"""Prometheus metrics for Fabric routing layer. Exposed via /fabric_metrics (separate from main /metrics to avoid conflicts). Falls back to no-op counters if prometheus_client is not installed. """ import logging import time from typing import Optional logger = logging.getLogger("fabric_metrics") try: from prometheus_client import Counter, Gauge, Histogram, CollectorRegistry, generate_latest PROM_AVAILABLE = True REGISTRY = CollectorRegistry() caps_refresh = Counter( "fabric_caps_refresh_total", "Capabilities refresh attempts", ["status"], registry=REGISTRY, ) caps_stale = Counter( "fabric_caps_stale_total", "Stale capabilities used", registry=REGISTRY, ) model_select = Counter( "fabric_model_select_total", "Model selection decisions", ["chosen_node", "chosen_runtime", "type"], registry=REGISTRY, ) offload_total = Counter( "fabric_offload_total", "Offload attempts", ["status", "node", "type"], registry=REGISTRY, ) breaker_state = Gauge( "fabric_breaker_state", "Circuit breaker state (1=open)", ["node", "type"], registry=REGISTRY, ) score_hist = Histogram( "fabric_score_ms", "Model selection score distribution", buckets=[100, 250, 500, 1000, 2000, 5000, 10000], registry=REGISTRY, ) # ── Voice HA metrics ────────────────────────────────────────────────────── # cap label: "voice_tts" | "voice_llm" | "voice_stt" voice_cap_requests = Counter( "fabric_voice_capability_requests_total", "Voice HA capability routing requests", ["cap", "status"], registry=REGISTRY, ) voice_offload_total = Counter( "fabric_voice_offload_total", "Voice HA offload attempts (node selected + NATS sent)", ["cap", "node", "status"], registry=REGISTRY, ) voice_breaker_state = Gauge( "fabric_voice_breaker_state", "Voice HA circuit breaker per node+cap (1=open)", ["cap", "node"], registry=REGISTRY, ) voice_score_hist = Histogram( "fabric_voice_score_ms", "Voice HA node scoring distribution", ["cap"], buckets=[0, 50, 100, 200, 400, 800, 1600, 3200], registry=REGISTRY, ) except ImportError: PROM_AVAILABLE = False REGISTRY = None logger.info("prometheus_client not installed, fabric metrics disabled") def inc_caps_refresh(status: str): if PROM_AVAILABLE: caps_refresh.labels(status=status).inc() def inc_caps_stale(): if PROM_AVAILABLE: caps_stale.inc() def inc_model_select(node: str, runtime: str, req_type: str): if PROM_AVAILABLE: model_select.labels(chosen_node=node, chosen_runtime=runtime, type=req_type).inc() def inc_offload(status: str, node: str, req_type: str): if PROM_AVAILABLE: offload_total.labels(status=status, node=node, type=req_type).inc() def set_breaker(node: str, req_type: str, is_open: bool): if PROM_AVAILABLE: breaker_state.labels(node=node, type=req_type).set(1 if is_open else 0) def observe_score(score: int): if PROM_AVAILABLE: score_hist.observe(score) def inc_voice_cap_request(cap: str, status: str): if PROM_AVAILABLE: voice_cap_requests.labels(cap=cap, status=status).inc() def inc_voice_offload(cap: str, node: str, status: str): if PROM_AVAILABLE: voice_offload_total.labels(cap=cap, node=node, status=status).inc() def set_voice_breaker(cap: str, node: str, is_open: bool): if PROM_AVAILABLE: voice_breaker_state.labels(cap=cap, node=node).set(1 if is_open else 0) def observe_voice_score(cap: str, score: float): if PROM_AVAILABLE: voice_score_hist.labels(cap=cap).observe(score) def get_metrics_text() -> Optional[bytes]: if PROM_AVAILABLE and REGISTRY: return generate_latest(REGISTRY) return None