"""Prometheus metrics for Fabric routing layer. Exposed via /fabric_metrics (separate from main /metrics to avoid conflicts). Falls back to no-op counters if prometheus_client is not installed. """ import logging import time from typing import Optional logger = logging.getLogger("fabric_metrics") try: from prometheus_client import Counter, Gauge, Histogram, CollectorRegistry, generate_latest PROM_AVAILABLE = True REGISTRY = CollectorRegistry() caps_refresh = Counter( "fabric_caps_refresh_total", "Capabilities refresh attempts", ["status"], registry=REGISTRY, ) caps_stale = Counter( "fabric_caps_stale_total", "Stale capabilities used", registry=REGISTRY, ) model_select = Counter( "fabric_model_select_total", "Model selection decisions", ["chosen_node", "chosen_runtime", "type"], registry=REGISTRY, ) offload_total = Counter( "fabric_offload_total", "Offload attempts", ["status", "node", "type"], registry=REGISTRY, ) breaker_state = Gauge( "fabric_breaker_state", "Circuit breaker state (1=open)", ["node", "type"], registry=REGISTRY, ) score_hist = Histogram( "fabric_score_ms", "Model selection score distribution", buckets=[100, 250, 500, 1000, 2000, 5000, 10000], registry=REGISTRY, ) except ImportError: PROM_AVAILABLE = False REGISTRY = None logger.info("prometheus_client not installed, fabric metrics disabled") def inc_caps_refresh(status: str): if PROM_AVAILABLE: caps_refresh.labels(status=status).inc() def inc_caps_stale(): if PROM_AVAILABLE: caps_stale.inc() def inc_model_select(node: str, runtime: str, req_type: str): if PROM_AVAILABLE: model_select.labels(chosen_node=node, chosen_runtime=runtime, type=req_type).inc() def inc_offload(status: str, node: str, req_type: str): if PROM_AVAILABLE: offload_total.labels(status=status, node=node, type=req_type).inc() def set_breaker(node: str, req_type: str, is_open: bool): if PROM_AVAILABLE: breaker_state.labels(node=node, type=req_type).set(1 if is_open else 0) def observe_score(score: int): if PROM_AVAILABLE: score_hist.observe(score) def get_metrics_text() -> Optional[bytes]: if PROM_AVAILABLE and REGISTRY: return generate_latest(REGISTRY) return None