microdao-daarion/services/node-worker/fabric_metrics.py

"""Prometheus metrics for Node Worker."""
import logging

logger = logging.getLogger("worker_metrics")

try:
    from prometheus_client import Counter, Gauge, Histogram, CollectorRegistry, generate_latest
    PROM_AVAILABLE = True
    REGISTRY = CollectorRegistry()

    jobs_total = Counter(
        "node_worker_jobs_total", "Jobs processed",
        ["type", "status"], registry=REGISTRY,
    )
    inflight_gauge = Gauge(
        "node_worker_inflight", "Currently inflight jobs",
        registry=REGISTRY,
    )
    latency_hist = Histogram(
        "node_worker_latency_ms", "Job latency in ms",
        ["type", "model"],
        buckets=[100, 250, 500, 1000, 2500, 5000, 15000, 30000],
        registry=REGISTRY,
    )

except ImportError:
    PROM_AVAILABLE = False
    REGISTRY = None
    logger.info("prometheus_client not installed, worker metrics disabled")


def inc_job(req_type: str, status: str):
    if PROM_AVAILABLE:
        jobs_total.labels(type=req_type, status=status).inc()


def set_inflight(count: int):
    if PROM_AVAILABLE:
        inflight_gauge.set(count)


def observe_latency(req_type: str, model: str, latency_ms: int):
    if PROM_AVAILABLE:
        latency_hist.labels(type=req_type, model=model).observe(latency_ms)


def get_metrics_text():
    if PROM_AVAILABLE and REGISTRY:
        return generate_latest(REGISTRY)
    return None