P3.2+P3.3+P3.4: NODA1 node-worker + NATS auth config + Prometheus counters

P3.2 — Multi-node deployment: - Added node-worker service to docker-compose.node1.yml (NODE_ID=noda1) - NCS NODA1 now has NODE_WORKER_URL for metrics collection - Fixed NODE_ID consistency: router NODA1 uses 'noda1' - NODA2 node-worker/NCS gets NCS_REPORT_URL for latency reporting P3.3 — NATS accounts/auth (opt-in config): - config/nats-server.conf with 3 accounts: SYS, FABRIC, APP - Per-user topic permissions (router, ncs, node_worker) - Leafnode listener :7422 with auth - Not yet activated (requires credential provisioning) P3.4 — Prometheus counters: - Router /fabric_metrics: caps_refresh, caps_stale, model_select, offload_total, breaker_state, score_ms histogram - Node Worker /prom_metrics: jobs_total, inflight gauge, latency_ms histogram - NCS /prom_metrics: runtime_health, runtime_p50/p95, node_wait_ms - All bound to 127.0.0.1 (not externally exposed) Made-with: Cursor
2026-02-27 03:03:18 -08:00
parent a605b8c43e
commit ed7ad49d3a
13 changed files with 408 additions and 1 deletions
--- a/services/node-worker/fabric_metrics.py
+++ b/services/node-worker/fabric_metrics.py
@@ -0,0 +1,50 @@
+"""Prometheus metrics for Node Worker."""
+import logging
+
+logger = logging.getLogger("worker_metrics")
+
+try:
+    from prometheus_client import Counter, Gauge, Histogram, CollectorRegistry, generate_latest
+    PROM_AVAILABLE = True
+    REGISTRY = CollectorRegistry()
+
+    jobs_total = Counter(
+        "node_worker_jobs_total", "Jobs processed",
+        ["type", "status"], registry=REGISTRY,
+    )
+    inflight_gauge = Gauge(
+        "node_worker_inflight", "Currently inflight jobs",
+        registry=REGISTRY,
+    )
+    latency_hist = Histogram(
+        "node_worker_latency_ms", "Job latency in ms",
+        ["type", "model"],
+        buckets=[100, 250, 500, 1000, 2500, 5000, 15000, 30000],
+        registry=REGISTRY,
+    )
+
+except ImportError:
+    PROM_AVAILABLE = False
+    REGISTRY = None
+    logger.info("prometheus_client not installed, worker metrics disabled")
+
+
+def inc_job(req_type: str, status: str):
+    if PROM_AVAILABLE:
+        jobs_total.labels(type=req_type, status=status).inc()
+
+
+def set_inflight(count: int):
+    if PROM_AVAILABLE:
+        inflight_gauge.set(count)
+
+
+def observe_latency(req_type: str, model: str, latency_ms: int):
+    if PROM_AVAILABLE:
+        latency_hist.labels(type=req_type, model=model).observe(latency_ms)
+
+
+def get_metrics_text():
+    if PROM_AVAILABLE and REGISTRY:
+        return generate_latest(REGISTRY)
+    return None
--- a/services/node-worker/main.py
+++ b/services/node-worker/main.py
@@ -31,6 +31,16 @@ async def metrics():
    return worker.get_metrics()


+@app.get("/prom_metrics")
+async def prom_metrics():
+    from fastapi.responses import Response
+    import fabric_metrics as fm
+    data = fm.get_metrics_text()
+    if data:
+        return Response(content=data, media_type="text/plain; charset=utf-8")
+    return {"error": "prometheus_client not installed"}
+
+
@app.on_event("startup")
 async def startup():
    global _nats_client
--- a/services/node-worker/requirements.txt
+++ b/services/node-worker/requirements.txt
@@ -3,3 +3,4 @@ uvicorn>=0.29.0
 httpx>=0.27.0
 nats-py>=2.7.0
 pydantic>=2.5.0
+prometheus-client>=0.20.0
--- a/services/node-worker/worker.py
+++ b/services/node-worker/worker.py
@@ -10,6 +10,7 @@ import config
 from models import JobRequest, JobResponse, JobError
 from idempotency import IdempotencyStore
 from providers import ollama, swapper_vision
+import fabric_metrics as fm

 logger = logging.getLogger("node-worker")

@@ -95,17 +96,21 @@ async def _handle_request(msg):

        global _inflight_count
        _inflight_count += 1
+        fm.set_inflight(_inflight_count)
        try:
            async with _semaphore:
                resp = await _execute(job, remaining)
        finally:
            _inflight_count -= 1
+            fm.set_inflight(_inflight_count)

        _idem.put(idem_key, resp)
        _idem.complete_inflight(idem_key, resp)
        resp.latency_ms = int((time.time() - t0) * 1000)

+        fm.inc_job(job.required_type, resp.status)
        if resp.status == "ok" and resp.latency_ms > 0:
+            fm.observe_latency(job.required_type, resp.model or "?", resp.latency_ms)
            buf = _latencies_llm if job.required_type in ("llm", "code") else _latencies_vision
            buf.append(resp.latency_ms)
            if len(buf) > _LATENCY_BUFFER: