P3.5-P3.7: 2-layer inventory, capability routing, STT/TTS adapters, Dev Contract

NCS:
- _collect_worker_caps() fetches capability flags from node-worker /caps
- _derive_capabilities() merges served model types + worker provider flags
- installed_artifacts replaces inventory_only (disk scan with DISK_SCAN_PATHS env)
- New endpoints: /capabilities/caps, /capabilities/installed

Node Worker:
- STT_PROVIDER, TTS_PROVIDER, OCR_PROVIDER, IMAGE_PROVIDER env flags
- /caps endpoint returns capabilities + providers for NCS aggregation
- STT adapter (providers/stt_mlx_whisper.py) — remote + local mode
- TTS adapter (providers/tts_mlx_kokoro.py) — remote + local mode
- OCR handler via vision_prompted (ollama_vision with OCR prompt)
- NATS subjects: node.{id}.stt/tts/ocr/image.request

Router:
- POST /v1/capability/{stt,tts,ocr,image} — capability-based offload routing
- GET /v1/capabilities — global view with capabilities_by_node
- require_fresh_caps(ttl) preflight guard
- find_nodes_with_capability(cap) + load-based node selection

Ops:
- ops/fabric_snapshot.py — full runtime snapshot collector
- ops/fabric_preflight.sh — quick check + snapshot save + diff
- docs/fabric_contract.md — Dev Contract v0.1 (preflight-first)
- tests/test_fabric_contract.py — CI enforcement (6 tests)

Made-with: Cursor
This commit is contained in:
Apple
2026-02-27 05:24:09 -08:00
parent 194c87f53c
commit 9a36020316
17 changed files with 1352 additions and 21 deletions

View File

@@ -1,5 +1,5 @@
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import Response
from fastapi.responses import JSONResponse, Response
from pydantic import BaseModel, ConfigDict
from typing import Literal, Optional, Dict, Any, List
import asyncio
@@ -3542,6 +3542,7 @@ async def documents_versions(doc_id: str, agent_id: str, limit: int = 20):
async def list_available_models():
"""List all available models from NCS (global capabilities pool)."""
models = []
caps_by_node = {}
try:
from global_capabilities_client import get_global_capabilities
@@ -3555,6 +3556,7 @@ async def list_available_models():
"size_gb": m.get("size_gb"),
"status": "served",
})
caps_by_node = pool.get("capabilities_by_node", {})
except Exception as e:
logger.warning(f"Cannot get NCS global models: {e}")
@@ -3572,7 +3574,110 @@ async def list_available_models():
except Exception as e:
logger.warning(f"Cannot get Ollama models: {e}")
return {"models": models, "total": len(models)}
return {
"models": models,
"total": len(models),
"capabilities_by_node": caps_by_node,
}
# ── Capability-based offload routing ────────────────────────────────────────
@app.post("/v1/capability/{cap_type}")
async def capability_offload(cap_type: str, request: Request):
"""Route a capability request (stt/tts/ocr/image) to the best node.
Router selects the node based on capabilities_by_node, circuit breaker,
and node_load — no static assumptions about which node has what.
"""
valid_types = {"stt", "tts", "ocr", "image"}
if cap_type not in valid_types:
return JSONResponse(status_code=400, content={
"error": f"Invalid capability type: {cap_type}. Valid: {sorted(valid_types)}",
})
if not NCS_AVAILABLE or not global_capabilities_client:
return JSONResponse(status_code=503, content={
"error": "NCS not available — cannot route capability requests",
})
gcaps = await global_capabilities_client.require_fresh_caps(ttl=30)
if gcaps is None:
return JSONResponse(status_code=503, content={
"error": "NCS caps stale — preflight failed, refusing to route",
})
eligible_nodes = global_capabilities_client.find_nodes_with_capability(cap_type)
if not eligible_nodes:
return JSONResponse(status_code=404, content={
"error": f"No node with capability '{cap_type}' available",
"capabilities_by_node": gcaps.get("capabilities_by_node", {}),
})
unavailable = offload_client.get_unavailable_nodes(cap_type) if offload_client else set()
available = [n for n in eligible_nodes if n.lower() not in {u.lower() for u in unavailable}]
if not available:
return JSONResponse(status_code=503, content={
"error": f"All nodes with '{cap_type}' are circuit-broken",
"eligible": eligible_nodes,
"unavailable": list(unavailable),
})
best_node = available[0]
if len(available) > 1:
loads = []
for nid in available:
nl = global_capabilities_client.get_node_load(nid)
score = nl.get("inflight", 0) * 10
if nl.get("mem_pressure") == "high":
score += 100
loads.append((score, nid))
loads.sort()
best_node = loads[0][1]
payload = await request.json()
logger.info(f"[cap.offload] type={cap_type} → node={best_node} (of {available})")
nats_ok = nc is not None and nats_available
if nats_ok and offload_client:
import uuid as _uuid
job = {
"job_id": str(_uuid.uuid4()),
"required_type": cap_type,
"payload": payload,
"deadline_ts": int(time.time() * 1000) + 60000,
"hints": payload.pop("hints", {}),
}
result = await offload_client.offload_infer(
nats_client=nc, node_id=best_node, required_type=cap_type,
job_payload=job, timeout_ms=60000,
)
if result and result.get("status") == "ok":
return JSONResponse(content=result.get("result", result))
error = result.get("error", {}) if result else {}
return JSONResponse(status_code=502, content={
"error": error.get("message", f"Offload to {best_node} failed"),
"code": error.get("code", "OFFLOAD_FAILED"),
"node": best_node,
})
return JSONResponse(status_code=503, content={
"error": "NATS not connected — cannot offload",
})
@app.get("/v1/capabilities")
async def list_global_capabilities():
"""Return full capabilities view across all nodes."""
if not NCS_AVAILABLE or not global_capabilities_client:
return JSONResponse(status_code=503, content={"error": "NCS not available"})
gcaps = await global_capabilities_client.get_global_capabilities()
return JSONResponse(content={
"node_count": gcaps.get("node_count", 0),
"nodes": gcaps.get("nodes", {}),
"capabilities_by_node": gcaps.get("capabilities_by_node", {}),
"served_count": gcaps.get("served_count", 0),
})
@app.get("/v1/agromatrix/shared-memory/pending")