P3.5-P3.7: 2-layer inventory, capability routing, STT/TTS adapters, Dev Contract

NCS:
- _collect_worker_caps() fetches capability flags from node-worker /caps
- _derive_capabilities() merges served model types + worker provider flags
- installed_artifacts replaces inventory_only (disk scan with DISK_SCAN_PATHS env)
- New endpoints: /capabilities/caps, /capabilities/installed

Node Worker:
- STT_PROVIDER, TTS_PROVIDER, OCR_PROVIDER, IMAGE_PROVIDER env flags
- /caps endpoint returns capabilities + providers for NCS aggregation
- STT adapter (providers/stt_mlx_whisper.py) — remote + local mode
- TTS adapter (providers/tts_mlx_kokoro.py) — remote + local mode
- OCR handler via vision_prompted (ollama_vision with OCR prompt)
- NATS subjects: node.{id}.stt/tts/ocr/image.request

Router:
- POST /v1/capability/{stt,tts,ocr,image} — capability-based offload routing
- GET /v1/capabilities — global view with capabilities_by_node
- require_fresh_caps(ttl) preflight guard
- find_nodes_with_capability(cap) + load-based node selection

Ops:
- ops/fabric_snapshot.py — full runtime snapshot collector
- ops/fabric_preflight.sh — quick check + snapshot save + diff
- docs/fabric_contract.md — Dev Contract v0.1 (preflight-first)
- tests/test_fabric_contract.py — CI enforcement (6 tests)

Made-with: Cursor
This commit is contained in:
Apple
2026-02-27 05:24:09 -08:00
parent 194c87f53c
commit 9a36020316
17 changed files with 1352 additions and 21 deletions

View File

@@ -100,8 +100,8 @@ async def _discover_remote_nodes() -> List[Dict[str, Any]]:
sub = await _nats_client.subscribe(inbox)
try:
await _nats_client.publish_request(
"node.*.capabilities.get", inbox, b""
await _nats_client.publish(
CAPS_DISCOVERY_SUBJECT, b"", reply=inbox,
)
await _nats_client.flush()
@@ -183,6 +183,7 @@ async def get_global_capabilities(force: bool = False) -> Dict[str, Any]:
def _build_global_view() -> Dict[str, Any]:
"""Build a unified view from all cached node capabilities."""
all_served: List[Dict[str, Any]] = []
global_caps: Dict[str, Dict[str, Any]] = {}
for node_id, caps in _node_cache.items():
is_local = (node_id.lower() == LOCAL_NODE_ID.lower())
@@ -194,16 +195,27 @@ def _build_global_view() -> Dict[str, Any]:
"local": is_local,
"node_age_s": round(age, 1),
})
node_caps = caps.get("capabilities", {})
if node_caps:
global_caps[node_id] = {
k: v for k, v in node_caps.items() if k != "providers"
}
all_served.sort(key=lambda m: (0 if m.get("local") else 1, m.get("name", "")))
return {
"local_node": LOCAL_NODE_ID,
"nodes": {nid: {"node_id": nid, "served_count": len(c.get("served_models", [])),
"age_s": round(time.time() - _node_timestamps.get(nid, 0), 1)}
for nid, c in _node_cache.items()},
"nodes": {nid: {
"node_id": nid,
"served_count": len(c.get("served_models", [])),
"installed_count": c.get("installed_count", 0),
"capabilities": c.get("capabilities", {}),
"node_load": c.get("node_load", {}),
"age_s": round(time.time() - _node_timestamps.get(nid, 0), 1),
} for nid, c in _node_cache.items()},
"served_models": all_served,
"served_count": len(all_served),
"capabilities_by_node": global_caps,
"node_count": len(_node_cache),
"updated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
}
@@ -214,6 +226,44 @@ def get_cached_global() -> Dict[str, Any]:
return _build_global_view()
async def require_fresh_caps(ttl: int = 30) -> Optional[Dict[str, Any]]:
"""Preflight: return global caps only if fresh enough.
Returns None if NCS data is stale beyond ttl — caller should use
safe fallback instead of making routing decisions on outdated info.
"""
if not _node_timestamps:
gcaps = await get_global_capabilities(force=True)
if not _node_timestamps:
return None
return gcaps
oldest = min(_node_timestamps.values())
if (time.time() - oldest) > ttl:
gcaps = await get_global_capabilities(force=True)
oldest = min(_node_timestamps.values()) if _node_timestamps else 0
if (time.time() - oldest) > ttl:
logger.warning("[preflight] caps stale after refresh, age=%ds", int(time.time() - oldest))
return None
return gcaps
return _build_global_view()
def find_nodes_with_capability(cap: str) -> List[str]:
"""Return node IDs that have a given capability enabled."""
result = []
for nid, caps in _node_cache.items():
node_caps = caps.get("capabilities", {})
if node_caps.get(cap, False):
result.append(nid)
return result
def get_node_load(node_id: str) -> Dict[str, Any]:
"""Get cached node_load for a specific node."""
caps = _node_cache.get(node_id, {})
return caps.get("node_load", {})
async def send_offload_request(
node_id: str,
request_type: str,

View File

@@ -1,5 +1,5 @@
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import Response
from fastapi.responses import JSONResponse, Response
from pydantic import BaseModel, ConfigDict
from typing import Literal, Optional, Dict, Any, List
import asyncio
@@ -3542,6 +3542,7 @@ async def documents_versions(doc_id: str, agent_id: str, limit: int = 20):
async def list_available_models():
"""List all available models from NCS (global capabilities pool)."""
models = []
caps_by_node = {}
try:
from global_capabilities_client import get_global_capabilities
@@ -3555,6 +3556,7 @@ async def list_available_models():
"size_gb": m.get("size_gb"),
"status": "served",
})
caps_by_node = pool.get("capabilities_by_node", {})
except Exception as e:
logger.warning(f"Cannot get NCS global models: {e}")
@@ -3572,7 +3574,110 @@ async def list_available_models():
except Exception as e:
logger.warning(f"Cannot get Ollama models: {e}")
return {"models": models, "total": len(models)}
return {
"models": models,
"total": len(models),
"capabilities_by_node": caps_by_node,
}
# ── Capability-based offload routing ────────────────────────────────────────
@app.post("/v1/capability/{cap_type}")
async def capability_offload(cap_type: str, request: Request):
"""Route a capability request (stt/tts/ocr/image) to the best node.
Router selects the node based on capabilities_by_node, circuit breaker,
and node_load — no static assumptions about which node has what.
"""
valid_types = {"stt", "tts", "ocr", "image"}
if cap_type not in valid_types:
return JSONResponse(status_code=400, content={
"error": f"Invalid capability type: {cap_type}. Valid: {sorted(valid_types)}",
})
if not NCS_AVAILABLE or not global_capabilities_client:
return JSONResponse(status_code=503, content={
"error": "NCS not available — cannot route capability requests",
})
gcaps = await global_capabilities_client.require_fresh_caps(ttl=30)
if gcaps is None:
return JSONResponse(status_code=503, content={
"error": "NCS caps stale — preflight failed, refusing to route",
})
eligible_nodes = global_capabilities_client.find_nodes_with_capability(cap_type)
if not eligible_nodes:
return JSONResponse(status_code=404, content={
"error": f"No node with capability '{cap_type}' available",
"capabilities_by_node": gcaps.get("capabilities_by_node", {}),
})
unavailable = offload_client.get_unavailable_nodes(cap_type) if offload_client else set()
available = [n for n in eligible_nodes if n.lower() not in {u.lower() for u in unavailable}]
if not available:
return JSONResponse(status_code=503, content={
"error": f"All nodes with '{cap_type}' are circuit-broken",
"eligible": eligible_nodes,
"unavailable": list(unavailable),
})
best_node = available[0]
if len(available) > 1:
loads = []
for nid in available:
nl = global_capabilities_client.get_node_load(nid)
score = nl.get("inflight", 0) * 10
if nl.get("mem_pressure") == "high":
score += 100
loads.append((score, nid))
loads.sort()
best_node = loads[0][1]
payload = await request.json()
logger.info(f"[cap.offload] type={cap_type} → node={best_node} (of {available})")
nats_ok = nc is not None and nats_available
if nats_ok and offload_client:
import uuid as _uuid
job = {
"job_id": str(_uuid.uuid4()),
"required_type": cap_type,
"payload": payload,
"deadline_ts": int(time.time() * 1000) + 60000,
"hints": payload.pop("hints", {}),
}
result = await offload_client.offload_infer(
nats_client=nc, node_id=best_node, required_type=cap_type,
job_payload=job, timeout_ms=60000,
)
if result and result.get("status") == "ok":
return JSONResponse(content=result.get("result", result))
error = result.get("error", {}) if result else {}
return JSONResponse(status_code=502, content={
"error": error.get("message", f"Offload to {best_node} failed"),
"code": error.get("code", "OFFLOAD_FAILED"),
"node": best_node,
})
return JSONResponse(status_code=503, content={
"error": "NATS not connected — cannot offload",
})
@app.get("/v1/capabilities")
async def list_global_capabilities():
"""Return full capabilities view across all nodes."""
if not NCS_AVAILABLE or not global_capabilities_client:
return JSONResponse(status_code=503, content={"error": "NCS not available"})
gcaps = await global_capabilities_client.get_global_capabilities()
return JSONResponse(content={
"node_count": gcaps.get("node_count", 0),
"nodes": gcaps.get("nodes", {}),
"capabilities_by_node": gcaps.get("capabilities_by_node", {}),
"served_count": gcaps.get("served_count", 0),
})
@app.get("/v1/agromatrix/shared-memory/pending")

View File

@@ -81,7 +81,7 @@ def get_unavailable_nodes(req_type: str) -> Set[str]:
async def offload_infer(
nats_client,
node_id: str,
required_type: Literal["llm", "vision", "stt", "tts"],
required_type: Literal["llm", "vision", "stt", "tts", "ocr", "image"],
job_payload: Dict[str, Any],
timeout_ms: int = 25000,
) -> Optional[Dict[str, Any]]: