P3.5-P3.7: 2-layer inventory, capability routing, STT/TTS adapters, Dev Contract
NCS:
- _collect_worker_caps() fetches capability flags from node-worker /caps
- _derive_capabilities() merges served model types + worker provider flags
- installed_artifacts replaces inventory_only (disk scan with DISK_SCAN_PATHS env)
- New endpoints: /capabilities/caps, /capabilities/installed
Node Worker:
- STT_PROVIDER, TTS_PROVIDER, OCR_PROVIDER, IMAGE_PROVIDER env flags
- /caps endpoint returns capabilities + providers for NCS aggregation
- STT adapter (providers/stt_mlx_whisper.py) — remote + local mode
- TTS adapter (providers/tts_mlx_kokoro.py) — remote + local mode
- OCR handler via vision_prompted (ollama_vision with OCR prompt)
- NATS subjects: node.{id}.stt/tts/ocr/image.request
Router:
- POST /v1/capability/{stt,tts,ocr,image} — capability-based offload routing
- GET /v1/capabilities — global view with capabilities_by_node
- require_fresh_caps(ttl) preflight guard
- find_nodes_with_capability(cap) + load-based node selection
Ops:
- ops/fabric_snapshot.py — full runtime snapshot collector
- ops/fabric_preflight.sh — quick check + snapshot save + diff
- docs/fabric_contract.md — Dev Contract v0.1 (preflight-first)
- tests/test_fabric_contract.py — CI enforcement (6 tests)
Made-with: Cursor
This commit is contained in:
@@ -100,8 +100,8 @@ async def _discover_remote_nodes() -> List[Dict[str, Any]]:
|
||||
sub = await _nats_client.subscribe(inbox)
|
||||
|
||||
try:
|
||||
await _nats_client.publish_request(
|
||||
"node.*.capabilities.get", inbox, b""
|
||||
await _nats_client.publish(
|
||||
CAPS_DISCOVERY_SUBJECT, b"", reply=inbox,
|
||||
)
|
||||
await _nats_client.flush()
|
||||
|
||||
@@ -183,6 +183,7 @@ async def get_global_capabilities(force: bool = False) -> Dict[str, Any]:
|
||||
def _build_global_view() -> Dict[str, Any]:
|
||||
"""Build a unified view from all cached node capabilities."""
|
||||
all_served: List[Dict[str, Any]] = []
|
||||
global_caps: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
for node_id, caps in _node_cache.items():
|
||||
is_local = (node_id.lower() == LOCAL_NODE_ID.lower())
|
||||
@@ -194,16 +195,27 @@ def _build_global_view() -> Dict[str, Any]:
|
||||
"local": is_local,
|
||||
"node_age_s": round(age, 1),
|
||||
})
|
||||
node_caps = caps.get("capabilities", {})
|
||||
if node_caps:
|
||||
global_caps[node_id] = {
|
||||
k: v for k, v in node_caps.items() if k != "providers"
|
||||
}
|
||||
|
||||
all_served.sort(key=lambda m: (0 if m.get("local") else 1, m.get("name", "")))
|
||||
|
||||
return {
|
||||
"local_node": LOCAL_NODE_ID,
|
||||
"nodes": {nid: {"node_id": nid, "served_count": len(c.get("served_models", [])),
|
||||
"age_s": round(time.time() - _node_timestamps.get(nid, 0), 1)}
|
||||
for nid, c in _node_cache.items()},
|
||||
"nodes": {nid: {
|
||||
"node_id": nid,
|
||||
"served_count": len(c.get("served_models", [])),
|
||||
"installed_count": c.get("installed_count", 0),
|
||||
"capabilities": c.get("capabilities", {}),
|
||||
"node_load": c.get("node_load", {}),
|
||||
"age_s": round(time.time() - _node_timestamps.get(nid, 0), 1),
|
||||
} for nid, c in _node_cache.items()},
|
||||
"served_models": all_served,
|
||||
"served_count": len(all_served),
|
||||
"capabilities_by_node": global_caps,
|
||||
"node_count": len(_node_cache),
|
||||
"updated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
}
|
||||
@@ -214,6 +226,44 @@ def get_cached_global() -> Dict[str, Any]:
|
||||
return _build_global_view()
|
||||
|
||||
|
||||
async def require_fresh_caps(ttl: int = 30) -> Optional[Dict[str, Any]]:
|
||||
"""Preflight: return global caps only if fresh enough.
|
||||
|
||||
Returns None if NCS data is stale beyond ttl — caller should use
|
||||
safe fallback instead of making routing decisions on outdated info.
|
||||
"""
|
||||
if not _node_timestamps:
|
||||
gcaps = await get_global_capabilities(force=True)
|
||||
if not _node_timestamps:
|
||||
return None
|
||||
return gcaps
|
||||
oldest = min(_node_timestamps.values())
|
||||
if (time.time() - oldest) > ttl:
|
||||
gcaps = await get_global_capabilities(force=True)
|
||||
oldest = min(_node_timestamps.values()) if _node_timestamps else 0
|
||||
if (time.time() - oldest) > ttl:
|
||||
logger.warning("[preflight] caps stale after refresh, age=%ds", int(time.time() - oldest))
|
||||
return None
|
||||
return gcaps
|
||||
return _build_global_view()
|
||||
|
||||
|
||||
def find_nodes_with_capability(cap: str) -> List[str]:
|
||||
"""Return node IDs that have a given capability enabled."""
|
||||
result = []
|
||||
for nid, caps in _node_cache.items():
|
||||
node_caps = caps.get("capabilities", {})
|
||||
if node_caps.get(cap, False):
|
||||
result.append(nid)
|
||||
return result
|
||||
|
||||
|
||||
def get_node_load(node_id: str) -> Dict[str, Any]:
|
||||
"""Get cached node_load for a specific node."""
|
||||
caps = _node_cache.get(node_id, {})
|
||||
return caps.get("node_load", {})
|
||||
|
||||
|
||||
async def send_offload_request(
|
||||
node_id: str,
|
||||
request_type: str,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.responses import Response
|
||||
from fastapi.responses import JSONResponse, Response
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from typing import Literal, Optional, Dict, Any, List
|
||||
import asyncio
|
||||
@@ -3542,6 +3542,7 @@ async def documents_versions(doc_id: str, agent_id: str, limit: int = 20):
|
||||
async def list_available_models():
|
||||
"""List all available models from NCS (global capabilities pool)."""
|
||||
models = []
|
||||
caps_by_node = {}
|
||||
|
||||
try:
|
||||
from global_capabilities_client import get_global_capabilities
|
||||
@@ -3555,6 +3556,7 @@ async def list_available_models():
|
||||
"size_gb": m.get("size_gb"),
|
||||
"status": "served",
|
||||
})
|
||||
caps_by_node = pool.get("capabilities_by_node", {})
|
||||
except Exception as e:
|
||||
logger.warning(f"Cannot get NCS global models: {e}")
|
||||
|
||||
@@ -3572,7 +3574,110 @@ async def list_available_models():
|
||||
except Exception as e:
|
||||
logger.warning(f"Cannot get Ollama models: {e}")
|
||||
|
||||
return {"models": models, "total": len(models)}
|
||||
return {
|
||||
"models": models,
|
||||
"total": len(models),
|
||||
"capabilities_by_node": caps_by_node,
|
||||
}
|
||||
|
||||
|
||||
# ── Capability-based offload routing ────────────────────────────────────────
|
||||
|
||||
@app.post("/v1/capability/{cap_type}")
|
||||
async def capability_offload(cap_type: str, request: Request):
|
||||
"""Route a capability request (stt/tts/ocr/image) to the best node.
|
||||
|
||||
Router selects the node based on capabilities_by_node, circuit breaker,
|
||||
and node_load — no static assumptions about which node has what.
|
||||
"""
|
||||
valid_types = {"stt", "tts", "ocr", "image"}
|
||||
if cap_type not in valid_types:
|
||||
return JSONResponse(status_code=400, content={
|
||||
"error": f"Invalid capability type: {cap_type}. Valid: {sorted(valid_types)}",
|
||||
})
|
||||
|
||||
if not NCS_AVAILABLE or not global_capabilities_client:
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": "NCS not available — cannot route capability requests",
|
||||
})
|
||||
|
||||
gcaps = await global_capabilities_client.require_fresh_caps(ttl=30)
|
||||
if gcaps is None:
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": "NCS caps stale — preflight failed, refusing to route",
|
||||
})
|
||||
|
||||
eligible_nodes = global_capabilities_client.find_nodes_with_capability(cap_type)
|
||||
if not eligible_nodes:
|
||||
return JSONResponse(status_code=404, content={
|
||||
"error": f"No node with capability '{cap_type}' available",
|
||||
"capabilities_by_node": gcaps.get("capabilities_by_node", {}),
|
||||
})
|
||||
|
||||
unavailable = offload_client.get_unavailable_nodes(cap_type) if offload_client else set()
|
||||
available = [n for n in eligible_nodes if n.lower() not in {u.lower() for u in unavailable}]
|
||||
if not available:
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": f"All nodes with '{cap_type}' are circuit-broken",
|
||||
"eligible": eligible_nodes,
|
||||
"unavailable": list(unavailable),
|
||||
})
|
||||
|
||||
best_node = available[0]
|
||||
if len(available) > 1:
|
||||
loads = []
|
||||
for nid in available:
|
||||
nl = global_capabilities_client.get_node_load(nid)
|
||||
score = nl.get("inflight", 0) * 10
|
||||
if nl.get("mem_pressure") == "high":
|
||||
score += 100
|
||||
loads.append((score, nid))
|
||||
loads.sort()
|
||||
best_node = loads[0][1]
|
||||
|
||||
payload = await request.json()
|
||||
logger.info(f"[cap.offload] type={cap_type} → node={best_node} (of {available})")
|
||||
|
||||
nats_ok = nc is not None and nats_available
|
||||
if nats_ok and offload_client:
|
||||
import uuid as _uuid
|
||||
job = {
|
||||
"job_id": str(_uuid.uuid4()),
|
||||
"required_type": cap_type,
|
||||
"payload": payload,
|
||||
"deadline_ts": int(time.time() * 1000) + 60000,
|
||||
"hints": payload.pop("hints", {}),
|
||||
}
|
||||
result = await offload_client.offload_infer(
|
||||
nats_client=nc, node_id=best_node, required_type=cap_type,
|
||||
job_payload=job, timeout_ms=60000,
|
||||
)
|
||||
if result and result.get("status") == "ok":
|
||||
return JSONResponse(content=result.get("result", result))
|
||||
error = result.get("error", {}) if result else {}
|
||||
return JSONResponse(status_code=502, content={
|
||||
"error": error.get("message", f"Offload to {best_node} failed"),
|
||||
"code": error.get("code", "OFFLOAD_FAILED"),
|
||||
"node": best_node,
|
||||
})
|
||||
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": "NATS not connected — cannot offload",
|
||||
})
|
||||
|
||||
|
||||
@app.get("/v1/capabilities")
|
||||
async def list_global_capabilities():
|
||||
"""Return full capabilities view across all nodes."""
|
||||
if not NCS_AVAILABLE or not global_capabilities_client:
|
||||
return JSONResponse(status_code=503, content={"error": "NCS not available"})
|
||||
gcaps = await global_capabilities_client.get_global_capabilities()
|
||||
return JSONResponse(content={
|
||||
"node_count": gcaps.get("node_count", 0),
|
||||
"nodes": gcaps.get("nodes", {}),
|
||||
"capabilities_by_node": gcaps.get("capabilities_by_node", {}),
|
||||
"served_count": gcaps.get("served_count", 0),
|
||||
})
|
||||
|
||||
|
||||
@app.get("/v1/agromatrix/shared-memory/pending")
|
||||
|
||||
@@ -81,7 +81,7 @@ def get_unavailable_nodes(req_type: str) -> Set[str]:
|
||||
async def offload_infer(
|
||||
nats_client,
|
||||
node_id: str,
|
||||
required_type: Literal["llm", "vision", "stt", "tts"],
|
||||
required_type: Literal["llm", "vision", "stt", "tts", "ocr", "image"],
|
||||
job_payload: Dict[str, Any],
|
||||
timeout_ms: int = 25000,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
|
||||
Reference in New Issue
Block a user