P3.5-P3.7: 2-layer inventory, capability routing, STT/TTS adapters, Dev Contract
NCS:
- _collect_worker_caps() fetches capability flags from node-worker /caps
- _derive_capabilities() merges served model types + worker provider flags
- installed_artifacts replaces inventory_only (disk scan with DISK_SCAN_PATHS env)
- New endpoints: /capabilities/caps, /capabilities/installed
Node Worker:
- STT_PROVIDER, TTS_PROVIDER, OCR_PROVIDER, IMAGE_PROVIDER env flags
- /caps endpoint returns capabilities + providers for NCS aggregation
- STT adapter (providers/stt_mlx_whisper.py) — remote + local mode
- TTS adapter (providers/tts_mlx_kokoro.py) — remote + local mode
- OCR handler via vision_prompted (ollama_vision with OCR prompt)
- NATS subjects: node.{id}.stt/tts/ocr/image.request
Router:
- POST /v1/capability/{stt,tts,ocr,image} — capability-based offload routing
- GET /v1/capabilities — global view with capabilities_by_node
- require_fresh_caps(ttl) preflight guard
- find_nodes_with_capability(cap) + load-based node selection
Ops:
- ops/fabric_snapshot.py — full runtime snapshot collector
- ops/fabric_preflight.sh — quick check + snapshot save + diff
- docs/fabric_contract.md — Dev Contract v0.1 (preflight-first)
- tests/test_fabric_contract.py — CI enforcement (6 tests)
Made-with: Cursor
This commit is contained in:
@@ -100,8 +100,8 @@ async def _discover_remote_nodes() -> List[Dict[str, Any]]:
|
||||
sub = await _nats_client.subscribe(inbox)
|
||||
|
||||
try:
|
||||
await _nats_client.publish_request(
|
||||
"node.*.capabilities.get", inbox, b""
|
||||
await _nats_client.publish(
|
||||
CAPS_DISCOVERY_SUBJECT, b"", reply=inbox,
|
||||
)
|
||||
await _nats_client.flush()
|
||||
|
||||
@@ -183,6 +183,7 @@ async def get_global_capabilities(force: bool = False) -> Dict[str, Any]:
|
||||
def _build_global_view() -> Dict[str, Any]:
|
||||
"""Build a unified view from all cached node capabilities."""
|
||||
all_served: List[Dict[str, Any]] = []
|
||||
global_caps: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
for node_id, caps in _node_cache.items():
|
||||
is_local = (node_id.lower() == LOCAL_NODE_ID.lower())
|
||||
@@ -194,16 +195,27 @@ def _build_global_view() -> Dict[str, Any]:
|
||||
"local": is_local,
|
||||
"node_age_s": round(age, 1),
|
||||
})
|
||||
node_caps = caps.get("capabilities", {})
|
||||
if node_caps:
|
||||
global_caps[node_id] = {
|
||||
k: v for k, v in node_caps.items() if k != "providers"
|
||||
}
|
||||
|
||||
all_served.sort(key=lambda m: (0 if m.get("local") else 1, m.get("name", "")))
|
||||
|
||||
return {
|
||||
"local_node": LOCAL_NODE_ID,
|
||||
"nodes": {nid: {"node_id": nid, "served_count": len(c.get("served_models", [])),
|
||||
"age_s": round(time.time() - _node_timestamps.get(nid, 0), 1)}
|
||||
for nid, c in _node_cache.items()},
|
||||
"nodes": {nid: {
|
||||
"node_id": nid,
|
||||
"served_count": len(c.get("served_models", [])),
|
||||
"installed_count": c.get("installed_count", 0),
|
||||
"capabilities": c.get("capabilities", {}),
|
||||
"node_load": c.get("node_load", {}),
|
||||
"age_s": round(time.time() - _node_timestamps.get(nid, 0), 1),
|
||||
} for nid, c in _node_cache.items()},
|
||||
"served_models": all_served,
|
||||
"served_count": len(all_served),
|
||||
"capabilities_by_node": global_caps,
|
||||
"node_count": len(_node_cache),
|
||||
"updated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
}
|
||||
@@ -214,6 +226,44 @@ def get_cached_global() -> Dict[str, Any]:
|
||||
return _build_global_view()
|
||||
|
||||
|
||||
async def require_fresh_caps(ttl: int = 30) -> Optional[Dict[str, Any]]:
|
||||
"""Preflight: return global caps only if fresh enough.
|
||||
|
||||
Returns None if NCS data is stale beyond ttl — caller should use
|
||||
safe fallback instead of making routing decisions on outdated info.
|
||||
"""
|
||||
if not _node_timestamps:
|
||||
gcaps = await get_global_capabilities(force=True)
|
||||
if not _node_timestamps:
|
||||
return None
|
||||
return gcaps
|
||||
oldest = min(_node_timestamps.values())
|
||||
if (time.time() - oldest) > ttl:
|
||||
gcaps = await get_global_capabilities(force=True)
|
||||
oldest = min(_node_timestamps.values()) if _node_timestamps else 0
|
||||
if (time.time() - oldest) > ttl:
|
||||
logger.warning("[preflight] caps stale after refresh, age=%ds", int(time.time() - oldest))
|
||||
return None
|
||||
return gcaps
|
||||
return _build_global_view()
|
||||
|
||||
|
||||
def find_nodes_with_capability(cap: str) -> List[str]:
|
||||
"""Return node IDs that have a given capability enabled."""
|
||||
result = []
|
||||
for nid, caps in _node_cache.items():
|
||||
node_caps = caps.get("capabilities", {})
|
||||
if node_caps.get(cap, False):
|
||||
result.append(nid)
|
||||
return result
|
||||
|
||||
|
||||
def get_node_load(node_id: str) -> Dict[str, Any]:
|
||||
"""Get cached node_load for a specific node."""
|
||||
caps = _node_cache.get(node_id, {})
|
||||
return caps.get("node_load", {})
|
||||
|
||||
|
||||
async def send_offload_request(
|
||||
node_id: str,
|
||||
request_type: str,
|
||||
|
||||
Reference in New Issue
Block a user