P3.5-P3.7: 2-layer inventory, capability routing, STT/TTS adapters, Dev Contract
NCS:
- _collect_worker_caps() fetches capability flags from node-worker /caps
- _derive_capabilities() merges served model types + worker provider flags
- installed_artifacts replaces inventory_only (disk scan with DISK_SCAN_PATHS env)
- New endpoints: /capabilities/caps, /capabilities/installed
Node Worker:
- STT_PROVIDER, TTS_PROVIDER, OCR_PROVIDER, IMAGE_PROVIDER env flags
- /caps endpoint returns capabilities + providers for NCS aggregation
- STT adapter (providers/stt_mlx_whisper.py) — remote + local mode
- TTS adapter (providers/tts_mlx_kokoro.py) — remote + local mode
- OCR handler via vision_prompted (ollama_vision with OCR prompt)
- NATS subjects: node.{id}.stt/tts/ocr/image.request
Router:
- POST /v1/capability/{stt,tts,ocr,image} — capability-based offload routing
- GET /v1/capabilities — global view with capabilities_by_node
- require_fresh_caps(ttl) preflight guard
- find_nodes_with_capability(cap) + load-based node selection
Ops:
- ops/fabric_snapshot.py — full runtime snapshot collector
- ops/fabric_preflight.sh — quick check + snapshot save + diff
- docs/fabric_contract.md — Dev Contract v0.1 (preflight-first)
- tests/test_fabric_contract.py — CI enforcement (6 tests)
Made-with: Cursor
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.responses import Response
|
||||
from fastapi.responses import JSONResponse, Response
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from typing import Literal, Optional, Dict, Any, List
|
||||
import asyncio
|
||||
@@ -3542,6 +3542,7 @@ async def documents_versions(doc_id: str, agent_id: str, limit: int = 20):
|
||||
async def list_available_models():
|
||||
"""List all available models from NCS (global capabilities pool)."""
|
||||
models = []
|
||||
caps_by_node = {}
|
||||
|
||||
try:
|
||||
from global_capabilities_client import get_global_capabilities
|
||||
@@ -3555,6 +3556,7 @@ async def list_available_models():
|
||||
"size_gb": m.get("size_gb"),
|
||||
"status": "served",
|
||||
})
|
||||
caps_by_node = pool.get("capabilities_by_node", {})
|
||||
except Exception as e:
|
||||
logger.warning(f"Cannot get NCS global models: {e}")
|
||||
|
||||
@@ -3572,7 +3574,110 @@ async def list_available_models():
|
||||
except Exception as e:
|
||||
logger.warning(f"Cannot get Ollama models: {e}")
|
||||
|
||||
return {"models": models, "total": len(models)}
|
||||
return {
|
||||
"models": models,
|
||||
"total": len(models),
|
||||
"capabilities_by_node": caps_by_node,
|
||||
}
|
||||
|
||||
|
||||
# ── Capability-based offload routing ────────────────────────────────────────
|
||||
|
||||
@app.post("/v1/capability/{cap_type}")
|
||||
async def capability_offload(cap_type: str, request: Request):
|
||||
"""Route a capability request (stt/tts/ocr/image) to the best node.
|
||||
|
||||
Router selects the node based on capabilities_by_node, circuit breaker,
|
||||
and node_load — no static assumptions about which node has what.
|
||||
"""
|
||||
valid_types = {"stt", "tts", "ocr", "image"}
|
||||
if cap_type not in valid_types:
|
||||
return JSONResponse(status_code=400, content={
|
||||
"error": f"Invalid capability type: {cap_type}. Valid: {sorted(valid_types)}",
|
||||
})
|
||||
|
||||
if not NCS_AVAILABLE or not global_capabilities_client:
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": "NCS not available — cannot route capability requests",
|
||||
})
|
||||
|
||||
gcaps = await global_capabilities_client.require_fresh_caps(ttl=30)
|
||||
if gcaps is None:
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": "NCS caps stale — preflight failed, refusing to route",
|
||||
})
|
||||
|
||||
eligible_nodes = global_capabilities_client.find_nodes_with_capability(cap_type)
|
||||
if not eligible_nodes:
|
||||
return JSONResponse(status_code=404, content={
|
||||
"error": f"No node with capability '{cap_type}' available",
|
||||
"capabilities_by_node": gcaps.get("capabilities_by_node", {}),
|
||||
})
|
||||
|
||||
unavailable = offload_client.get_unavailable_nodes(cap_type) if offload_client else set()
|
||||
available = [n for n in eligible_nodes if n.lower() not in {u.lower() for u in unavailable}]
|
||||
if not available:
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": f"All nodes with '{cap_type}' are circuit-broken",
|
||||
"eligible": eligible_nodes,
|
||||
"unavailable": list(unavailable),
|
||||
})
|
||||
|
||||
best_node = available[0]
|
||||
if len(available) > 1:
|
||||
loads = []
|
||||
for nid in available:
|
||||
nl = global_capabilities_client.get_node_load(nid)
|
||||
score = nl.get("inflight", 0) * 10
|
||||
if nl.get("mem_pressure") == "high":
|
||||
score += 100
|
||||
loads.append((score, nid))
|
||||
loads.sort()
|
||||
best_node = loads[0][1]
|
||||
|
||||
payload = await request.json()
|
||||
logger.info(f"[cap.offload] type={cap_type} → node={best_node} (of {available})")
|
||||
|
||||
nats_ok = nc is not None and nats_available
|
||||
if nats_ok and offload_client:
|
||||
import uuid as _uuid
|
||||
job = {
|
||||
"job_id": str(_uuid.uuid4()),
|
||||
"required_type": cap_type,
|
||||
"payload": payload,
|
||||
"deadline_ts": int(time.time() * 1000) + 60000,
|
||||
"hints": payload.pop("hints", {}),
|
||||
}
|
||||
result = await offload_client.offload_infer(
|
||||
nats_client=nc, node_id=best_node, required_type=cap_type,
|
||||
job_payload=job, timeout_ms=60000,
|
||||
)
|
||||
if result and result.get("status") == "ok":
|
||||
return JSONResponse(content=result.get("result", result))
|
||||
error = result.get("error", {}) if result else {}
|
||||
return JSONResponse(status_code=502, content={
|
||||
"error": error.get("message", f"Offload to {best_node} failed"),
|
||||
"code": error.get("code", "OFFLOAD_FAILED"),
|
||||
"node": best_node,
|
||||
})
|
||||
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": "NATS not connected — cannot offload",
|
||||
})
|
||||
|
||||
|
||||
@app.get("/v1/capabilities")
|
||||
async def list_global_capabilities():
|
||||
"""Return full capabilities view across all nodes."""
|
||||
if not NCS_AVAILABLE or not global_capabilities_client:
|
||||
return JSONResponse(status_code=503, content={"error": "NCS not available"})
|
||||
gcaps = await global_capabilities_client.get_global_capabilities()
|
||||
return JSONResponse(content={
|
||||
"node_count": gcaps.get("node_count", 0),
|
||||
"nodes": gcaps.get("nodes", {}),
|
||||
"capabilities_by_node": gcaps.get("capabilities_by_node", {}),
|
||||
"served_count": gcaps.get("served_count", 0),
|
||||
})
|
||||
|
||||
|
||||
@app.get("/v1/agromatrix/shared-memory/pending")
|
||||
|
||||
Reference in New Issue
Block a user