P3.5-P3.7: 2-layer inventory, capability routing, STT/TTS adapters, Dev Contract

NCS:
- _collect_worker_caps() fetches capability flags from node-worker /caps
- _derive_capabilities() merges served model types + worker provider flags
- installed_artifacts replaces inventory_only (disk scan with DISK_SCAN_PATHS env)
- New endpoints: /capabilities/caps, /capabilities/installed

Node Worker:
- STT_PROVIDER, TTS_PROVIDER, OCR_PROVIDER, IMAGE_PROVIDER env flags
- /caps endpoint returns capabilities + providers for NCS aggregation
- STT adapter (providers/stt_mlx_whisper.py) — remote + local mode
- TTS adapter (providers/tts_mlx_kokoro.py) — remote + local mode
- OCR handler via vision_prompted (ollama_vision with OCR prompt)
- NATS subjects: node.{id}.stt/tts/ocr/image.request

Router:
- POST /v1/capability/{stt,tts,ocr,image} — capability-based offload routing
- GET /v1/capabilities — global view with capabilities_by_node
- require_fresh_caps(ttl) preflight guard
- find_nodes_with_capability(cap) + load-based node selection

Ops:
- ops/fabric_snapshot.py — full runtime snapshot collector
- ops/fabric_preflight.sh — quick check + snapshot save + diff
- docs/fabric_contract.md — Dev Contract v0.1 (preflight-first)
- tests/test_fabric_contract.py — CI enforcement (6 tests)

Made-with: Cursor
This commit is contained in:
Apple
2026-02-27 05:24:09 -08:00
parent 194c87f53c
commit 9a36020316
17 changed files with 1352 additions and 21 deletions

View File

@@ -20,8 +20,10 @@ app = FastAPI(title="Node Capabilities Service", version="1.0.0")
NODE_ID = os.getenv("NODE_ID", "noda2")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
SWAPPER_URL = os.getenv("SWAPPER_URL", "") # empty = skip Swapper probing
SWAPPER_URL = os.getenv("SWAPPER_URL", "")
LLAMA_SERVER_URL = os.getenv("LLAMA_SERVER_URL", "")
NODE_WORKER_URL = os.getenv("NODE_WORKER_URL", "http://node-worker:8109")
DISK_SCAN_PATHS = os.getenv("DISK_SCAN_PATHS", "") # comma-sep extra dirs
_cache: Dict[str, Any] = {}
_cache_ts: float = 0
@@ -129,30 +131,48 @@ async def _collect_llama_server() -> Optional[Dict[str, Any]]:
return runtime
async def _collect_worker_caps() -> Dict[str, Any]:
"""Fetch capability flags from local Node Worker."""
default = {"capabilities": {}, "providers": {}, "defaults": {}}
try:
async with httpx.AsyncClient(timeout=3) as c:
r = await c.get(f"{NODE_WORKER_URL}/caps")
if r.status_code == 200:
return r.json()
except Exception as e:
logger.debug(f"Worker caps unavailable: {e}")
return default
def _collect_disk_inventory() -> List[Dict[str, Any]]:
"""Scan known model directories — NOT for routing, only inventory."""
import pathlib
inventory: List[Dict[str, Any]] = []
scan_dirs = [
("cursor_worktrees", pathlib.Path.home() / ".cursor" / "worktrees"),
("jan_ai", pathlib.Path.home() / "Library" / "Application Support" / "Jan"),
("ollama", pathlib.Path.home() / ".ollama" / "models"),
("hf_cache", pathlib.Path.home() / ".cache" / "huggingface" / "hub"),
("comfyui_main", pathlib.Path.home() / "ComfyUI" / "models"),
("comfyui_docs", pathlib.Path.home() / "Documents" / "ComfyUI" / "models"),
("llama_cpp", pathlib.Path.home() / "Library" / "Application Support" / "llama.cpp" / "models"),
("hf_models", pathlib.Path.home() / "hf_models"),
("jan_ai", pathlib.Path.home() / "Library" / "Application Support" / "Jan"),
]
if DISK_SCAN_PATHS:
for p in DISK_SCAN_PATHS.split(","):
p = p.strip()
if p:
scan_dirs.append(("custom", pathlib.Path(p)))
for source, base in scan_dirs:
if not base.exists():
continue
try:
for f in base.rglob("*"):
if f.suffix in (".gguf", ".safetensors", ".bin", ".pt") and f.stat().st_size > 100_000_000:
if f.suffix in (".gguf", ".safetensors", ".bin", ".pt", ".mlx") and f.stat().st_size > 50_000_000:
inventory.append({
"name": f.stem,
"path": str(f.relative_to(pathlib.Path.home())),
"path": str(f),
"source": source,
"size_gb": round(f.stat().st_size / 1e9, 1),
"type": _classify_model(f.stem),
@@ -191,6 +211,24 @@ def _build_served_models(ollama: Dict, swapper: Dict, llama: Optional[Dict]) ->
return served
def _derive_capabilities(served: List[Dict], worker_caps: Dict) -> Dict[str, Any]:
"""Merge served model types + worker provider flags into capability map."""
served_types = {m.get("type", "llm") for m in served}
wc = worker_caps.get("capabilities", {})
wp = worker_caps.get("providers", {})
has_vision = "vision" in served_types or wc.get("vision", False)
return {
"llm": "llm" in served_types or "code" in served_types,
"vision": has_vision,
"stt": wc.get("stt", False),
"tts": wc.get("tts", False),
"ocr": has_vision and wp.get("ocr", "none") != "none",
"image": wc.get("image", False),
"providers": wp,
}
async def _build_capabilities() -> Dict[str, Any]:
global _cache, _cache_ts
@@ -200,6 +238,7 @@ async def _build_capabilities() -> Dict[str, Any]:
ollama = await _collect_ollama()
swapper = await _collect_swapper()
llama = await _collect_llama_server()
worker_caps = await _collect_worker_caps()
disk = _collect_disk_inventory()
served = _build_served_models(ollama, swapper, llama)
@@ -209,6 +248,7 @@ async def _build_capabilities() -> Dict[str, Any]:
node_load = await build_node_load()
runtime_load = await build_runtime_load(runtimes)
capabilities = _derive_capabilities(served, worker_caps)
result = {
"node_id": NODE_ID,
@@ -216,10 +256,12 @@ async def _build_capabilities() -> Dict[str, Any]:
"runtimes": runtimes,
"served_models": served,
"served_count": len(served),
"capabilities": capabilities,
"node_load": node_load,
"runtime_load": runtime_load,
"inventory_only": disk,
"inventory_count": len(disk),
"installed_artifacts": disk,
"installed_count": len(disk),
"worker": worker_caps,
}
_cache = result
@@ -245,6 +287,26 @@ async def capabilities_models():
return JSONResponse(content={"node_id": data["node_id"], "served_models": data["served_models"]})
@app.get("/capabilities/caps")
async def capabilities_caps():
data = await _build_capabilities()
return JSONResponse(content={
"node_id": data["node_id"],
"capabilities": data.get("capabilities", {}),
"worker": data.get("worker", {}),
})
@app.get("/capabilities/installed")
async def capabilities_installed():
data = await _build_capabilities()
return JSONResponse(content={
"node_id": data["node_id"],
"installed_artifacts": data.get("installed_artifacts", []),
"installed_count": data.get("installed_count", 0),
})
@app.post("/capabilities/refresh")
async def capabilities_refresh():
global _cache_ts