P3.5-P3.7: 2-layer inventory, capability routing, STT/TTS adapters, Dev Contract
NCS:
- _collect_worker_caps() fetches capability flags from node-worker /caps
- _derive_capabilities() merges served model types + worker provider flags
- installed_artifacts replaces inventory_only (disk scan with DISK_SCAN_PATHS env)
- New endpoints: /capabilities/caps, /capabilities/installed
Node Worker:
- STT_PROVIDER, TTS_PROVIDER, OCR_PROVIDER, IMAGE_PROVIDER env flags
- /caps endpoint returns capabilities + providers for NCS aggregation
- STT adapter (providers/stt_mlx_whisper.py) — remote + local mode
- TTS adapter (providers/tts_mlx_kokoro.py) — remote + local mode
- OCR handler via vision_prompted (ollama_vision with OCR prompt)
- NATS subjects: node.{id}.stt/tts/ocr/image.request
Router:
- POST /v1/capability/{stt,tts,ocr,image} — capability-based offload routing
- GET /v1/capabilities — global view with capabilities_by_node
- require_fresh_caps(ttl) preflight guard
- find_nodes_with_capability(cap) + load-based node selection
Ops:
- ops/fabric_snapshot.py — full runtime snapshot collector
- ops/fabric_preflight.sh — quick check + snapshot save + diff
- docs/fabric_contract.md — Dev Contract v0.1 (preflight-first)
- tests/test_fabric_contract.py — CI enforcement (6 tests)
Made-with: Cursor
This commit is contained in:
@@ -20,8 +20,10 @@ app = FastAPI(title="Node Capabilities Service", version="1.0.0")
|
||||
|
||||
NODE_ID = os.getenv("NODE_ID", "noda2")
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
|
||||
SWAPPER_URL = os.getenv("SWAPPER_URL", "") # empty = skip Swapper probing
|
||||
SWAPPER_URL = os.getenv("SWAPPER_URL", "")
|
||||
LLAMA_SERVER_URL = os.getenv("LLAMA_SERVER_URL", "")
|
||||
NODE_WORKER_URL = os.getenv("NODE_WORKER_URL", "http://node-worker:8109")
|
||||
DISK_SCAN_PATHS = os.getenv("DISK_SCAN_PATHS", "") # comma-sep extra dirs
|
||||
|
||||
_cache: Dict[str, Any] = {}
|
||||
_cache_ts: float = 0
|
||||
@@ -129,30 +131,48 @@ async def _collect_llama_server() -> Optional[Dict[str, Any]]:
|
||||
return runtime
|
||||
|
||||
|
||||
async def _collect_worker_caps() -> Dict[str, Any]:
|
||||
"""Fetch capability flags from local Node Worker."""
|
||||
default = {"capabilities": {}, "providers": {}, "defaults": {}}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=3) as c:
|
||||
r = await c.get(f"{NODE_WORKER_URL}/caps")
|
||||
if r.status_code == 200:
|
||||
return r.json()
|
||||
except Exception as e:
|
||||
logger.debug(f"Worker caps unavailable: {e}")
|
||||
return default
|
||||
|
||||
|
||||
def _collect_disk_inventory() -> List[Dict[str, Any]]:
|
||||
"""Scan known model directories — NOT for routing, only inventory."""
|
||||
import pathlib
|
||||
inventory: List[Dict[str, Any]] = []
|
||||
|
||||
scan_dirs = [
|
||||
("cursor_worktrees", pathlib.Path.home() / ".cursor" / "worktrees"),
|
||||
("jan_ai", pathlib.Path.home() / "Library" / "Application Support" / "Jan"),
|
||||
("ollama", pathlib.Path.home() / ".ollama" / "models"),
|
||||
("hf_cache", pathlib.Path.home() / ".cache" / "huggingface" / "hub"),
|
||||
("comfyui_main", pathlib.Path.home() / "ComfyUI" / "models"),
|
||||
("comfyui_docs", pathlib.Path.home() / "Documents" / "ComfyUI" / "models"),
|
||||
("llama_cpp", pathlib.Path.home() / "Library" / "Application Support" / "llama.cpp" / "models"),
|
||||
("hf_models", pathlib.Path.home() / "hf_models"),
|
||||
("jan_ai", pathlib.Path.home() / "Library" / "Application Support" / "Jan"),
|
||||
]
|
||||
if DISK_SCAN_PATHS:
|
||||
for p in DISK_SCAN_PATHS.split(","):
|
||||
p = p.strip()
|
||||
if p:
|
||||
scan_dirs.append(("custom", pathlib.Path(p)))
|
||||
|
||||
for source, base in scan_dirs:
|
||||
if not base.exists():
|
||||
continue
|
||||
try:
|
||||
for f in base.rglob("*"):
|
||||
if f.suffix in (".gguf", ".safetensors", ".bin", ".pt") and f.stat().st_size > 100_000_000:
|
||||
if f.suffix in (".gguf", ".safetensors", ".bin", ".pt", ".mlx") and f.stat().st_size > 50_000_000:
|
||||
inventory.append({
|
||||
"name": f.stem,
|
||||
"path": str(f.relative_to(pathlib.Path.home())),
|
||||
"path": str(f),
|
||||
"source": source,
|
||||
"size_gb": round(f.stat().st_size / 1e9, 1),
|
||||
"type": _classify_model(f.stem),
|
||||
@@ -191,6 +211,24 @@ def _build_served_models(ollama: Dict, swapper: Dict, llama: Optional[Dict]) ->
|
||||
return served
|
||||
|
||||
|
||||
def _derive_capabilities(served: List[Dict], worker_caps: Dict) -> Dict[str, Any]:
|
||||
"""Merge served model types + worker provider flags into capability map."""
|
||||
served_types = {m.get("type", "llm") for m in served}
|
||||
wc = worker_caps.get("capabilities", {})
|
||||
wp = worker_caps.get("providers", {})
|
||||
|
||||
has_vision = "vision" in served_types or wc.get("vision", False)
|
||||
return {
|
||||
"llm": "llm" in served_types or "code" in served_types,
|
||||
"vision": has_vision,
|
||||
"stt": wc.get("stt", False),
|
||||
"tts": wc.get("tts", False),
|
||||
"ocr": has_vision and wp.get("ocr", "none") != "none",
|
||||
"image": wc.get("image", False),
|
||||
"providers": wp,
|
||||
}
|
||||
|
||||
|
||||
async def _build_capabilities() -> Dict[str, Any]:
|
||||
global _cache, _cache_ts
|
||||
|
||||
@@ -200,6 +238,7 @@ async def _build_capabilities() -> Dict[str, Any]:
|
||||
ollama = await _collect_ollama()
|
||||
swapper = await _collect_swapper()
|
||||
llama = await _collect_llama_server()
|
||||
worker_caps = await _collect_worker_caps()
|
||||
disk = _collect_disk_inventory()
|
||||
served = _build_served_models(ollama, swapper, llama)
|
||||
|
||||
@@ -209,6 +248,7 @@ async def _build_capabilities() -> Dict[str, Any]:
|
||||
|
||||
node_load = await build_node_load()
|
||||
runtime_load = await build_runtime_load(runtimes)
|
||||
capabilities = _derive_capabilities(served, worker_caps)
|
||||
|
||||
result = {
|
||||
"node_id": NODE_ID,
|
||||
@@ -216,10 +256,12 @@ async def _build_capabilities() -> Dict[str, Any]:
|
||||
"runtimes": runtimes,
|
||||
"served_models": served,
|
||||
"served_count": len(served),
|
||||
"capabilities": capabilities,
|
||||
"node_load": node_load,
|
||||
"runtime_load": runtime_load,
|
||||
"inventory_only": disk,
|
||||
"inventory_count": len(disk),
|
||||
"installed_artifacts": disk,
|
||||
"installed_count": len(disk),
|
||||
"worker": worker_caps,
|
||||
}
|
||||
|
||||
_cache = result
|
||||
@@ -245,6 +287,26 @@ async def capabilities_models():
|
||||
return JSONResponse(content={"node_id": data["node_id"], "served_models": data["served_models"]})
|
||||
|
||||
|
||||
@app.get("/capabilities/caps")
|
||||
async def capabilities_caps():
|
||||
data = await _build_capabilities()
|
||||
return JSONResponse(content={
|
||||
"node_id": data["node_id"],
|
||||
"capabilities": data.get("capabilities", {}),
|
||||
"worker": data.get("worker", {}),
|
||||
})
|
||||
|
||||
|
||||
@app.get("/capabilities/installed")
|
||||
async def capabilities_installed():
|
||||
data = await _build_capabilities()
|
||||
return JSONResponse(content={
|
||||
"node_id": data["node_id"],
|
||||
"installed_artifacts": data.get("installed_artifacts", []),
|
||||
"installed_count": data.get("installed_count", 0),
|
||||
})
|
||||
|
||||
|
||||
@app.post("/capabilities/refresh")
|
||||
async def capabilities_refresh():
|
||||
global _cache_ts
|
||||
|
||||
Reference in New Issue
Block a user