P1: NCS-first model selection + NATS capabilities + Grok 4.1
Router model selection: - New model_select.py: resolve_effective_profile → profile_requirements → select_best_model pipeline. NCS-first with graceful static fallback. - selection_policies in router-config.node2.yml define prefer order per profile without hardcoding models (e.g. local_default_coder prefers qwen3:14b then qwen3.5:35b-a3b). - Cloud profiles (cloud_grok, cloud_deepseek) skip NCS; on cloud failure use fallback_profile via NCS for local selection. - Structured logs: selected_profile, required_type, runtime, model, caps_age_s, fallback_reason on every infer request. Grok model fix: - grok-2-1212 no longer exists on xAI API → updated to grok-4-1-fast-reasoning across all 3 hardcoded locations in main.py and router-config.node2.yml. NCS NATS request/reply: - node-capabilities subscribes to node.noda2.capabilities.get (NATS request/reply). Enabled via ENABLE_NATS_CAPS=true in compose. - NODA1 router can query NODA2 capabilities over NATS leafnode without HTTP connectivity. Verified: - NCS: 14 served models from Ollama+Swapper+llama-server - NATS: request/reply returns full capabilities JSON - Sofiia: cloud_grok → grok-4-1-fast-reasoning (tested, 200 OK) - Helion: NCS → qwen3:14b via Ollama (caps_age=23.7s cache hit) - Router health: ok Made-with: Cursor
This commit is contained in:
@@ -240,6 +240,55 @@ async def capabilities_refresh():
|
||||
return JSONResponse(content={"refreshed": True, "served_count": data["served_count"]})
|
||||
|
||||
|
||||
# ── NATS request/reply (optional) ─────────────────────────────────────────────
|
||||
|
||||
ENABLE_NATS = os.getenv("ENABLE_NATS_CAPS", "false").lower() in ("true", "1", "yes")
|
||||
NATS_URL = os.getenv("NATS_URL", "nats://dagi-nats:4222")
|
||||
NATS_SUBJECT = f"node.{NODE_ID.lower()}.capabilities.get"
|
||||
|
||||
_nats_client = None
|
||||
|
||||
|
||||
async def _nats_capabilities_handler(msg):
|
||||
"""Handle NATS request/reply for capabilities."""
|
||||
import json as _json
|
||||
try:
|
||||
data = await _build_capabilities()
|
||||
payload = _json.dumps(data).encode()
|
||||
if msg.reply:
|
||||
await _nats_client.publish(msg.reply, payload)
|
||||
logger.debug(f"NATS reply sent to {msg.reply} ({len(payload)} bytes)")
|
||||
except Exception as e:
|
||||
logger.warning(f"NATS handler error: {e}")
|
||||
if msg.reply and _nats_client:
|
||||
await _nats_client.publish(msg.reply, b'{"error":"internal"}')
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_nats():
|
||||
global _nats_client
|
||||
if not ENABLE_NATS:
|
||||
logger.info(f"NATS capabilities disabled (ENABLE_NATS_CAPS={ENABLE_NATS})")
|
||||
return
|
||||
try:
|
||||
import nats as nats_lib
|
||||
_nats_client = await nats_lib.connect(NATS_URL)
|
||||
await _nats_client.subscribe(NATS_SUBJECT, cb=_nats_capabilities_handler)
|
||||
logger.info(f"✅ NATS subscribed: {NATS_SUBJECT} on {NATS_URL}")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ NATS init failed (non-fatal): {e}")
|
||||
_nats_client = None
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_nats():
|
||||
if _nats_client:
|
||||
try:
|
||||
await _nats_client.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "8099")))
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
fastapi>=0.110.0
|
||||
uvicorn>=0.29.0
|
||||
httpx>=0.27.0
|
||||
nats-py>=2.7.0
|
||||
|
||||
Reference in New Issue
Block a user