feat: Node Self-Healing, DAGI Audit, Agent Prompts, Infra Invariants

### Backend (city-service) - Node Registry + Self-Healing API (migration 039) - Improved get_all_nodes() with robust fallback for node_registry/node_cache - Agent Prompts Runtime API for DAGI Router integration - DAGI Router Audit endpoints (phantom/stale detection) - Node Agents API (Guardian/Steward) - Node metrics extended (CPU/GPU/RAM/Disk) ### Frontend (apps/web) - Node Directory with improved error handling - Node Cabinet with metrics cards - DAGI Router Card component - Node Metrics Card component - useDAGIAudit hook ### Scripts - check-invariants.py - deploy verification - node-bootstrap.sh - node self-registration - node-guardian-loop.py - continuous self-healing - dagi_agent_audit.py - DAGI audit utility ### Migrations - 034: Agent prompts seed - 035: Agent DAGI audit - 036: Node metrics extended - 037: Node agents complete - 038: Agent prompts full coverage - 039: Node registry self-healing ### Tests - test_infra_smoke.py - test_agent_prompts_runtime.py - test_dagi_router_api.py ### Documentation - DEPLOY_CHECKLIST_2024_11_30.md - Multiple TASK_PHASE docs
2025-11-30 13:52:01 -08:00
parent 0c7836af5a
commit bca81dc719
36 changed files with 10630 additions and 55 deletions
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -19,6 +19,7 @@ SWAPPER_URL = os.getenv("SWAPPER_URL", "http://192.168.1.33:8890")
 STT_URL = os.getenv("STT_URL", "http://192.168.1.33:8895")
 VISION_URL = os.getenv("VISION_URL", "http://192.168.1.33:11434")
 OCR_URL = os.getenv("OCR_URL", "http://192.168.1.33:8896")
+CITY_SERVICE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")

 # HTTP client for backend services
 http_client: Optional[httpx.AsyncClient] = None
@@ -56,7 +57,27 @@ def load_config():
        }
    }

+def load_router_config():
+    """Load main router-config.yml with agents and LLM profiles"""
+    # Try multiple locations
+    paths = [
+        "router-config.yml",
+        "/app/router-config.yml",
+        "../router-config.yml",
+        "../../router-config.yml"
+    ]
+    
+    for path in paths:
+        if os.path.exists(path):
+            with open(path, 'r') as f:
+                logger.info(f"✅ Loaded router config from {path}")
+                return yaml.safe_load(f)
+    
+    logger.warning("⚠️ router-config.yml not found, using empty config")
+    return {"agents": {}}
+
 config = load_config()
+router_config = load_router_config()

@app.on_event("startup")
 async def startup_event():
@@ -363,10 +384,30 @@ async def agent_infer(agent_id: str, request: InferRequest):
    - Agent configuration (model, capabilities)
    - Request type (text, vision, audio)
    - Backend availability
+    
+    System prompt is fetched from database via city-service API.
    """
    logger.info(f"🔀 Inference request for agent: {agent_id}")
    logger.info(f"📝 Prompt: {request.prompt[:100]}...")
    
+    # Get system prompt from database or config
+    system_prompt = request.system_prompt
+    
+    if not system_prompt:
+        try:
+            from prompt_builder import get_agent_system_prompt
+            system_prompt = await get_agent_system_prompt(
+                agent_id,
+                city_service_url=CITY_SERVICE_URL,
+                router_config=router_config
+            )
+            logger.info(f"✅ Loaded system prompt from database for {agent_id}")
+        except Exception as e:
+            logger.warning(f"⚠️ Could not load prompt from database: {e}")
+            # Fallback to config
+            agent_config = router_config.get("agents", {}).get(agent_id, {})
+            system_prompt = agent_config.get("system_prompt")
+    
    # Determine which backend to use
    model = request.model or "gpt-oss:latest"
    
@@ -389,7 +430,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
                    json={
                        "model": model,
                        "prompt": request.prompt,
-                        "system": request.system_prompt,
+                        "system": system_prompt,
                        "stream": False,
                        "options": {
                            "num_predict": request.max_tokens,