feat: Node Self-Healing, DAGI Audit, Agent Prompts, Infra Invariants
### Backend (city-service) - Node Registry + Self-Healing API (migration 039) - Improved get_all_nodes() with robust fallback for node_registry/node_cache - Agent Prompts Runtime API for DAGI Router integration - DAGI Router Audit endpoints (phantom/stale detection) - Node Agents API (Guardian/Steward) - Node metrics extended (CPU/GPU/RAM/Disk) ### Frontend (apps/web) - Node Directory with improved error handling - Node Cabinet with metrics cards - DAGI Router Card component - Node Metrics Card component - useDAGIAudit hook ### Scripts - check-invariants.py - deploy verification - node-bootstrap.sh - node self-registration - node-guardian-loop.py - continuous self-healing - dagi_agent_audit.py - DAGI audit utility ### Migrations - 034: Agent prompts seed - 035: Agent DAGI audit - 036: Node metrics extended - 037: Node agents complete - 038: Agent prompts full coverage - 039: Node registry self-healing ### Tests - test_infra_smoke.py - test_agent_prompts_runtime.py - test_dagi_router_api.py ### Documentation - DEPLOY_CHECKLIST_2024_11_30.md - Multiple TASK_PHASE docs
This commit is contained in:
@@ -19,6 +19,7 @@ SWAPPER_URL = os.getenv("SWAPPER_URL", "http://192.168.1.33:8890")
|
||||
STT_URL = os.getenv("STT_URL", "http://192.168.1.33:8895")
|
||||
VISION_URL = os.getenv("VISION_URL", "http://192.168.1.33:11434")
|
||||
OCR_URL = os.getenv("OCR_URL", "http://192.168.1.33:8896")
|
||||
CITY_SERVICE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")
|
||||
|
||||
# HTTP client for backend services
|
||||
http_client: Optional[httpx.AsyncClient] = None
|
||||
@@ -56,7 +57,27 @@ def load_config():
|
||||
}
|
||||
}
|
||||
|
||||
def load_router_config():
|
||||
"""Load main router-config.yml with agents and LLM profiles"""
|
||||
# Try multiple locations
|
||||
paths = [
|
||||
"router-config.yml",
|
||||
"/app/router-config.yml",
|
||||
"../router-config.yml",
|
||||
"../../router-config.yml"
|
||||
]
|
||||
|
||||
for path in paths:
|
||||
if os.path.exists(path):
|
||||
with open(path, 'r') as f:
|
||||
logger.info(f"✅ Loaded router config from {path}")
|
||||
return yaml.safe_load(f)
|
||||
|
||||
logger.warning("⚠️ router-config.yml not found, using empty config")
|
||||
return {"agents": {}}
|
||||
|
||||
config = load_config()
|
||||
router_config = load_router_config()
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
@@ -363,10 +384,30 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
- Agent configuration (model, capabilities)
|
||||
- Request type (text, vision, audio)
|
||||
- Backend availability
|
||||
|
||||
System prompt is fetched from database via city-service API.
|
||||
"""
|
||||
logger.info(f"🔀 Inference request for agent: {agent_id}")
|
||||
logger.info(f"📝 Prompt: {request.prompt[:100]}...")
|
||||
|
||||
# Get system prompt from database or config
|
||||
system_prompt = request.system_prompt
|
||||
|
||||
if not system_prompt:
|
||||
try:
|
||||
from prompt_builder import get_agent_system_prompt
|
||||
system_prompt = await get_agent_system_prompt(
|
||||
agent_id,
|
||||
city_service_url=CITY_SERVICE_URL,
|
||||
router_config=router_config
|
||||
)
|
||||
logger.info(f"✅ Loaded system prompt from database for {agent_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Could not load prompt from database: {e}")
|
||||
# Fallback to config
|
||||
agent_config = router_config.get("agents", {}).get(agent_id, {})
|
||||
system_prompt = agent_config.get("system_prompt")
|
||||
|
||||
# Determine which backend to use
|
||||
model = request.model or "gpt-oss:latest"
|
||||
|
||||
@@ -389,7 +430,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": request.prompt,
|
||||
"system": request.system_prompt,
|
||||
"system": system_prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"num_predict": request.max_tokens,
|
||||
|
||||
Reference in New Issue
Block a user