feat: DAGI Router v2 - new endpoints, hooks, and UI card
This commit is contained in:
@@ -4103,6 +4103,198 @@ async def get_node_swapper_detail(node_id: str):
|
||||
)
|
||||
|
||||
|
||||
@router.get("/internal/node/{node_id}/dagi-router/health")
|
||||
async def get_dagi_router_health(node_id: str):
|
||||
"""
|
||||
Get DAGI Router health status for a node.
|
||||
Always returns 200 with status="down" if router is unavailable.
|
||||
"""
|
||||
import httpx
|
||||
import time
|
||||
|
||||
# Node-specific router URLs
|
||||
NODE_ROUTER_URLS = {
|
||||
"node-1-hetzner-gex44": "http://dagi-router:9102",
|
||||
"node-2-macbook-m4max": "http://localhost:9102", # Local router on NODE2
|
||||
}
|
||||
|
||||
base_url = NODE_ROUTER_URLS.get(node_id)
|
||||
|
||||
if not base_url:
|
||||
return {
|
||||
"node_id": node_id,
|
||||
"status": "down",
|
||||
"version": None,
|
||||
"agent_count": 0,
|
||||
"latency_ms": None,
|
||||
"error": "No router URL configured for this node"
|
||||
}
|
||||
|
||||
try:
|
||||
start = time.monotonic()
|
||||
async with httpx.AsyncClient(timeout=3.0) as client:
|
||||
resp = await client.get(f"{base_url}/health")
|
||||
latency_ms = (time.monotonic() - start) * 1000.0
|
||||
|
||||
if resp.status_code != 200:
|
||||
return {
|
||||
"node_id": node_id,
|
||||
"status": "down",
|
||||
"version": None,
|
||||
"agent_count": 0,
|
||||
"latency_ms": latency_ms
|
||||
}
|
||||
|
||||
data = resp.json()
|
||||
return {
|
||||
"node_id": node_id,
|
||||
"status": "up" if data.get("status") == "healthy" else "degraded",
|
||||
"version": data.get("version"),
|
||||
"agent_count": data.get("agent_count", 0),
|
||||
"latency_ms": round(latency_ms, 2)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"DAGI Router health check failed for {node_id}: {e}")
|
||||
return {
|
||||
"node_id": node_id,
|
||||
"status": "down",
|
||||
"version": None,
|
||||
"agent_count": 0,
|
||||
"latency_ms": None
|
||||
}
|
||||
|
||||
|
||||
@router.get("/internal/node/{node_id}/dagi-router/agents")
|
||||
async def get_dagi_router_agents(node_id: str):
|
||||
"""
|
||||
Get list of agents registered with DAGI Router for a node.
|
||||
Compares with DB to identify phantom/stale agents.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
NODE_ROUTER_URLS = {
|
||||
"node-1-hetzner-gex44": "http://dagi-router:9102",
|
||||
"node-2-macbook-m4max": "http://localhost:9102",
|
||||
}
|
||||
|
||||
base_url = NODE_ROUTER_URLS.get(node_id)
|
||||
router_agents = []
|
||||
|
||||
# Try to get agents from router
|
||||
if base_url:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(f"{base_url}/agents")
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
router_agents = data.get("agents", [])
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get agents from router for {node_id}: {e}")
|
||||
|
||||
# Get agents from DB for this node
|
||||
try:
|
||||
db_agents = await repo_city.get_agents_for_node(node_id)
|
||||
db_agent_ids = {a.get("id") or a.get("slug") for a in db_agents}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get DB agents for {node_id}: {e}")
|
||||
db_agents = []
|
||||
db_agent_ids = set()
|
||||
|
||||
# Build combined list
|
||||
result_agents = []
|
||||
router_agent_ids = set()
|
||||
|
||||
for ra in router_agents:
|
||||
agent_id = ra.get("id") or ra.get("name") or ra.get("slug")
|
||||
if not agent_id:
|
||||
continue
|
||||
router_agent_ids.add(agent_id)
|
||||
|
||||
# Check if in DB
|
||||
has_db_record = agent_id in db_agent_ids
|
||||
status = "active" if has_db_record else "phantom"
|
||||
|
||||
result_agents.append({
|
||||
"id": agent_id,
|
||||
"name": ra.get("name"),
|
||||
"kind": ra.get("kind"),
|
||||
"runtime": ra.get("runtime") or f"{node_id}-router",
|
||||
"node_id": node_id,
|
||||
"last_seen_at": ra.get("last_seen_at"),
|
||||
"status": status,
|
||||
"has_db_record": has_db_record
|
||||
})
|
||||
|
||||
# Add stale agents (in DB but not in router)
|
||||
for db_agent in db_agents:
|
||||
agent_id = db_agent.get("id") or db_agent.get("slug")
|
||||
if agent_id and agent_id not in router_agent_ids:
|
||||
result_agents.append({
|
||||
"id": agent_id,
|
||||
"name": db_agent.get("display_name") or db_agent.get("name"),
|
||||
"kind": db_agent.get("kind"),
|
||||
"runtime": None,
|
||||
"node_id": node_id,
|
||||
"last_seen_at": None,
|
||||
"status": "stale",
|
||||
"has_db_record": True
|
||||
})
|
||||
|
||||
# Count by status
|
||||
active = sum(1 for a in result_agents if a["status"] == "active")
|
||||
phantom = sum(1 for a in result_agents if a["status"] == "phantom")
|
||||
stale = sum(1 for a in result_agents if a["status"] == "stale")
|
||||
|
||||
return {
|
||||
"node_id": node_id,
|
||||
"total": len(result_agents),
|
||||
"active": active,
|
||||
"phantom": phantom,
|
||||
"stale": stale,
|
||||
"agents": result_agents
|
||||
}
|
||||
|
||||
|
||||
@router.get("/internal/node/{node_id}/dagi-router/summary")
|
||||
async def get_dagi_router_summary(node_id: str):
|
||||
"""
|
||||
Get combined DAGI Router status summary for a node.
|
||||
Includes health, agent counts, and last audit timestamp.
|
||||
"""
|
||||
# Get health
|
||||
health = await get_dagi_router_health(node_id)
|
||||
|
||||
# Get agents info
|
||||
agents_info = await get_dagi_router_agents(node_id)
|
||||
|
||||
# Get last audit timestamp
|
||||
last_audit_at = None
|
||||
try:
|
||||
pool = await repo_city.get_pool()
|
||||
row = await pool.fetchrow("""
|
||||
SELECT MAX(created_at) as last_audit
|
||||
FROM dagi_audit_reports
|
||||
WHERE node_id = $1
|
||||
""", node_id)
|
||||
if row and row["last_audit"]:
|
||||
last_audit_at = row["last_audit"].isoformat()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get last audit for {node_id}: {e}")
|
||||
|
||||
return {
|
||||
"node_id": node_id,
|
||||
"status": health.get("status", "down"),
|
||||
"version": health.get("version"),
|
||||
"latency_ms": health.get("latency_ms"),
|
||||
"router_agent_count": health.get("agent_count", 0),
|
||||
"db_agent_count": agents_info.get("total", 0),
|
||||
"active": agents_info.get("active", 0),
|
||||
"phantom": agents_info.get("phantom", 0),
|
||||
"stale": agents_info.get("stale", 0),
|
||||
"last_audit_at": last_audit_at
|
||||
}
|
||||
|
||||
|
||||
@router.get("/internal/node/{node_id}/directory-check")
|
||||
async def check_node_in_directory(node_id: str):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user