fix(node2): Use node_cache router_healthy for DAGI Router agents status

- Fix get_dagi_router_agents to use router_healthy from node_cache first
- Fallback to direct API call only if cache is unavailable
- This fixes NODE2 agents showing as 'stale' when router is actually healthy
- Fix CITY_SERVICE_URL in scripts (remove /api/city, use /api)
This commit is contained in:
Apple
2025-12-02 07:02:08 -08:00
parent 80123fd1be
commit 88188ed693
2 changed files with 38 additions and 16 deletions

View File

@@ -4571,23 +4571,34 @@ async def get_dagi_router_agents(node_id: str):
"""
import httpx
# Get router URL from database (node-specific)
endpoints = await repo_city.get_node_endpoints(node_id)
base_url = endpoints.get("router_url")
# First, try to get router health from node_cache (populated by node-guardian)
# This is preferred for remote nodes (like NODE2) where direct connection may not work
router_healthy = False
try:
metrics = await repo_city.get_node_metrics(node_id)
if metrics and metrics.get("router_healthy") is not None:
router_healthy = metrics.get("router_healthy", False)
logger.debug(f"Using router_healthy from node_cache for {node_id}: {router_healthy}")
except Exception as e:
logger.debug(f"Failed to get cached router health for {node_id}: {e}")
# Check if router is healthy
if base_url:
try:
async with httpx.AsyncClient(timeout=3.0) as client:
resp = await client.get(f"{base_url}/health")
if resp.status_code == 200:
data = resp.json()
# Router can return "healthy" or "ok"
status = data.get("status", "").lower()
router_healthy = status in ("healthy", "ok")
except Exception as e:
logger.warning(f"Failed to check router health for {node_id} at {base_url}: {e}")
# Fallback: try direct health check (only works for local nodes like NODE1)
if router_healthy is False:
endpoints = await repo_city.get_node_endpoints(node_id)
base_url = endpoints.get("router_url")
if base_url:
try:
async with httpx.AsyncClient(timeout=3.0) as client:
resp = await client.get(f"{base_url}/health")
if resp.status_code == 200:
data = resp.json()
# Router can return "healthy" or "ok"
status = data.get("status", "").lower()
router_healthy = status in ("healthy", "ok")
logger.debug(f"Direct router health check for {node_id}: {router_healthy}")
except Exception as e:
logger.debug(f"Failed to check router health for {node_id} at {base_url}: {e}")
# Get agents from DB for this node
try: