fix(node2): Add detailed logging for router_healthy determination

This commit is contained in:
Apple
2025-12-02 07:03:50 -08:00
parent 90ebf32de3
commit 08cb8c52cc

View File

@@ -4576,14 +4576,21 @@ async def get_dagi_router_agents(node_id: str):
router_healthy = None
try:
metrics = await repo_city.get_node_metrics(node_id)
if metrics and metrics.get("router_healthy") is not None:
router_healthy = bool(metrics.get("router_healthy", False))
logger.info(f"Using router_healthy from node_cache for {node_id}: {router_healthy}")
if metrics:
router_healthy_raw = metrics.get("router_healthy")
if router_healthy_raw is not None:
router_healthy = bool(router_healthy_raw)
logger.info(f"[{node_id}] Using router_healthy from node_cache: {router_healthy} (raw: {router_healthy_raw})")
else:
logger.debug(f"[{node_id}] router_healthy is None in node_cache")
else:
logger.debug(f"[{node_id}] No metrics found in node_cache")
except Exception as e:
logger.debug(f"Failed to get cached router health for {node_id}: {e}")
logger.warning(f"[{node_id}] Failed to get cached router health: {e}")
# Fallback: try direct health check (only works for local nodes like NODE1)
if router_healthy is None:
logger.info(f"[{node_id}] router_healthy is None, trying direct health check")
endpoints = await repo_city.get_node_endpoints(node_id)
base_url = endpoints.get("router_url")
@@ -4596,9 +4603,16 @@ async def get_dagi_router_agents(node_id: str):
# Router can return "healthy" or "ok"
status = data.get("status", "").lower()
router_healthy = status in ("healthy", "ok")
logger.debug(f"Direct router health check for {node_id}: {router_healthy}")
logger.info(f"[{node_id}] Direct router health check: {router_healthy}")
except Exception as e:
logger.debug(f"Failed to check router health for {node_id} at {base_url}: {e}")
logger.warning(f"[{node_id}] Failed to check router health at {base_url}: {e}")
# Final fallback
if router_healthy is None:
router_healthy = False
logger.warning(f"[{node_id}] router_healthy is None after all checks, defaulting to False")
logger.info(f"[{node_id}] Final router_healthy value: {router_healthy}")
# Get agents from DB for this node
try: