fix(node2): Add detailed logging for router_healthy determination
This commit is contained in:
@@ -4576,14 +4576,21 @@ async def get_dagi_router_agents(node_id: str):
|
||||
router_healthy = None
|
||||
try:
|
||||
metrics = await repo_city.get_node_metrics(node_id)
|
||||
if metrics and metrics.get("router_healthy") is not None:
|
||||
router_healthy = bool(metrics.get("router_healthy", False))
|
||||
logger.info(f"Using router_healthy from node_cache for {node_id}: {router_healthy}")
|
||||
if metrics:
|
||||
router_healthy_raw = metrics.get("router_healthy")
|
||||
if router_healthy_raw is not None:
|
||||
router_healthy = bool(router_healthy_raw)
|
||||
logger.info(f"[{node_id}] Using router_healthy from node_cache: {router_healthy} (raw: {router_healthy_raw})")
|
||||
else:
|
||||
logger.debug(f"[{node_id}] router_healthy is None in node_cache")
|
||||
else:
|
||||
logger.debug(f"[{node_id}] No metrics found in node_cache")
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to get cached router health for {node_id}: {e}")
|
||||
logger.warning(f"[{node_id}] Failed to get cached router health: {e}")
|
||||
|
||||
# Fallback: try direct health check (only works for local nodes like NODE1)
|
||||
if router_healthy is None:
|
||||
logger.info(f"[{node_id}] router_healthy is None, trying direct health check")
|
||||
endpoints = await repo_city.get_node_endpoints(node_id)
|
||||
base_url = endpoints.get("router_url")
|
||||
|
||||
@@ -4596,9 +4603,16 @@ async def get_dagi_router_agents(node_id: str):
|
||||
# Router can return "healthy" or "ok"
|
||||
status = data.get("status", "").lower()
|
||||
router_healthy = status in ("healthy", "ok")
|
||||
logger.debug(f"Direct router health check for {node_id}: {router_healthy}")
|
||||
logger.info(f"[{node_id}] Direct router health check: {router_healthy}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to check router health for {node_id} at {base_url}: {e}")
|
||||
logger.warning(f"[{node_id}] Failed to check router health at {base_url}: {e}")
|
||||
|
||||
# Final fallback
|
||||
if router_healthy is None:
|
||||
router_healthy = False
|
||||
logger.warning(f"[{node_id}] router_healthy is None after all checks, defaulting to False")
|
||||
|
||||
logger.info(f"[{node_id}] Final router_healthy value: {router_healthy}")
|
||||
|
||||
# Get agents from DB for this node
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user