diff --git a/services/city-service/routes_city.py b/services/city-service/routes_city.py index 6b3440b7..c957c7fc 100644 --- a/services/city-service/routes_city.py +++ b/services/city-service/routes_city.py @@ -4576,14 +4576,21 @@ async def get_dagi_router_agents(node_id: str): router_healthy = None try: metrics = await repo_city.get_node_metrics(node_id) - if metrics and metrics.get("router_healthy") is not None: - router_healthy = bool(metrics.get("router_healthy", False)) - logger.info(f"Using router_healthy from node_cache for {node_id}: {router_healthy}") + if metrics: + router_healthy_raw = metrics.get("router_healthy") + if router_healthy_raw is not None: + router_healthy = bool(router_healthy_raw) + logger.info(f"[{node_id}] Using router_healthy from node_cache: {router_healthy} (raw: {router_healthy_raw})") + else: + logger.debug(f"[{node_id}] router_healthy is None in node_cache") + else: + logger.debug(f"[{node_id}] No metrics found in node_cache") except Exception as e: - logger.debug(f"Failed to get cached router health for {node_id}: {e}") + logger.warning(f"[{node_id}] Failed to get cached router health: {e}") # Fallback: try direct health check (only works for local nodes like NODE1) if router_healthy is None: + logger.info(f"[{node_id}] router_healthy is None, trying direct health check") endpoints = await repo_city.get_node_endpoints(node_id) base_url = endpoints.get("router_url") @@ -4596,9 +4603,16 @@ async def get_dagi_router_agents(node_id: str): # Router can return "healthy" or "ok" status = data.get("status", "").lower() router_healthy = status in ("healthy", "ok") - logger.debug(f"Direct router health check for {node_id}: {router_healthy}") + logger.info(f"[{node_id}] Direct router health check: {router_healthy}") except Exception as e: - logger.debug(f"Failed to check router health for {node_id} at {base_url}: {e}") + logger.warning(f"[{node_id}] Failed to check router health at {base_url}: {e}") + + # Final fallback + if router_healthy is None: + router_healthy = False + logger.warning(f"[{node_id}] router_healthy is None after all checks, defaulting to False") + + logger.info(f"[{node_id}] Final router_healthy value: {router_healthy}") # Get agents from DB for this node try: