fix(nodes): Normalize Router/Swapper endpoints and fix NODE2 display
Major changes: - Normalize get_node_endpoints to use ENV vars (ROUTER_BASE_URL, SWAPPER_BASE_URL) - Remove node_id-based URL selection logic - Add fallback direct API call in get_node_swapper_detail - Fix Swapper API endpoint (/models instead of /api/v1/models) - Add router_healthy and router_version to node_heartbeat fallback - Add ENV vars to docker-compose for Router/Swapper URLs Documentation: - Add TASK_PHASE_NODE2_ROUTER_SWAPPER_FIX.md with full task description - Add NODE2_GUARDIAN_SETUP.md with setup instructions This fixes: - Swapper models not showing for NODE1 and NODE2 - DAGI Router agents not showing for NODE2 - Router/Swapper showing as Down/Degraded when they're actually up
This commit is contained in:
@@ -4388,43 +4388,86 @@ async def get_node_swapper_detail(node_id: str):
|
||||
"""
|
||||
Get detailed Swapper Service status for a node.
|
||||
Used by Node Cabinet to show loaded models and health.
|
||||
Returns fallback data if metrics not found (instead of 404).
|
||||
|
||||
First tries to get data from node_cache (populated by node-guardian).
|
||||
If not found, attempts direct call to Swapper API as fallback.
|
||||
Returns fallback data if both fail (instead of 404).
|
||||
"""
|
||||
import httpx
|
||||
|
||||
try:
|
||||
# Fetch from node_cache
|
||||
# First, try to fetch from node_cache (preferred - populated by node-guardian)
|
||||
metrics = await repo_city.get_node_metrics(node_id)
|
||||
if not metrics:
|
||||
# Return fallback instead of 404 - allows UI to show pending state
|
||||
logger.info(f"Swapper metrics not found for {node_id}, returning fallback")
|
||||
if metrics:
|
||||
# Parse swapper state (stored as JSONB)
|
||||
state = metrics.get("swapper_state") or {}
|
||||
models_data = state.get("models", [])
|
||||
|
||||
models = [
|
||||
SwapperModel(
|
||||
name=m.get("name", "unknown"),
|
||||
# Swapper uses "status": "loaded" not "loaded": true
|
||||
loaded=m.get("status") == "loaded" or m.get("loaded", False),
|
||||
type=m.get("type"),
|
||||
vram_gb=m.get("size_gb") or m.get("vram_gb")
|
||||
)
|
||||
for m in models_data
|
||||
]
|
||||
|
||||
return NodeSwapperDetail(
|
||||
node_id=node_id,
|
||||
healthy=False,
|
||||
models_loaded=0,
|
||||
models_total=0,
|
||||
models=[]
|
||||
healthy=metrics.get("swapper_healthy", False),
|
||||
models_loaded=metrics.get("swapper_models_loaded", 0),
|
||||
models_total=metrics.get("swapper_models_total", 0),
|
||||
models=models
|
||||
)
|
||||
|
||||
# Parse swapper state (stored as JSONB)
|
||||
state = metrics.get("swapper_state") or {}
|
||||
models_data = state.get("models", [])
|
||||
|
||||
models = [
|
||||
SwapperModel(
|
||||
name=m.get("name", "unknown"),
|
||||
# Swapper uses "status": "loaded" not "loaded": true
|
||||
loaded=m.get("status") == "loaded" or m.get("loaded", False),
|
||||
type=m.get("type"),
|
||||
vram_gb=m.get("size_gb") or m.get("vram_gb")
|
||||
)
|
||||
for m in models_data
|
||||
]
|
||||
# Fallback: try direct call to Swapper API
|
||||
logger.info(f"Swapper metrics not found in cache for {node_id}, trying direct API call")
|
||||
endpoints = await repo_city.get_node_endpoints(node_id)
|
||||
swapper_url = endpoints.get("swapper_url")
|
||||
|
||||
if swapper_url:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
# Try to get models from Swapper (endpoint: /models, not /api/v1/models)
|
||||
resp = await client.get(f"{swapper_url}/models")
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
models_list = data.get("models", []) if isinstance(data, dict) else data
|
||||
|
||||
models = [
|
||||
SwapperModel(
|
||||
name=m.get("name", "unknown"),
|
||||
loaded=m.get("status") == "loaded" or m.get("loaded", False),
|
||||
type=m.get("type"),
|
||||
vram_gb=m.get("size_gb") or m.get("vram_gb")
|
||||
)
|
||||
for m in models_list
|
||||
]
|
||||
|
||||
loaded_count = sum(1 for m in models if m.loaded)
|
||||
|
||||
logger.info(f"✅ Direct Swapper API call successful: {loaded_count}/{len(models)} models loaded")
|
||||
|
||||
return NodeSwapperDetail(
|
||||
node_id=node_id,
|
||||
healthy=True,
|
||||
models_loaded=loaded_count,
|
||||
models_total=len(models),
|
||||
models=models
|
||||
)
|
||||
except Exception as api_error:
|
||||
logger.warning(f"Direct Swapper API call failed for {node_id} at {swapper_url}: {api_error}")
|
||||
|
||||
# Final fallback: return empty state
|
||||
logger.info(f"Swapper data unavailable for {node_id}, returning fallback")
|
||||
return NodeSwapperDetail(
|
||||
node_id=node_id,
|
||||
healthy=metrics.get("swapper_healthy", False),
|
||||
models_loaded=metrics.get("swapper_models_loaded", 0),
|
||||
models_total=metrics.get("swapper_models_total", 0),
|
||||
models=models
|
||||
healthy=False,
|
||||
models_loaded=0,
|
||||
models_total=0,
|
||||
models=[]
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user