feat: full node isolation - use node-specific swapper_url and router_url from DB

- Add migration 041_node_local_endpoints.sql
- Add get_node_endpoints() to repo_city.py
- Update routes_city.py to use DB endpoints instead of hardcoded URLs
- Update node-guardian-loop.py to use NODE_SWAPPER_URL/NODE_ROUTER_URL env vars
- Update launchd plist for NODE2 with router URL
This commit is contained in:
Apple
2025-12-01 08:01:53 -08:00
parent b25e002db6
commit 9b9a72ffbd
4 changed files with 113 additions and 27 deletions

View File

@@ -4087,17 +4087,14 @@ async def get_dagi_router_health(node_id: str):
"""
Get DAGI Router health status for a node.
Always returns 200 with status="down" if router is unavailable.
Uses node-specific router_url from node_cache.
"""
import httpx
import time
# Node-specific router URLs
NODE_ROUTER_URLS = {
"node-1-hetzner-gex44": "http://dagi-router:9102",
"node-2-macbook-m4max": "http://localhost:9102", # Local router on NODE2
}
base_url = NODE_ROUTER_URLS.get(node_id)
# Get router URL from database (node-specific)
endpoints = await repo_city.get_node_endpoints(node_id)
base_url = endpoints.get("router_url")
if not base_url:
return {
@@ -4125,9 +4122,12 @@ async def get_dagi_router_health(node_id: str):
}
data = resp.json()
# Router can return "healthy" or "ok"
status_val = data.get("status", "").lower()
is_healthy = status_val in ("healthy", "ok")
return {
"node_id": node_id,
"status": "up" if data.get("status") == "healthy" else "degraded",
"status": "up" if is_healthy else "degraded",
"version": data.get("version"),
"agent_count": data.get("agent_count", 0),
"latency_ms": round(latency_ms, 2)
@@ -4149,15 +4149,13 @@ async def get_dagi_router_agents(node_id: str):
Get list of agents for a node.
Since DAGI Router doesn't expose /agents endpoint, we use DB agents
and check router health to determine status.
Uses node-specific router_url from node_cache.
"""
import httpx
NODE_ROUTER_URLS = {
"node-1-hetzner-gex44": "http://dagi-router:9102",
"node-2-macbook-m4max": "http://localhost:9102",
}
base_url = NODE_ROUTER_URLS.get(node_id)
# Get router URL from database (node-specific)
endpoints = await repo_city.get_node_endpoints(node_id)
base_url = endpoints.get("router_url")
router_healthy = False
# Check if router is healthy
@@ -4167,9 +4165,11 @@ async def get_dagi_router_agents(node_id: str):
resp = await client.get(f"{base_url}/health")
if resp.status_code == 200:
data = resp.json()
router_healthy = data.get("status") == "healthy"
# Router can return "healthy" or "ok"
status = data.get("status", "").lower()
router_healthy = status in ("healthy", "ok")
except Exception as e:
logger.warning(f"Failed to check router health for {node_id}: {e}")
logger.warning(f"Failed to check router health for {node_id} at {base_url}: {e}")
# Get agents from DB for this node
try: