feat: full node isolation - use node-specific swapper_url and router_url from DB
- Add migration 041_node_local_endpoints.sql - Add get_node_endpoints() to repo_city.py - Update routes_city.py to use DB endpoints instead of hardcoded URLs - Update node-guardian-loop.py to use NODE_SWAPPER_URL/NODE_ROUTER_URL env vars - Update launchd plist for NODE2 with router URL
This commit is contained in:
@@ -3287,6 +3287,8 @@ async def get_node_metrics_current(node_id: str) -> Optional[Dict[str, Any]]:
|
||||
COALESCE(swapper_healthy, false) as swapper_healthy,
|
||||
COALESCE(swapper_models_loaded, 0) as swapper_models_loaded,
|
||||
COALESCE(swapper_models_total, 0) as swapper_models_total,
|
||||
router_url,
|
||||
swapper_url,
|
||||
updated_at
|
||||
FROM node_cache
|
||||
WHERE node_id = $1
|
||||
@@ -3341,6 +3343,34 @@ async def get_node_metrics_current(node_id: str) -> Optional[Dict[str, Any]]:
|
||||
return result
|
||||
|
||||
|
||||
async def get_node_endpoints(node_id: str) -> Dict[str, str]:
|
||||
"""
|
||||
Отримати URL endpoints для конкретної ноди.
|
||||
Якщо в БД немає значень — підставляє дефолти для NODE1.
|
||||
"""
|
||||
pool = await get_pool()
|
||||
|
||||
row = await pool.fetchrow("""
|
||||
SELECT router_url, swapper_url
|
||||
FROM node_cache
|
||||
WHERE node_id = $1
|
||||
""", node_id)
|
||||
|
||||
# Default values (NODE1 Docker-based)
|
||||
defaults = {
|
||||
"router_url": "http://dagi-router:9102",
|
||||
"swapper_url": "http://swapper-service:8890"
|
||||
}
|
||||
|
||||
if not row:
|
||||
return defaults
|
||||
|
||||
return {
|
||||
"router_url": row["router_url"] or defaults["router_url"],
|
||||
"swapper_url": row["swapper_url"] or defaults["swapper_url"]
|
||||
}
|
||||
|
||||
|
||||
async def get_node_metrics(node_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Отримати розширені метрики ноди (включаючи Swapper).
|
||||
@@ -3353,7 +3383,9 @@ async def get_node_metrics(node_id: str) -> Optional[Dict[str, Any]]:
|
||||
swapper_healthy,
|
||||
swapper_models_loaded,
|
||||
swapper_models_total,
|
||||
swapper_state
|
||||
swapper_state,
|
||||
router_url,
|
||||
swapper_url
|
||||
FROM node_cache
|
||||
WHERE node_id = $1
|
||||
""", node_id)
|
||||
|
||||
@@ -4087,17 +4087,14 @@ async def get_dagi_router_health(node_id: str):
|
||||
"""
|
||||
Get DAGI Router health status for a node.
|
||||
Always returns 200 with status="down" if router is unavailable.
|
||||
Uses node-specific router_url from node_cache.
|
||||
"""
|
||||
import httpx
|
||||
import time
|
||||
|
||||
# Node-specific router URLs
|
||||
NODE_ROUTER_URLS = {
|
||||
"node-1-hetzner-gex44": "http://dagi-router:9102",
|
||||
"node-2-macbook-m4max": "http://localhost:9102", # Local router on NODE2
|
||||
}
|
||||
|
||||
base_url = NODE_ROUTER_URLS.get(node_id)
|
||||
# Get router URL from database (node-specific)
|
||||
endpoints = await repo_city.get_node_endpoints(node_id)
|
||||
base_url = endpoints.get("router_url")
|
||||
|
||||
if not base_url:
|
||||
return {
|
||||
@@ -4125,9 +4122,12 @@ async def get_dagi_router_health(node_id: str):
|
||||
}
|
||||
|
||||
data = resp.json()
|
||||
# Router can return "healthy" or "ok"
|
||||
status_val = data.get("status", "").lower()
|
||||
is_healthy = status_val in ("healthy", "ok")
|
||||
return {
|
||||
"node_id": node_id,
|
||||
"status": "up" if data.get("status") == "healthy" else "degraded",
|
||||
"status": "up" if is_healthy else "degraded",
|
||||
"version": data.get("version"),
|
||||
"agent_count": data.get("agent_count", 0),
|
||||
"latency_ms": round(latency_ms, 2)
|
||||
@@ -4149,15 +4149,13 @@ async def get_dagi_router_agents(node_id: str):
|
||||
Get list of agents for a node.
|
||||
Since DAGI Router doesn't expose /agents endpoint, we use DB agents
|
||||
and check router health to determine status.
|
||||
Uses node-specific router_url from node_cache.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
NODE_ROUTER_URLS = {
|
||||
"node-1-hetzner-gex44": "http://dagi-router:9102",
|
||||
"node-2-macbook-m4max": "http://localhost:9102",
|
||||
}
|
||||
|
||||
base_url = NODE_ROUTER_URLS.get(node_id)
|
||||
# Get router URL from database (node-specific)
|
||||
endpoints = await repo_city.get_node_endpoints(node_id)
|
||||
base_url = endpoints.get("router_url")
|
||||
router_healthy = False
|
||||
|
||||
# Check if router is healthy
|
||||
@@ -4167,9 +4165,11 @@ async def get_dagi_router_agents(node_id: str):
|
||||
resp = await client.get(f"{base_url}/health")
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
router_healthy = data.get("status") == "healthy"
|
||||
# Router can return "healthy" or "ok"
|
||||
status = data.get("status", "").lower()
|
||||
router_healthy = status in ("healthy", "ok")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to check router health for {node_id}: {e}")
|
||||
logger.warning(f"Failed to check router health for {node_id} at {base_url}: {e}")
|
||||
|
||||
# Get agents from DB for this node
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user