feat: full node isolation - use node-specific swapper_url and router_url from DB
- Add migration 041_node_local_endpoints.sql - Add get_node_endpoints() to repo_city.py - Update routes_city.py to use DB endpoints instead of hardcoded URLs - Update node-guardian-loop.py to use NODE_SWAPPER_URL/NODE_ROUTER_URL env vars - Update launchd plist for NODE2 with router URL
This commit is contained in:
@@ -16,6 +16,9 @@ Environment variables:
|
||||
NODE_NAME - Назва ноди (для self-registration)
|
||||
NODE_ENVIRONMENT - production/development
|
||||
NODE_ROLES - Ролі через кому
|
||||
NODE_SWAPPER_URL - URL Swapper Service (node-specific)
|
||||
NODE_ROUTER_URL - URL DAGI Router (node-specific)
|
||||
SWAPPER_URL - Legacy alias for NODE_SWAPPER_URL
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -63,7 +66,9 @@ class NodeGuardian:
|
||||
city_url: str,
|
||||
environment: str = "development",
|
||||
roles: list = None,
|
||||
hostname: str = None
|
||||
hostname: str = None,
|
||||
swapper_url: str = None,
|
||||
router_url: str = None
|
||||
):
|
||||
self.node_id = node_id
|
||||
self.node_name = node_name
|
||||
@@ -72,6 +77,20 @@ class NodeGuardian:
|
||||
self.roles = roles or []
|
||||
self.hostname = hostname
|
||||
|
||||
# Node-specific service URLs
|
||||
# Priority: explicit param > NODE_*_URL env > SWAPPER_URL env > defaults
|
||||
self.swapper_url = (
|
||||
swapper_url or
|
||||
os.getenv("NODE_SWAPPER_URL") or
|
||||
os.getenv("SWAPPER_URL") or
|
||||
"http://swapper-service:8890"
|
||||
)
|
||||
self.router_url = (
|
||||
router_url or
|
||||
os.getenv("NODE_ROUTER_URL") or
|
||||
"http://dagi-router:9102"
|
||||
)
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=10.0)
|
||||
self.healing_attempts = 0
|
||||
self.last_successful_check = None
|
||||
@@ -144,11 +163,15 @@ class NodeGuardian:
|
||||
async def send_heartbeat(self, metrics: Dict = None) -> bool:
|
||||
"""Відправити heartbeat"""
|
||||
try:
|
||||
payload = {"metrics": metrics or {}}
|
||||
# Add node-specific URLs to metrics for storage in node_cache
|
||||
metrics_with_urls = metrics or {}
|
||||
metrics_with_urls["swapper_url"] = self.swapper_url
|
||||
metrics_with_urls["router_url"] = self.router_url
|
||||
|
||||
payload = {"metrics": metrics_with_urls}
|
||||
|
||||
# Log key info for debugging node isolation
|
||||
swapper_url = os.getenv("SWAPPER_URL", "http://swapper-service:8890")
|
||||
logger.info(f"📤 Sending heartbeat: node_id={self.node_id}, swapper_url={swapper_url}")
|
||||
logger.info(f"📤 Sending heartbeat: node_id={self.node_id}, swapper_url={self.swapper_url}, router_url={self.router_url}")
|
||||
|
||||
response = await self.client.post(
|
||||
f"{self.city_url}/city/internal/node/{self.node_id}/heartbeat",
|
||||
@@ -193,7 +216,7 @@ class NodeGuardian:
|
||||
"disk_used": 0,
|
||||
"agent_count_router": 0,
|
||||
"agent_count_system": 0,
|
||||
"dagi_router_url": "http://dagi-router:9102",
|
||||
"dagi_router_url": self.router_url, # Use node-specific URL
|
||||
# Swapper defaults
|
||||
"swapper_healthy": False,
|
||||
"swapper_models_loaded": 0,
|
||||
@@ -201,12 +224,11 @@ class NodeGuardian:
|
||||
"swapper_state": {}
|
||||
}
|
||||
|
||||
# Collect Swapper Metrics
|
||||
swapper_url = os.getenv("SWAPPER_URL", "http://swapper-service:8890")
|
||||
# Collect Swapper Metrics using node-specific URL
|
||||
try:
|
||||
# Check health (Swapper uses /health, not /healthz)
|
||||
try:
|
||||
r = await self.client.get(f"{swapper_url}/health", timeout=3.0)
|
||||
r = await self.client.get(f"{self.swapper_url}/health", timeout=3.0)
|
||||
if r.status_code == 200:
|
||||
health_data = r.json()
|
||||
# Swapper can return "status": "healthy" or "ok"
|
||||
@@ -219,7 +241,7 @@ class NodeGuardian:
|
||||
|
||||
# Check models (Swapper uses /models, not /v1/models)
|
||||
try:
|
||||
r = await self.client.get(f"{swapper_url}/models", timeout=5.0)
|
||||
r = await self.client.get(f"{self.swapper_url}/models", timeout=5.0)
|
||||
if r.status_code == 200:
|
||||
data = r.json()
|
||||
models = data.get("models", [])
|
||||
@@ -229,7 +251,7 @@ class NodeGuardian:
|
||||
metrics["swapper_state"] = data
|
||||
logger.debug(f"🧠 Swapper metrics: healthy={metrics['swapper_healthy']}, loaded={metrics['swapper_models_loaded']}/{metrics['swapper_models_total']}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch Swapper models from {swapper_url}: {e}")
|
||||
logger.warning(f"Failed to fetch Swapper models from {self.swapper_url}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Swapper metrics collection failed: {e}")
|
||||
@@ -347,6 +369,8 @@ async def run_guardian_loop(
|
||||
logger.info(f" Node Name: {node_name}")
|
||||
logger.info(f" Environment: {environment}")
|
||||
logger.info(f" City Service: {city_url}")
|
||||
logger.info(f" Swapper URL: {guardian.swapper_url}")
|
||||
logger.info(f" Router URL: {guardian.router_url}")
|
||||
logger.info(f" Interval: {interval}s")
|
||||
logger.info("=" * 60)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user