feat: DAGI Router v2 - new endpoints, hooks, and UI card

This commit is contained in:
Apple
2025-12-01 05:21:43 -08:00
parent 53f31adbf0
commit e3accd4df0
221 changed files with 999 additions and 261 deletions

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7005"]

View File

@@ -293,3 +293,4 @@ curl http://localhost:7004/internal/messaging/channels/{channel_id}/context

View File

@@ -17,3 +17,4 @@ rules:

View File

@@ -161,3 +161,4 @@ async def shutdown_event():

View File

@@ -37,3 +37,4 @@ class FilterContext(BaseModel):

View File

@@ -9,3 +9,4 @@ PyYAML==6.0.1

View File

@@ -116,3 +116,4 @@ class FilterRules:

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7006"]

View File

@@ -406,3 +406,4 @@ curl -X POST http://localhost:7006/internal/agent-runtime/test-channel \

View File

@@ -22,3 +22,4 @@ memory:

View File

@@ -73,3 +73,4 @@ async def post_message(agent_id: str, channel_id: str, text: str) -> bool:

View File

@@ -36,3 +36,4 @@ class LLMResponse(BaseModel):

View File

@@ -74,3 +74,4 @@ pep_client = PEPClient()

View File

@@ -9,3 +9,4 @@ PyYAML==6.0.1

View File

@@ -173,3 +173,4 @@ Connects to:

View File

@@ -219,3 +219,4 @@ docker run -p 7011:7011 \

View File

@@ -128,3 +128,4 @@ async def require_actor(

View File

@@ -229,3 +229,4 @@ class PasskeyStore:

View File

@@ -126,3 +126,4 @@ async def delete_api_key(

View File

@@ -328,3 +328,4 @@ async def authenticate_finish(

View File

@@ -128,3 +128,4 @@ async def logout(

View File

@@ -208,3 +208,4 @@ def hash_credential_id(credential_id: str) -> str:

View File

@@ -616,3 +616,52 @@ class NodeSwapperDetail(BaseModel):
models_loaded: int
models_total: int
models: List[SwapperModel] = []
# =============================================================================
# DAGI Router
# =============================================================================
class DagiRouterHealth(BaseModel):
"""DAGI Router health status"""
node_id: str
status: str # "up", "down", "degraded"
version: Optional[str] = None
agent_count: int = 0
latency_ms: Optional[float] = None
class DagiRouterAgent(BaseModel):
"""Agent info from DAGI Router"""
id: str
name: Optional[str] = None
kind: Optional[str] = None
runtime: Optional[str] = None # e.g. "NODE1-router", "NODE2-router"
node_id: str
last_seen_at: Optional[datetime] = None
status: str = "active" # "active", "phantom", "stale"
has_db_record: bool = False
class DagiRouterAgentsResponse(BaseModel):
"""Response for DAGI Router agents endpoint"""
node_id: str
total: int = 0
active: int = 0
phantom: int = 0
stale: int = 0
agents: List[DagiRouterAgent] = []
class DagiRouterSummary(BaseModel):
"""Summary of DAGI Router status for a node"""
node_id: str
status: str # "up", "down", "degraded"
version: Optional[str] = None
latency_ms: Optional[float] = None
router_agent_count: int = 0
db_agent_count: int = 0
active: int = 0
phantom: int = 0
stale: int = 0
last_audit_at: Optional[datetime] = None

View File

@@ -3648,6 +3648,12 @@ async def get_node_agents(node_id: str) -> List[Dict[str, Any]]:
return [dict(row) for row in rows]
# Alias for DAGI Router integration
async def get_agents_for_node(node_id: str) -> List[Dict[str, Any]]:
"""Alias for get_node_agents - used by DAGI Router endpoints."""
return await get_node_agents(node_id)
# ==============================================================================
# Node Self-Registration & Self-Healing
# ==============================================================================

View File

@@ -4103,6 +4103,198 @@ async def get_node_swapper_detail(node_id: str):
)
@router.get("/internal/node/{node_id}/dagi-router/health")
async def get_dagi_router_health(node_id: str):
"""
Get DAGI Router health status for a node.
Always returns 200 with status="down" if router is unavailable.
"""
import httpx
import time
# Node-specific router URLs
NODE_ROUTER_URLS = {
"node-1-hetzner-gex44": "http://dagi-router:9102",
"node-2-macbook-m4max": "http://localhost:9102", # Local router on NODE2
}
base_url = NODE_ROUTER_URLS.get(node_id)
if not base_url:
return {
"node_id": node_id,
"status": "down",
"version": None,
"agent_count": 0,
"latency_ms": None,
"error": "No router URL configured for this node"
}
try:
start = time.monotonic()
async with httpx.AsyncClient(timeout=3.0) as client:
resp = await client.get(f"{base_url}/health")
latency_ms = (time.monotonic() - start) * 1000.0
if resp.status_code != 200:
return {
"node_id": node_id,
"status": "down",
"version": None,
"agent_count": 0,
"latency_ms": latency_ms
}
data = resp.json()
return {
"node_id": node_id,
"status": "up" if data.get("status") == "healthy" else "degraded",
"version": data.get("version"),
"agent_count": data.get("agent_count", 0),
"latency_ms": round(latency_ms, 2)
}
except Exception as e:
logger.warning(f"DAGI Router health check failed for {node_id}: {e}")
return {
"node_id": node_id,
"status": "down",
"version": None,
"agent_count": 0,
"latency_ms": None
}
@router.get("/internal/node/{node_id}/dagi-router/agents")
async def get_dagi_router_agents(node_id: str):
"""
Get list of agents registered with DAGI Router for a node.
Compares with DB to identify phantom/stale agents.
"""
import httpx
NODE_ROUTER_URLS = {
"node-1-hetzner-gex44": "http://dagi-router:9102",
"node-2-macbook-m4max": "http://localhost:9102",
}
base_url = NODE_ROUTER_URLS.get(node_id)
router_agents = []
# Try to get agents from router
if base_url:
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(f"{base_url}/agents")
if resp.status_code == 200:
data = resp.json()
router_agents = data.get("agents", [])
except Exception as e:
logger.warning(f"Failed to get agents from router for {node_id}: {e}")
# Get agents from DB for this node
try:
db_agents = await repo_city.get_agents_for_node(node_id)
db_agent_ids = {a.get("id") or a.get("slug") for a in db_agents}
except Exception as e:
logger.warning(f"Failed to get DB agents for {node_id}: {e}")
db_agents = []
db_agent_ids = set()
# Build combined list
result_agents = []
router_agent_ids = set()
for ra in router_agents:
agent_id = ra.get("id") or ra.get("name") or ra.get("slug")
if not agent_id:
continue
router_agent_ids.add(agent_id)
# Check if in DB
has_db_record = agent_id in db_agent_ids
status = "active" if has_db_record else "phantom"
result_agents.append({
"id": agent_id,
"name": ra.get("name"),
"kind": ra.get("kind"),
"runtime": ra.get("runtime") or f"{node_id}-router",
"node_id": node_id,
"last_seen_at": ra.get("last_seen_at"),
"status": status,
"has_db_record": has_db_record
})
# Add stale agents (in DB but not in router)
for db_agent in db_agents:
agent_id = db_agent.get("id") or db_agent.get("slug")
if agent_id and agent_id not in router_agent_ids:
result_agents.append({
"id": agent_id,
"name": db_agent.get("display_name") or db_agent.get("name"),
"kind": db_agent.get("kind"),
"runtime": None,
"node_id": node_id,
"last_seen_at": None,
"status": "stale",
"has_db_record": True
})
# Count by status
active = sum(1 for a in result_agents if a["status"] == "active")
phantom = sum(1 for a in result_agents if a["status"] == "phantom")
stale = sum(1 for a in result_agents if a["status"] == "stale")
return {
"node_id": node_id,
"total": len(result_agents),
"active": active,
"phantom": phantom,
"stale": stale,
"agents": result_agents
}
@router.get("/internal/node/{node_id}/dagi-router/summary")
async def get_dagi_router_summary(node_id: str):
"""
Get combined DAGI Router status summary for a node.
Includes health, agent counts, and last audit timestamp.
"""
# Get health
health = await get_dagi_router_health(node_id)
# Get agents info
agents_info = await get_dagi_router_agents(node_id)
# Get last audit timestamp
last_audit_at = None
try:
pool = await repo_city.get_pool()
row = await pool.fetchrow("""
SELECT MAX(created_at) as last_audit
FROM dagi_audit_reports
WHERE node_id = $1
""", node_id)
if row and row["last_audit"]:
last_audit_at = row["last_audit"].isoformat()
except Exception as e:
logger.warning(f"Failed to get last audit for {node_id}: {e}")
return {
"node_id": node_id,
"status": health.get("status", "down"),
"version": health.get("version"),
"latency_ms": health.get("latency_ms"),
"router_agent_count": health.get("agent_count", 0),
"db_agent_count": agents_info.get("total", 0),
"active": agents_info.get("active", 0),
"phantom": agents_info.get("phantom", 0),
"stale": agents_info.get("stale", 0),
"last_audit_at": last_audit_at
}
@router.get("/internal/node/{node_id}/directory-check")
async def check_node_in_directory(node_id: str):
"""

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7007"]

View File

@@ -333,3 +333,4 @@ Internal DAARION service

View File

@@ -59,3 +59,4 @@ logging:

View File

@@ -194,3 +194,4 @@ if __name__ == "__main__":

View File

@@ -101,3 +101,4 @@ class UsageTracker:

View File

@@ -60,3 +60,4 @@ class UsageLog(BaseModel):

View File

@@ -9,3 +9,4 @@ __all__ = ['BaseProvider', 'OpenAIProvider', 'DeepSeekProvider', 'LocalProvider'

View File

@@ -35,3 +35,4 @@ class BaseProvider(Protocol):

View File

@@ -74,3 +74,4 @@ class DeepSeekProvider:

View File

@@ -96,3 +96,4 @@ class LocalProvider:

View File

@@ -74,3 +74,4 @@ class OpenAIProvider:

View File

@@ -9,3 +9,4 @@ python-multipart==0.0.6

View File

@@ -74,3 +74,4 @@ class ModelRouter:

View File

@@ -645,3 +645,4 @@ Content-Type: application/json

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7008"]

View File

@@ -316,3 +316,4 @@ CREATE TABLE agent_memories_vector (

View File

@@ -8,3 +8,4 @@ __all__ = ['ShortTermBackend', 'VectorStoreBackend', 'KnowledgeBaseBackend']

View File

@@ -74,3 +74,4 @@ class KnowledgeBaseBackend:

View File

@@ -108,3 +108,4 @@ class ShortTermBackend:

View File

@@ -184,3 +184,4 @@ class VectorStoreBackend:

View File

@@ -31,3 +31,4 @@ limits:

View File

@@ -51,3 +51,4 @@ class EmbeddingClient:

View File

@@ -243,3 +243,4 @@ if __name__ == "__main__":

View File

@@ -50,3 +50,4 @@ class MemorySummarizeResponse(BaseModel):

View File

@@ -10,3 +10,4 @@ python-multipart==0.0.6

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7004"]

View File

@@ -360,3 +360,4 @@ DAARION Platform Team

View File

@@ -177,3 +177,4 @@ async def require_microdao_permission(

View File

@@ -229,3 +229,4 @@ def get_monitor_agent_file_urls(agent_id: str, base_url: str = "/") -> Dict[str,

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7012"]

View File

@@ -350,3 +350,4 @@ docker exec postgres psql -U postgres -d daarion \

View File

@@ -59,3 +59,4 @@ defaults:

View File

@@ -206,3 +206,4 @@ def evaluate_usage_access(request: PolicyRequest, policy_store: PolicyStore) ->

View File

@@ -159,3 +159,4 @@ if __name__ == "__main__":

View File

@@ -58,3 +58,4 @@ class PolicyDecision(BaseModel):

View File

@@ -100,3 +100,4 @@ class PolicyStore:

View File

@@ -9,3 +9,4 @@ python-multipart==0.0.6

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -210,3 +210,4 @@ curl -X POST http://localhost:8000/internal/router/test-messaging \

View File

@@ -10,3 +10,4 @@ messaging_inbound:

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7002"]

View File

@@ -257,3 +257,4 @@ Proprietary — DAARION Ecosystem

View File

@@ -238,3 +238,4 @@ if __name__ == "__main__":

View File

@@ -9,3 +9,4 @@ asyncio-nats-client==0.11.5

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7009"]

View File

@@ -303,3 +303,4 @@ Each tool has a `timeout` (seconds). If execution exceeds timeout, it fails grac

View File

@@ -79,3 +79,4 @@ logging:

View File

@@ -7,3 +7,4 @@ __all__ = ['HTTPExecutor', 'PythonExecutor']

View File

@@ -103,3 +103,4 @@ class HTTPExecutor:

View File

@@ -67,3 +67,4 @@ class PythonExecutor:

View File

@@ -196,3 +196,4 @@ if __name__ == "__main__":

View File

@@ -31,3 +31,4 @@ class ToolCallResult(BaseModel):

View File

@@ -78,3 +78,4 @@ class ToolRegistry:

View File

@@ -9,3 +9,4 @@ python-multipart==0.0.6

View File

@@ -23,3 +23,4 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7013"]

View File

@@ -362,3 +362,4 @@ await publish_nats_event("usage.agent", {

View File

@@ -238,3 +238,4 @@ class UsageAggregator:

View File

@@ -183,3 +183,4 @@ class UsageCollector:

View File

@@ -220,3 +220,4 @@ if __name__ == "__main__":

View File

@@ -160,3 +160,4 @@ class UsageQueryResponse(BaseModel):

View File

@@ -9,3 +9,4 @@ python-multipart==0.0.6

View File

@@ -363,3 +363,4 @@ if __name__ == "__main__":