gateway: add redis-backed city metrics poller and /v1/metrics/dashboard
This commit is contained in:
268
gateway-bot/daarion_facade/registry_api.py
Normal file
268
gateway-bot/daarion_facade/registry_api.py
Normal file
@@ -0,0 +1,268 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter
|
||||
from redis.asyncio import Redis
|
||||
|
||||
router = APIRouter(prefix="/v1", tags=["daarion-facade"])
|
||||
|
||||
REGISTRY_CACHE_TTL = int(os.getenv("REGISTRY_CACHE_TTL", "30"))
|
||||
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
|
||||
CREWAI_SERVICE_URL = os.getenv("CREWAI_SERVICE_URL", "http://dagi-staging-crewai-service:9010")
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
METRICS_DASHBOARD_KEY = "daarion:metrics:dashboard"
|
||||
|
||||
_REGISTRY_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": None}
|
||||
_DISTRICT_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": None}
|
||||
_CREWAI_CACHE: Dict[str, Any] = {"loaded_at": 0.0, "data": {}}
|
||||
_REDIS: Optional[Redis] = None
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _registry_paths() -> List[Path]:
|
||||
return [
|
||||
Path("/app/gateway-bot/agent_registry.json"),
|
||||
Path("/opt/microdao-daarion/config/agent_registry.json"),
|
||||
Path(__file__).resolve().parents[1] / "agent_registry.json",
|
||||
]
|
||||
|
||||
|
||||
def _district_paths() -> List[Path]:
|
||||
return [
|
||||
Path("/app/gateway-bot/district_registry.json"),
|
||||
Path(__file__).resolve().parents[1] / "district_registry.json",
|
||||
]
|
||||
|
||||
|
||||
def _load_registry() -> Dict[str, Any]:
|
||||
now = time.time()
|
||||
if _REGISTRY_CACHE.get("data") and (now - _REGISTRY_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL):
|
||||
return _REGISTRY_CACHE["data"]
|
||||
|
||||
for path in _registry_paths():
|
||||
if path.exists():
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
_REGISTRY_CACHE.update({"loaded_at": now, "data": data})
|
||||
return data
|
||||
|
||||
data = {"agents": {}}
|
||||
_REGISTRY_CACHE.update({"loaded_at": now, "data": data})
|
||||
return data
|
||||
|
||||
|
||||
def _load_district_registry() -> Dict[str, Any]:
|
||||
now = time.time()
|
||||
if _DISTRICT_CACHE.get("data") and (now - _DISTRICT_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL):
|
||||
return _DISTRICT_CACHE["data"]
|
||||
|
||||
for path in _district_paths():
|
||||
if path.exists():
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
_DISTRICT_CACHE.update({"loaded_at": now, "data": data})
|
||||
return data
|
||||
|
||||
data = {"districts": []}
|
||||
_DISTRICT_CACHE.update({"loaded_at": now, "data": data})
|
||||
return data
|
||||
|
||||
|
||||
async def _redis_client() -> Redis:
|
||||
global _REDIS
|
||||
if _REDIS is None:
|
||||
_REDIS = Redis.from_url(REDIS_URL, decode_responses=True)
|
||||
return _REDIS
|
||||
|
||||
|
||||
async def _load_cached_dashboard() -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
r = await _redis_client()
|
||||
raw = await r.get(METRICS_DASHBOARD_KEY)
|
||||
if not raw:
|
||||
return None
|
||||
return json.loads(raw)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def _load_crewai_roles() -> Dict[str, int]:
|
||||
now = time.time()
|
||||
if now - _CREWAI_CACHE.get("loaded_at", 0.0) < REGISTRY_CACHE_TTL:
|
||||
return _CREWAI_CACHE.get("data", {})
|
||||
|
||||
out: Dict[str, int] = {}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=8.0) as client:
|
||||
resp = await client.get(f"{CREWAI_SERVICE_URL}/crew/agents")
|
||||
if resp.status_code == 200:
|
||||
payload = resp.json()
|
||||
for aid, info in payload.items():
|
||||
default_roles = info.get("default_roles")
|
||||
out[str(aid)] = int(default_roles) if isinstance(default_roles, int) else 0
|
||||
except Exception:
|
||||
out = {}
|
||||
|
||||
_CREWAI_CACHE.update({"loaded_at": now, "data": out})
|
||||
return out
|
||||
|
||||
|
||||
@router.get("/registry/agents")
|
||||
async def get_agents() -> Dict[str, Any]:
|
||||
reg = _load_registry()
|
||||
agents = reg.get("agents", {}) if isinstance(reg, dict) else {}
|
||||
role_counts = await _load_crewai_roles()
|
||||
|
||||
items: List[Dict[str, Any]] = []
|
||||
for agent_id, cfg in agents.items():
|
||||
if not isinstance(cfg, dict):
|
||||
continue
|
||||
domains = cfg.get("domains") or []
|
||||
district_id = cfg.get("district_id") or "city-core"
|
||||
items.append(
|
||||
{
|
||||
"agent_id": agent_id,
|
||||
"title": cfg.get("display_name") or agent_id,
|
||||
"role": cfg.get("canonical_role") or "",
|
||||
"domain_primary": domains[0] if domains else "general",
|
||||
"domain_aliases": domains[1:] if len(domains) > 1 else [],
|
||||
"visibility": cfg.get("visibility", "public"),
|
||||
"status": cfg.get("status", "active"),
|
||||
"team": {"subagents_total": role_counts.get(agent_id, 0)},
|
||||
"district_id": district_id,
|
||||
"avatar_url": cfg.get("avatar_url"),
|
||||
"health_url": cfg.get("health_url"),
|
||||
}
|
||||
)
|
||||
|
||||
return {"items": items, "total": len(items)}
|
||||
|
||||
|
||||
@router.get("/registry/districts")
|
||||
async def get_districts() -> Dict[str, Any]:
|
||||
agents_payload = await get_agents()
|
||||
agents = agents_payload.get("items", [])
|
||||
by_district: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for a in agents:
|
||||
by_district.setdefault(a.get("district_id", "city-core"), []).append(a)
|
||||
|
||||
catalog = _load_district_registry().get("districts", [])
|
||||
catalog_by_id: Dict[str, Dict[str, Any]] = {
|
||||
str(d.get("district_id")): d for d in catalog if isinstance(d, dict) and d.get("district_id")
|
||||
}
|
||||
|
||||
district_ids = sorted(set(catalog_by_id.keys()) | set(by_district.keys()))
|
||||
items: List[Dict[str, Any]] = []
|
||||
|
||||
for district_id in district_ids:
|
||||
members = by_district.get(district_id, [])
|
||||
base = catalog_by_id.get(district_id, {})
|
||||
domain = base.get("domain") or ("daarion.city" if district_id == "city-core" else f"{district_id}.daarion.city")
|
||||
|
||||
lead_agent_id = base.get("lead_agent_id")
|
||||
if not lead_agent_id:
|
||||
if district_id == "city-core" and any(m.get("agent_id") == "daarwizz" for m in members):
|
||||
lead_agent_id = "daarwizz"
|
||||
elif members:
|
||||
lead_agent_id = members[0].get("agent_id")
|
||||
else:
|
||||
lead_agent_id = None
|
||||
|
||||
items.append(
|
||||
{
|
||||
"district_id": district_id,
|
||||
"title": base.get("title") or district_id.replace("-", " ").title(),
|
||||
"domain": domain,
|
||||
"status": base.get("status", "active"),
|
||||
"logo_url": base.get("logo_url"),
|
||||
"health_url": base.get("health_url"),
|
||||
"well_known": {
|
||||
"manifest": f"https://{domain}/.well-known/daarion-district.json",
|
||||
"health": f"https://{domain}/.well-known/daarion-health.json",
|
||||
"capabilities": f"https://{domain}/.well-known/daarion-capabilities.json",
|
||||
},
|
||||
"lead_agent_id": lead_agent_id,
|
||||
"agents_total": len(members),
|
||||
}
|
||||
)
|
||||
|
||||
return {"items": items, "total": len(items)}
|
||||
|
||||
|
||||
@router.get("/metrics")
|
||||
async def get_metrics() -> Dict[str, Any]:
|
||||
agents_payload = await get_agents()
|
||||
districts_payload = await get_districts()
|
||||
agents = agents_payload.get("items", [])
|
||||
|
||||
memory_vectors = 0
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(f"{MEMORY_SERVICE_URL}/health")
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
memory_vectors = int(
|
||||
data.get("vector_store", {})
|
||||
.get("memories", {})
|
||||
.get("vectors_count", 0)
|
||||
)
|
||||
except Exception:
|
||||
memory_vectors = 0
|
||||
|
||||
return {
|
||||
"nodes": 1,
|
||||
"districts": districts_payload.get("total", 0),
|
||||
"agents": len(agents),
|
||||
"subagents": sum(int((a.get("team") or {}).get("subagents_total", 0)) for a in agents),
|
||||
"memory_vectors": memory_vectors,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/metrics/dashboard")
|
||||
async def get_metrics_dashboard() -> Dict[str, Any]:
|
||||
cached = await _load_cached_dashboard()
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
metrics = await get_metrics()
|
||||
districts_payload = await get_districts()
|
||||
districts = districts_payload.get("items", [])
|
||||
|
||||
by_district = []
|
||||
for d in districts:
|
||||
by_district.append(
|
||||
{
|
||||
"district_id": d.get("district_id"),
|
||||
"title": d.get("title"),
|
||||
"domain": d.get("domain"),
|
||||
"status": d.get("status"),
|
||||
"ok": None,
|
||||
"agents_total": d.get("agents_total", 0),
|
||||
"agents_online": None,
|
||||
"latency_ms": None,
|
||||
"last_check_ts": None,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"global": {
|
||||
"nodes": metrics.get("nodes", 1),
|
||||
"districts": metrics.get("districts", 0),
|
||||
"agents": metrics.get("agents", 0),
|
||||
"subagents": metrics.get("subagents", 0),
|
||||
"memory_vectors": metrics.get("memory_vectors", 0),
|
||||
"districts_online": 0,
|
||||
"agents_online": 0,
|
||||
},
|
||||
"by_district": by_district,
|
||||
"updated_at": _now_iso(),
|
||||
"source": "fallback_registry",
|
||||
}
|
||||
Reference in New Issue
Block a user