New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
46 lines
1.3 KiB
Python
46 lines
1.3 KiB
Python
"""Nodes dashboard: aggregate telemetry from all configured nodes."""
|
|
import logging
|
|
from typing import Any, Dict
|
|
|
|
from .config import load_nodes_registry
|
|
from .monitor import collect_all_nodes
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def get_nodes_dashboard(router_api_key: str = "") -> Dict[str, Any]:
|
|
"""
|
|
GET /api/nodes/dashboard
|
|
|
|
For each node in nodes_registry.yml, collects:
|
|
- router health (ok, latency)
|
|
- gateway health (ok, latency) — optional
|
|
- monitor agent telemetry (heartbeat, SLO, incidents, backends, artifacts)
|
|
|
|
All probes run in parallel with per-node timeout.
|
|
Non-fatal: unreachable nodes appear with online=false.
|
|
"""
|
|
reg = load_nodes_registry()
|
|
nodes_cfg = reg.get("nodes", {})
|
|
defaults = reg.get("defaults", {})
|
|
timeout = float(defaults.get("health_timeout_sec", 10))
|
|
|
|
nodes = await collect_all_nodes(
|
|
nodes_cfg,
|
|
router_api_key=router_api_key,
|
|
timeout_per_node=timeout,
|
|
)
|
|
|
|
online_count = sum(1 for n in nodes if n.get("online"))
|
|
router_ok_count = sum(1 for n in nodes if n.get("router_ok"))
|
|
|
|
return {
|
|
"nodes": nodes,
|
|
"summary": {
|
|
"total": len(nodes),
|
|
"online": online_count,
|
|
"router_ok": router_ok_count,
|
|
},
|
|
"defaults": defaults,
|
|
}
|