snapshot: NODE1 production state 2026-02-09
Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.
Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles
Excluded from snapshot: venv/, .env, data/, backups, .tgz archives
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
100
monitoring/grafana/dashboards/slo_dashboard.json
Normal file
100
monitoring/grafana/dashboards/slo_dashboard.json
Normal file
@@ -0,0 +1,100 @@
|
||||
{
|
||||
"dashboard": {
|
||||
"title": "SLO Dashboard - DAARION Platform",
|
||||
"tags": ["slo", "production", "monitoring"],
|
||||
"timezone": "browser",
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Availability SLO (99.9%)",
|
||||
"type": "stat",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(http_requests_total{status=~\"2..\"}[5m])) / sum(rate(http_requests_total[5m])) * 100",
|
||||
"legendFormat": "Success Rate"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"value": 0, "color": "red"},
|
||||
{"value": 99.9, "color": "green"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Latency SLO (p95 < 2s)",
|
||||
"type": "graph",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{service=\"router\"}[5m])) by (le))",
|
||||
"legendFormat": "p95 Gateway→Router"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(nats_message_processing_duration_seconds_bucket[5m])) by (le))",
|
||||
"legendFormat": "p95 E2E Async"
|
||||
}
|
||||
],
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": "Latency"
|
||||
}
|
||||
],
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 1}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Queue Time SLO (p95 < 5s)",
|
||||
"type": "graph",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(nats_consumer_lag[5m])) by (consumer))",
|
||||
"legendFormat": "{{consumer}}"
|
||||
}
|
||||
],
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": "Queue Time"
|
||||
}
|
||||
],
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 1}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Error Budget (30 days)",
|
||||
"type": "stat",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(1 - (sum(rate(http_requests_total{status=~\"5..\"}[30d])) / sum(rate(http_requests_total[30d])))) * 100",
|
||||
"legendFormat": "Error Budget Remaining"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"value": 0, "color": "red"},
|
||||
{"value": 50, "color": "yellow"},
|
||||
{"value": 99, "color": "green"}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 9}
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user