{ "dashboard": { "title": "SLO Dashboard - DAARION Platform", "tags": ["slo", "production", "monitoring"], "timezone": "browser", "panels": [ { "id": 1, "title": "Availability SLO (99.9%)", "type": "stat", "targets": [ { "expr": "sum(rate(http_requests_total{status=~\"2..\"}[5m])) / sum(rate(http_requests_total[5m])) * 100", "legendFormat": "Success Rate" } ], "fieldConfig": { "defaults": { "unit": "percent", "thresholds": { "steps": [ {"value": 0, "color": "red"}, {"value": 99.9, "color": "green"} ] } } } }, { "id": 2, "title": "Latency SLO (p95 < 2s)", "type": "graph", "targets": [ { "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{service=\"router\"}[5m])) by (le))", "legendFormat": "p95 Gateway→Router" }, { "expr": "histogram_quantile(0.95, sum(rate(nats_message_processing_duration_seconds_bucket[5m])) by (le))", "legendFormat": "p95 E2E Async" } ], "yaxes": [ { "format": "s", "label": "Latency" } ], "gridPos": {"h": 8, "w": 12, "x": 0, "y": 1} }, { "id": 3, "title": "Queue Time SLO (p95 < 5s)", "type": "graph", "targets": [ { "expr": "histogram_quantile(0.95, sum(rate(nats_consumer_lag[5m])) by (consumer))", "legendFormat": "{{consumer}}" } ], "yaxes": [ { "format": "s", "label": "Queue Time" } ], "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1} }, { "id": 4, "title": "Error Budget (30 days)", "type": "stat", "targets": [ { "expr": "(1 - (sum(rate(http_requests_total{status=~\"5..\"}[30d])) / sum(rate(http_requests_total[30d])))) * 100", "legendFormat": "Error Budget Remaining" } ], "fieldConfig": { "defaults": { "unit": "percent", "thresholds": { "steps": [ {"value": 0, "color": "red"}, {"value": 50, "color": "yellow"}, {"value": 99, "color": "green"} ] } } }, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 9} } ], "refresh": "30s", "time": { "from": "now-1h", "to": "now" } } }