# SLO Policy — DAARION.city # # Defines Service Level Objectives per service. # Used by observability_tool.slo_snapshot and incident_triage_graph slo_context node. # # Fields: # error_rate_pct — max allowed error rate (%) # latency_p95_ms — max p95 latency (milliseconds) # window_minutes — default observation window (default: 60) defaults: window_minutes: 60 error_rate_pct: 1.0 latency_p95_ms: 300 services: gateway: error_rate_pct: 1.0 latency_p95_ms: 300 router: error_rate_pct: 0.5 latency_p95_ms: 200 memory-service: error_rate_pct: 1.0 latency_p95_ms: 400 sofiia-supervisor: error_rate_pct: 1.0 latency_p95_ms: 500 # ─── Voice SLO profiles ─────────────────────────────────────────────────────── # Two profiles aligned with router-config.yml selection_policies. # Measured via Prometheus metrics emitted by sofiia-console /api/telemetry/voice # and memory-service voice_endpoints.py. # # Prometheus metrics: # voice_ttfa_ms{voice_profile} — Time-to-first-audio (BFF → first playable) # voice_e2e_ms{voice_profile} — User stops speaking → audio plays # voice_tts_first_ms{voice_profile} — First-sentence TTS synthesis # voice_tts_compute_ms{engine,voice} — Memory-service internal TTS # voice_queue_underflows_total — Playback starvation events voice_slo: voice_fast_uk: description: "Fast profile: gemma3 → qwen3.5 fallback" ttfa_ms_p95: 5000 # TTFA p95 ≤ 5s e2e_ms_p95: 9000 # E2E p95 ≤ 9s tts_first_ms_p95: 2000 # TTS synthesis p95 ≤ 2s underflow_rate_pct: 1.0 # starvation events per 100 voice turns ≤ 1% tts_error_rate_pct: 0.5 # edge-tts failures ≤ 0.5% window_minutes: 10 voice_quality_uk: description: "Quality profile: qwen3.5 → qwen3:14b fallback" ttfa_ms_p95: 7000 e2e_ms_p95: 12000 tts_first_ms_p95: 2000 # TTS itself is the same engine underflow_rate_pct: 2.0 # slightly relaxed (longer LLM → more gap risk) tts_error_rate_pct: 0.5 window_minutes: 10 # Canary thresholds (runtime health check, stricter) canary: tts_polina_max_ms: 3000 # live Polina synthesis ≤ 3s tts_ostap_max_ms: 3000 # live Ostap synthesis ≤ 3s min_audio_bytes: 1000 # valid audio is never empty/tiny