microdao-daarion/ops/voice_alerts.yml

groups:
  - name: voice_slo
    # Evaluation interval should match Prometheus global evaluation_interval (default 1m).
    # All thresholds align with config/slo_policy.yml voice_slo section.
    rules:

      # ── Alert 1: TTFA p95 breach ──────────────────────────────────────────────
      # Fires when Time-to-first-audio p95 exceeds SLO for 10 consecutive minutes.
      # Root causes: slow LLM, Ollama overload, model cold-start.
      - alert: VoiceTTFA_P95_Breach_Fast
        expr: |
          histogram_quantile(0.95,
            rate(voice_ttfa_ms_bucket{voice_profile="voice_fast_uk"}[10m])
          ) > 5000
        for: 10m
        labels:
          severity: warning
          team: platform
          profile: voice_fast_uk
        annotations:
          summary: "Voice TTFA p95 breach (fast profile)"
          description: >
            voice_fast_uk TTFA p95 = {{ $value | humanizeDuration }}ms > 5000ms SLO.
            Check: Ollama queue depth, gemma3 model availability, sofiia-console logs.
          runbook: "ops/runbook-alerts.md#voice-ttfa"
          dashboard: "grafana/d/voice-slo/voice-latency"

      - alert: VoiceTTFA_P95_Breach_Quality
        expr: |
          histogram_quantile(0.95,
            rate(voice_ttfa_ms_bucket{voice_profile="voice_quality_uk"}[10m])
          ) > 7000
        for: 10m
        labels:
          severity: warning
          team: platform
          profile: voice_quality_uk
        annotations:
          summary: "Voice TTFA p95 breach (quality profile)"
          description: >
            voice_quality_uk TTFA p95 = {{ $value }}ms > 7000ms SLO.
            Check: qwen3.5:35b-a3b availability, NODA2 GPU/CPU load.
          runbook: "ops/runbook-alerts.md#voice-ttfa"

      # ── Alert 2: Underflow spike ───────────────────────────────────────────────
      # Fires when queue starvation rate exceeds 1 event/min for 5 minutes.
      # Root cause: TTS synthesis slower than playback — LLM too slow, long chunks,
      # or network latency to memory-service.
      - alert: VoiceQueueUnderflow_Spike
        expr: |
          rate(voice_queue_underflows_total[5m]) > 0.017
        for: 5m
        labels:
          severity: warning
          team: platform
        annotations:
          summary: "Voice queue starvation detected"
          description: >
            Queue underflow rate = {{ $value | humanize }}/s (>1/min).
            Audio playback is outrunning TTS synthesis — users hear silence gaps.
            Check: TTS latency (voice_tts_first_ms), chunk size, LLM total time.
          runbook: "ops/runbook-alerts.md#voice-underflow"

      # ── Alert 3: TTS synthesis degradation ────────────────────────────────────
      # Fires when first-sentence TTS p95 exceeds 2s — indicates edge-tts issues
      # (403 auth errors, Microsoft endpoint throttling, network degradation).
      - alert: VoiceTTS_P95_Degraded
        expr: |
          histogram_quantile(0.95,
            rate(voice_tts_first_ms_bucket[10m])
          ) > 2000
        for: 10m
        labels:
          severity: critical
          team: platform
        annotations:
          summary: "Voice TTS synthesis degraded (p95 > 2s)"
          description: >
            voice_tts_first_ms p95 = {{ $value }}ms > 2000ms.
            Likely edge-tts 403 or Microsoft endpoint issue.
            Check: memory-service /voice/health, voice_tts_errors_total{error_type="403"}.
          runbook: "ops/runbook-alerts.md#voice-tts-degraded"

      # ── Alert 4: TTS error rate spike ─────────────────────────────────────────
      # Fires on elevated edge-tts error rate (403, network, synthesis failure).
      - alert: VoiceTTS_ErrorRate_High
        expr: |
          rate(voice_tts_errors_total[5m]) > 0.05
        for: 3m
        labels:
          severity: critical
          team: platform
        annotations:
          summary: "Voice TTS error rate elevated"
          description: >
            TTS errors = {{ $value | humanize }}/s.
            Engine: {{ $labels.engine }}, Error type: {{ $labels.error_type }}.
            Users may hear espeak fallback or silence.
          runbook: "ops/runbook-alerts.md#voice-tts-error"

      # ── Alert 5: E2E latency breach ───────────────────────────────────────────
      # Full round-trip SLO guard — catches combined LLM+TTS degradation.
      - alert: VoiceE2E_P95_Breach
        expr: |
          histogram_quantile(0.95,
            rate(voice_e2e_ms_bucket{voice_profile="voice_fast_uk"}[15m])
          ) > 9000
        for: 15m
        labels:
          severity: warning
          team: platform
          profile: voice_fast_uk
        annotations:
          summary: "Voice E2E latency p95 breach"
          description: >
            voice_fast_uk E2E p95 = {{ $value }}ms > 9000ms SLO.
            Full pipeline (STT+LLM+TTS) is degraded.
          runbook: "ops/runbook-alerts.md#voice-e2e"