Config policies (16 files): alert_routing, architecture_pressure, backlog, cost_weights, data_governance, incident_escalation, incident_intelligence, network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix, release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard, deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice, cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule), task_registry, voice alerts/ha/latency/policy Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks, NODA1/NODA2 status and setup, audit index and traces, backlog, incident, supervisor, tools, voice, opencode, release, risk, aistalk, spacebot Made-with: Cursor
65 lines
2.4 KiB
YAML
65 lines
2.4 KiB
YAML
# SLO Policy — DAARION.city
|
|
#
|
|
# Defines Service Level Objectives per service.
|
|
# Used by observability_tool.slo_snapshot and incident_triage_graph slo_context node.
|
|
#
|
|
# Fields:
|
|
# error_rate_pct — max allowed error rate (%)
|
|
# latency_p95_ms — max p95 latency (milliseconds)
|
|
# window_minutes — default observation window (default: 60)
|
|
|
|
defaults:
|
|
window_minutes: 60
|
|
error_rate_pct: 1.0
|
|
latency_p95_ms: 300
|
|
|
|
services:
|
|
gateway:
|
|
error_rate_pct: 1.0
|
|
latency_p95_ms: 300
|
|
router:
|
|
error_rate_pct: 0.5
|
|
latency_p95_ms: 200
|
|
memory-service:
|
|
error_rate_pct: 1.0
|
|
latency_p95_ms: 400
|
|
sofiia-supervisor:
|
|
error_rate_pct: 1.0
|
|
latency_p95_ms: 500
|
|
|
|
# ─── Voice SLO profiles ───────────────────────────────────────────────────────
|
|
# Two profiles aligned with router-config.yml selection_policies.
|
|
# Measured via Prometheus metrics emitted by sofiia-console /api/telemetry/voice
|
|
# and memory-service voice_endpoints.py.
|
|
#
|
|
# Prometheus metrics:
|
|
# voice_ttfa_ms{voice_profile} — Time-to-first-audio (BFF → first playable)
|
|
# voice_e2e_ms{voice_profile} — User stops speaking → audio plays
|
|
# voice_tts_first_ms{voice_profile} — First-sentence TTS synthesis
|
|
# voice_tts_compute_ms{engine,voice} — Memory-service internal TTS
|
|
# voice_queue_underflows_total — Playback starvation events
|
|
voice_slo:
|
|
voice_fast_uk:
|
|
description: "Fast profile: gemma3 → qwen3.5 fallback"
|
|
ttfa_ms_p95: 5000 # TTFA p95 ≤ 5s
|
|
e2e_ms_p95: 9000 # E2E p95 ≤ 9s
|
|
tts_first_ms_p95: 2000 # TTS synthesis p95 ≤ 2s
|
|
underflow_rate_pct: 1.0 # starvation events per 100 voice turns ≤ 1%
|
|
tts_error_rate_pct: 0.5 # edge-tts failures ≤ 0.5%
|
|
window_minutes: 10
|
|
|
|
voice_quality_uk:
|
|
description: "Quality profile: qwen3.5 → qwen3:14b fallback"
|
|
ttfa_ms_p95: 7000
|
|
e2e_ms_p95: 12000
|
|
tts_first_ms_p95: 2000 # TTS itself is the same engine
|
|
underflow_rate_pct: 2.0 # slightly relaxed (longer LLM → more gap risk)
|
|
tts_error_rate_pct: 0.5
|
|
window_minutes: 10
|
|
|
|
# Canary thresholds (runtime health check, stricter)
|
|
canary:
|
|
tts_polina_max_ms: 3000 # live Polina synthesis ≤ 3s
|
|
tts_ostap_max_ms: 3000 # live Ostap synthesis ≤ 3s
|
|
min_audio_bytes: 1000 # valid audio is never empty/tiny
|