docs(platform): add policy configs, runbooks, ops scripts and platform documentation

Config policies (16 files): alert_routing, architecture_pressure, backlog,
cost_weights, data_governance, incident_escalation, incident_intelligence,
network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix,
release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout

Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard,
deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice,
cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule),
task_registry, voice alerts/ha/latency/policy

Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks,
NODA1/NODA2 status and setup, audit index and traces, backlog, incident,
supervisor, tools, voice, opencode, release, risk, aistalk, spacebot

Made-with: Cursor
This commit is contained in:
Apple
2026-03-03 07:14:53 -08:00
parent 129e4ea1fc
commit 67225a39fa
102 changed files with 20060 additions and 0 deletions

341
ops/voice_latency_audit.sh Executable file
View File

@@ -0,0 +1,341 @@
#!/usr/bin/env bash
# voice_latency_audit.sh — Voice pipeline latency audit (10 scenarios)
# Usage:
# bash ops/voice_latency_audit.sh [BFF_URL] [MEMORY_URL]
# bash ops/voice_latency_audit.sh http://localhost:8002 http://localhost:8000
#
# Scenarios:
# 1-3 TTS only: gemma3 / qwen3:14b / glm-4.7-flash (warm)
# 4 TTS + STT roundtrip (audio stub)
# 5-7 Chat: short/medium/detailed answers
# 8 High-load simulation (parallel requests)
# 9 Cloud fallback simulation (ollama unavailable)
# 10 edge-tts voice health check (Polina + Ostap live)
set -euo pipefail
BFF_URL="${1:-http://localhost:8002}"
MEMORY_URL="${2:-http://localhost:8000}"
RESULTS_DIR="ops/voice_audit_results"
TS=$(date +%Y%m%d_%H%M%S)
RESULTS_FILE="${RESULTS_DIR}/audit_${TS}.json"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'; CYAN='\033[0;36m'; NC='\033[0m'
pass() { echo -e " ${GREEN}PASS${NC} $1"; }
fail() { echo -e " ${RED}FAIL${NC} $1"; }
warn() { echo -e " ${YELLOW}WARN${NC} $1"; }
info() { echo -e " ${CYAN}INFO${NC} $1"; }
mkdir -p "$RESULTS_DIR"
declare -a RESULTS=()
# ── Helper: measure HTTP call ──────────────────────────────────────────────────
_now_ms() { python3 -c "import time; print(int(time.time()*1000))"; }
measure() {
local scenario="$1" method="$2" url="$3" opts="${4:-}"
local t0 t1 ms http_code size ct
t0=$(_now_ms)
local tmp; tmp=$(mktemp)
if [ "$method" = "POST_JSON" ]; then
local body="${5:-{}}"
http_code=$(curl -sf -w "%{http_code}" -X POST "$url" \
-H "Content-Type: application/json" \
-d "$body" -o "$tmp" --max-time 15 2>/dev/null || echo "000")
elif [ "$method" = "GET" ]; then
http_code=$(curl -sf -w "%{http_code}" -X GET "$url" \
-o "$tmp" --max-time 10 2>/dev/null || echo "000")
else
http_code="000"
fi
t1=$(_now_ms)
ms=$((t1 - t0))
size=$(stat -f%z "$tmp" 2>/dev/null || stat -c%s "$tmp" 2>/dev/null || echo 0)
ct=$(file "$tmp" 2>/dev/null | head -1 || echo "unknown")
rm -f "$tmp"
local status="ok"
[ "$http_code" = "000" ] || [ "$http_code" -ge 400 ] && status="error"
echo "{\"scenario\":\"$scenario\",\"ms\":$ms,\"http\":\"$http_code\",\"bytes\":$size,\"status\":\"$status\"}"
if [ "$status" = "ok" ]; then
local label; label=$([ $ms -le 2500 ] && echo "FAST" || ([ $ms -le 6000 ] && echo "OK" || echo "SLOW"))
local color; color=$([ "$label" = "FAST" ] && echo "$GREEN" || ([ "$label" = "OK" ] && echo "$YELLOW" || echo "$RED"))
echo -e " ${color}${label}${NC} ${scenario}: ${ms}ms (HTTP ${http_code}, ${size}B)"
else
echo -e " ${RED}FAIL${NC} ${scenario}: HTTP ${http_code} ${ms}ms"
fi
}
# ── Scenario runner ────────────────────────────────────────────────────────────
run_scenarios() {
echo ""
echo "╔════════════════════════════════════════════╗"
echo "║ Voice Latency Audit — $(date +%H:%M:%S)"
echo "╚════════════════════════════════════════════╝"
echo ""
# ── 0. Preflight ──────────────────────────────────────────────────────────
echo "── Preflight ──"
local bff_ok mem_ok
bff_ok=$(curl -sf "$BFF_URL/api/memory/status" 2>/dev/null | python3 -c "import json,sys;print(json.load(sys.stdin).get('ok',False))" 2>/dev/null || echo "False")
mem_ok=$(curl -sf "$MEMORY_URL/health" 2>/dev/null | python3 -c "import json,sys;print(json.load(sys.stdin).get('status','error'))" 2>/dev/null || echo "error")
[ "$bff_ok" = "True" ] && pass "BFF ok ($BFF_URL)" || fail "BFF unreachable"
[ "$mem_ok" = "healthy" ] && pass "Memory Service ok ($MEMORY_URL)" || fail "Memory Service: $mem_ok"
# ── Helper: measure and collect result ────────────────────────────────────
_m() {
local scenario="$1" method="$2" url="$3" body="${4:-}"
local t0 t1 ms http_code size tmp
t0=$(_now_ms)
tmp=$(mktemp)
if [ "$method" = "POST_JSON" ]; then
http_code=$(curl -sf -w "%{http_code}" -X POST "$url" \
-H "Content-Type: application/json" \
-d "$body" -o "$tmp" --max-time 30 2>/dev/null || echo "000")
else
http_code=$(curl -sf -w "%{http_code}" "$url" \
-o "$tmp" --max-time 20 2>/dev/null || echo "000")
fi
t1=$(_now_ms); ms=$((t1-t0))
size=$(stat -f%z "$tmp" 2>/dev/null || stat -c%s "$tmp" 2>/dev/null || echo 0)
rm -f "$tmp"
local status; status="ok"
{ [ "$http_code" = "000" ] || { [[ "$http_code" =~ ^[0-9]+$ ]] && [ "$http_code" -ge 400 ]; }; } && status="error" || true
local json="{\"scenario\":\"$scenario\",\"ms\":$ms,\"http\":\"$http_code\",\"bytes\":$size,\"status\":\"$status\"}"
RESULTS+=("$json")
local lbl col
if [ "$status" = "ok" ]; then
if [ $ms -le 2500 ]; then lbl="FAST"; col="$GREEN"
elif [ $ms -le 6000 ]; then lbl="OK "; col="$YELLOW"
else lbl="SLOW"; col="$RED"; fi
else
lbl="FAIL"; col="$RED"
fi
echo -e " ${col}${lbl}${NC} ${scenario}: ${ms}ms (HTTP ${http_code}, ${size}B)"
}
# ── 1. TTS Scenario 1: short text → Polina ────────────────────────────────
echo ""
echo "── TTS Scenarios (direct memory-service) ──"
_m "TTS_short_polina" "POST_JSON" "$MEMORY_URL/voice/tts" '{"text":"Привіт! Це тест голосу.","voice":"default"}'
_m "TTS_medium_polina" "POST_JSON" "$MEMORY_URL/voice/tts" '{"text":"Я Sofiia, головний AI-архітектор DAARION.city. Моя роль — розробка AI-рішень.","voice":"uk-UA-PolinaNeural"}'
_m "TTS_ostap" "POST_JSON" "$MEMORY_URL/voice/tts" '{"text":"Перевірка голосу Остап Нейрал. Технічний тест.","voice":"Ostap"}'
# ── 2. TTS via BFF proxy ──────────────────────────────────────────────────
echo ""
echo "── TTS via BFF proxy ──"
_m "TTS_bff_short" "POST_JSON" "$BFF_URL/api/voice/tts" '{"text":"Перевірка через BFF.","voice":"default"}'
_m "TTS_bff_medium" "POST_JSON" "$BFF_URL/api/voice/tts" '{"text":"NODA2 voice pipeline: edge-tts 7.2.7, Polina Neural, Ukrainian. OK.","voice":"uk-UA-PolinaNeural"}'
# ── 3. Chat: per-model LLM latency comparison ────────────────────────────
echo ""
echo "── Chat: LLM per-model latency (voice turn = 1 sentence) ──"
local _q='{"message":"Одне речення (max 15 слів): що таке NODA2?","model":'
_m "CHAT_gemma3" "POST_JSON" "$BFF_URL/api/chat/send" "${_q}\"ollama:gemma3:latest\",\"voice_profile\":\"voice_fast_uk\"}"
_m "CHAT_qwen3_8b" "POST_JSON" "$BFF_URL/api/chat/send" "${_q}\"ollama:qwen3:8b\",\"voice_profile\":\"voice_fast_uk\"}"
_m "CHAT_qwen3_14b" "POST_JSON" "$BFF_URL/api/chat/send" "${_q}\"ollama:qwen3:14b\",\"voice_profile\":\"voice_fast_uk\"}"
_m "CHAT_qwen35_35b" "POST_JSON" "$BFF_URL/api/chat/send" "${_q}\"ollama:qwen3.5:35b-a3b\",\"voice_profile\":\"voice_quality_uk\"}"
# ── 4. Voice health check ─────────────────────────────────────────────────
echo ""
echo "── Voice health (live Polina/Ostap synthesis) ──"
_m "VOICE_HEALTH_live" "GET" "$MEMORY_URL/voice/health"
# ── 5. Parallel TTS (simulate 2 concurrent users) ────────────────────────
echo ""
echo "── Parallel TTS (2 concurrent) ──"
local tmp1 tmp2
tmp1=$(mktemp)
tmp2=$(mktemp)
local t0_par; t0_par=$(_now_ms)
curl -sf -X POST "$MEMORY_URL/voice/tts" \
-H "Content-Type: application/json" \
-d '{"text":"Перший паралельний запит.","voice":"default"}' \
-o "$tmp1" --max-time 15 &
curl -sf -X POST "$MEMORY_URL/voice/tts" \
-H "Content-Type: application/json" \
-d '{"text":"Другий паралельний запит.","voice":"Ostap"}' \
-o "$tmp2" --max-time 15 &
wait
local t1_par; t1_par=$(_now_ms)
local par_ms; par_ms=$((t1_par - t0_par))
local s1; s1=$(stat -f%z "$tmp1" 2>/dev/null || stat -c%s "$tmp1" 2>/dev/null || echo 0)
local s2; s2=$(stat -f%z "$tmp2" 2>/dev/null || stat -c%s "$tmp2" 2>/dev/null || echo 0)
rm -f "$tmp1" "$tmp2"
local par_status; par_status=$([ $s1 -gt 1000 ] && [ $s2 -gt 1000 ] && echo "ok" || echo "error")
R9="{\"scenario\":\"TTS_parallel_2x\",\"ms\":$par_ms,\"bytes\":$((s1+s2)),\"status\":\"$par_status\"}"
RESULTS+=("$R9")
local par_label; par_label=$([ $par_ms -le 4000 ] && echo "FAST" || ([ $par_ms -le 8000 ] && echo "OK" || echo "SLOW"))
local par_color; par_color=$([ "$par_label" = "FAST" ] && echo "$GREEN" || ([ "$par_label" = "OK" ] && echo "$YELLOW" || echo "$RED"))
echo -e " ${par_color}${par_label}${NC} TTS_parallel_2x: ${par_ms}ms (s1=${s1}B s2=${s2}B)"
# ── 6. STT smoke test (synthetic WAV) ─────────────────────────────────────
echo ""
echo "── STT smoke (silent WAV stub) ──"
# Generate minimal WAV header (44 bytes) as a stub
python3 -c "
import struct, sys
# 44-byte WAV header for 1s silence at 16kHz mono int16
sample_rate = 16000
duration_s = 1
num_samples = sample_rate * duration_s
data_size = num_samples * 2
header = struct.pack('<4sI4s4sIHHIIHH4sI',
b'RIFF', 36 + data_size, b'WAVE',
b'fmt ', 16, 1, 1, sample_rate, sample_rate*2, 2, 16,
b'data', data_size)
sys.stdout.buffer.write(header + bytes(data_size))
" > /tmp/voice_audit_stub.wav 2>/dev/null || echo "" > /tmp/voice_audit_stub.wav
local t0_stt; t0_stt=$(_now_ms)
local stt_code; stt_code=$(curl -sf -w "%{http_code}" -X POST "$MEMORY_URL/voice/stt" \
-F "audio=@/tmp/voice_audit_stub.wav;filename=audio.wav;type=audio/wav" \
-F "language=uk" \
-o /tmp/stt_result.json \
--max-time 30 2>/dev/null || echo "000")
local t1_stt; t1_stt=$(_now_ms)
local stt_ms; stt_ms=$((t1_stt - t0_stt))
local stt_status; stt_status=$([ "$stt_code" = "200" ] && echo "ok" || echo "error")
R10="{\"scenario\":\"STT_silence_stub\",\"ms\":$stt_ms,\"http\":\"$stt_code\",\"status\":\"$stt_status\"}"
RESULTS+=("$R10")
local stt_label; stt_label=$([ $stt_ms -le 2000 ] && echo "FAST" || ([ $stt_ms -le 5000 ] && echo "OK" || echo "SLOW"))
local stt_color; stt_color=$([ "$stt_label" = "FAST" ] && echo "$GREEN" || ([ "$stt_label" = "OK" ] && echo "$YELLOW" || echo "$RED"))
echo -e " ${stt_color}${stt_label}${NC} STT_silence_stub: ${stt_ms}ms (HTTP ${stt_code})"
rm -f /tmp/voice_audit_stub.wav /tmp/stt_result.json
}
# ── Compute p50/p95 ────────────────────────────────────────────────────────────
compute_stats() {
python3 - "${RESULTS[@]}" <<'PYEOF'
import json, sys, statistics
results = []
for arg in sys.argv[1:]:
try:
results.append(json.loads(arg))
except Exception:
pass
if not results:
print("No results to analyze.")
sys.exit(0)
tts_ms = [r["ms"] for r in results if "TTS" in r["scenario"] and r["status"] == "ok"]
stt_ms = [r["ms"] for r in results if "STT" in r["scenario"] and r["status"] == "ok"]
# per-model chat latency
model_map = {
"gemma3": [r["ms"] for r in results if r["scenario"] == "CHAT_gemma3" and r["status"] == "ok"],
"qwen3:8b": [r["ms"] for r in results if r["scenario"] == "CHAT_qwen3_8b" and r["status"] == "ok"],
"qwen3:14b": [r["ms"] for r in results if r["scenario"] == "CHAT_qwen3_14b" and r["status"] == "ok"],
"qwen3.5:35b":[r["ms"] for r in results if r["scenario"] == "CHAT_qwen35_35b"and r["status"] == "ok"],
}
all_chat_ms = [ms for vals in model_map.values() for ms in vals]
def stats(label, vals):
if not vals:
print(f" {label}: no data")
return
s = sorted(vals)
p50 = s[len(s)//2]
p95 = s[int(len(s)*0.95)] if len(s) > 1 else s[-1]
print(f" {label}: p50={p50}ms p95={p95}ms min={min(s)}ms max={max(s)}ms n={len(s)}")
print("")
print("── Latency Summary ──")
stats("TTS (memory-service + proxy)", tts_ms)
stats("Chat (all models)", all_chat_ms)
stats("STT (memory-service)", stt_ms)
print("")
print("── LLM Model Comparison (voice: 1 sentence) ──")
rows = []
for model, vals in model_map.items():
if vals:
s = sorted(vals)
p50 = s[len(s)//2]
rows.append((p50, model, vals[0]))
else:
rows.append((99999, model, None))
rows.sort()
fastest = rows[0][1] if rows else "?"
for rank, (p50, model, ms) in enumerate(rows, 1):
icon = "🥇" if rank == 1 else ("🥈" if rank == 2 else ("🥉" if rank == 3 else " "))
note = " ← fastest (voice_fast default)" if model == fastest else ""
ms_str = f"{ms}ms" if ms else "no data"
print(f" {icon} {model:20s} {ms_str}{note}")
# Bottleneck analysis
all_ok = [r for r in results if r["status"] == "ok"]
all_err = [r for r in results if r["status"] != "ok"]
print(f" Passed: {len(all_ok)}/{len(results)} scenarios")
if all_err:
print(f" Failed: {[r['scenario'] for r in all_err]}")
# SLO check
print("")
print("── SLO Check (voice_fast_uk) ──")
tts_p95 = sorted(tts_ms)[int(len(tts_ms)*0.95)] if len(tts_ms) > 1 else (tts_ms[0] if tts_ms else 9999)
q14_ms = model_map["qwen3:14b"][0] if model_map["qwen3:14b"] else 9999
q35_ms = model_map["qwen3.5:35b"][0] if model_map["qwen3.5:35b"] else 9999
g3_ms = model_map["gemma3"][0] if model_map["gemma3"] else 9999
fast_ms = min(q14_ms, q35_ms, g3_ms)
checks = [
("TTS p95 ≤ 2500ms", tts_p95 <= 2500, f"actual={tts_p95}ms"),
("Fastest model ≤ 9000ms", fast_ms <= 9000, f"actual={fast_ms}ms"),
("qwen3:14b ≤ 12000ms", q14_ms <= 12000, f"actual={q14_ms}ms"),
("qwen3.5 faster than 14b", q35_ms < q14_ms, f"35b={q35_ms}ms vs 14b={q14_ms}ms"),
("qwen3.5 auto-promote worthy", q35_ms < q14_ms * 0.9, f"ratio={round(q35_ms/max(q14_ms,1),2)} (need <0.9)"),
]
for label, ok, detail in checks:
icon = "✅" if ok else "⚠️ "
print(f" {icon} {label} ({detail})")
print("")
# Verdict on qwen3.5 promotion
if q35_ms < q14_ms * 0.9:
print(" ✅ VERDICT: qwen3.5:35b-a3b qualifies for auto_promote in voice_fast_uk")
elif q35_ms < q14_ms:
print(" VERDICT: qwen3.5 is faster but not by 10% — keep as 2nd in prefer list")
else:
print(" ⚠️ VERDICT: qwen3.5 slower than qwen3:14b — move to voice_quality_uk only")
PYEOF
}
# ── Save results ──────────────────────────────────────────────────────────────
save_results() {
python3 - "${RESULTS[@]}" "$RESULTS_FILE" <<'PYEOF'
import json, sys
results = []
for arg in sys.argv[1:-1]:
try:
results.append(json.loads(arg))
except Exception:
pass
out_file = sys.argv[-1]
from datetime import datetime, timezone
doc = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"scenarios_total": len(results),
"scenarios_passed": sum(1 for r in results if r["status"] == "ok"),
"results": results,
}
with open(out_file, "w") as f:
json.dump(doc, f, indent=2, ensure_ascii=False)
print(f"\n Saved: {out_file}")
PYEOF
}
# ── Main ──────────────────────────────────────────────────────────────────────
run_scenarios
compute_stats "${RESULTS[@]}"
save_results "${RESULTS[@]}"
echo ""
echo "Done. Rerun: bash ops/voice_latency_audit.sh $BFF_URL $MEMORY_URL"