docs(platform): add policy configs, runbooks, ops scripts and platform documentation

Config policies (16 files): alert_routing, architecture_pressure, backlog,
cost_weights, data_governance, incident_escalation, incident_intelligence,
network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix,
release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout

Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard,
deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice,
cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule),
task_registry, voice alerts/ha/latency/policy

Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks,
NODA1/NODA2 status and setup, audit index and traces, backlog, incident,
supervisor, tools, voice, opencode, release, risk, aistalk, spacebot

Made-with: Cursor
This commit is contained in:
Apple
2026-03-03 07:14:53 -08:00
parent 129e4ea1fc
commit 67225a39fa
102 changed files with 20060 additions and 0 deletions

336
ops/voice_policy_update.py Normal file
View File

@@ -0,0 +1,336 @@
#!/usr/bin/env python3
"""
voice_policy_update.py — Auto-update voice routing policy from audit results.
What it does:
1. Reads the N most recent audit JSON files from ops/voice_audit_results/
2. Computes p50/p95 per model across all runs
3. Generates ops/voice_latency_report.json — canonical latency snapshot
4. Proposes (and optionally applies) updates to router-config.yml:
- prefer_models order (sorted by p50 ascending)
- exclude_models (models with p50 > SLOW_THRESHOLD_MS)
- auto_promote: set/update p95_ratio based on real data
Usage:
python3 ops/voice_policy_update.py [--apply] [--n-runs 5]
python3 ops/voice_policy_update.py # dry-run: print proposals only
python3 ops/voice_policy_update.py --apply # write changes to router-config.yml
Safety:
- Always prints a diff before applying.
- Creates a backup: router-config.yml.bak before any write.
- Never removes a model that has no audit data (unknown ≠ slow).
- Never touches sections other than selection_policies.
"""
import argparse
import json
import os
import re
import shutil
import statistics
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path
import yaml
REPO_ROOT = Path(__file__).parent.parent
AUDIT_DIR = REPO_ROOT / "ops" / "voice_audit_results"
REPORT_FILE = REPO_ROOT / "ops" / "voice_latency_report.json"
ROUTER_CONFIG = REPO_ROOT / "services" / "router" / "router-config.yml"
# Models with p50 > this are considered "slow" for voice
SLOW_THRESHOLD_MS = 15_000
# Models with p50 > this but ≤ SLOW_THRESHOLD_MS → warn only
WARN_THRESHOLD_MS = 10_000
# auto_promote: apply if ratio < this
AUTO_PROMOTE_RATIO_THRESHOLD = 0.9
# Scenario name → model mapping (from audit script naming convention)
SCENARIO_MODEL_MAP = {
"CHAT_gemma3": "gemma3:latest",
"CHAT_qwen3_8b": "qwen3:8b",
"CHAT_qwen3_14b": "qwen3:14b",
"CHAT_qwen35_35b": "qwen3.5:35b-a3b",
}
def load_audits(n: int = 10) -> list[dict]:
if not AUDIT_DIR.exists():
return []
files = sorted(AUDIT_DIR.glob("audit_*.json"), reverse=True)[:n]
audits = []
for f in files:
try:
with open(f) as fp:
audits.append(json.load(fp))
except Exception as e:
print(f" warn: could not load {f}: {e}")
return audits
def aggregate_model_latency(audits: list[dict]) -> dict[str, dict]:
"""
Returns {model_name: {p50, p95, min, max, n, samples: [ms, ...]}}
Only includes successful runs.
"""
samples: dict[str, list[int]] = defaultdict(list)
for audit in audits:
for r in audit.get("results", []):
model = SCENARIO_MODEL_MAP.get(r.get("scenario", ""))
if model and r.get("status") == "ok" and r.get("ms", 0) > 0:
samples[model].append(r["ms"])
result = {}
for model, ms_list in samples.items():
s = sorted(ms_list)
n = len(s)
p50 = s[n // 2]
p95 = s[int(n * 0.95)] if n > 1 else s[-1]
result[model] = {
"p50": p50,
"p95": p95,
"min": min(s),
"max": max(s),
"n": n,
"samples": s,
}
return result
def aggregate_tts_latency(audits: list[dict]) -> dict:
tts_ms = []
for audit in audits:
for r in audit.get("results", []):
if "TTS" in r.get("scenario", "") and r.get("status") == "ok":
tts_ms.append(r["ms"])
if not tts_ms:
return {}
s = sorted(tts_ms)
return {
"p50": s[len(s) // 2],
"p95": s[int(len(s) * 0.95)] if len(s) > 1 else s[-1],
"min": min(s), "max": max(s), "n": len(s),
}
def generate_report(model_stats: dict, tts_stats: dict, n_audits: int) -> dict:
now = datetime.now(timezone.utc).isoformat()
report = {
"generated_at": now,
"n_audit_runs": n_audits,
"slo": {
"tts_p95_target_ms": 2500,
"llm_fast_p95_target_ms": 9000,
"e2e_p95_target_ms": 12000,
},
"tts": tts_stats,
"models": {},
"voice_fast_uk_recommended_prefer": [],
"voice_fast_uk_recommended_exclude": [],
"auto_promote_verdict": None,
}
# Sort models by p50
ranked = sorted(model_stats.items(), key=lambda x: x[1]["p50"])
for model, stats in ranked:
status = "ok"
if stats["p50"] > SLOW_THRESHOLD_MS:
status = "slow_exclude"
elif stats["p50"] > WARN_THRESHOLD_MS:
status = "warn_borderline"
report["models"][model] = {**stats, "status": status}
# Build recommended prefer list (exclude slow models)
prefer = [m for m, s in ranked if s["p50"] <= SLOW_THRESHOLD_MS]
exclude = [m for m, s in ranked if s["p50"] > SLOW_THRESHOLD_MS]
# Always exclude known-slow even if no data
always_exclude = ["glm-4.7-flash:32k", "glm-4.7-flash", "deepseek-r1:70b"]
for m in always_exclude:
if m not in exclude:
exclude.append(m)
report["voice_fast_uk_recommended_prefer"] = prefer
report["voice_fast_uk_recommended_exclude"] = exclude
# auto_promote verdict: qwen3.5 vs qwen3:14b
q35 = model_stats.get("qwen3.5:35b-a3b", {})
q14 = model_stats.get("qwen3:14b", {})
if q35 and q14:
ratio = q35["p95"] / max(q14["p95"], 1)
verdict = "promote" if ratio < AUTO_PROMOTE_RATIO_THRESHOLD else (
"keep_as_second" if ratio < 1.0 else "move_to_quality_only"
)
report["auto_promote_verdict"] = {
"candidate": "qwen3.5:35b-a3b",
"baseline": "qwen3:14b",
"ratio": round(ratio, 3),
"threshold": AUTO_PROMOTE_RATIO_THRESHOLD,
"verdict": verdict,
"description": {
"promote": "qwen3.5 qualifies for auto_promote in voice_fast_uk (>10% faster)",
"keep_as_second": "qwen3.5 faster but <10% — keep as 2nd in prefer list",
"move_to_quality_only": "qwen3.5 slower than 14b — move to voice_quality_uk only",
}[verdict],
}
return report
def propose_config_updates(report: dict, current_config: dict) -> list[dict]:
"""Returns list of {section, key, old, new, reason} proposals."""
proposals = []
sp = current_config.get("selection_policies", {})
vf = sp.get("voice_fast_uk", {})
rec_prefer = report.get("voice_fast_uk_recommended_prefer", [])
rec_exclude = report.get("voice_fast_uk_recommended_exclude", [])
current_prefer = vf.get("prefer_models", [])
current_exclude = vf.get("exclude_models", [])
# Prefer order: suggest if different from current
if rec_prefer and rec_prefer != current_prefer:
proposals.append({
"section": "selection_policies.voice_fast_uk.prefer_models",
"old": current_prefer,
"new": rec_prefer,
"reason": f"Sorted by p50 from {report['n_audit_runs']} audit run(s)",
})
# Exclude: add newly-discovered slow models
new_excludes = [m for m in rec_exclude if m not in current_exclude]
if new_excludes:
proposals.append({
"section": "selection_policies.voice_fast_uk.exclude_models",
"old": current_exclude,
"new": current_exclude + new_excludes,
"reason": f"Add slow models: {new_excludes}",
})
# auto_promote ratio update
ap_verdict = report.get("auto_promote_verdict")
if ap_verdict:
current_ratio = vf.get("auto_promote", {}).get("condition", {}).get(
"p95_ratio_vs_next_model", None
)
new_ratio = ap_verdict["ratio"]
if current_ratio is None or abs(current_ratio - new_ratio) > 0.05:
proposals.append({
"section": "selection_policies.voice_fast_uk.auto_promote.condition.p95_ratio_vs_next_model",
"old": current_ratio,
"new": round(new_ratio, 3),
"reason": f"Updated from real audit data. Verdict: {ap_verdict['verdict']}",
})
return proposals
def apply_updates(proposals: list[dict], config_path: Path) -> None:
"""Write updates to router-config.yml with backup."""
backup = config_path.with_suffix(".yml.bak")
shutil.copy2(config_path, backup)
print(f"\n Backup created: {backup}")
with open(config_path) as f:
config = yaml.safe_load(f)
for p in proposals:
section = p["section"]
new_val = p["new"]
# Navigate and set
keys = section.split(".")
obj = config
for k in keys[:-1]:
obj = obj.setdefault(k, {})
obj[keys[-1]] = new_val
print(f" Applied: {section}")
print(f" old: {p['old']}")
print(f" new: {new_val}")
with open(config_path, "w") as f:
yaml.dump(config, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
print(f"\n Written: {config_path}")
def print_report_summary(report: dict) -> None:
print("\n── Voice Latency Report ──")
print(f" Generated: {report['generated_at']}")
print(f" Audit runs analyzed: {report['n_audit_runs']}")
tts = report.get("tts", {})
if tts:
slo_ok = "" if tts.get("p95", 9999) <= 2500 else "⚠️ "
print(f"\n TTS: p50={tts.get('p50')}ms p95={tts.get('p95')}ms {slo_ok}")
print("\n LLM Model Ranking (by p50):")
for model, stats in report.get("models", {}).items():
icon = {"ok": "", "warn_borderline": "⚠️ ", "slow_exclude": ""}.get(stats["status"], " ")
print(f" {icon} {model:25s} p50={stats['p50']}ms p95={stats['p95']}ms n={stats['n']}")
ap = report.get("auto_promote_verdict")
if ap:
verdict_icon = {"promote": "", "keep_as_second": " ", "move_to_quality_only": "⚠️ "}.get(
ap["verdict"], " "
)
print(f"\n auto_promote: {verdict_icon} {ap['description']} (ratio={ap['ratio']})")
print(f"\n Recommended prefer: {report.get('voice_fast_uk_recommended_prefer')}")
print(f" Recommended exclude additions: {[m for m in report.get('voice_fast_uk_recommended_exclude', []) if 'glm' not in m and 'deepseek' not in m]}")
def main():
parser = argparse.ArgumentParser(description="Voice policy auto-update from audit data")
parser.add_argument("--apply", action="store_true", help="Apply updates to router-config.yml")
parser.add_argument("--n-runs", type=int, default=10, help="Number of recent audit runs to analyze")
args = parser.parse_args()
print("╔══════════════════════════════════════════╗")
print("║ Voice Policy Auto-Update ║")
print("╚══════════════════════════════════════════╝")
audits = load_audits(args.n_runs)
if not audits:
print("\n No audit results found. Run: bash ops/voice_latency_audit.sh first.")
return
print(f"\n Loaded {len(audits)} audit run(s)")
model_stats = aggregate_model_latency(audits)
tts_stats = aggregate_tts_latency(audits)
report = generate_report(model_stats, tts_stats, len(audits))
print_report_summary(report)
# Save report
with open(REPORT_FILE, "w") as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\n Report saved: {REPORT_FILE}")
# Load current config
with open(ROUTER_CONFIG) as f:
current_config = yaml.safe_load(f)
proposals = propose_config_updates(report, current_config)
if not proposals:
print("\n No config changes needed — policy is up to date.")
else:
print(f"\n── {len(proposals)} Proposed Config Update(s) ──")
for p in proposals:
print(f"\n [{p['section']}]")
print(f" reason: {p['reason']}")
print(f" old: {p['old']}")
print(f" new: {p['new']}")
if args.apply:
print("\n── Applying updates ──")
apply_updates(proposals, ROUTER_CONFIG)
print("\n Done. Re-run tests: python3 -m pytest tests/test_voice_policy.py -v")
else:
print("\n Dry-run mode. To apply: python3 ops/voice_policy_update.py --apply")
if __name__ == "__main__":
main()