feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
This commit is contained in:
285
tests/test_voice_policy.py
Normal file
285
tests/test_voice_policy.py
Normal file
@@ -0,0 +1,285 @@
|
||||
"""
|
||||
Voice routing policy contract tests.
|
||||
|
||||
Rules enforced:
|
||||
1. voice_fast_uk must NOT contain models with known p50 > 15000ms.
|
||||
2. voice_fast_uk must contain at least one model with p50 ≤ 3000ms (fast gate).
|
||||
3. voice_quality_uk deadline_ms ≤ 12000 (bounded — not unlimited chat).
|
||||
4. voice_fast_uk deadline_ms ≤ 9000.
|
||||
5. auto_promote.condition.p95_ratio_vs_next_model < 1.0 (must be faster, not slower).
|
||||
6. exclude_models list must contain known-slow models.
|
||||
7. voice_fast_uk max_tokens (num_predict equivalent) ≤ 256.
|
||||
8. Audit JSON (if present): latest run must show no "FAIL" in TTS scenarios.
|
||||
9. voice_guardrails: SOFIIA_VOICE_PROMPT_SUFFIX must contain hard constraints.
|
||||
10. BFF: ChatSendBody must accept voice_profile field.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
ROUTER_CONFIG = REPO_ROOT / "services" / "router" / "router-config.yml"
|
||||
AUDIT_DIR = REPO_ROOT / "ops" / "voice_audit_results"
|
||||
BFF_MAIN = REPO_ROOT / "services" / "sofiia-console" / "app" / "main.py"
|
||||
|
||||
# Known-slow models: p50 > 15s in any voice audit run → banned from voice_fast_uk
|
||||
KNOWN_SLOW_MODELS = {
|
||||
"glm-4.7-flash:32k",
|
||||
"glm-4.7-flash",
|
||||
"deepseek-r1:70b",
|
||||
}
|
||||
|
||||
# Hard deadline threshold for voice profiles (ms)
|
||||
VOICE_FAST_MAX_DEADLINE_MS = 9_000
|
||||
VOICE_QUALITY_MAX_DEADLINE_MS = 12_000
|
||||
|
||||
# SLO: at least one prefer model must be historically ≤ this latency
|
||||
VOICE_FAST_GATE_MS = 6_000 # gemma3 measured at ~2.6s
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def router_config() -> dict:
|
||||
with open(ROUTER_CONFIG) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def voice_fast(router_config) -> dict:
|
||||
policies = router_config.get("selection_policies", {})
|
||||
assert "voice_fast_uk" in policies, "selection_policies.voice_fast_uk not found in router-config.yml"
|
||||
return policies["voice_fast_uk"]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def voice_quality(router_config) -> dict:
|
||||
policies = router_config.get("selection_policies", {})
|
||||
assert "voice_quality_uk" in policies, "selection_policies.voice_quality_uk not found"
|
||||
return policies["voice_quality_uk"]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def latest_audit() -> dict | None:
|
||||
"""Load the most recent voice audit JSON if present."""
|
||||
if not AUDIT_DIR.exists():
|
||||
return None
|
||||
files = sorted(AUDIT_DIR.glob("audit_*.json"), reverse=True)
|
||||
if not files:
|
||||
return None
|
||||
with open(files[0]) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
# ── 1. voice_fast_uk: no slow models ─────────────────────────────────────────
|
||||
|
||||
class TestVoiceFastUkPolicy:
|
||||
|
||||
def test_no_known_slow_models_in_prefer(self, voice_fast):
|
||||
prefer = voice_fast.get("prefer_models", [])
|
||||
for model in prefer:
|
||||
assert model not in KNOWN_SLOW_MODELS, (
|
||||
f"SLOW model '{model}' found in voice_fast_uk.prefer_models — "
|
||||
f"remove or move to voice_quality_uk"
|
||||
)
|
||||
|
||||
def test_known_slow_models_are_excluded(self, voice_fast):
|
||||
excluded = set(voice_fast.get("exclude_models", []))
|
||||
missing = KNOWN_SLOW_MODELS - excluded
|
||||
assert not missing, (
|
||||
f"Known-slow models not in voice_fast_uk.exclude_models: {missing}"
|
||||
)
|
||||
|
||||
def test_deadline_bounded(self, voice_fast):
|
||||
deadline = voice_fast.get("deadline_ms", 0)
|
||||
assert deadline > 0, "deadline_ms must be set"
|
||||
assert deadline <= VOICE_FAST_MAX_DEADLINE_MS, (
|
||||
f"voice_fast_uk.deadline_ms={deadline} exceeds hard limit {VOICE_FAST_MAX_DEADLINE_MS}ms"
|
||||
)
|
||||
|
||||
def test_max_tokens_bounded(self, voice_fast):
|
||||
tokens = voice_fast.get("max_tokens", 999)
|
||||
assert tokens <= 256, (
|
||||
f"voice_fast_uk.max_tokens={tokens} — too high for voice (≤256 required)"
|
||||
)
|
||||
|
||||
def test_prefer_has_fast_model(self, voice_fast):
|
||||
prefer = voice_fast.get("prefer_models", [])
|
||||
assert len(prefer) >= 1, "voice_fast_uk must have at least one prefer_model"
|
||||
# gemma3 (known fast) should be first or second
|
||||
fast_candidates = [m for m in prefer if "gemma3" in m or "8b" in m]
|
||||
assert fast_candidates, (
|
||||
f"voice_fast_uk.prefer_models has no known-fast model (gemma3/8b). "
|
||||
f"Current list: {prefer}"
|
||||
)
|
||||
|
||||
def test_gemma3_is_first_or_second(self, voice_fast):
|
||||
prefer = voice_fast.get("prefer_models", [])
|
||||
gemma3_idx = next((i for i, m in enumerate(prefer) if "gemma3" in m), None)
|
||||
assert gemma3_idx is not None, "gemma3 must be in voice_fast_uk.prefer_models"
|
||||
assert gemma3_idx <= 1, (
|
||||
f"gemma3 should be first or second in prefer_models (found at index {gemma3_idx})"
|
||||
)
|
||||
|
||||
def test_auto_promote_ratio_less_than_one(self, voice_fast):
|
||||
ap = voice_fast.get("auto_promote", {})
|
||||
if not ap:
|
||||
pytest.skip("auto_promote not configured")
|
||||
ratio = ap.get("condition", {}).get("p95_ratio_vs_next_model", 1.0)
|
||||
assert ratio < 1.0, (
|
||||
f"auto_promote.condition.p95_ratio_vs_next_model={ratio} must be < 1.0 "
|
||||
f"(candidate must be faster than next model)"
|
||||
)
|
||||
|
||||
def test_require_caps_includes_tts(self, voice_fast):
|
||||
caps = voice_fast.get("require_caps", [])
|
||||
assert "tts" in caps, "voice_fast_uk must require caps=[tts]"
|
||||
|
||||
|
||||
# ── 2. voice_quality_uk: bounded deadline ────────────────────────────────────
|
||||
|
||||
class TestVoiceQualityUkPolicy:
|
||||
|
||||
def test_deadline_bounded(self, voice_quality):
|
||||
deadline = voice_quality.get("deadline_ms", 0)
|
||||
assert deadline <= VOICE_QUALITY_MAX_DEADLINE_MS, (
|
||||
f"voice_quality_uk.deadline_ms={deadline} exceeds {VOICE_QUALITY_MAX_DEADLINE_MS}ms"
|
||||
)
|
||||
|
||||
def test_no_known_slow_in_prefer(self, voice_quality):
|
||||
prefer = voice_quality.get("prefer_models", [])
|
||||
for model in prefer:
|
||||
assert model not in KNOWN_SLOW_MODELS, (
|
||||
f"SLOW model '{model}' in voice_quality_uk.prefer_models"
|
||||
)
|
||||
|
||||
def test_max_tokens_bounded(self, voice_quality):
|
||||
tokens = voice_quality.get("max_tokens", 999)
|
||||
assert tokens <= 320, (
|
||||
f"voice_quality_uk.max_tokens={tokens} exceeds 320 — high for voice"
|
||||
)
|
||||
|
||||
|
||||
# ── 3. Audit JSON: TTS scenarios must pass ───────────────────────────────────
|
||||
|
||||
class TestAuditResultsContract:
|
||||
|
||||
def test_tts_scenarios_all_pass(self, latest_audit):
|
||||
if latest_audit is None:
|
||||
pytest.skip("No audit results found — run ops/voice_latency_audit.sh first")
|
||||
results = latest_audit.get("results", [])
|
||||
tts_results = [r for r in results if "TTS" in r.get("scenario", "")]
|
||||
assert tts_results, "No TTS scenarios in audit results"
|
||||
failed = [r for r in tts_results if r.get("status") != "ok"]
|
||||
assert not failed, (
|
||||
f"TTS scenarios failed: {[r['scenario'] for r in failed]}"
|
||||
)
|
||||
|
||||
def test_tts_p95_under_slo(self, latest_audit):
|
||||
if latest_audit is None:
|
||||
pytest.skip("No audit results")
|
||||
results = latest_audit.get("results", [])
|
||||
tts_ms = [r["ms"] for r in results if "TTS" in r.get("scenario", "") and r.get("status") == "ok"]
|
||||
if not tts_ms:
|
||||
pytest.skip("No successful TTS results")
|
||||
tts_p95 = sorted(tts_ms)[int(len(tts_ms) * 0.95)] if len(tts_ms) > 1 else tts_ms[0]
|
||||
assert tts_p95 <= 2500, f"TTS p95={tts_p95}ms exceeds SLO of 2500ms"
|
||||
|
||||
def test_no_glm_in_passing_chat_scenarios(self, latest_audit):
|
||||
if latest_audit is None:
|
||||
pytest.skip("No audit results")
|
||||
results = latest_audit.get("results", [])
|
||||
glm_ok = [r for r in results
|
||||
if "glm" in r.get("scenario", "").lower()
|
||||
and r.get("status") == "ok"
|
||||
and r.get("ms", 0) > 15000]
|
||||
assert not glm_ok, (
|
||||
f"GLM model passed but took >15s — it must remain in exclude_models: {glm_ok}"
|
||||
)
|
||||
|
||||
|
||||
# ── 4. Voice guardrails in BFF ────────────────────────────────────────────────
|
||||
|
||||
class TestVoiceGuardrails:
|
||||
|
||||
def test_voice_prompt_suffix_exists(self):
|
||||
src = BFF_MAIN.read_text()
|
||||
assert "SOFIIA_VOICE_PROMPT_SUFFIX" in src, (
|
||||
"SOFIIA_VOICE_PROMPT_SUFFIX not found in sofiia-console/app/main.py"
|
||||
)
|
||||
|
||||
def test_voice_prompt_suffix_contains_max_sentences(self):
|
||||
src = BFF_MAIN.read_text()
|
||||
# Extract the suffix constant
|
||||
m = re.search(
|
||||
r'SOFIIA_VOICE_PROMPT_SUFFIX\s*=\s*"""(.*?)"""',
|
||||
src, re.DOTALL
|
||||
)
|
||||
assert m, "Could not parse SOFIIA_VOICE_PROMPT_SUFFIX"
|
||||
content = m.group(1)
|
||||
assert "2 речення" in content or "2 sentence" in content.lower(), (
|
||||
"SOFIIA_VOICE_PROMPT_SUFFIX must enforce max 2 sentence constraint"
|
||||
)
|
||||
|
||||
def test_voice_prompt_suffix_bans_markdown(self):
|
||||
src = BFF_MAIN.read_text()
|
||||
m = re.search(
|
||||
r'SOFIIA_VOICE_PROMPT_SUFFIX\s*=\s*"""(.*?)"""',
|
||||
src, re.DOTALL
|
||||
)
|
||||
assert m
|
||||
content = m.group(1)
|
||||
assert "markdown" in content.lower() or "bold" in content.lower() or "bullet" in content.lower(), (
|
||||
"SOFIIA_VOICE_PROMPT_SUFFIX should ban markdown/lists for voice"
|
||||
)
|
||||
|
||||
def test_voice_prompt_suffix_bans_think_tags(self):
|
||||
src = BFF_MAIN.read_text()
|
||||
m = re.search(
|
||||
r'SOFIIA_VOICE_PROMPT_SUFFIX\s*=\s*"""(.*?)"""',
|
||||
src, re.DOTALL
|
||||
)
|
||||
assert m
|
||||
content = m.group(1)
|
||||
assert "<think>" in content, (
|
||||
"SOFIIA_VOICE_PROMPT_SUFFIX should explicitly mention <think> ban"
|
||||
)
|
||||
|
||||
def test_chat_send_body_has_voice_profile_field(self):
|
||||
src = BFF_MAIN.read_text()
|
||||
assert "voice_profile" in src, (
|
||||
"ChatSendBody must have voice_profile field in sofiia-console/app/main.py"
|
||||
)
|
||||
|
||||
def test_voice_guardrails_applied_per_turn(self):
|
||||
src = BFF_MAIN.read_text()
|
||||
# Check that _system_prompt is used (not hardcoded SOFIIA_SYSTEM_PROMPT) in ollama/glm/grok
|
||||
provider_uses_system_prompt = src.count('"content": _system_prompt')
|
||||
assert provider_uses_system_prompt >= 2, (
|
||||
f"Expected ≥2 providers to use _system_prompt (voice-aware), found {provider_uses_system_prompt}"
|
||||
)
|
||||
|
||||
|
||||
# ── 5. Router config structure ────────────────────────────────────────────────
|
||||
|
||||
class TestRouterConfigStructure:
|
||||
|
||||
def test_voice_policies_section_exists(self, router_config):
|
||||
assert "selection_policies" in router_config, \
|
||||
"router-config.yml must have selection_policies section"
|
||||
|
||||
def test_agent_voice_profiles_sofiia(self, router_config):
|
||||
avp = router_config.get("agent_voice_profiles", {})
|
||||
assert "sofiia" in avp, "agent_voice_profiles.sofiia must be defined"
|
||||
sofiia = avp["sofiia"]
|
||||
assert sofiia.get("voice_profile") == "voice_fast_uk"
|
||||
assert sofiia.get("quality_profile") == "voice_quality_uk"
|
||||
|
||||
def test_policies_voice_section_exists(self, router_config):
|
||||
policies = router_config.get("policies", {})
|
||||
assert "voice" in policies, "policies.voice section missing in router-config.yml"
|
||||
v = policies["voice"]
|
||||
assert v.get("deadline_ms_voice", 0) <= 9000
|
||||
assert v.get("max_tokens_voice", 999) <= 256
|
||||
Reference in New Issue
Block a user