New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
217 lines
9.0 KiB
Python
217 lines
9.0 KiB
Python
"""
|
|
tests/test_llm_enrichment_guard.py — Tests for LLM enrichment guards.
|
|
|
|
Tests:
|
|
- llm_mode=off → never called
|
|
- triggers not met → never called even if mode=local
|
|
- triggers met + mode=local → called with bounded prompt (input size)
|
|
- LLM output does NOT change attribution scores (explanatory only)
|
|
- LLM failure → graceful skip (enabled=False)
|
|
"""
|
|
import sys
|
|
import pytest
|
|
from pathlib import Path
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "services" / "router"))
|
|
|
|
from llm_enrichment import (
|
|
maybe_enrich_attribution, _should_trigger, _build_prompt, _clear_dedupe_store,
|
|
)
|
|
from risk_attribution import _builtin_attr_defaults, _reload_attribution_policy
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def reset_cache():
|
|
_reload_attribution_policy()
|
|
_clear_dedupe_store()
|
|
yield
|
|
_reload_attribution_policy()
|
|
_clear_dedupe_store()
|
|
|
|
|
|
@pytest.fixture
|
|
def attr_policy_off():
|
|
p = _builtin_attr_defaults()
|
|
p["defaults"]["llm_mode"] = "off"
|
|
return p
|
|
|
|
|
|
@pytest.fixture
|
|
def attr_policy_local():
|
|
p = _builtin_attr_defaults()
|
|
p["defaults"]["llm_mode"] = "local"
|
|
return p
|
|
|
|
|
|
def _risk_report(band="high", delta_24h=15):
|
|
return {
|
|
"service": "gateway", "env": "prod",
|
|
"score": 75, "band": band,
|
|
"reasons": ["Open P1 incident(s): 1"],
|
|
"trend": {"delta_24h": delta_24h, "delta_7d": None,
|
|
"regression": {"warn": True, "fail": False}},
|
|
}
|
|
|
|
|
|
def _attribution(causes=None):
|
|
return {
|
|
"service": "gateway", "env": "prod",
|
|
"causes": causes or [
|
|
{"type": "deploy", "score": 30, "confidence": "medium",
|
|
"evidence": ["deploy alerts: 2 in last 24h"]},
|
|
],
|
|
"summary": "Likely causes: deploy activity.",
|
|
}
|
|
|
|
|
|
# ─── mode=off guard ───────────────────────────────────────────────────────────
|
|
|
|
class TestLLMModeOff:
|
|
def test_mode_off_never_calls_llm(self, attr_policy_off):
|
|
with patch("llm_enrichment._call_local_llm") as mock_llm:
|
|
result = maybe_enrich_attribution(_attribution(), _risk_report(),
|
|
attr_policy=attr_policy_off)
|
|
mock_llm.assert_not_called()
|
|
assert result["enabled"] is False
|
|
assert result["text"] is None
|
|
assert result["mode"] == "off"
|
|
|
|
def test_mode_off_even_high_delta(self, attr_policy_off):
|
|
"""mode=off means NO LLM regardless of delta."""
|
|
with patch("llm_enrichment._call_local_llm") as mock_llm:
|
|
result = maybe_enrich_attribution(
|
|
_attribution(), _risk_report(band="critical", delta_24h=50),
|
|
attr_policy=attr_policy_off,
|
|
)
|
|
mock_llm.assert_not_called()
|
|
assert result["enabled"] is False
|
|
|
|
|
|
# ─── Triggers guard ───────────────────────────────────────────────────────────
|
|
|
|
class TestTriggerGuard:
|
|
def test_triggers_not_met_no_call(self, attr_policy_local):
|
|
"""Band=low, delta=5 < warn 10 → triggers not met → no call."""
|
|
report = _risk_report(band="low", delta_24h=5)
|
|
with patch("llm_enrichment._call_local_llm") as mock_llm:
|
|
result = maybe_enrich_attribution(_attribution(), report,
|
|
attr_policy=attr_policy_local)
|
|
mock_llm.assert_not_called()
|
|
assert result["enabled"] is False
|
|
assert "skipped_reason" in result
|
|
|
|
def test_band_high_meets_trigger(self, attr_policy_local):
|
|
"""Band=high (in band_in) → trigger met even if delta < warn."""
|
|
report = _risk_report(band="high", delta_24h=3)
|
|
assert _should_trigger(report, attr_policy_local) is True
|
|
|
|
def test_delta_meets_trigger(self, attr_policy_local):
|
|
"""delta_24h=10 == risk_delta_warn=10 → trigger met."""
|
|
report = _risk_report(band="low", delta_24h=10)
|
|
assert _should_trigger(report, attr_policy_local) is True
|
|
|
|
def test_below_triggers(self, attr_policy_local):
|
|
"""Band=low, delta=5 → trigger NOT met."""
|
|
report = _risk_report(band="low", delta_24h=5)
|
|
assert _should_trigger(report, attr_policy_local) is False
|
|
|
|
def test_critical_band_meets_trigger(self, attr_policy_local):
|
|
report = _risk_report(band="critical", delta_24h=0)
|
|
assert _should_trigger(report, attr_policy_local) is True
|
|
|
|
|
|
# ─── mode=local with triggers ─────────────────────────────────────────────────
|
|
|
|
class TestLocalModeWithTriggers:
|
|
def test_local_mode_called_when_triggers_met(self, attr_policy_local):
|
|
with patch("llm_enrichment._call_local_llm", return_value="Deploy event caused instability.") as mock_llm:
|
|
result = maybe_enrich_attribution(
|
|
_attribution(), _risk_report(band="high", delta_24h=15),
|
|
attr_policy=attr_policy_local,
|
|
)
|
|
mock_llm.assert_called_once()
|
|
assert result["enabled"] is True
|
|
assert result["text"] == "Deploy event caused instability."
|
|
|
|
def test_prompt_respects_max_chars_in(self, attr_policy_local):
|
|
"""Prompt must be truncated to llm_max_chars_in."""
|
|
max_in = 100
|
|
attr_policy_local["defaults"]["llm_max_chars_in"] = max_in
|
|
prompt = _build_prompt(_attribution(), _risk_report(), max_chars=max_in)
|
|
assert len(prompt) <= max_in
|
|
|
|
def test_llm_output_does_not_change_scores(self, attr_policy_local):
|
|
"""LLM text is explanatory only — attribution scores unchanged."""
|
|
causes_before = [{"type": "deploy", "score": 30, "confidence": "medium",
|
|
"evidence": ["deploy: 2"]}]
|
|
attr = _attribution(causes=causes_before)
|
|
|
|
with patch("llm_enrichment._call_local_llm", return_value="Some LLM text."):
|
|
result = maybe_enrich_attribution(
|
|
attr, _risk_report(band="high", delta_24h=15),
|
|
attr_policy=attr_policy_local,
|
|
)
|
|
|
|
# Verify attribution dict was NOT mutated by LLM
|
|
assert attr["causes"][0]["score"] == 30
|
|
assert attr["causes"][0]["type"] == "deploy"
|
|
assert result["text"] == "Some LLM text."
|
|
|
|
def test_llm_failure_returns_graceful_skip(self, attr_policy_local):
|
|
"""LLM raises → enabled=False, no crash."""
|
|
with patch("llm_enrichment._call_local_llm", return_value=None):
|
|
result = maybe_enrich_attribution(
|
|
_attribution(), _risk_report(band="high", delta_24h=15),
|
|
attr_policy=attr_policy_local,
|
|
)
|
|
assert result["enabled"] is False
|
|
assert result["text"] is None
|
|
|
|
def test_llm_exception_returns_graceful_skip(self, attr_policy_local):
|
|
"""Exception in _call_local_llm → skip gracefully."""
|
|
with patch("llm_enrichment._call_local_llm", side_effect=ConnectionError("no server")):
|
|
result = maybe_enrich_attribution(
|
|
_attribution(), _risk_report(band="high", delta_24h=15),
|
|
attr_policy=attr_policy_local,
|
|
)
|
|
assert result["enabled"] is False
|
|
|
|
|
|
# ─── enrich_risk_report_with_attribution integration ─────────────────────────
|
|
|
|
class TestEnrichIntegration:
|
|
def test_attribution_key_added_to_report(self):
|
|
"""Full integration: enrich_risk_report_with_attribution adds attribution key."""
|
|
from risk_engine import enrich_risk_report_with_attribution
|
|
report = {
|
|
"service": "gateway", "env": "prod",
|
|
"score": 50, "band": "medium",
|
|
"components": {"slo": {"violations": 1, "points": 10},
|
|
"followups": {"P0": 0, "P1": 1, "other": 0, "points": 12}},
|
|
"reasons": [],
|
|
"trend": None,
|
|
}
|
|
enrich_risk_report_with_attribution(report)
|
|
assert "attribution" in report
|
|
# Either a proper dict or None (non-fatal)
|
|
if report["attribution"] is not None:
|
|
assert "causes" in report["attribution"]
|
|
assert "summary" in report["attribution"]
|
|
|
|
def test_attribution_non_fatal_on_error(self):
|
|
"""Even with broken stores, attribution never crashes the report."""
|
|
from risk_engine import enrich_risk_report_with_attribution
|
|
broken = MagicMock()
|
|
broken.list_alerts.side_effect = RuntimeError("DB down")
|
|
broken.top_signatures.side_effect = RuntimeError("down")
|
|
broken.list_incidents.side_effect = RuntimeError("down")
|
|
|
|
report = {"service": "gateway", "env": "prod", "score": 50, "band": "medium",
|
|
"components": {}, "reasons": [], "trend": None}
|
|
# Should not raise
|
|
enrich_risk_report_with_attribution(
|
|
report, alert_store=broken, incident_store=broken
|
|
)
|
|
assert "attribution" in report
|