Files
microdao-daarion/tests/test_llm_enrichment_guard.py
Apple 129e4ea1fc feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store,
architecture_pressure, backlog_generator/store, cost_analyzer, data_governance,
dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment,
platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files),
signature_state_store, sofiia_auto_router, tool_governance

New services:
- sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static
- memory-service: integration_endpoints, integrations, voice_endpoints, static UI
- aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents)
- sofiia-supervisor: new supervisor service
- aistalk-bridge-lite: Telegram bridge lite
- calendar-service: CalDAV calendar service with reminders
- mlx-stt-service / mlx-tts-service: Apple Silicon speech services
- binance-bot-monitor: market monitor service
- node-worker: STT/TTS memory providers

New tools (9): agent_email, browser_tool, contract_tool, observability_tool,
oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault

New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus,
farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine,
session_context, style_adapter, telemetry)

Tests: 85+ test files for all new modules
Made-with: Cursor
2026-03-03 07:14:14 -08:00

217 lines
9.0 KiB
Python

"""
tests/test_llm_enrichment_guard.py — Tests for LLM enrichment guards.
Tests:
- llm_mode=off → never called
- triggers not met → never called even if mode=local
- triggers met + mode=local → called with bounded prompt (input size)
- LLM output does NOT change attribution scores (explanatory only)
- LLM failure → graceful skip (enabled=False)
"""
import sys
import pytest
from pathlib import Path
from unittest.mock import patch, MagicMock
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "services" / "router"))
from llm_enrichment import (
maybe_enrich_attribution, _should_trigger, _build_prompt, _clear_dedupe_store,
)
from risk_attribution import _builtin_attr_defaults, _reload_attribution_policy
@pytest.fixture(autouse=True)
def reset_cache():
_reload_attribution_policy()
_clear_dedupe_store()
yield
_reload_attribution_policy()
_clear_dedupe_store()
@pytest.fixture
def attr_policy_off():
p = _builtin_attr_defaults()
p["defaults"]["llm_mode"] = "off"
return p
@pytest.fixture
def attr_policy_local():
p = _builtin_attr_defaults()
p["defaults"]["llm_mode"] = "local"
return p
def _risk_report(band="high", delta_24h=15):
return {
"service": "gateway", "env": "prod",
"score": 75, "band": band,
"reasons": ["Open P1 incident(s): 1"],
"trend": {"delta_24h": delta_24h, "delta_7d": None,
"regression": {"warn": True, "fail": False}},
}
def _attribution(causes=None):
return {
"service": "gateway", "env": "prod",
"causes": causes or [
{"type": "deploy", "score": 30, "confidence": "medium",
"evidence": ["deploy alerts: 2 in last 24h"]},
],
"summary": "Likely causes: deploy activity.",
}
# ─── mode=off guard ───────────────────────────────────────────────────────────
class TestLLMModeOff:
def test_mode_off_never_calls_llm(self, attr_policy_off):
with patch("llm_enrichment._call_local_llm") as mock_llm:
result = maybe_enrich_attribution(_attribution(), _risk_report(),
attr_policy=attr_policy_off)
mock_llm.assert_not_called()
assert result["enabled"] is False
assert result["text"] is None
assert result["mode"] == "off"
def test_mode_off_even_high_delta(self, attr_policy_off):
"""mode=off means NO LLM regardless of delta."""
with patch("llm_enrichment._call_local_llm") as mock_llm:
result = maybe_enrich_attribution(
_attribution(), _risk_report(band="critical", delta_24h=50),
attr_policy=attr_policy_off,
)
mock_llm.assert_not_called()
assert result["enabled"] is False
# ─── Triggers guard ───────────────────────────────────────────────────────────
class TestTriggerGuard:
def test_triggers_not_met_no_call(self, attr_policy_local):
"""Band=low, delta=5 < warn 10 → triggers not met → no call."""
report = _risk_report(band="low", delta_24h=5)
with patch("llm_enrichment._call_local_llm") as mock_llm:
result = maybe_enrich_attribution(_attribution(), report,
attr_policy=attr_policy_local)
mock_llm.assert_not_called()
assert result["enabled"] is False
assert "skipped_reason" in result
def test_band_high_meets_trigger(self, attr_policy_local):
"""Band=high (in band_in) → trigger met even if delta < warn."""
report = _risk_report(band="high", delta_24h=3)
assert _should_trigger(report, attr_policy_local) is True
def test_delta_meets_trigger(self, attr_policy_local):
"""delta_24h=10 == risk_delta_warn=10 → trigger met."""
report = _risk_report(band="low", delta_24h=10)
assert _should_trigger(report, attr_policy_local) is True
def test_below_triggers(self, attr_policy_local):
"""Band=low, delta=5 → trigger NOT met."""
report = _risk_report(band="low", delta_24h=5)
assert _should_trigger(report, attr_policy_local) is False
def test_critical_band_meets_trigger(self, attr_policy_local):
report = _risk_report(band="critical", delta_24h=0)
assert _should_trigger(report, attr_policy_local) is True
# ─── mode=local with triggers ─────────────────────────────────────────────────
class TestLocalModeWithTriggers:
def test_local_mode_called_when_triggers_met(self, attr_policy_local):
with patch("llm_enrichment._call_local_llm", return_value="Deploy event caused instability.") as mock_llm:
result = maybe_enrich_attribution(
_attribution(), _risk_report(band="high", delta_24h=15),
attr_policy=attr_policy_local,
)
mock_llm.assert_called_once()
assert result["enabled"] is True
assert result["text"] == "Deploy event caused instability."
def test_prompt_respects_max_chars_in(self, attr_policy_local):
"""Prompt must be truncated to llm_max_chars_in."""
max_in = 100
attr_policy_local["defaults"]["llm_max_chars_in"] = max_in
prompt = _build_prompt(_attribution(), _risk_report(), max_chars=max_in)
assert len(prompt) <= max_in
def test_llm_output_does_not_change_scores(self, attr_policy_local):
"""LLM text is explanatory only — attribution scores unchanged."""
causes_before = [{"type": "deploy", "score": 30, "confidence": "medium",
"evidence": ["deploy: 2"]}]
attr = _attribution(causes=causes_before)
with patch("llm_enrichment._call_local_llm", return_value="Some LLM text."):
result = maybe_enrich_attribution(
attr, _risk_report(band="high", delta_24h=15),
attr_policy=attr_policy_local,
)
# Verify attribution dict was NOT mutated by LLM
assert attr["causes"][0]["score"] == 30
assert attr["causes"][0]["type"] == "deploy"
assert result["text"] == "Some LLM text."
def test_llm_failure_returns_graceful_skip(self, attr_policy_local):
"""LLM raises → enabled=False, no crash."""
with patch("llm_enrichment._call_local_llm", return_value=None):
result = maybe_enrich_attribution(
_attribution(), _risk_report(band="high", delta_24h=15),
attr_policy=attr_policy_local,
)
assert result["enabled"] is False
assert result["text"] is None
def test_llm_exception_returns_graceful_skip(self, attr_policy_local):
"""Exception in _call_local_llm → skip gracefully."""
with patch("llm_enrichment._call_local_llm", side_effect=ConnectionError("no server")):
result = maybe_enrich_attribution(
_attribution(), _risk_report(band="high", delta_24h=15),
attr_policy=attr_policy_local,
)
assert result["enabled"] is False
# ─── enrich_risk_report_with_attribution integration ─────────────────────────
class TestEnrichIntegration:
def test_attribution_key_added_to_report(self):
"""Full integration: enrich_risk_report_with_attribution adds attribution key."""
from risk_engine import enrich_risk_report_with_attribution
report = {
"service": "gateway", "env": "prod",
"score": 50, "band": "medium",
"components": {"slo": {"violations": 1, "points": 10},
"followups": {"P0": 0, "P1": 1, "other": 0, "points": 12}},
"reasons": [],
"trend": None,
}
enrich_risk_report_with_attribution(report)
assert "attribution" in report
# Either a proper dict or None (non-fatal)
if report["attribution"] is not None:
assert "causes" in report["attribution"]
assert "summary" in report["attribution"]
def test_attribution_non_fatal_on_error(self):
"""Even with broken stores, attribution never crashes the report."""
from risk_engine import enrich_risk_report_with_attribution
broken = MagicMock()
broken.list_alerts.side_effect = RuntimeError("DB down")
broken.top_signatures.side_effect = RuntimeError("down")
broken.list_incidents.side_effect = RuntimeError("down")
report = {"service": "gateway", "env": "prod", "score": 50, "band": "medium",
"components": {}, "reasons": [], "trend": None}
# Should not raise
enrich_risk_report_with_attribution(
report, alert_store=broken, incident_store=broken
)
assert "attribution" in report