""" tests/test_llm_enrichment_guard.py — Tests for LLM enrichment guards. Tests: - llm_mode=off → never called - triggers not met → never called even if mode=local - triggers met + mode=local → called with bounded prompt (input size) - LLM output does NOT change attribution scores (explanatory only) - LLM failure → graceful skip (enabled=False) """ import sys import pytest from pathlib import Path from unittest.mock import patch, MagicMock sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "services" / "router")) from llm_enrichment import ( maybe_enrich_attribution, _should_trigger, _build_prompt, _clear_dedupe_store, ) from risk_attribution import _builtin_attr_defaults, _reload_attribution_policy @pytest.fixture(autouse=True) def reset_cache(): _reload_attribution_policy() _clear_dedupe_store() yield _reload_attribution_policy() _clear_dedupe_store() @pytest.fixture def attr_policy_off(): p = _builtin_attr_defaults() p["defaults"]["llm_mode"] = "off" return p @pytest.fixture def attr_policy_local(): p = _builtin_attr_defaults() p["defaults"]["llm_mode"] = "local" return p def _risk_report(band="high", delta_24h=15): return { "service": "gateway", "env": "prod", "score": 75, "band": band, "reasons": ["Open P1 incident(s): 1"], "trend": {"delta_24h": delta_24h, "delta_7d": None, "regression": {"warn": True, "fail": False}}, } def _attribution(causes=None): return { "service": "gateway", "env": "prod", "causes": causes or [ {"type": "deploy", "score": 30, "confidence": "medium", "evidence": ["deploy alerts: 2 in last 24h"]}, ], "summary": "Likely causes: deploy activity.", } # ─── mode=off guard ─────────────────────────────────────────────────────────── class TestLLMModeOff: def test_mode_off_never_calls_llm(self, attr_policy_off): with patch("llm_enrichment._call_local_llm") as mock_llm: result = maybe_enrich_attribution(_attribution(), _risk_report(), attr_policy=attr_policy_off) mock_llm.assert_not_called() assert result["enabled"] is False assert result["text"] is None assert result["mode"] == "off" def test_mode_off_even_high_delta(self, attr_policy_off): """mode=off means NO LLM regardless of delta.""" with patch("llm_enrichment._call_local_llm") as mock_llm: result = maybe_enrich_attribution( _attribution(), _risk_report(band="critical", delta_24h=50), attr_policy=attr_policy_off, ) mock_llm.assert_not_called() assert result["enabled"] is False # ─── Triggers guard ─────────────────────────────────────────────────────────── class TestTriggerGuard: def test_triggers_not_met_no_call(self, attr_policy_local): """Band=low, delta=5 < warn 10 → triggers not met → no call.""" report = _risk_report(band="low", delta_24h=5) with patch("llm_enrichment._call_local_llm") as mock_llm: result = maybe_enrich_attribution(_attribution(), report, attr_policy=attr_policy_local) mock_llm.assert_not_called() assert result["enabled"] is False assert "skipped_reason" in result def test_band_high_meets_trigger(self, attr_policy_local): """Band=high (in band_in) → trigger met even if delta < warn.""" report = _risk_report(band="high", delta_24h=3) assert _should_trigger(report, attr_policy_local) is True def test_delta_meets_trigger(self, attr_policy_local): """delta_24h=10 == risk_delta_warn=10 → trigger met.""" report = _risk_report(band="low", delta_24h=10) assert _should_trigger(report, attr_policy_local) is True def test_below_triggers(self, attr_policy_local): """Band=low, delta=5 → trigger NOT met.""" report = _risk_report(band="low", delta_24h=5) assert _should_trigger(report, attr_policy_local) is False def test_critical_band_meets_trigger(self, attr_policy_local): report = _risk_report(band="critical", delta_24h=0) assert _should_trigger(report, attr_policy_local) is True # ─── mode=local with triggers ───────────────────────────────────────────────── class TestLocalModeWithTriggers: def test_local_mode_called_when_triggers_met(self, attr_policy_local): with patch("llm_enrichment._call_local_llm", return_value="Deploy event caused instability.") as mock_llm: result = maybe_enrich_attribution( _attribution(), _risk_report(band="high", delta_24h=15), attr_policy=attr_policy_local, ) mock_llm.assert_called_once() assert result["enabled"] is True assert result["text"] == "Deploy event caused instability." def test_prompt_respects_max_chars_in(self, attr_policy_local): """Prompt must be truncated to llm_max_chars_in.""" max_in = 100 attr_policy_local["defaults"]["llm_max_chars_in"] = max_in prompt = _build_prompt(_attribution(), _risk_report(), max_chars=max_in) assert len(prompt) <= max_in def test_llm_output_does_not_change_scores(self, attr_policy_local): """LLM text is explanatory only — attribution scores unchanged.""" causes_before = [{"type": "deploy", "score": 30, "confidence": "medium", "evidence": ["deploy: 2"]}] attr = _attribution(causes=causes_before) with patch("llm_enrichment._call_local_llm", return_value="Some LLM text."): result = maybe_enrich_attribution( attr, _risk_report(band="high", delta_24h=15), attr_policy=attr_policy_local, ) # Verify attribution dict was NOT mutated by LLM assert attr["causes"][0]["score"] == 30 assert attr["causes"][0]["type"] == "deploy" assert result["text"] == "Some LLM text." def test_llm_failure_returns_graceful_skip(self, attr_policy_local): """LLM raises → enabled=False, no crash.""" with patch("llm_enrichment._call_local_llm", return_value=None): result = maybe_enrich_attribution( _attribution(), _risk_report(band="high", delta_24h=15), attr_policy=attr_policy_local, ) assert result["enabled"] is False assert result["text"] is None def test_llm_exception_returns_graceful_skip(self, attr_policy_local): """Exception in _call_local_llm → skip gracefully.""" with patch("llm_enrichment._call_local_llm", side_effect=ConnectionError("no server")): result = maybe_enrich_attribution( _attribution(), _risk_report(band="high", delta_24h=15), attr_policy=attr_policy_local, ) assert result["enabled"] is False # ─── enrich_risk_report_with_attribution integration ───────────────────────── class TestEnrichIntegration: def test_attribution_key_added_to_report(self): """Full integration: enrich_risk_report_with_attribution adds attribution key.""" from risk_engine import enrich_risk_report_with_attribution report = { "service": "gateway", "env": "prod", "score": 50, "band": "medium", "components": {"slo": {"violations": 1, "points": 10}, "followups": {"P0": 0, "P1": 1, "other": 0, "points": 12}}, "reasons": [], "trend": None, } enrich_risk_report_with_attribution(report) assert "attribution" in report # Either a proper dict or None (non-fatal) if report["attribution"] is not None: assert "causes" in report["attribution"] assert "summary" in report["attribution"] def test_attribution_non_fatal_on_error(self): """Even with broken stores, attribution never crashes the report.""" from risk_engine import enrich_risk_report_with_attribution broken = MagicMock() broken.list_alerts.side_effect = RuntimeError("DB down") broken.top_signatures.side_effect = RuntimeError("down") broken.list_incidents.side_effect = RuntimeError("down") report = {"service": "gateway", "env": "prod", "score": 50, "band": "medium", "components": {}, "reasons": [], "trend": None} # Should not raise enrich_risk_report_with_attribution( report, alert_store=broken, incident_store=broken ) assert "attribution" in report