microdao-daarion/tests/test_llm_enrichment_guard.py

"""
tests/test_llm_enrichment_guard.py — Tests for LLM enrichment guards.

Tests:
- llm_mode=off → never called
- triggers not met → never called even if mode=local
- triggers met + mode=local → called with bounded prompt (input size)
- LLM output does NOT change attribution scores (explanatory only)
- LLM failure → graceful skip (enabled=False)
"""
import sys
import pytest
from pathlib import Path
from unittest.mock import patch, MagicMock

sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "services" / "router"))

from llm_enrichment import (
    maybe_enrich_attribution, _should_trigger, _build_prompt, _clear_dedupe_store,
)
from risk_attribution import _builtin_attr_defaults, _reload_attribution_policy


@pytest.fixture(autouse=True)
def reset_cache():
    _reload_attribution_policy()
    _clear_dedupe_store()
    yield
    _reload_attribution_policy()
    _clear_dedupe_store()


@pytest.fixture
def attr_policy_off():
    p = _builtin_attr_defaults()
    p["defaults"]["llm_mode"] = "off"
    return p


@pytest.fixture
def attr_policy_local():
    p = _builtin_attr_defaults()
    p["defaults"]["llm_mode"] = "local"
    return p


def _risk_report(band="high", delta_24h=15):
    return {
        "service": "gateway", "env": "prod",
        "score": 75, "band": band,
        "reasons": ["Open P1 incident(s): 1"],
        "trend": {"delta_24h": delta_24h, "delta_7d": None,
                  "regression": {"warn": True, "fail": False}},
    }


def _attribution(causes=None):
    return {
        "service": "gateway", "env": "prod",
        "causes": causes or [
            {"type": "deploy", "score": 30, "confidence": "medium",
             "evidence": ["deploy alerts: 2 in last 24h"]},
        ],
        "summary": "Likely causes: deploy activity.",
    }


# ─── mode=off guard ───────────────────────────────────────────────────────────

class TestLLMModeOff:
    def test_mode_off_never_calls_llm(self, attr_policy_off):
        with patch("llm_enrichment._call_local_llm") as mock_llm:
            result = maybe_enrich_attribution(_attribution(), _risk_report(),
                                              attr_policy=attr_policy_off)
        mock_llm.assert_not_called()
        assert result["enabled"] is False
        assert result["text"] is None
        assert result["mode"] == "off"

    def test_mode_off_even_high_delta(self, attr_policy_off):
        """mode=off means NO LLM regardless of delta."""
        with patch("llm_enrichment._call_local_llm") as mock_llm:
            result = maybe_enrich_attribution(
                _attribution(), _risk_report(band="critical", delta_24h=50),
                attr_policy=attr_policy_off,
            )
        mock_llm.assert_not_called()
        assert result["enabled"] is False


# ─── Triggers guard ───────────────────────────────────────────────────────────

class TestTriggerGuard:
    def test_triggers_not_met_no_call(self, attr_policy_local):
        """Band=low, delta=5 < warn 10 → triggers not met → no call."""
        report = _risk_report(band="low", delta_24h=5)
        with patch("llm_enrichment._call_local_llm") as mock_llm:
            result = maybe_enrich_attribution(_attribution(), report,
                                              attr_policy=attr_policy_local)
        mock_llm.assert_not_called()
        assert result["enabled"] is False
        assert "skipped_reason" in result

    def test_band_high_meets_trigger(self, attr_policy_local):
        """Band=high (in band_in) → trigger met even if delta < warn."""
        report = _risk_report(band="high", delta_24h=3)
        assert _should_trigger(report, attr_policy_local) is True

    def test_delta_meets_trigger(self, attr_policy_local):
        """delta_24h=10 == risk_delta_warn=10 → trigger met."""
        report = _risk_report(band="low", delta_24h=10)
        assert _should_trigger(report, attr_policy_local) is True

    def test_below_triggers(self, attr_policy_local):
        """Band=low, delta=5 → trigger NOT met."""
        report = _risk_report(band="low", delta_24h=5)
        assert _should_trigger(report, attr_policy_local) is False

    def test_critical_band_meets_trigger(self, attr_policy_local):
        report = _risk_report(band="critical", delta_24h=0)
        assert _should_trigger(report, attr_policy_local) is True


# ─── mode=local with triggers ─────────────────────────────────────────────────

class TestLocalModeWithTriggers:
    def test_local_mode_called_when_triggers_met(self, attr_policy_local):
        with patch("llm_enrichment._call_local_llm", return_value="Deploy event caused instability.") as mock_llm:
            result = maybe_enrich_attribution(
                _attribution(), _risk_report(band="high", delta_24h=15),
                attr_policy=attr_policy_local,
            )
        mock_llm.assert_called_once()
        assert result["enabled"] is True
        assert result["text"] == "Deploy event caused instability."

    def test_prompt_respects_max_chars_in(self, attr_policy_local):
        """Prompt must be truncated to llm_max_chars_in."""
        max_in = 100
        attr_policy_local["defaults"]["llm_max_chars_in"] = max_in
        prompt = _build_prompt(_attribution(), _risk_report(), max_chars=max_in)
        assert len(prompt) <= max_in

    def test_llm_output_does_not_change_scores(self, attr_policy_local):
        """LLM text is explanatory only — attribution scores unchanged."""
        causes_before = [{"type": "deploy", "score": 30, "confidence": "medium",
                           "evidence": ["deploy: 2"]}]
        attr = _attribution(causes=causes_before)

        with patch("llm_enrichment._call_local_llm", return_value="Some LLM text."):
            result = maybe_enrich_attribution(
                attr, _risk_report(band="high", delta_24h=15),
                attr_policy=attr_policy_local,
            )

        # Verify attribution dict was NOT mutated by LLM
        assert attr["causes"][0]["score"] == 30
        assert attr["causes"][0]["type"] == "deploy"
        assert result["text"] == "Some LLM text."

    def test_llm_failure_returns_graceful_skip(self, attr_policy_local):
        """LLM raises → enabled=False, no crash."""
        with patch("llm_enrichment._call_local_llm", return_value=None):
            result = maybe_enrich_attribution(
                _attribution(), _risk_report(band="high", delta_24h=15),
                attr_policy=attr_policy_local,
            )
        assert result["enabled"] is False
        assert result["text"] is None

    def test_llm_exception_returns_graceful_skip(self, attr_policy_local):
        """Exception in _call_local_llm → skip gracefully."""
        with patch("llm_enrichment._call_local_llm", side_effect=ConnectionError("no server")):
            result = maybe_enrich_attribution(
                _attribution(), _risk_report(band="high", delta_24h=15),
                attr_policy=attr_policy_local,
            )
        assert result["enabled"] is False


# ─── enrich_risk_report_with_attribution integration ─────────────────────────

class TestEnrichIntegration:
    def test_attribution_key_added_to_report(self):
        """Full integration: enrich_risk_report_with_attribution adds attribution key."""
        from risk_engine import enrich_risk_report_with_attribution
        report = {
            "service": "gateway", "env": "prod",
            "score": 50, "band": "medium",
            "components": {"slo": {"violations": 1, "points": 10},
                           "followups": {"P0": 0, "P1": 1, "other": 0, "points": 12}},
            "reasons": [],
            "trend": None,
        }
        enrich_risk_report_with_attribution(report)
        assert "attribution" in report
        # Either a proper dict or None (non-fatal)
        if report["attribution"] is not None:
            assert "causes" in report["attribution"]
            assert "summary" in report["attribution"]

    def test_attribution_non_fatal_on_error(self):
        """Even with broken stores, attribution never crashes the report."""
        from risk_engine import enrich_risk_report_with_attribution
        broken = MagicMock()
        broken.list_alerts.side_effect = RuntimeError("DB down")
        broken.top_signatures.side_effect = RuntimeError("down")
        broken.list_incidents.side_effect = RuntimeError("down")

        report = {"service": "gateway", "env": "prod", "score": 50, "band": "medium",
                  "components": {}, "reasons": [], "trend": None}
        # Should not raise
        enrich_risk_report_with_attribution(
            report, alert_store=broken, incident_store=broken
        )
        assert "attribution" in report