""" llm_enrichment.py — Optional LLM enrichment for Risk Attribution (strictly bounded). Design constraints: - LLM output is explanatory ONLY — never changes scores or decisions. - Default mode is OFF (llm_mode="off"). - Local mode calls a local HTTP model runner (Ollama-compatible by default). - Triggers are checked before every call: off if delta < warn OR band not high/critical. - Input is hard-truncated to llm_max_chars_in. - Output is hard-truncated to llm_max_chars_out. - Any error → graceful skip, returns {enabled: false, text: null}. Hardening guards (new): - model_allowlist: model must be in allowlist or call is skipped. - max_calls_per_digest: caller passes a mutable counter dict; stops after limit. - per_day_dedupe: in-memory key per (date, service, env) prevents duplicate calls. Usage: from llm_enrichment import maybe_enrich_attribution call_counter = {"count": 0} report["llm_enrichment"] = maybe_enrich_attribution( attribution_report, risk_report, attr_policy, call_counter=call_counter, ) """ from __future__ import annotations import datetime import json import logging from typing import Dict, Optional logger = logging.getLogger(__name__) # ─── Per-day dedupe store (module-level in-memory) ─────────────────────────── # key: "risk_enrich:{YYYY-MM-DD}:{service}:{env}" → True _dedupe_store: Dict[str, bool] = {} def _dedupe_key(service: str, env: str) -> str: date = datetime.datetime.utcnow().strftime("%Y-%m-%d") return f"risk_enrich:{date}:{service}:{env}" def _is_deduped(service: str, env: str) -> bool: return _dedupe_store.get(_dedupe_key(service, env), False) def _mark_deduped(service: str, env: str) -> None: _dedupe_store[_dedupe_key(service, env)] = True def _clear_dedupe_store() -> None: """Test helper to reset per-day dedup state.""" _dedupe_store.clear() # ─── Trigger guard ──────────────────────────────────────────────────────────── def _should_trigger(risk_report: Dict, attr_policy: Dict) -> bool: """ Returns True only if triggers are met: delta_24h >= risk_delta_warn OR band in band_in Both conditions are OR — either is enough. """ triggers = attr_policy.get("llm_triggers", {}) delta_warn = int(triggers.get("risk_delta_warn", 10)) band_in = set(triggers.get("band_in", ["high", "critical"])) band = risk_report.get("band", "low") delta_24h = (risk_report.get("trend") or {}).get("delta_24h") if band in band_in: return True if delta_24h is not None and delta_24h >= delta_warn: return True return False # ─── Prompt builder ─────────────────────────────────────────────────────────── def _build_prompt( attribution_report: Dict, risk_report: Dict, max_chars: int, ) -> str: """Build a compact prompt for local LLM enrichment.""" service = attribution_report.get("service", "?") env = attribution_report.get("env", "prod") score = risk_report.get("score", 0) band = risk_report.get("band", "?") delta = attribution_report.get("delta_24h") causes = attribution_report.get("causes", [])[:3] reasons = risk_report.get("reasons", [])[:4] causes_text = "\n".join( f" - {c['type']} (score={c['score']}, confidence={c['confidence']}): " + "; ".join(c.get("evidence", [])) for c in causes ) reasons_text = "\n".join(f" - {r}" for r in reasons) prompt = ( f"You are a platform reliability assistant. Provide a 2-3 sentence human-readable " f"explanation for a risk spike in service '{service}' (env={env}).\n\n" f"Risk score: {score} ({band}). " + (f"Delta 24h: +{delta}.\n\n" if delta is not None else "\n\n") + f"Risk signals:\n{reasons_text}\n\n" f"Attributed causes:\n{causes_text}\n\n" f"Write a concise explanation (max 3 sentences). Do NOT include scores or numbers " f"from above verbatim. Focus on actionable insight." ) return prompt[:max_chars] # ─── Local model call ───────────────────────────────────────────────────────── def _is_model_allowed(model: str, attr_policy: Dict) -> bool: """Return True if model is in llm_local.model_allowlist (or list is empty/absent).""" allowlist = attr_policy.get("llm_local", {}).get("model_allowlist") if not allowlist: return True # no restriction configured return model in allowlist def _call_local_llm( prompt: str, attr_policy: Dict, max_out: int, ) -> Optional[str]: """ Calls Ollama-compatible local endpoint. Skips if model is not in model_allowlist. Returns text or None on failure. """ llm_cfg = attr_policy.get("llm_local", {}) endpoint = llm_cfg.get("endpoint", "http://localhost:11434/api/generate") model = llm_cfg.get("model", "llama3") timeout = int(llm_cfg.get("timeout_seconds", 15)) if not _is_model_allowed(model, attr_policy): logger.warning("llm_enrichment: model '%s' not in allowlist; skipping", model) return None try: import urllib.request payload = json.dumps({ "model": model, "prompt": prompt, "stream": False, "options": {"num_predict": max_out // 4}, # approx token budget }).encode() req = urllib.request.Request( endpoint, data=payload, headers={"Content-Type": "application/json"}, method="POST", ) with urllib.request.urlopen(req, timeout=timeout) as resp: body = json.loads(resp.read()) text = body.get("response", "") or "" return text[:max_out] if text else None except (Exception, OSError, ConnectionError) as e: logger.warning("llm_enrichment: local LLM call failed: %s", e) return None # ─── Public interface ───────────────────────────────────────────────────────── def maybe_enrich_attribution( attribution_report: Dict, risk_report: Dict, attr_policy: Optional[Dict] = None, *, call_counter: Optional[Dict] = None, ) -> Dict: """ Conditionally enrich attribution_report with LLM text. Hardening guards (checked in order): 1. llm_mode must be "local" (not "off" or "remote") 2. triggers must be met (delta >= warn OR band in high/critical) 3. model must be in model_allowlist 4. max_calls_per_digest not exceeded (via mutable `call_counter` dict) 5. per-day dedupe: (service, env) pair not already enriched today Returns: {"enabled": True/False, "text": str|None, "mode": str} Never raises. LLM output does NOT alter scores. """ if attr_policy is None: try: from risk_attribution import load_attribution_policy attr_policy = load_attribution_policy() except Exception: return {"enabled": False, "text": None, "mode": "off"} mode = (attr_policy.get("defaults") or {}).get("llm_mode", "off") if mode == "off": return {"enabled": False, "text": None, "mode": "off"} # Guard: triggers if not _should_trigger(risk_report, attr_policy): return {"enabled": False, "text": None, "mode": mode, "skipped_reason": "triggers not met"} service = attribution_report.get("service", "") env = attribution_report.get("env", "prod") # Guard: model allowlist (checked early so tests can assert without calling LLM) if mode == "local": llm_local_cfg_early = attr_policy.get("llm_local", {}) model_cfg = llm_local_cfg_early.get("model", "llama3") if not _is_model_allowed(model_cfg, attr_policy): logger.warning("llm_enrichment: model '%s' not in allowlist; skipping", model_cfg) return {"enabled": False, "text": None, "mode": mode, "skipped_reason": f"model '{model_cfg}' not in allowlist"} # Guard: per-day dedupe llm_local_cfg = attr_policy.get("llm_local", {}) if llm_local_cfg.get("per_day_dedupe", True): if _is_deduped(service, env): return {"enabled": False, "text": None, "mode": mode, "skipped_reason": "per_day_dedupe: already enriched today"} # Guard: max_calls_per_digest if call_counter is not None: max_calls = int(llm_local_cfg.get("max_calls_per_digest", 3)) if call_counter.get("count", 0) >= max_calls: return {"enabled": False, "text": None, "mode": mode, "skipped_reason": f"max_calls_per_digest={max_calls} reached"} defaults = attr_policy.get("defaults", {}) max_in = int(defaults.get("llm_max_chars_in", 3500)) max_out = int(defaults.get("llm_max_chars_out", 800)) prompt = _build_prompt(attribution_report, risk_report, max_in) if mode == "local": try: text = _call_local_llm(prompt, attr_policy, max_out) except Exception as e: logger.warning("llm_enrichment: local call raised: %s", e) text = None if text is not None: # Update guards on success _mark_deduped(service, env) if call_counter is not None: call_counter["count"] = call_counter.get("count", 0) + 1 return { "enabled": text is not None, "text": text, "mode": "local", } # mode == "remote" — not implemented; stub for future extensibility logger.debug("llm_enrichment: remote mode not implemented; skipping") return {"enabled": False, "text": None, "mode": "remote", "skipped_reason": "remote not implemented"}