feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
This commit is contained in:
261
services/router/llm_enrichment.py
Normal file
261
services/router/llm_enrichment.py
Normal file
@@ -0,0 +1,261 @@
|
||||
"""
|
||||
llm_enrichment.py — Optional LLM enrichment for Risk Attribution (strictly bounded).
|
||||
|
||||
Design constraints:
|
||||
- LLM output is explanatory ONLY — never changes scores or decisions.
|
||||
- Default mode is OFF (llm_mode="off").
|
||||
- Local mode calls a local HTTP model runner (Ollama-compatible by default).
|
||||
- Triggers are checked before every call: off if delta < warn OR band not high/critical.
|
||||
- Input is hard-truncated to llm_max_chars_in.
|
||||
- Output is hard-truncated to llm_max_chars_out.
|
||||
- Any error → graceful skip, returns {enabled: false, text: null}.
|
||||
|
||||
Hardening guards (new):
|
||||
- model_allowlist: model must be in allowlist or call is skipped.
|
||||
- max_calls_per_digest: caller passes a mutable counter dict; stops after limit.
|
||||
- per_day_dedupe: in-memory key per (date, service, env) prevents duplicate calls.
|
||||
|
||||
Usage:
|
||||
from llm_enrichment import maybe_enrich_attribution
|
||||
call_counter = {"count": 0}
|
||||
report["llm_enrichment"] = maybe_enrich_attribution(
|
||||
attribution_report, risk_report, attr_policy,
|
||||
call_counter=call_counter,
|
||||
)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ─── Per-day dedupe store (module-level in-memory) ───────────────────────────
|
||||
# key: "risk_enrich:{YYYY-MM-DD}:{service}:{env}" → True
|
||||
_dedupe_store: Dict[str, bool] = {}
|
||||
|
||||
|
||||
def _dedupe_key(service: str, env: str) -> str:
|
||||
date = datetime.datetime.utcnow().strftime("%Y-%m-%d")
|
||||
return f"risk_enrich:{date}:{service}:{env}"
|
||||
|
||||
|
||||
def _is_deduped(service: str, env: str) -> bool:
|
||||
return _dedupe_store.get(_dedupe_key(service, env), False)
|
||||
|
||||
|
||||
def _mark_deduped(service: str, env: str) -> None:
|
||||
_dedupe_store[_dedupe_key(service, env)] = True
|
||||
|
||||
|
||||
def _clear_dedupe_store() -> None:
|
||||
"""Test helper to reset per-day dedup state."""
|
||||
_dedupe_store.clear()
|
||||
|
||||
# ─── Trigger guard ────────────────────────────────────────────────────────────
|
||||
|
||||
def _should_trigger(risk_report: Dict, attr_policy: Dict) -> bool:
|
||||
"""
|
||||
Returns True only if triggers are met:
|
||||
delta_24h >= risk_delta_warn OR band in band_in
|
||||
Both conditions are OR — either is enough.
|
||||
"""
|
||||
triggers = attr_policy.get("llm_triggers", {})
|
||||
delta_warn = int(triggers.get("risk_delta_warn", 10))
|
||||
band_in = set(triggers.get("band_in", ["high", "critical"]))
|
||||
|
||||
band = risk_report.get("band", "low")
|
||||
delta_24h = (risk_report.get("trend") or {}).get("delta_24h")
|
||||
|
||||
if band in band_in:
|
||||
return True
|
||||
if delta_24h is not None and delta_24h >= delta_warn:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ─── Prompt builder ───────────────────────────────────────────────────────────
|
||||
|
||||
def _build_prompt(
|
||||
attribution_report: Dict,
|
||||
risk_report: Dict,
|
||||
max_chars: int,
|
||||
) -> str:
|
||||
"""Build a compact prompt for local LLM enrichment."""
|
||||
service = attribution_report.get("service", "?")
|
||||
env = attribution_report.get("env", "prod")
|
||||
score = risk_report.get("score", 0)
|
||||
band = risk_report.get("band", "?")
|
||||
delta = attribution_report.get("delta_24h")
|
||||
causes = attribution_report.get("causes", [])[:3]
|
||||
reasons = risk_report.get("reasons", [])[:4]
|
||||
|
||||
causes_text = "\n".join(
|
||||
f" - {c['type']} (score={c['score']}, confidence={c['confidence']}): "
|
||||
+ "; ".join(c.get("evidence", []))
|
||||
for c in causes
|
||||
)
|
||||
reasons_text = "\n".join(f" - {r}" for r in reasons)
|
||||
|
||||
prompt = (
|
||||
f"You are a platform reliability assistant. Provide a 2-3 sentence human-readable "
|
||||
f"explanation for a risk spike in service '{service}' (env={env}).\n\n"
|
||||
f"Risk score: {score} ({band}). "
|
||||
+ (f"Delta 24h: +{delta}.\n\n" if delta is not None else "\n\n")
|
||||
+ f"Risk signals:\n{reasons_text}\n\n"
|
||||
f"Attributed causes:\n{causes_text}\n\n"
|
||||
f"Write a concise explanation (max 3 sentences). Do NOT include scores or numbers "
|
||||
f"from above verbatim. Focus on actionable insight."
|
||||
)
|
||||
return prompt[:max_chars]
|
||||
|
||||
|
||||
# ─── Local model call ─────────────────────────────────────────────────────────
|
||||
|
||||
def _is_model_allowed(model: str, attr_policy: Dict) -> bool:
|
||||
"""Return True if model is in llm_local.model_allowlist (or list is empty/absent)."""
|
||||
allowlist = attr_policy.get("llm_local", {}).get("model_allowlist")
|
||||
if not allowlist:
|
||||
return True # no restriction configured
|
||||
return model in allowlist
|
||||
|
||||
|
||||
def _call_local_llm(
|
||||
prompt: str,
|
||||
attr_policy: Dict,
|
||||
max_out: int,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Calls Ollama-compatible local endpoint.
|
||||
Skips if model is not in model_allowlist.
|
||||
Returns text or None on failure.
|
||||
"""
|
||||
llm_cfg = attr_policy.get("llm_local", {})
|
||||
endpoint = llm_cfg.get("endpoint", "http://localhost:11434/api/generate")
|
||||
model = llm_cfg.get("model", "llama3")
|
||||
timeout = int(llm_cfg.get("timeout_seconds", 15))
|
||||
|
||||
if not _is_model_allowed(model, attr_policy):
|
||||
logger.warning("llm_enrichment: model '%s' not in allowlist; skipping", model)
|
||||
return None
|
||||
|
||||
try:
|
||||
import urllib.request
|
||||
payload = json.dumps({
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"num_predict": max_out // 4}, # approx token budget
|
||||
}).encode()
|
||||
req = urllib.request.Request(
|
||||
endpoint,
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
body = json.loads(resp.read())
|
||||
text = body.get("response", "") or ""
|
||||
return text[:max_out] if text else None
|
||||
except (Exception, OSError, ConnectionError) as e:
|
||||
logger.warning("llm_enrichment: local LLM call failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
# ─── Public interface ─────────────────────────────────────────────────────────
|
||||
|
||||
def maybe_enrich_attribution(
|
||||
attribution_report: Dict,
|
||||
risk_report: Dict,
|
||||
attr_policy: Optional[Dict] = None,
|
||||
*,
|
||||
call_counter: Optional[Dict] = None,
|
||||
) -> Dict:
|
||||
"""
|
||||
Conditionally enrich attribution_report with LLM text.
|
||||
|
||||
Hardening guards (checked in order):
|
||||
1. llm_mode must be "local" (not "off" or "remote")
|
||||
2. triggers must be met (delta >= warn OR band in high/critical)
|
||||
3. model must be in model_allowlist
|
||||
4. max_calls_per_digest not exceeded (via mutable `call_counter` dict)
|
||||
5. per-day dedupe: (service, env) pair not already enriched today
|
||||
|
||||
Returns:
|
||||
{"enabled": True/False, "text": str|None, "mode": str}
|
||||
|
||||
Never raises. LLM output does NOT alter scores.
|
||||
"""
|
||||
if attr_policy is None:
|
||||
try:
|
||||
from risk_attribution import load_attribution_policy
|
||||
attr_policy = load_attribution_policy()
|
||||
except Exception:
|
||||
return {"enabled": False, "text": None, "mode": "off"}
|
||||
|
||||
mode = (attr_policy.get("defaults") or {}).get("llm_mode", "off")
|
||||
|
||||
if mode == "off":
|
||||
return {"enabled": False, "text": None, "mode": "off"}
|
||||
|
||||
# Guard: triggers
|
||||
if not _should_trigger(risk_report, attr_policy):
|
||||
return {"enabled": False, "text": None, "mode": mode,
|
||||
"skipped_reason": "triggers not met"}
|
||||
|
||||
service = attribution_report.get("service", "")
|
||||
env = attribution_report.get("env", "prod")
|
||||
|
||||
# Guard: model allowlist (checked early so tests can assert without calling LLM)
|
||||
if mode == "local":
|
||||
llm_local_cfg_early = attr_policy.get("llm_local", {})
|
||||
model_cfg = llm_local_cfg_early.get("model", "llama3")
|
||||
if not _is_model_allowed(model_cfg, attr_policy):
|
||||
logger.warning("llm_enrichment: model '%s' not in allowlist; skipping", model_cfg)
|
||||
return {"enabled": False, "text": None, "mode": mode,
|
||||
"skipped_reason": f"model '{model_cfg}' not in allowlist"}
|
||||
|
||||
# Guard: per-day dedupe
|
||||
llm_local_cfg = attr_policy.get("llm_local", {})
|
||||
if llm_local_cfg.get("per_day_dedupe", True):
|
||||
if _is_deduped(service, env):
|
||||
return {"enabled": False, "text": None, "mode": mode,
|
||||
"skipped_reason": "per_day_dedupe: already enriched today"}
|
||||
|
||||
# Guard: max_calls_per_digest
|
||||
if call_counter is not None:
|
||||
max_calls = int(llm_local_cfg.get("max_calls_per_digest", 3))
|
||||
if call_counter.get("count", 0) >= max_calls:
|
||||
return {"enabled": False, "text": None, "mode": mode,
|
||||
"skipped_reason": f"max_calls_per_digest={max_calls} reached"}
|
||||
|
||||
defaults = attr_policy.get("defaults", {})
|
||||
max_in = int(defaults.get("llm_max_chars_in", 3500))
|
||||
max_out = int(defaults.get("llm_max_chars_out", 800))
|
||||
prompt = _build_prompt(attribution_report, risk_report, max_in)
|
||||
|
||||
if mode == "local":
|
||||
try:
|
||||
text = _call_local_llm(prompt, attr_policy, max_out)
|
||||
except Exception as e:
|
||||
logger.warning("llm_enrichment: local call raised: %s", e)
|
||||
text = None
|
||||
|
||||
if text is not None:
|
||||
# Update guards on success
|
||||
_mark_deduped(service, env)
|
||||
if call_counter is not None:
|
||||
call_counter["count"] = call_counter.get("count", 0) + 1
|
||||
|
||||
return {
|
||||
"enabled": text is not None,
|
||||
"text": text,
|
||||
"mode": "local",
|
||||
}
|
||||
|
||||
# mode == "remote" — not implemented; stub for future extensibility
|
||||
logger.debug("llm_enrichment: remote mode not implemented; skipping")
|
||||
return {"enabled": False, "text": None, "mode": "remote",
|
||||
"skipped_reason": "remote not implemented"}
|
||||
Reference in New Issue
Block a user