feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
This commit is contained in:
341
services/router/risk_digest.py
Normal file
341
services/router/risk_digest.py
Normal file
@@ -0,0 +1,341 @@
|
||||
"""
|
||||
risk_digest.py — Daily Risk Digest generator (deterministic, no LLM).
|
||||
|
||||
Produces:
|
||||
ops/reports/risk/YYYY-MM-DD.json
|
||||
ops/reports/risk/YYYY-MM-DD.md
|
||||
|
||||
Content:
|
||||
- Top risky services (score desc)
|
||||
- Top regressions (delta_24h desc)
|
||||
- SLO violation summary
|
||||
- Deterministic action list based on risk state
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ACTION_TEMPLATES = {
|
||||
"regression_fail": "🚨 **Regression detected**: {service} score +{delta} in 24h. Freeze deployments; inspect recent incidents/followups immediately.",
|
||||
"regression_warn": "⚠️ **Score rising**: {service} +{delta} in 24h. Review open incidents and overdue follow-ups.",
|
||||
"critical_band": "🔴 **Critical risk**: {service} (score {score}). Oncall review required within 2h.",
|
||||
"high_band": "🟠 **High risk**: {service} (score {score}). Coordinate with oncall before next release.",
|
||||
"overdue_followups": "📋 **Overdue follow-ups**: {service} has {count} overdue follow-up(s). Close them to reduce risk score.",
|
||||
"slo_violation": "📉 **SLO violation**: {service} has {count} active SLO violation(s). Avoid deploying until clear.",
|
||||
}
|
||||
|
||||
|
||||
def _now_date() -> str:
|
||||
return datetime.datetime.utcnow().strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def _clamp(text: str, max_chars: int) -> str:
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
truncated = text[:max_chars]
|
||||
return truncated + "\n\n_[digest truncated to policy max_chars]_"
|
||||
|
||||
|
||||
def _build_action_list(reports: List[Dict]) -> List[str]:
|
||||
actions = []
|
||||
for r in reports[:10]:
|
||||
service = r.get("service", "?")
|
||||
score = r.get("score", 0)
|
||||
band = r.get("band", "low")
|
||||
trend = r.get("trend") or {}
|
||||
comp = r.get("components", {})
|
||||
|
||||
delta_24h = trend.get("delta_24h")
|
||||
reg = trend.get("regression", {})
|
||||
|
||||
if reg.get("fail") and delta_24h is not None and delta_24h > 0:
|
||||
actions.append(_ACTION_TEMPLATES["regression_fail"].format(
|
||||
service=service, delta=delta_24h))
|
||||
elif reg.get("warn") and delta_24h is not None and delta_24h > 0:
|
||||
actions.append(_ACTION_TEMPLATES["regression_warn"].format(
|
||||
service=service, delta=delta_24h))
|
||||
|
||||
if band == "critical":
|
||||
actions.append(_ACTION_TEMPLATES["critical_band"].format(
|
||||
service=service, score=score))
|
||||
elif band == "high":
|
||||
actions.append(_ACTION_TEMPLATES["high_band"].format(
|
||||
service=service, score=score))
|
||||
|
||||
overdue = (
|
||||
(comp.get("followups") or {}).get("P0", 0)
|
||||
+ (comp.get("followups") or {}).get("P1", 0)
|
||||
+ (comp.get("followups") or {}).get("other", 0)
|
||||
)
|
||||
if overdue:
|
||||
actions.append(_ACTION_TEMPLATES["overdue_followups"].format(
|
||||
service=service, count=overdue))
|
||||
|
||||
slo_count = (comp.get("slo") or {}).get("violations", 0)
|
||||
if slo_count:
|
||||
actions.append(_ACTION_TEMPLATES["slo_violation"].format(
|
||||
service=service, count=slo_count))
|
||||
|
||||
return actions[:20] # cap
|
||||
|
||||
|
||||
def _build_markdown(
|
||||
date_str: str,
|
||||
env: str,
|
||||
reports: List[Dict],
|
||||
top_regressions: List[Dict],
|
||||
improving: List[Dict],
|
||||
actions: List[str],
|
||||
band_counts: Dict,
|
||||
) -> str:
|
||||
lines = [
|
||||
f"# Risk Digest — {date_str} ({env})",
|
||||
"",
|
||||
f"Generated: {datetime.datetime.utcnow().isoformat()} UTC",
|
||||
"",
|
||||
"## Band Summary",
|
||||
"",
|
||||
"| Band | Count |",
|
||||
"|------|-------|",
|
||||
]
|
||||
for band in ("critical", "high", "medium", "low"):
|
||||
lines.append(f"| {band} | {band_counts.get(band, 0)} |")
|
||||
|
||||
lines += [
|
||||
"",
|
||||
"## Top Risky Services",
|
||||
"",
|
||||
"| Service | Score | Band | Δ24h | Δ7d |",
|
||||
"|---------|-------|------|------|-----|",
|
||||
]
|
||||
for r in reports:
|
||||
t = r.get("trend") or {}
|
||||
d24 = t.get("delta_24h")
|
||||
d7 = t.get("delta_7d")
|
||||
d24_str = (f"+{d24}" if d24 and d24 > 0 else str(d24)) if d24 is not None else "—"
|
||||
d7_str = (f"+{d7}" if d7 and d7 > 0 else str(d7)) if d7 is not None else "—"
|
||||
lines.append(
|
||||
f"| {r['service']} | {r.get('score', 0)} | {r.get('band', '?')} "
|
||||
f"| {d24_str} | {d7_str} |"
|
||||
)
|
||||
|
||||
if top_regressions:
|
||||
lines += ["", "## Top Regressions (Δ24h)", ""]
|
||||
for item in top_regressions:
|
||||
delta = item.get("delta_24h", 0)
|
||||
lines.append(f"- **{item['service']}**: +{delta} points in 24h")
|
||||
|
||||
# ── Likely Causes (Attribution) ───────────────────────────────────────────
|
||||
regressions_with_attribution = [
|
||||
r for r in reports
|
||||
if (r.get("trend") or {}).get("delta_24h") is not None
|
||||
and r["trend"]["delta_24h"] > 0
|
||||
and r.get("attribution") is not None
|
||||
and r["attribution"].get("causes")
|
||||
]
|
||||
regressions_with_attribution = sorted(
|
||||
regressions_with_attribution,
|
||||
key=lambda r: -(r.get("trend") or {}).get("delta_24h", 0),
|
||||
)[:5]
|
||||
|
||||
if regressions_with_attribution:
|
||||
lines += ["", "## Likely Causes (Top Regressions)", ""]
|
||||
for r in regressions_with_attribution:
|
||||
svc = r["service"]
|
||||
attr = r["attribution"]
|
||||
delta = r["trend"]["delta_24h"]
|
||||
summary = attr.get("summary", "")
|
||||
lines.append(f"### {svc} (+{delta} pts)")
|
||||
if summary:
|
||||
lines.append(f"> {summary}")
|
||||
causes = attr.get("causes", [])[:2]
|
||||
for c in causes:
|
||||
evid = "; ".join(c.get("evidence", []))
|
||||
lines.append(
|
||||
f"- **{c['type']}** (confidence: {c.get('confidence', '?')}): {evid}"
|
||||
)
|
||||
# LLM text if available
|
||||
llm = attr.get("llm_enrichment") or {}
|
||||
if llm.get("enabled") and llm.get("text"):
|
||||
lines += ["", f" _LLM insight_: {llm['text'][:400]}"]
|
||||
lines.append("")
|
||||
|
||||
# ── Change Timeline (Top Regressions) ────────────────────────────────────
|
||||
regressions_with_timeline = [
|
||||
r for r in regressions_with_attribution
|
||||
if r.get("attribution") and r["attribution"].get("timeline")
|
||||
]
|
||||
if regressions_with_timeline:
|
||||
lines += ["", "## Change Timeline (Top Regressions)", ""]
|
||||
for r in regressions_with_timeline:
|
||||
svc = r["service"]
|
||||
timeline = r["attribution"]["timeline"][:5] # top 5 per service
|
||||
lines.append(f"### {svc}")
|
||||
for item in timeline:
|
||||
ts = (item.get("ts") or "")[:16]
|
||||
label = item.get("label", "")
|
||||
ev_type = item.get("type", "")
|
||||
lines.append(f"- `{ts}` [{ev_type}] {label}")
|
||||
lines.append("")
|
||||
|
||||
if improving:
|
||||
lines += ["", "## Improving Services (Δ7d)", ""]
|
||||
for item in improving:
|
||||
delta = item.get("delta_7d", 0)
|
||||
lines.append(f"- **{item['service']}**: {delta} points over 7d")
|
||||
|
||||
if actions:
|
||||
lines += ["", "## Action List", ""]
|
||||
for action in actions:
|
||||
lines.append(f"- {action}")
|
||||
|
||||
lines += ["", "---", "_Generated by DAARION.city Risk Digest (deterministic, no LLM by default)_"]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def daily_digest(
|
||||
env: str = "prod",
|
||||
*,
|
||||
service_reports: Optional[List[Dict]] = None,
|
||||
policy: Optional[Dict] = None,
|
||||
date_str: Optional[str] = None,
|
||||
output_dir: Optional[str] = None,
|
||||
write_files: bool = True,
|
||||
) -> Dict:
|
||||
"""
|
||||
Build and optionally persist the daily risk digest.
|
||||
|
||||
service_reports — pre-fetched+enriched list of RiskReports (with trend).
|
||||
Returns {json_path, md_path, json_data, markdown, date, env}
|
||||
"""
|
||||
from risk_engine import load_risk_policy, compute_risk_dashboard
|
||||
|
||||
if policy is None:
|
||||
policy = load_risk_policy()
|
||||
|
||||
digest_cfg = policy.get("digest", {})
|
||||
top_n = int(digest_cfg.get("top_n", 10))
|
||||
max_chars = int(digest_cfg.get("markdown_max_chars", 8000))
|
||||
cfg_output_dir = digest_cfg.get("output_dir", "ops/reports/risk")
|
||||
|
||||
effective_output_dir = output_dir or cfg_output_dir
|
||||
effective_date = date_str or _now_date()
|
||||
|
||||
reports = sorted(service_reports or [], key=lambda r: -r.get("score", 0))[:top_n]
|
||||
|
||||
# Band counts
|
||||
band_counts: Dict[str, int] = {"critical": 0, "high": 0, "medium": 0, "low": 0}
|
||||
for r in reports:
|
||||
b = r.get("band", "low")
|
||||
band_counts[b] = band_counts.get(b, 0) + 1
|
||||
|
||||
# Top regressions
|
||||
top_regressions = sorted(
|
||||
[r for r in reports if (r.get("trend") or {}).get("delta_24h") is not None
|
||||
and r["trend"]["delta_24h"] > 0],
|
||||
key=lambda r: -r["trend"]["delta_24h"],
|
||||
)[:5]
|
||||
top_regressions_out = [
|
||||
{"service": r["service"], "delta_24h": r["trend"]["delta_24h"],
|
||||
"attribution_causes": [
|
||||
{"type": c["type"], "score": c["score"],
|
||||
"confidence": c.get("confidence", "low"),
|
||||
"evidence": c.get("evidence", [])[:2],
|
||||
"refs": c.get("refs", [])[:3]}
|
||||
for c in (r.get("attribution") or {}).get("causes", [])[:2]
|
||||
],
|
||||
"timeline_preview": (r.get("attribution") or {}).get("timeline", [])[:3],
|
||||
}
|
||||
for r in top_regressions
|
||||
]
|
||||
|
||||
# Improving services
|
||||
improving = sorted(
|
||||
[r for r in reports if (r.get("trend") or {}).get("delta_7d") is not None
|
||||
and r["trend"]["delta_7d"] < 0],
|
||||
key=lambda r: r["trend"]["delta_7d"],
|
||||
)[:5]
|
||||
improving_out = [
|
||||
{"service": r["service"], "delta_7d": r["trend"]["delta_7d"]}
|
||||
for r in improving
|
||||
]
|
||||
|
||||
actions = _build_action_list(reports)
|
||||
|
||||
markdown_raw = _build_markdown(
|
||||
date_str=effective_date,
|
||||
env=env,
|
||||
reports=reports,
|
||||
top_regressions=top_regressions_out,
|
||||
improving=improving_out,
|
||||
actions=actions,
|
||||
band_counts=band_counts,
|
||||
)
|
||||
markdown = _clamp(markdown_raw, max_chars)
|
||||
|
||||
json_data = {
|
||||
"date": effective_date,
|
||||
"env": env,
|
||||
"generated_at": datetime.datetime.utcnow().isoformat(),
|
||||
"band_counts": band_counts,
|
||||
"top_services": [
|
||||
{
|
||||
"service": r.get("service"),
|
||||
"score": r.get("score"),
|
||||
"band": r.get("band"),
|
||||
"delta_24h": (r.get("trend") or {}).get("delta_24h"),
|
||||
"delta_7d": (r.get("trend") or {}).get("delta_7d"),
|
||||
"regression": (r.get("trend") or {}).get("regression"),
|
||||
"reasons": r.get("reasons", [])[:5],
|
||||
"attribution_summary": (r.get("attribution") or {}).get("summary"),
|
||||
"top_causes": [
|
||||
{"type": c["type"], "score": c["score"],
|
||||
"confidence": c.get("confidence", "low"),
|
||||
"evidence": c.get("evidence", [])[:2],
|
||||
"refs": c.get("refs", [])[:3]}
|
||||
for c in (r.get("attribution") or {}).get("causes", [])[:2]
|
||||
],
|
||||
"timeline_preview": (r.get("attribution") or {}).get("timeline", [])[:3],
|
||||
"evidence_refs": (r.get("attribution") or {}).get("evidence_refs", {}),
|
||||
}
|
||||
for r in reports
|
||||
],
|
||||
"top_regressions": top_regressions_out,
|
||||
"improving_services": improving_out,
|
||||
"actions": actions,
|
||||
}
|
||||
|
||||
json_path: Optional[str] = None
|
||||
md_path: Optional[str] = None
|
||||
|
||||
if write_files:
|
||||
try:
|
||||
out = Path(effective_output_dir)
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
json_path = str(out / f"{effective_date}.json")
|
||||
md_path = str(out / f"{effective_date}.md")
|
||||
with open(json_path, "w") as f:
|
||||
json.dump(json_data, f, indent=2)
|
||||
with open(md_path, "w") as f:
|
||||
f.write(markdown)
|
||||
logger.info("Risk digest written: %s, %s", json_path, md_path)
|
||||
except Exception as e:
|
||||
logger.warning("Risk digest write failed: %s", e)
|
||||
json_path = md_path = None
|
||||
|
||||
return {
|
||||
"date": effective_date,
|
||||
"env": env,
|
||||
"json_path": json_path,
|
||||
"md_path": md_path,
|
||||
"json_data": json_data,
|
||||
"markdown": markdown,
|
||||
}
|
||||
Reference in New Issue
Block a user