feat(platform): add new services, tools, tests and crews modules
New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
This commit is contained in:
143
services/router/incident_intel_utils.py
Normal file
143
services/router/incident_intel_utils.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""
|
||||
incident_intel_utils.py — Data helpers for Incident Intelligence Layer.
|
||||
|
||||
Provides:
|
||||
- kind extraction from incident (signature, meta, title heuristics)
|
||||
- normalized key fields dict
|
||||
- time-proximity helpers
|
||||
- safe truncation/masking
|
||||
|
||||
No external dependencies beyond stdlib.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import re
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
# ─── Kind heuristics ──────────────────────────────────────────────────────────
|
||||
|
||||
_TITLE_KIND_PATTERNS = [
|
||||
(re.compile(r'\b(latency|slow|timeout|p9[5-9]|p100)\b', re.I), "latency"),
|
||||
(re.compile(r'\b(error.?rate|5xx|http.?error|exception)\b', re.I), "error_rate"),
|
||||
(re.compile(r'\b(slo.?breach|slo)\b', re.I), "slo_breach"),
|
||||
(re.compile(r'\b(oom|out.?of.?memory|memory.?pressure)\b', re.I), "oom"),
|
||||
(re.compile(r'\b(disk|storage|volume.?full|inode)\b', re.I), "disk"),
|
||||
(re.compile(r'\b(security|intrusion|cve|vuln|unauthorized)\b', re.I), "security"),
|
||||
(re.compile(r'\b(deploy|rollout|release|canary)\b', re.I), "deploy"),
|
||||
(re.compile(r'\b(crash.?loop|crashloop|restart)\b', re.I), "crashloop"),
|
||||
(re.compile(r'\b(queue|lag|consumer|backlog)\b', re.I), "queue"),
|
||||
(re.compile(r'\b(network|connectivity|dns|unreachable)\b', re.I), "network"),
|
||||
]
|
||||
|
||||
_KNOWN_KINDS = frozenset([
|
||||
"slo_breach", "crashloop", "latency", "error_rate",
|
||||
"disk", "oom", "deploy", "security", "custom", "network", "queue",
|
||||
])
|
||||
|
||||
|
||||
def extract_kind(incident: Dict) -> str:
|
||||
"""
|
||||
Best-effort kind extraction. Priority:
|
||||
1. incident.meta.kind (if present)
|
||||
2. incident.meta.alert_kind
|
||||
3. Title heuristics
|
||||
4. 'custom'
|
||||
"""
|
||||
meta = incident.get("meta") or {}
|
||||
|
||||
# Direct meta fields
|
||||
for key in ("kind", "alert_kind"):
|
||||
v = meta.get(key)
|
||||
if v and v in _KNOWN_KINDS:
|
||||
return v
|
||||
|
||||
# Title heuristics
|
||||
title = incident.get("title", "") or ""
|
||||
for pat, kind_name in _TITLE_KIND_PATTERNS:
|
||||
if pat.search(title):
|
||||
return kind_name
|
||||
|
||||
return "custom"
|
||||
|
||||
|
||||
def incident_key_fields(incident: Dict) -> Dict:
|
||||
"""Return a normalized dict of key fields used for correlation."""
|
||||
meta = incident.get("meta") or {}
|
||||
return {
|
||||
"id": incident.get("id", ""),
|
||||
"service": incident.get("service", ""),
|
||||
"env": incident.get("env", "prod"),
|
||||
"severity": incident.get("severity", "P2"),
|
||||
"status": incident.get("status", "open"),
|
||||
"started_at": incident.get("started_at", ""),
|
||||
"signature": meta.get("incident_signature", ""),
|
||||
"kind": extract_kind(incident),
|
||||
}
|
||||
|
||||
|
||||
# ─── Time helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
def parse_iso(ts: str) -> Optional[datetime.datetime]:
|
||||
"""Parse ISO timestamp string to datetime, returns None on failure."""
|
||||
if not ts:
|
||||
return None
|
||||
try:
|
||||
return datetime.datetime.fromisoformat(ts.rstrip("Z").split("+")[0])
|
||||
except (ValueError, AttributeError):
|
||||
return None
|
||||
|
||||
|
||||
def minutes_apart(ts_a: str, ts_b: str) -> Optional[float]:
|
||||
"""Return absolute minutes between two ISO timestamps, or None."""
|
||||
a = parse_iso(ts_a)
|
||||
b = parse_iso(ts_b)
|
||||
if a is None or b is None:
|
||||
return None
|
||||
return abs((a - b).total_seconds()) / 60.0
|
||||
|
||||
|
||||
def incidents_within_minutes(inc_a: Dict, inc_b: Dict, within: float) -> bool:
|
||||
"""Return True if two incidents started within `within` minutes of each other."""
|
||||
gap = minutes_apart(
|
||||
inc_a.get("started_at", ""),
|
||||
inc_b.get("started_at", ""),
|
||||
)
|
||||
return gap is not None and gap <= within
|
||||
|
||||
|
||||
# ─── Text helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
def safe_truncate(text: str, max_chars: int = 200) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
return text[:max_chars] + ("…" if len(text) > max_chars else "")
|
||||
|
||||
|
||||
def mask_signature(sig: str, prefix_len: int = 8) -> str:
|
||||
"""Show only first N chars of a SHA-256 signature for readability."""
|
||||
if not sig:
|
||||
return ""
|
||||
return sig[:prefix_len]
|
||||
|
||||
|
||||
def severity_rank(sev: str) -> int:
|
||||
"""Lower = more severe."""
|
||||
return {"P0": 0, "P1": 1, "P2": 2, "P3": 3, "INFO": 4}.get(sev, 5)
|
||||
|
||||
|
||||
def format_duration(started_at: str, ended_at: Optional[str]) -> str:
|
||||
"""Human-readable duration string."""
|
||||
a = parse_iso(started_at)
|
||||
if a is None:
|
||||
return "unknown"
|
||||
if ended_at:
|
||||
b = parse_iso(ended_at)
|
||||
if b:
|
||||
secs = (b - a).total_seconds()
|
||||
if secs < 60:
|
||||
return f"{int(secs)}s"
|
||||
if secs < 3600:
|
||||
return f"{int(secs / 60)}m"
|
||||
return f"{secs / 3600:.1f}h"
|
||||
return "ongoing"
|
||||
Reference in New Issue
Block a user