New router intelligence modules (26 files): alert_ingest/store, audit_store, architecture_pressure, backlog_generator/store, cost_analyzer, data_governance, dependency_scanner, drift_analyzer, incident_* (5 files), llm_enrichment, platform_priority_digest, provider_budget, release_check_runner, risk_* (6 files), signature_state_store, sofiia_auto_router, tool_governance New services: - sofiia-console: Dockerfile, adapters/, monitor/nodes/ops/voice modules, launchd, react static - memory-service: integration_endpoints, integrations, voice_endpoints, static UI - aurora-service: full app suite (analysis, job_store, orchestrator, reporting, schemas, subagents) - sofiia-supervisor: new supervisor service - aistalk-bridge-lite: Telegram bridge lite - calendar-service: CalDAV calendar service with reminders - mlx-stt-service / mlx-tts-service: Apple Silicon speech services - binance-bot-monitor: market monitor service - node-worker: STT/TTS memory providers New tools (9): agent_email, browser_tool, contract_tool, observability_tool, oncall_tool, pr_reviewer_tool, repo_tool, safe_code_executor, secure_vault New crews: agromatrix_crew (10 modules: depth_classifier, doc_facts, doc_focus, farm_state, light_reply, llm_factory, memory_manager, proactivity, reflection_engine, session_context, style_adapter, telemetry) Tests: 85+ test files for all new modules Made-with: Cursor
139 lines
4.4 KiB
Python
139 lines
4.4 KiB
Python
"""
|
|
alert_ingest.py — Alert ingestion business logic.
|
|
|
|
Handles:
|
|
- AlertEvent validation and normalization
|
|
- Dedupe-aware ingestion via AlertStore
|
|
- list/get/ack helpers used by alert_ingest_tool handler
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import re
|
|
import logging
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from alert_store import (
|
|
AlertStore,
|
|
_compute_dedupe_key,
|
|
_redact_text,
|
|
_sanitize_alert,
|
|
MAX_LOG_SAMPLES,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ─── Validation ────────────────────────────────────────────────────────────────
|
|
|
|
VALID_SEVERITIES = {"P0", "P1", "P2", "P3", "INFO"}
|
|
VALID_KINDS = {
|
|
"slo_breach", "crashloop", "latency", "error_rate",
|
|
"disk", "oom", "deploy", "security", "custom",
|
|
}
|
|
VALID_ENVS = {"prod", "staging", "dev", "any"}
|
|
|
|
|
|
def validate_alert(data: Dict) -> Optional[str]:
|
|
"""Return error string or None if valid."""
|
|
if not data.get("service"):
|
|
return "alert.service is required"
|
|
if not data.get("title"):
|
|
return "alert.title is required"
|
|
sev = data.get("severity", "P2")
|
|
if sev not in VALID_SEVERITIES:
|
|
return f"alert.severity must be one of {VALID_SEVERITIES}"
|
|
kind = data.get("kind", "custom")
|
|
if kind not in VALID_KINDS:
|
|
return f"alert.kind must be one of {VALID_KINDS}"
|
|
return None
|
|
|
|
|
|
def normalize_alert(data: Dict) -> Dict:
|
|
"""Normalize and sanitize alert fields."""
|
|
safe = _sanitize_alert(data)
|
|
safe.setdefault("kind", "custom")
|
|
safe.setdefault("env", "prod")
|
|
safe.setdefault("severity", "P2")
|
|
safe.setdefault("labels", {})
|
|
safe.setdefault("metrics", {})
|
|
safe.setdefault("links", [])
|
|
safe.setdefault("evidence", {})
|
|
|
|
ev = safe.get("evidence", {})
|
|
logs = ev.get("log_samples", [])
|
|
safe["evidence"] = {
|
|
**ev,
|
|
"log_samples": [_redact_text(l, 300) for l in logs[:MAX_LOG_SAMPLES]],
|
|
}
|
|
return safe
|
|
|
|
|
|
# ─── Ingest ────────────────────────────────────────────────────────────────────
|
|
|
|
def ingest_alert(
|
|
store: AlertStore,
|
|
alert_data: Dict,
|
|
dedupe_ttl_minutes: int = 30,
|
|
) -> Dict:
|
|
"""
|
|
Validate, normalize, and ingest alert with dedupe.
|
|
Returns the store result dict.
|
|
"""
|
|
err = validate_alert(alert_data)
|
|
if err:
|
|
return {"accepted": False, "error": err}
|
|
|
|
normalized = normalize_alert(alert_data)
|
|
return store.ingest(normalized, dedupe_ttl_minutes=dedupe_ttl_minutes)
|
|
|
|
|
|
# ─── List/Get/Ack ──────────────────────────────────────────────────────────────
|
|
|
|
def list_alerts(
|
|
store: AlertStore,
|
|
service: Optional[str] = None,
|
|
env: Optional[str] = None,
|
|
window_minutes: int = 240,
|
|
limit: int = 50,
|
|
) -> List[Dict]:
|
|
filters = {}
|
|
if service:
|
|
filters["service"] = service
|
|
if env and env != "any":
|
|
filters["env"] = env
|
|
filters["window_minutes"] = window_minutes
|
|
return store.list_alerts(filters, limit=min(limit, 200))
|
|
|
|
|
|
def get_alert(store: AlertStore, alert_ref: str) -> Optional[Dict]:
|
|
return store.get_alert(alert_ref)
|
|
|
|
|
|
def ack_alert(store: AlertStore, alert_ref: str, actor: str, note: str = "") -> Optional[Dict]:
|
|
if not alert_ref:
|
|
return None
|
|
return store.ack_alert(alert_ref, actor, _redact_text(note, 500))
|
|
|
|
|
|
# ─── Dedupe helpers ────────────────────────────────────────────────────────────
|
|
|
|
def build_dedupe_key(service: str, env: str, kind: str, fingerprint: str = "") -> str:
|
|
return _compute_dedupe_key(service, env, kind, fingerprint)
|
|
|
|
|
|
def map_alert_severity_to_incident(
|
|
alert_severity: str,
|
|
cap: str = "P1",
|
|
) -> str:
|
|
"""
|
|
Map alert severity to incident severity, applying a cap.
|
|
e.g. alert P0 with cap P1 → P1.
|
|
"""
|
|
order = {"P0": 0, "P1": 1, "P2": 2, "P3": 3, "INFO": 4}
|
|
sev = alert_severity if alert_severity in order else "P2"
|
|
cap_val = cap if cap in order else "P1"
|
|
# Take the higher (less critical) of the two
|
|
if order[sev] < order[cap_val]:
|
|
return cap_val
|
|
return sev
|