Config policies (16 files): alert_routing, architecture_pressure, backlog, cost_weights, data_governance, incident_escalation, incident_intelligence, network_allowlist, nodes_registry, observability_sources, rbac_tools_matrix, release_gate, risk_attribution, risk_policy, slo_policy, tool_limits, tools_rollout Ops (22 files): Caddyfile, calendar compose, grafana voice dashboard, deployments/incidents logs, runbooks for alerts/audit/backlog/incidents/sofiia/voice, cron jobs, scripts (alert_triage, audit_cleanup, migrate_*, governance, schedule), task_registry, voice alerts/ha/latency/policy Docs (30+ files): HUMANIZED_STEPAN v2.7-v3 changelogs and runbooks, NODA1/NODA2 status and setup, audit index and traces, backlog, incident, supervisor, tools, voice, opencode, release, risk, aistalk, spacebot Made-with: Cursor
149 lines
6.0 KiB
Python
149 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
migrate_alerts_postgres.py — Idempotent DDL for alert tables.
|
|
|
|
Runs safely on existing DBs (ALTER ... ADD COLUMN IF NOT EXISTS).
|
|
|
|
Tables:
|
|
alerts — canonical alert records + state machine
|
|
incident_signature_state — cooldown tracking per incident signature
|
|
|
|
Usage:
|
|
DATABASE_URL=postgresql://user:pass@host/db python3 ops/scripts/migrate_alerts_postgres.py [--dry-run]
|
|
"""
|
|
import os
|
|
import sys
|
|
import textwrap
|
|
|
|
# ─── alerts table ─────────────────────────────────────────────────────────────
|
|
DDL_ALERTS_CREATE = textwrap.dedent("""\
|
|
CREATE TABLE IF NOT EXISTS alerts (
|
|
alert_ref TEXT PRIMARY KEY,
|
|
dedupe_key TEXT NOT NULL,
|
|
source TEXT NOT NULL DEFAULT 'unknown',
|
|
service TEXT NOT NULL,
|
|
env TEXT NOT NULL DEFAULT 'prod',
|
|
severity TEXT NOT NULL DEFAULT 'P2',
|
|
kind TEXT NOT NULL DEFAULT 'custom',
|
|
title TEXT NOT NULL DEFAULT '',
|
|
summary TEXT,
|
|
started_at TIMESTAMPTZ,
|
|
labels JSONB,
|
|
metrics JSONB,
|
|
evidence JSONB,
|
|
links JSONB,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
occurrences INT NOT NULL DEFAULT 1,
|
|
-- State machine (added in v2)
|
|
status TEXT NOT NULL DEFAULT 'new',
|
|
processing_lock_until TIMESTAMPTZ,
|
|
processing_owner TEXT,
|
|
last_error TEXT,
|
|
acked_at TIMESTAMPTZ,
|
|
-- Legacy compat
|
|
ack_status TEXT DEFAULT 'pending',
|
|
ack_actor TEXT,
|
|
ack_note TEXT,
|
|
ack_at TIMESTAMPTZ
|
|
);
|
|
""")
|
|
|
|
# Backward-compat additions (safe on existing tables)
|
|
DDL_ALERTS_ADD_COLUMNS = textwrap.dedent("""\
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'new';
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS claimed_at TIMESTAMPTZ;
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS processing_lock_until TIMESTAMPTZ;
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS processing_owner TEXT;
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS last_error TEXT;
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS acked_at TIMESTAMPTZ;
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS ack_status TEXT DEFAULT 'pending';
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS ack_actor TEXT;
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS ack_note TEXT;
|
|
ALTER TABLE alerts ADD COLUMN IF NOT EXISTS ack_at TIMESTAMPTZ;
|
|
""")
|
|
|
|
DDL_ALERTS_INDEXES = textwrap.dedent("""\
|
|
CREATE INDEX IF NOT EXISTS idx_alerts_dedupe_key
|
|
ON alerts(dedupe_key, created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_alerts_service_env
|
|
ON alerts(service, env, created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_alerts_severity
|
|
ON alerts(severity, created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_alerts_status
|
|
ON alerts(status, created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_alerts_processing_lock
|
|
ON alerts(processing_lock_until)
|
|
WHERE processing_lock_until IS NOT NULL;
|
|
""")
|
|
|
|
# ─── incident_signature_state table ──────────────────────────────────────────
|
|
DDL_SIG_STATE = textwrap.dedent("""\
|
|
CREATE TABLE IF NOT EXISTS incident_signature_state (
|
|
signature TEXT PRIMARY KEY,
|
|
last_triage_at TIMESTAMPTZ,
|
|
last_alert_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
triage_count_24h INT NOT NULL DEFAULT 0,
|
|
occurrences_60m INT NOT NULL DEFAULT 0,
|
|
occurrences_60m_bucket_start TIMESTAMPTZ,
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
-- Add new columns to existing table (idempotent)
|
|
ALTER TABLE incident_signature_state
|
|
ADD COLUMN IF NOT EXISTS occurrences_60m INT NOT NULL DEFAULT 0;
|
|
ALTER TABLE incident_signature_state
|
|
ADD COLUMN IF NOT EXISTS occurrences_60m_bucket_start TIMESTAMPTZ;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_sig_state_updated
|
|
ON incident_signature_state(updated_at);
|
|
CREATE INDEX IF NOT EXISTS idx_sig_state_last_alert
|
|
ON incident_signature_state(last_alert_at);
|
|
""")
|
|
|
|
|
|
def run(dsn: str, dry_run: bool = False) -> None:
|
|
try:
|
|
import psycopg2
|
|
except ImportError:
|
|
print("psycopg2 not installed. Run: pip install psycopg2-binary", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
all_ddl = [
|
|
("Create alerts table", DDL_ALERTS_CREATE),
|
|
("Add state machine columns (idempotent)", DDL_ALERTS_ADD_COLUMNS),
|
|
("Create alerts indexes", DDL_ALERTS_INDEXES),
|
|
("Create incident_signature_state table", DDL_SIG_STATE),
|
|
]
|
|
|
|
if dry_run:
|
|
print("=== DRY RUN — DDL that would be executed ===\n")
|
|
for label, ddl in all_ddl:
|
|
print(f"-- {label}\n{ddl}")
|
|
return
|
|
|
|
conn = psycopg2.connect(dsn)
|
|
try:
|
|
conn.autocommit = False
|
|
with conn.cursor() as cur:
|
|
for label, ddl in all_ddl:
|
|
print(f" → {label}")
|
|
cur.execute(ddl)
|
|
conn.commit()
|
|
print("✅ All alert migrations completed successfully.")
|
|
except Exception as e:
|
|
conn.rollback()
|
|
print(f"❌ Migration failed: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
dsn = os.getenv("DATABASE_URL") or os.getenv("ALERT_DATABASE_URL")
|
|
if not dsn:
|
|
print("ERROR: DATABASE_URL not set", file=sys.stderr)
|
|
sys.exit(1)
|
|
dry = "--dry-run" in sys.argv
|
|
run(dsn, dry_run=dry)
|