Files
microdao-daarion/services/sofiia-console/app/runbook_artifacts.py
Apple 2962d33a3b feat(sofiia-console): add artifacts list endpoint + team onboarding doc
- runbook_artifacts.py: adds list_run_artifacts() returning files with
  names, paths, sizes, mtime_utc from release_artifacts/<run_id>/
- runbook_runs_router.py: adds GET /api/runbooks/runs/{run_id}/artifacts
- docs/runbook/team-onboarding-console.md: one-page team onboarding doc
  covering access, rehearsal run steps, audit auth model (strict, no
  localhost bypass), artifacts location, abort procedure

Made-with: Cursor
2026-03-03 06:55:49 -08:00

414 lines
16 KiB
Python

"""
Runbook artifacts renderer — PR4.
Generates two markdown artifacts from runbook run DB data (no shell required):
- release_evidence.md (aligned with docs/runbook/release-evidence-template.md)
- post_review.md (aligned with docs/release/sofiia-console-post-release-review-template.md)
Output path: ${SOFIIA_DATA_DIR}/release_artifacts/<run_id>/
"""
from __future__ import annotations
import json
import logging
import os
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from . import db as _db
logger = logging.getLogger(__name__)
def _artifacts_dir(run_id: str) -> Path:
"""${SOFIIA_DATA_DIR}/release_artifacts/<run_id>/"""
data_dir = os.getenv("SOFIIA_DATA_DIR", "/tmp/sofiia-data")
return Path(data_dir) / "release_artifacts" / run_id
def _iso_utc(ts: Optional[float]) -> str:
if not ts:
return ""
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
def _duration_str(started: Optional[float], finished: Optional[float]) -> str:
if not started or not finished:
return "?"
secs = finished - started
if secs < 60:
return f"{secs:.1f}s"
return f"{secs / 60:.1f}min"
async def _load_run(run_id: str) -> Optional[Dict[str, Any]]:
conn = await _db.get_db()
async with conn.execute(
"SELECT run_id, runbook_path, status, current_step, created_at, started_at, "
"finished_at, operator_id, node_id, sofiia_url, evidence_path "
"FROM runbook_runs WHERE run_id = ?",
(run_id,),
) as cur:
row = await cur.fetchone()
if not row:
return None
return {
"run_id": row[0],
"runbook_path": row[1],
"status": row[2],
"current_step": row[3],
"created_at": row[4],
"started_at": row[5],
"finished_at": row[6],
"operator_id": row[7],
"node_id": row[8],
"sofiia_url": row[9],
"evidence_path": row[10],
}
async def _load_steps(run_id: str) -> List[Dict[str, Any]]:
conn = await _db.get_db()
async with conn.execute(
"SELECT step_index, title, section, action_type, action_json, status, "
"result_json, started_at, finished_at "
"FROM runbook_steps WHERE run_id = ? ORDER BY step_index",
(run_id,),
) as cur:
rows = await cur.fetchall()
return [
{
"step_index": r[0],
"title": r[1],
"section": r[2],
"action_type": r[3],
"action_json": json.loads(r[4]) if r[4] else {},
"status": r[5],
"result": json.loads(r[6]) if r[6] else {},
"started_at": r[7],
"finished_at": r[8],
}
for r in (rows or [])
]
def _step_status_icon(status: str) -> str:
return {"ok": "", "warn": "⚠️", "fail": "", "skipped": "⏭️", "pending": ""}.get(status, "")
def _format_result_line(step: Dict[str, Any]) -> str:
"""Compact one-line summary of step result."""
result = step.get("result") or {}
action_type = step.get("action_type", "")
status = step.get("status", "pending")
icon = _step_status_icon(status)
if action_type == "http_check":
code = result.get("status_code", "?")
ok = result.get("ok")
expected = result.get("expected", [])
return f"{icon} HTTP {result.get('method','GET')} → `{code}` (expected: {expected}) — {'ok' if ok else 'FAIL'}"
if action_type == "script":
exit_code = result.get("exit_code", "?")
timed_out = result.get("timeout", False)
warn = result.get("warning", "")
suffix = " ⚠️ running_as_root" if warn == "running_as_root" else ""
suffix += " ⏰ TIMEOUT" if timed_out else ""
return f"{icon} exit_code={exit_code}{suffix}"
# manual
notes = (result.get("notes") or "")[:80]
return f"{icon} manual — {notes or status}"
# ── Release Evidence ──────────────────────────────────────────────────────────
def _render_release_evidence(run: Dict[str, Any], steps: List[Dict[str, Any]]) -> str:
"""Render release evidence markdown from run + step results."""
run_id = run["run_id"]
operator = run.get("operator_id") or ""
node_id = run.get("node_id") or "NODA2"
started = run.get("started_at")
finished = run.get("finished_at")
duration = _duration_str(started, finished)
# Classify steps by action_type and name
health_step = next((s for s in steps if s["action_type"] == "http_check" and "/api/health" in str(s.get("action_json", {}).get("url_path", ""))), None)
metrics_step = next((s for s in steps if s["action_type"] == "http_check" and "/metrics" in str(s.get("action_json", {}).get("url_path", ""))), None)
audit_step = next((s for s in steps if s["action_type"] == "http_check" and "/api/audit" in str(s.get("action_json", {}).get("url_path", ""))), None)
preflight_step = next((s for s in steps if s["action_type"] == "script" and "preflight" in str(s.get("action_json", {}).get("script", ""))), None)
smoke_step = next((s for s in steps if s["action_type"] == "script" and "idempotency" in str(s.get("action_json", {}).get("script", ""))), None)
evidence_step = next((s for s in steps if s["action_type"] == "script" and "evidence" in str(s.get("action_json", {}).get("script", ""))), None)
def _step_val(s: Optional[Dict], fallback: str = "") -> str:
if not s:
return fallback
return _format_result_line(s)
preflight_outcome = "PASS" if (preflight_step and preflight_step.get("status") in ("ok", "warn")) else ("FAIL" if preflight_step else "not run")
preflight_warns = ""
if preflight_step and preflight_step.get("result", {}).get("warning"):
preflight_warns = f" - {preflight_step['result']['warning']}"
lines = [
f"# Release Evidence — Sofiia Console",
f"",
f"## 1) Release metadata",
f"",
f"- Release ID: `{run_id}`",
f"- Date/Time UTC: {_iso_utc(started)}",
f"- Runbook: `{run['runbook_path']}`",
f"- Operator: `{operator}`",
f"- Target node: `{node_id}`",
f"- Run status: `{run['status']}`",
f"- Duration: {duration}",
f"- Change summary:",
f" - _Generated from runbook run `{run_id}`_",
f"",
f"## 2) Preflight results",
f"",
f"- Command: `STRICT=1 bash ops/preflight_sofiia_console.sh`",
f"- Status: `{preflight_outcome.upper()}`",
f"- WARN summary: {preflight_warns or ''}",
f"- Step detail: {_step_val(preflight_step)}",
f"",
f"## 3) Deploy steps performed",
f"",
f"- {node_id} precheck: `OK`",
f" - Notes: controlled restart via runbook runner",
f"- Rollout method: manual (guided runbook)",
f"",
f"## 4) Smoke evidence",
f"",
f"- `GET /api/health`: {_step_val(health_step)}",
f"- `GET /metrics`: {_step_val(metrics_step)}",
f"- Idempotency A/B smoke: {_step_val(smoke_step)}",
f"- `/api/audit` auth check: {_step_val(audit_step)}",
f"",
f"## 5) Post-release checks",
f"",
f"- Evidence generated: {_step_val(evidence_step)}",
f"- Audit write/read quick check: _manual observation required_",
f"- Retention dry-run: _run manually if needed_",
f"",
f"## 6) All steps summary",
f"",
f"| # | Title | Type | Status | Duration |",
f"|---|-------|------|--------|----------|",
]
for s in steps:
icon = _step_status_icon(s.get("status", "pending"))
dur = _duration_str(s.get("started_at"), s.get("finished_at"))
lines.append(f"| {s['step_index']} | {s['title'][:50]} | `{s['action_type']}` | {icon} `{s['status']}` | {dur} |")
lines += [
f"",
f"## 7) Rollback plan & outcome",
f"",
f"- Rollback needed: `no`",
f"- Final service state: `{run['status']}`",
f"",
f"## 8) Sign-off",
f"",
f"- Generated by: sofiia-console runbook runner",
f"- Timestamp UTC: {_iso_utc(time.time())}",
f"- Run ID: `{run_id}`",
f"",
]
return "\n".join(lines)
# ── Post-Release Review ───────────────────────────────────────────────────────
def _render_post_review(run: Dict[str, Any], steps: List[Dict[str, Any]]) -> str:
"""Render post-release review markdown, auto-filling from run data."""
run_id = run["run_id"]
operator = run.get("operator_id") or ""
node_id = run.get("node_id") or "NODA2"
started = run.get("started_at")
finished = run.get("finished_at")
preflight_step = next((s for s in steps if s["action_type"] == "script" and "preflight" in str(s.get("action_json", {}).get("script", ""))), None)
smoke_step = next((s for s in steps if s["action_type"] == "script" and "idempotency" in str(s.get("action_json", {}).get("script", ""))), None)
health_step = next((s for s in steps if s["action_type"] == "http_check" and "/api/health" in str(s.get("action_json", {}).get("url_path", ""))), None)
metrics_step = next((s for s in steps if s["action_type"] == "http_check" and "/metrics" in str(s.get("action_json", {}).get("url_path", ""))), None)
audit_step = next((s for s in steps if s["action_type"] == "http_check" and "/api/audit" in str(s.get("action_json", {}).get("url_path", ""))), None)
preflight_outcome = "PASS" if (preflight_step and preflight_step.get("status") in ("ok", "warn")) else ("FAIL" if preflight_step else "not run")
preflight_warn_items = " - —"
if preflight_step and preflight_step.get("result", {}).get("warning"):
preflight_warn_items = f" - {preflight_step['result']['warning']}"
def _smoke_result(s: Optional[Dict]) -> str:
if not s:
return "not run"
r = s.get("result", {})
if s["action_type"] == "http_check":
code = r.get("status_code", "?")
ok = r.get("ok")
return f"`{code}` — {'OK' if ok else 'FAIL'}"
exit_code = r.get("exit_code", "?")
return f"exit_code={exit_code} ({'PASS' if exit_code == 0 else 'FAIL'})"
warnings_in_run = [s for s in steps if s.get("status") in ("warn", "fail")]
incidents_section = "- What happened?: —" if not warnings_in_run else "\n".join(
f"- Step `{s['step_index']}` ({s['title'][:40]}): status=`{s['status']}`"
for s in warnings_in_run
)
lines = [
f"# Sofiia Console Post-Release Review",
f"",
f"_Auto-generated from runbook run `{run_id}`. Fill in sections marked [TODO]._",
f"",
f"## 1) Release Metadata",
f"",
f"- Date / Time window: {_iso_utc(started)}{_iso_utc(finished)}",
f"- Target nodes: `{node_id}`",
f"- Runbook: `{run['runbook_path']}`",
f"- Run ID: `{run_id}`",
f"- Operator(s): `{operator}`",
f"- Deployed SHAs:",
f" - sofiia-console: [TODO]",
f" - router: [TODO]",
f" - gateway: [TODO]",
f" - memory-service: [TODO]",
f"",
f"## 2) Preflight Outcome",
f"",
f"- STRICT mode result: `{preflight_outcome}`",
f"- WARN items worth noting:",
f"{preflight_warn_items}",
f"",
f"## 3) Smoke Results",
f"",
f"- `/api/health`: {_smoke_result(health_step)}",
f"- `/metrics`: {_smoke_result(metrics_step)}",
f"- Redis idempotency A/B: {_smoke_result(smoke_step)}",
f"- `/api/audit` auth check (401/200): {_smoke_result(audit_step)}",
f"- Audit write/read quick test: [TODO — manual check]",
f"",
f"## 4) Observed Metrics (first 15-30 min)",
f"",
f"- 5xx count: [TODO]",
f"- `sofiia_rate_limited_total` (chat / operator): [TODO]",
f"- `sofiia_idempotency_replays_total`: [TODO]",
f"- Unexpected spikes?: [TODO]",
f"",
f"## 5) Incidents / Anomalies",
f"",
incidents_section,
f"- Root cause (if known): —",
f"- Mitigation applied: —",
f"- Rollback needed: `no`",
f"",
f"## 6) What Went Well",
f"",
f"- [TODO]",
f"",
f"## 7) What Was Friction",
f"",
f"- Manual steps: [TODO]",
f"- Confusing logs/output: [TODO]",
f"- Missing visibility: [TODO]",
f"",
f"## 8) Action Items",
f"",
f"- [ ] [TODO]",
f"",
f"---",
f"_Generated by sofiia-console runbook runner at {_iso_utc(time.time())}_",
f"",
]
return "\n".join(lines)
# ── Public functions ──────────────────────────────────────────────────────────
async def render_release_evidence(run_id: str) -> Dict[str, Any]:
"""
Generate release evidence markdown from run DB data.
Saves to ${SOFIIA_DATA_DIR}/release_artifacts/<run_id>/release_evidence.md
Updates runbook_runs.evidence_path.
Returns {evidence_path, bytes, created_at}.
"""
run = await _load_run(run_id)
if not run:
raise ValueError(f"Run not found: {run_id}")
steps = await _load_steps(run_id)
out_dir = _artifacts_dir(run_id)
out_dir.mkdir(parents=True, exist_ok=True)
evidence_path = out_dir / "release_evidence.md"
content = _render_release_evidence(run, steps)
evidence_path.write_text(content, encoding="utf-8")
conn = await _db.get_db()
await conn.execute(
"UPDATE runbook_runs SET evidence_path = ? WHERE run_id = ?",
(str(evidence_path), run_id),
)
await conn.commit()
logger.info("Release evidence written: %s (%d bytes)", evidence_path, len(content))
return {
"evidence_path": str(evidence_path),
"bytes": len(content),
"created_at": _iso_utc(time.time()),
"run_id": run_id,
}
async def list_run_artifacts(run_id: str) -> Dict[str, Any]:
"""
List files in release_artifacts/<run_id>/ with sizes and mtimes.
Returns {run_id, dir, files: [{name, path, bytes, mtime_utc}]}.
"""
out_dir = _artifacts_dir(run_id)
files = []
if out_dir.exists():
for f in sorted(out_dir.iterdir()):
if f.is_file():
stat = f.stat()
files.append({
"name": f.name,
"path": str(f),
"bytes": stat.st_size,
"mtime_utc": _iso_utc(stat.st_mtime),
})
return {
"run_id": run_id,
"dir": str(out_dir),
"exists": out_dir.exists(),
"files": files,
}
async def render_post_review(run_id: str) -> Dict[str, Any]:
"""
Generate post-release review markdown from run DB data.
Saves to ${SOFIIA_DATA_DIR}/release_artifacts/<run_id>/post_review.md
Returns {path, bytes, created_at}.
"""
run = await _load_run(run_id)
if not run:
raise ValueError(f"Run not found: {run_id}")
steps = await _load_steps(run_id)
out_dir = _artifacts_dir(run_id)
out_dir.mkdir(parents=True, exist_ok=True)
review_path = out_dir / "post_review.md"
content = _render_post_review(run, steps)
review_path.write_text(content, encoding="utf-8")
logger.info("Post-review written: %s (%d bytes)", review_path, len(content))
return {
"path": str(review_path),
"bytes": len(content),
"created_at": _iso_utc(time.time()),
"run_id": run_id,
}