microdao-daarion/services/sofiia-console/app/runbook_artifacts.py

"""
Runbook artifacts renderer — PR4.

Generates two markdown artifacts from runbook run DB data (no shell required):
  - release_evidence.md   (aligned with docs/runbook/release-evidence-template.md)
  - post_review.md        (aligned with docs/release/sofiia-console-post-release-review-template.md)

Output path: ${SOFIIA_DATA_DIR}/release_artifacts/<run_id>/
"""
from __future__ import annotations

import json
import logging
import os
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional

from . import db as _db

logger = logging.getLogger(__name__)


def _artifacts_dir(run_id: str) -> Path:
    """${SOFIIA_DATA_DIR}/release_artifacts/<run_id>/"""
    data_dir = os.getenv("SOFIIA_DATA_DIR", "/tmp/sofiia-data")
    return Path(data_dir) / "release_artifacts" / run_id


def _iso_utc(ts: Optional[float]) -> str:
    if not ts:
        return "—"
    return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")


def _duration_str(started: Optional[float], finished: Optional[float]) -> str:
    if not started or not finished:
        return "?"
    secs = finished - started
    if secs < 60:
        return f"{secs:.1f}s"
    return f"{secs / 60:.1f}min"


async def _load_run(run_id: str) -> Optional[Dict[str, Any]]:
    conn = await _db.get_db()
    async with conn.execute(
        "SELECT run_id, runbook_path, status, current_step, created_at, started_at, "
        "finished_at, operator_id, node_id, sofiia_url, evidence_path "
        "FROM runbook_runs WHERE run_id = ?",
        (run_id,),
    ) as cur:
        row = await cur.fetchone()
    if not row:
        return None
    return {
        "run_id": row[0],
        "runbook_path": row[1],
        "status": row[2],
        "current_step": row[3],
        "created_at": row[4],
        "started_at": row[5],
        "finished_at": row[6],
        "operator_id": row[7],
        "node_id": row[8],
        "sofiia_url": row[9],
        "evidence_path": row[10],
    }


async def _load_steps(run_id: str) -> List[Dict[str, Any]]:
    conn = await _db.get_db()
    async with conn.execute(
        "SELECT step_index, title, section, action_type, action_json, status, "
        "result_json, started_at, finished_at "
        "FROM runbook_steps WHERE run_id = ? ORDER BY step_index",
        (run_id,),
    ) as cur:
        rows = await cur.fetchall()
    return [
        {
            "step_index": r[0],
            "title": r[1],
            "section": r[2],
            "action_type": r[3],
            "action_json": json.loads(r[4]) if r[4] else {},
            "status": r[5],
            "result": json.loads(r[6]) if r[6] else {},
            "started_at": r[7],
            "finished_at": r[8],
        }
        for r in (rows or [])
    ]


def _step_status_icon(status: str) -> str:
    return {"ok": "✅", "warn": "⚠️", "fail": "❌", "skipped": "⏭️", "pending": "⏳"}.get(status, "❓")


def _format_result_line(step: Dict[str, Any]) -> str:
    """Compact one-line summary of step result."""
    result = step.get("result") or {}
    action_type = step.get("action_type", "")
    status = step.get("status", "pending")
    icon = _step_status_icon(status)

    if action_type == "http_check":
        code = result.get("status_code", "?")
        ok = result.get("ok")
        expected = result.get("expected", [])
        return f"{icon} HTTP {result.get('method','GET')} → `{code}` (expected: {expected}) — {'ok' if ok else 'FAIL'}"
    if action_type == "script":
        exit_code = result.get("exit_code", "?")
        timed_out = result.get("timeout", False)
        warn = result.get("warning", "")
        suffix = " ⚠️ running_as_root" if warn == "running_as_root" else ""
        suffix += " ⏰ TIMEOUT" if timed_out else ""
        return f"{icon} exit_code={exit_code}{suffix}"
    # manual
    notes = (result.get("notes") or "")[:80]
    return f"{icon} manual — {notes or status}"


# ── Release Evidence ──────────────────────────────────────────────────────────

def _render_release_evidence(run: Dict[str, Any], steps: List[Dict[str, Any]]) -> str:
    """Render release evidence markdown from run + step results."""
    run_id = run["run_id"]
    operator = run.get("operator_id") or "—"
    node_id = run.get("node_id") or "NODA2"
    started = run.get("started_at")
    finished = run.get("finished_at")
    duration = _duration_str(started, finished)

    # Classify steps by action_type and name
    health_step = next((s for s in steps if s["action_type"] == "http_check" and "/api/health" in str(s.get("action_json", {}).get("url_path", ""))), None)
    metrics_step = next((s for s in steps if s["action_type"] == "http_check" and "/metrics" in str(s.get("action_json", {}).get("url_path", ""))), None)
    audit_step = next((s for s in steps if s["action_type"] == "http_check" and "/api/audit" in str(s.get("action_json", {}).get("url_path", ""))), None)
    preflight_step = next((s for s in steps if s["action_type"] == "script" and "preflight" in str(s.get("action_json", {}).get("script", ""))), None)
    smoke_step = next((s for s in steps if s["action_type"] == "script" and "idempotency" in str(s.get("action_json", {}).get("script", ""))), None)
    evidence_step = next((s for s in steps if s["action_type"] == "script" and "evidence" in str(s.get("action_json", {}).get("script", ""))), None)

    def _step_val(s: Optional[Dict], fallback: str = "—") -> str:
        if not s:
            return fallback
        return _format_result_line(s)

    preflight_outcome = "PASS" if (preflight_step and preflight_step.get("status") in ("ok", "warn")) else ("FAIL" if preflight_step else "not run")
    preflight_warns = ""
    if preflight_step and preflight_step.get("result", {}).get("warning"):
        preflight_warns = f"  - {preflight_step['result']['warning']}"

    lines = [
        f"# Release Evidence — Sofiia Console",
        f"",
        f"## 1) Release metadata",
        f"",
        f"- Release ID: `{run_id}`",
        f"- Date/Time UTC: {_iso_utc(started)}",
        f"- Runbook: `{run['runbook_path']}`",
        f"- Operator: `{operator}`",
        f"- Target node: `{node_id}`",
        f"- Run status: `{run['status']}`",
        f"- Duration: {duration}",
        f"- Change summary:",
        f"  - _Generated from runbook run `{run_id}`_",
        f"",
        f"## 2) Preflight results",
        f"",
        f"- Command: `STRICT=1 bash ops/preflight_sofiia_console.sh`",
        f"- Status: `{preflight_outcome.upper()}`",
        f"- WARN summary: {preflight_warns or '—'}",
        f"- Step detail: {_step_val(preflight_step)}",
        f"",
        f"## 3) Deploy steps performed",
        f"",
        f"- {node_id} precheck: `OK`",
        f"  - Notes: controlled restart via runbook runner",
        f"- Rollout method: manual (guided runbook)",
        f"",
        f"## 4) Smoke evidence",
        f"",
        f"- `GET /api/health`: {_step_val(health_step)}",
        f"- `GET /metrics`: {_step_val(metrics_step)}",
        f"- Idempotency A/B smoke: {_step_val(smoke_step)}",
        f"- `/api/audit` auth check: {_step_val(audit_step)}",
        f"",
        f"## 5) Post-release checks",
        f"",
        f"- Evidence generated: {_step_val(evidence_step)}",
        f"- Audit write/read quick check: _manual observation required_",
        f"- Retention dry-run: _run manually if needed_",
        f"",
        f"## 6) All steps summary",
        f"",
        f"| # | Title | Type | Status | Duration |",
        f"|---|-------|------|--------|----------|",
    ]
    for s in steps:
        icon = _step_status_icon(s.get("status", "pending"))
        dur = _duration_str(s.get("started_at"), s.get("finished_at"))
        lines.append(f"| {s['step_index']} | {s['title'][:50]} | `{s['action_type']}` | {icon} `{s['status']}` | {dur} |")

    lines += [
        f"",
        f"## 7) Rollback plan & outcome",
        f"",
        f"- Rollback needed: `no`",
        f"- Final service state: `{run['status']}`",
        f"",
        f"## 8) Sign-off",
        f"",
        f"- Generated by: sofiia-console runbook runner",
        f"- Timestamp UTC: {_iso_utc(time.time())}",
        f"- Run ID: `{run_id}`",
        f"",
    ]
    return "\n".join(lines)


# ── Post-Release Review ───────────────────────────────────────────────────────

def _render_post_review(run: Dict[str, Any], steps: List[Dict[str, Any]]) -> str:
    """Render post-release review markdown, auto-filling from run data."""
    run_id = run["run_id"]
    operator = run.get("operator_id") or "—"
    node_id = run.get("node_id") or "NODA2"
    started = run.get("started_at")
    finished = run.get("finished_at")

    preflight_step = next((s for s in steps if s["action_type"] == "script" and "preflight" in str(s.get("action_json", {}).get("script", ""))), None)
    smoke_step = next((s for s in steps if s["action_type"] == "script" and "idempotency" in str(s.get("action_json", {}).get("script", ""))), None)
    health_step = next((s for s in steps if s["action_type"] == "http_check" and "/api/health" in str(s.get("action_json", {}).get("url_path", ""))), None)
    metrics_step = next((s for s in steps if s["action_type"] == "http_check" and "/metrics" in str(s.get("action_json", {}).get("url_path", ""))), None)
    audit_step = next((s for s in steps if s["action_type"] == "http_check" and "/api/audit" in str(s.get("action_json", {}).get("url_path", ""))), None)

    preflight_outcome = "PASS" if (preflight_step and preflight_step.get("status") in ("ok", "warn")) else ("FAIL" if preflight_step else "not run")
    preflight_warn_items = "  - —"
    if preflight_step and preflight_step.get("result", {}).get("warning"):
        preflight_warn_items = f"  - {preflight_step['result']['warning']}"

    def _smoke_result(s: Optional[Dict]) -> str:
        if not s:
            return "not run"
        r = s.get("result", {})
        if s["action_type"] == "http_check":
            code = r.get("status_code", "?")
            ok = r.get("ok")
            return f"`{code}` — {'OK' if ok else 'FAIL'}"
        exit_code = r.get("exit_code", "?")
        return f"exit_code={exit_code} ({'PASS' if exit_code == 0 else 'FAIL'})"

    warnings_in_run = [s for s in steps if s.get("status") in ("warn", "fail")]
    incidents_section = "- What happened?: —" if not warnings_in_run else "\n".join(
        f"- Step `{s['step_index']}` ({s['title'][:40]}): status=`{s['status']}`"
        for s in warnings_in_run
    )

    lines = [
        f"# Sofiia Console Post-Release Review",
        f"",
        f"_Auto-generated from runbook run `{run_id}`. Fill in sections marked [TODO]._",
        f"",
        f"## 1) Release Metadata",
        f"",
        f"- Date / Time window: {_iso_utc(started)} → {_iso_utc(finished)}",
        f"- Target nodes: `{node_id}`",
        f"- Runbook: `{run['runbook_path']}`",
        f"- Run ID: `{run_id}`",
        f"- Operator(s): `{operator}`",
        f"- Deployed SHAs:",
        f"  - sofiia-console: [TODO]",
        f"  - router: [TODO]",
        f"  - gateway: [TODO]",
        f"  - memory-service: [TODO]",
        f"",
        f"## 2) Preflight Outcome",
        f"",
        f"- STRICT mode result: `{preflight_outcome}`",
        f"- WARN items worth noting:",
        f"{preflight_warn_items}",
        f"",
        f"## 3) Smoke Results",
        f"",
        f"- `/api/health`: {_smoke_result(health_step)}",
        f"- `/metrics`: {_smoke_result(metrics_step)}",
        f"- Redis idempotency A/B: {_smoke_result(smoke_step)}",
        f"- `/api/audit` auth check (401/200): {_smoke_result(audit_step)}",
        f"- Audit write/read quick test: [TODO — manual check]",
        f"",
        f"## 4) Observed Metrics (first 15-30 min)",
        f"",
        f"- 5xx count: [TODO]",
        f"- `sofiia_rate_limited_total` (chat / operator): [TODO]",
        f"- `sofiia_idempotency_replays_total`: [TODO]",
        f"- Unexpected spikes?: [TODO]",
        f"",
        f"## 5) Incidents / Anomalies",
        f"",
        incidents_section,
        f"- Root cause (if known): —",
        f"- Mitigation applied: —",
        f"- Rollback needed: `no`",
        f"",
        f"## 6) What Went Well",
        f"",
        f"- [TODO]",
        f"",
        f"## 7) What Was Friction",
        f"",
        f"- Manual steps: [TODO]",
        f"- Confusing logs/output: [TODO]",
        f"- Missing visibility: [TODO]",
        f"",
        f"## 8) Action Items",
        f"",
        f"- [ ] [TODO]",
        f"",
        f"---",
        f"_Generated by sofiia-console runbook runner at {_iso_utc(time.time())}_",
        f"",
    ]
    return "\n".join(lines)


# ── Public functions ──────────────────────────────────────────────────────────

async def render_release_evidence(run_id: str) -> Dict[str, Any]:
    """
    Generate release evidence markdown from run DB data.
    Saves to ${SOFIIA_DATA_DIR}/release_artifacts/<run_id>/release_evidence.md
    Updates runbook_runs.evidence_path.
    Returns {evidence_path, bytes, created_at}.
    """
    run = await _load_run(run_id)
    if not run:
        raise ValueError(f"Run not found: {run_id}")
    steps = await _load_steps(run_id)

    out_dir = _artifacts_dir(run_id)
    out_dir.mkdir(parents=True, exist_ok=True)
    evidence_path = out_dir / "release_evidence.md"

    content = _render_release_evidence(run, steps)
    evidence_path.write_text(content, encoding="utf-8")

    conn = await _db.get_db()
    await conn.execute(
        "UPDATE runbook_runs SET evidence_path = ? WHERE run_id = ?",
        (str(evidence_path), run_id),
    )
    await conn.commit()

    logger.info("Release evidence written: %s (%d bytes)", evidence_path, len(content))
    return {
        "evidence_path": str(evidence_path),
        "bytes": len(content),
        "created_at": _iso_utc(time.time()),
        "run_id": run_id,
    }


async def list_run_artifacts(run_id: str) -> Dict[str, Any]:
    """
    List files in release_artifacts/<run_id>/ with sizes and mtimes.
    Returns {run_id, dir, files: [{name, path, bytes, mtime_utc}]}.
    """
    out_dir = _artifacts_dir(run_id)
    files = []
    if out_dir.exists():
        for f in sorted(out_dir.iterdir()):
            if f.is_file():
                stat = f.stat()
                files.append({
                    "name": f.name,
                    "path": str(f),
                    "bytes": stat.st_size,
                    "mtime_utc": _iso_utc(stat.st_mtime),
                })
    return {
        "run_id": run_id,
        "dir": str(out_dir),
        "exists": out_dir.exists(),
        "files": files,
    }


async def render_post_review(run_id: str) -> Dict[str, Any]:
    """
    Generate post-release review markdown from run DB data.
    Saves to ${SOFIIA_DATA_DIR}/release_artifacts/<run_id>/post_review.md
    Returns {path, bytes, created_at}.
    """
    run = await _load_run(run_id)
    if not run:
        raise ValueError(f"Run not found: {run_id}")
    steps = await _load_steps(run_id)

    out_dir = _artifacts_dir(run_id)
    out_dir.mkdir(parents=True, exist_ok=True)
    review_path = out_dir / "post_review.md"

    content = _render_post_review(run, steps)
    review_path.write_text(content, encoding="utf-8")

    logger.info("Post-review written: %s (%d bytes)", review_path, len(content))
    return {
        "path": str(review_path),
        "bytes": len(content),
        "created_at": _iso_utc(time.time()),
        "run_id": run_id,
    }