From d9ce36653876ce24905a19b4285035a58cb468f6 Mon Sep 17 00:00:00 2001 From: Apple Date: Mon, 2 Mar 2026 04:14:58 -0800 Subject: [PATCH] feat(sofiia-console): idempotency_key, cursor pagination, and noda2 router fallback Add BFF runtime support for chat idempotency (header priority over body) with bounded in-memory TTL/LRU replay cache, implement cursor-based pagination for chats and messages, and add a safe NODA2 local router fallback for legacy runs without NODE_ID. Made-with: Cursor --- services/sofiia-console/app/config.py | 184 + services/sofiia-console/app/db.py | 5715 +++++++++++++++++++++++++ services/sofiia-console/app/main.py | 435 +- 3 files changed, 6326 insertions(+), 8 deletions(-) create mode 100644 services/sofiia-console/app/config.py create mode 100644 services/sofiia-console/app/db.py diff --git a/services/sofiia-console/app/config.py b/services/sofiia-console/app/config.py new file mode 100644 index 00000000..98432048 --- /dev/null +++ b/services/sofiia-console/app/config.py @@ -0,0 +1,184 @@ +"""Load nodes_registry and env.""" +import os +from pathlib import Path +from typing import Any, Dict + +try: + import yaml +except ImportError: + yaml = None + +# In Docker: set CONFIG_DIR=/app/config. Else: repo root / config +if os.getenv("CONFIG_DIR"): + _CONFIG_DIR = Path(os.getenv("CONFIG_DIR")).resolve() +else: + _REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent + _CONFIG_DIR = _REPO_ROOT / "config" +if not _CONFIG_DIR.exists(): + _CONFIG_DIR = Path("config") +_NODES_PATH = _CONFIG_DIR / "nodes_registry.yml" +_NODES_ALT = Path("config/nodes_registry.yml") + + +def get_nodes_registry_path() -> Path: + for p in (_NODES_PATH, _NODES_ALT, Path("config/nodes_registry.yml")): + if p.exists(): + return p + return _NODES_PATH + + +def load_nodes_registry() -> Dict[str, Any]: + for p in (_NODES_PATH, _NODES_ALT, Path("config/nodes_registry.yml")): + if p.exists() and yaml: + try: + with open(p) as f: + return yaml.safe_load(f) or {} + except Exception: + pass + return {"nodes": {}, "defaults": {"health_timeout_sec": 10, "tools_timeout_sec": 30}} + + +def save_nodes_registry(data: Dict[str, Any]) -> Path: + if not yaml: + raise RuntimeError("PyYAML is not available") + path = get_nodes_registry_path() + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + yaml.safe_dump(data, f, allow_unicode=True, sort_keys=False) + return path + + +def get_gateway_url(node_id: str) -> str: + """Gateway URL for node; env override: NODES__GATEWAY_URL.""" + env_key = f"NODES_{node_id}_GATEWAY_URL" + if os.getenv(env_key): + return os.getenv(env_key) + reg = load_nodes_registry() + nodes = reg.get("nodes", {}) + return (nodes.get(node_id) or {}).get("gateway_url", "") + + +def get_node_policy(node_id: str) -> Dict[str, Any]: + """Return operational policy for a node (timeouts, role, retry).""" + reg = load_nodes_registry() + defaults = reg.get("defaults", {}) + node_cfg = (reg.get("nodes", {}).get(node_id) or {}) + return { + "node_role": node_cfg.get("node_role", "prod"), + "gateway_timeout_ms": int(node_cfg.get( + "gateway_timeout_ms", + defaults.get("gateway_timeout_ms", 2500), + )), + "apply_timeout_ms": int(node_cfg.get( + "apply_timeout_ms", + defaults.get("apply_timeout_ms", 10000), + )), + "get_retry": int(node_cfg.get("get_retry", defaults.get("get_retry", 1))), + "post_retry": int(node_cfg.get("post_retry", defaults.get("post_retry", 0))), + "enabled": node_cfg.get("enabled", True), + } + + +def get_router_url(node_id: str) -> str: + """Router URL for node. + + Priority: + 1) NODES__ROUTER_URL + 2) ROUTER_URL for current NODE_ID (single-node/local dev fallback) + 3) nodes_registry.yml value + 4) hardcoded localhost fallback + """ + env_key = f"NODES_{node_id}_ROUTER_URL" + if os.getenv(env_key): + return os.getenv(env_key) + # Local fallback: when running console outside Docker, NODE_ID may be absent + # while only ROUTER_URL is configured (without per-node env override). + current_node = os.getenv("NODE_ID", "").strip().upper() + router_url = os.getenv("ROUTER_URL", "").strip() + target_node = str(node_id).strip().upper() + if router_url and current_node and current_node == target_node: + return router_url + # Compatibility fallback for legacy local startup scripts that target NODA2 + # but do not export NODE_ID. + if router_url and not current_node and target_node == "NODA2": + return router_url + reg = load_nodes_registry() + nodes = reg.get("nodes", {}) + return (nodes.get(node_id) or {}).get("router_url", "http://localhost:8000") + + +def get_node_ssh_profile(node_id: str) -> Dict[str, Any]: + """SSH profile for node with env overrides. + + Env overrides: + NODES__SSH_HOST + NODES__SSH_PORT + NODES__SSH_USER + NODES__SSH_PASSWORD + NODES__SSH_PRIVATE_KEY + """ + reg = load_nodes_registry() + nodes = reg.get("nodes", {}) + node = nodes.get(node_id, {}) or {} + ssh = dict(node.get("ssh") or {}) + auth = dict(ssh.get("auth") or {}) + + prefix = f"NODES_{node_id}_SSH_" + host = os.getenv(f"{prefix}HOST", ssh.get("host", "")).strip() + user = os.getenv(f"{prefix}USER", ssh.get("user", "")).strip() + private_key = os.getenv(f"{prefix}PRIVATE_KEY", auth.get("private_key", "")).strip() + password_env = (auth.get("password_env") or f"{prefix}PASSWORD").strip() + password = os.getenv(f"{prefix}PASSWORD", os.getenv(password_env, "")).strip() + try: + port = int(os.getenv(f"{prefix}PORT", str(ssh.get("port", 22)))) + except Exception: + port = 22 + + return { + "configured": bool(host and user), + "host": host, + "ipv6": ssh.get("ipv6", ""), + "port": port, + "user": user, + "host_keys": ssh.get("host_keys", []), + "auth": { + "password_env": password_env, + "password_set": bool(password), + "private_key_set": bool(private_key), + }, + } + + +def get_memory_service_url() -> str: + """Memory-service URL; env override: MEMORY_SERVICE_URL.""" + if os.getenv("MEMORY_SERVICE_URL"): + return os.getenv("MEMORY_SERVICE_URL").rstrip("/") + reg = load_nodes_registry() + defaults = reg.get("defaults", {}) + if defaults.get("memory_service_url"): + return defaults["memory_service_url"].rstrip("/") + return "http://localhost:8000" + + +def get_ollama_url() -> str: + """Ollama URL; env override: OLLAMA_URL.""" + return os.getenv("OLLAMA_URL", "http://localhost:11434").rstrip("/") + + +def is_voice_ha_enabled() -> bool: + """Voice HA feature flag. Set VOICE_HA_ENABLED=true to opt-in. + + When enabled, /api/voice/tts and /api/voice/chat/stream use Router + /v1/capability/voice_tts and /v1/capability/voice_llm endpoints for + multi-node failover instead of calling memory-service directly. + Default: False (safe for existing deployments). + """ + return os.getenv("VOICE_HA_ENABLED", "false").lower() in ("1", "true", "yes") + + +def get_voice_ha_router_url(node_id: str = "NODA2") -> str: + """Router URL used for Voice HA offload. Defaults to same router as LLM.""" + override = os.getenv("VOICE_HA_ROUTER_URL") + if override: + return override.rstrip("/") + return get_router_url(node_id).rstrip("/") diff --git a/services/sofiia-console/app/db.py b/services/sofiia-console/app/db.py new file mode 100644 index 00000000..8b12775f --- /dev/null +++ b/services/sofiia-console/app/db.py @@ -0,0 +1,5715 @@ +""" +sofiia-console — SQLite persistence layer (Phase 1). + +Schema: projects, documents, sessions, messages (with branching). +Phase 2 upgrade path: replace aiosqlite with asyncpg by setting DATABASE_URL=postgresql://... + +Usage: + from app.db import get_db, init_db + db = await get_db() + await db.execute(...) +""" +import asyncio +import hashlib +import json +import logging +import os +import re +import time +import uuid +from contextlib import asynccontextmanager +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + +# Data directory — writable volume mount in Docker +_DATA_DIR = Path(os.getenv("SOFIIA_DATA_DIR", "/app/data")) +_DB_PATH = _DATA_DIR / "sofiia.db" + +_INIT_SQL = """ +PRAGMA journal_mode=WAL; +PRAGMA foreign_keys=ON; + +CREATE TABLE IF NOT EXISTS projects ( + project_id TEXT PRIMARY KEY, + name TEXT NOT NULL, + description TEXT DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS documents ( + doc_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + file_id TEXT NOT NULL, + sha256 TEXT NOT NULL, + mime TEXT NOT NULL, + size_bytes INTEGER NOT NULL DEFAULT 0, + filename TEXT NOT NULL, + title TEXT DEFAULT '', + tags TEXT DEFAULT '[]', + created_at TEXT NOT NULL, + extracted_text TEXT DEFAULT '' +); +CREATE INDEX IF NOT EXISTS idx_docs_project ON documents(project_id); + +CREATE TABLE IF NOT EXISTS sessions ( + session_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + title TEXT DEFAULT '', + started_at TEXT NOT NULL, + last_active TEXT NOT NULL, + turn_count INTEGER DEFAULT 0 +); +CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_id); + +CREATE TABLE IF NOT EXISTS messages ( + msg_id TEXT PRIMARY KEY, + session_id TEXT NOT NULL REFERENCES sessions(session_id) ON DELETE CASCADE, + role TEXT NOT NULL CHECK(role IN ('user','assistant','system')), + content TEXT NOT NULL, + ts TEXT NOT NULL, + parent_msg_id TEXT, + branch_label TEXT DEFAULT 'main' +); +CREATE INDEX IF NOT EXISTS idx_msgs_session ON messages(session_id); +CREATE INDEX IF NOT EXISTS idx_msgs_parent ON messages(parent_msg_id); + +-- Default "default" project always exists +INSERT OR IGNORE INTO projects(project_id, name, description, created_at, updated_at) +VALUES('default', 'Default', 'Auto-created default project', datetime('now'), datetime('now')); + +-- ── Tasks (Kanban) ───────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS tasks ( + task_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + title TEXT NOT NULL, + description TEXT DEFAULT '', + status TEXT DEFAULT 'backlog' CHECK(status IN ('backlog','in_progress','review','done')), + priority TEXT DEFAULT 'normal' CHECK(priority IN ('low','normal','high','urgent')), + labels TEXT DEFAULT '[]', -- JSON array + assignees TEXT DEFAULT '[]', -- JSON array + due_at TEXT, + sort_key REAL DEFAULT 0.0, + created_by TEXT DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_tasks_project_status ON tasks(project_id, status); +CREATE INDEX IF NOT EXISTS idx_tasks_project_updated ON tasks(project_id, updated_at DESC); + +-- ── Meetings ─────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS meetings ( + meeting_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + title TEXT NOT NULL, + agenda TEXT DEFAULT '', + starts_at TEXT NOT NULL, + duration_min INTEGER DEFAULT 30, + location TEXT DEFAULT '', + attendees TEXT DEFAULT '[]', -- JSON array + created_by TEXT DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_meetings_project_starts ON meetings(project_id, starts_at); + +-- ── Dialog Graph ─────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS dialog_nodes ( + node_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + node_type TEXT NOT NULL CHECK(node_type IN ( + 'message','task','doc','meeting','agent_run','ops_run', + 'repo_changeset','pull_request','decision','goal' + )), + ref_id TEXT NOT NULL, + title TEXT DEFAULT '', + summary TEXT DEFAULT '', + props TEXT DEFAULT '{}', -- JSON + created_by TEXT DEFAULT 'system', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(project_id, node_type, ref_id) +); +CREATE INDEX IF NOT EXISTS idx_dnodes_project_type ON dialog_nodes(project_id, node_type); +CREATE INDEX IF NOT EXISTS idx_dnodes_project_updated ON dialog_nodes(project_id, updated_at DESC); + +CREATE TABLE IF NOT EXISTS dialog_edges ( + edge_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + from_node_id TEXT NOT NULL, + to_node_id TEXT NOT NULL, + edge_type TEXT NOT NULL CHECK(edge_type IN ( + 'references','summarizes','derives_task','updates_doc', + 'schedules_meeting','resolves','blocks','relates_to', + 'produced_by','executed_as','reflects_on','supersedes' + )), + props TEXT DEFAULT '{}', -- JSON + created_by TEXT DEFAULT 'system', + created_at TEXT NOT NULL, + CHECK(from_node_id != to_node_id), + UNIQUE(project_id, from_node_id, to_node_id, edge_type) +); +CREATE INDEX IF NOT EXISTS idx_dedges_project_from ON dialog_edges(project_id, from_node_id); +CREATE INDEX IF NOT EXISTS idx_dedges_project_to ON dialog_edges(project_id, to_node_id); +CREATE INDEX IF NOT EXISTS idx_dedges_project_type ON dialog_edges(project_id, edge_type); + +CREATE TABLE IF NOT EXISTS dialog_views ( + view_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + name TEXT NOT NULL, + filters TEXT DEFAULT '{}', + layout TEXT DEFAULT '{}', + created_by TEXT DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(project_id, name) +); +CREATE INDEX IF NOT EXISTS idx_dviews_project ON dialog_views(project_id); + +-- ── Entity Links (generic backlinks) ────────────────────────────────────── +CREATE TABLE IF NOT EXISTS entity_links ( + link_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + from_type TEXT NOT NULL, + from_id TEXT NOT NULL, + to_type TEXT NOT NULL, + to_id TEXT NOT NULL, + link_type TEXT DEFAULT 'references', + props TEXT DEFAULT '{}', + created_by TEXT DEFAULT '', + created_at TEXT NOT NULL, + UNIQUE(project_id, from_type, from_id, to_type, to_id, link_type) +); +CREATE INDEX IF NOT EXISTS idx_elinks_project_from ON entity_links(project_id, from_type, from_id); +CREATE INDEX IF NOT EXISTS idx_elinks_project_to ON entity_links(project_id, to_type, to_id); + +-- ── Doc versions ────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS doc_versions ( + version_id TEXT PRIMARY KEY, + doc_id TEXT NOT NULL REFERENCES documents(doc_id) ON DELETE CASCADE, + content TEXT NOT NULL, + author_id TEXT DEFAULT 'system', + created_at TEXT NOT NULL, + project_id TEXT NOT NULL DEFAULT '', + reason TEXT NOT NULL DEFAULT '' +); +CREATE INDEX IF NOT EXISTS idx_docver_doc ON doc_versions(doc_id, created_at DESC); + +-- ── Graph Learning Layer: Playbooks ───────────────────────────────────────── + +CREATE TABLE IF NOT EXISTS playbooks ( + playbook_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + signal_type TEXT NOT NULL, + context_key TEXT NOT NULL, -- e.g. "label:auth", "ops_action:smoke_gateway", "global" + content TEXT NOT NULL DEFAULT '', -- markdown with embedded JSON frontmatter + fingerprint TEXT NOT NULL, -- sha256(project_id|signal_type|context_key) + uses INTEGER NOT NULL DEFAULT 0, + successes INTEGER NOT NULL DEFAULT 0, + failures INTEGER NOT NULL DEFAULT 0, + success_rate REAL NOT NULL DEFAULT 0.0, + ema_time_to_resolve_h REAL NOT NULL DEFAULT 0.0, + last_used_at TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS ux_playbooks_fingerprint + ON playbooks(fingerprint); +CREATE INDEX IF NOT EXISTS idx_playbooks_project_type + ON playbooks(project_id, signal_type); + +-- ── Graph Learning Layer: Lessons ──────────────────────────────────────────── + +CREATE TABLE IF NOT EXISTS lessons ( + lesson_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + window TEXT NOT NULL DEFAULT '7d', + date_bucket TEXT NOT NULL, -- ISO week, e.g. '2026-W09' + fingerprint TEXT NOT NULL, -- sha256(project_id|window|date_bucket) + status TEXT NOT NULL DEFAULT 'published', + lesson_node_id TEXT NOT NULL DEFAULT '', + doc_version_id TEXT NOT NULL DEFAULT '', + metrics_json TEXT NOT NULL DEFAULT '{}', + impact_score REAL NOT NULL DEFAULT 0.0, + impact_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS ux_lessons_fingerprint + ON lessons(fingerprint); +CREATE INDEX IF NOT EXISTS idx_lessons_project_bucket + ON lessons(project_id, date_bucket); + +-- ── Strategic CTO Layer ──────────────────────────────────────────────────── + +CREATE TABLE IF NOT EXISTS graph_signals ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + signal_type TEXT NOT NULL, -- 'release_blocker'|'ops_instability'|'stale_goal'|'risk_cluster'|'run_quality_regression' + severity TEXT NOT NULL DEFAULT 'medium' CHECK(severity IN ('low','medium','high','critical')), + title TEXT NOT NULL, + summary TEXT NOT NULL DEFAULT '', + evidence TEXT NOT NULL DEFAULT '{}', -- JSON: node_ids, metrics, context + status TEXT NOT NULL DEFAULT 'open' CHECK(status IN ('open','ack','resolved','dismissed')), + fingerprint TEXT, -- hash(signal_type+evidence) for idempotent upsert + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_signals_project_status ON graph_signals(project_id, status); +CREATE INDEX IF NOT EXISTS idx_signals_project_severity ON graph_signals(project_id, severity); +CREATE INDEX IF NOT EXISTS idx_signals_fingerprint ON graph_signals(project_id, fingerprint); + +CREATE TABLE IF NOT EXISTS graph_snapshots ( + id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + scope TEXT NOT NULL DEFAULT 'project', + window TEXT NOT NULL DEFAULT '7d', + date_bucket TEXT NOT NULL, -- ISO date "YYYY-MM-DD" for dedup + metrics TEXT NOT NULL DEFAULT '{}', -- JSON + created_at TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS ux_snapshots_project_window_date + ON graph_snapshots(project_id, scope, window, date_bucket); +CREATE INDEX IF NOT EXISTS idx_snapshots_project_window ON graph_snapshots(project_id, window); + +-- ── Agent Overrides (Projects = Agents, Level 8) ──────────────────────────── + +CREATE TABLE IF NOT EXISTS agent_overrides ( + node_id TEXT NOT NULL, -- 'NODA1' | 'NODA2' + agent_id TEXT NOT NULL, + display_name TEXT, + domain TEXT, + system_prompt_md TEXT, + is_hidden INTEGER NOT NULL DEFAULT 0, + last_applied_hash TEXT, -- sha256 of last successfully applied payload + last_applied_at TEXT, + updated_at TEXT NOT NULL, + PRIMARY KEY (node_id, agent_id) +); +CREATE INDEX IF NOT EXISTS idx_agent_overrides_hidden ON agent_overrides(is_hidden); + +-- ── Agent Override Versions (Safe Apply v2, Agents Ops) ────────────────────── + +CREATE TABLE IF NOT EXISTS agent_override_versions ( + id TEXT PRIMARY KEY, -- uuid4 + node_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + version_hash TEXT NOT NULL, -- sha256(payload_json) + payload_json TEXT NOT NULL, -- full desired state snapshot + created_at TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS ux_agent_ver_hash + ON agent_override_versions(node_id, agent_id, version_hash); +CREATE INDEX IF NOT EXISTS idx_agent_ver_agent + ON agent_override_versions(node_id, agent_id, created_at DESC); + +-- ── Governance Audit Trail (Level 7) ──────────────────────────────────────── + +CREATE TABLE IF NOT EXISTS governance_events ( + event_id TEXT PRIMARY KEY, + scope TEXT NOT NULL DEFAULT 'project', -- 'project' | 'portfolio' + project_id TEXT NOT NULL, -- real project_id OR 'portfolio' + actor_type TEXT NOT NULL DEFAULT 'system', -- 'user' | 'autopilot' | 'system' + actor_id TEXT, + event_type TEXT NOT NULL, + idempotency_key TEXT NOT NULL, + severity TEXT NOT NULL DEFAULT 'info', -- info|warn|high|critical + status TEXT NOT NULL DEFAULT 'ok', -- ok|error|skipped + ref_type TEXT, -- 'signal'|'lesson'|'run'|'gate_decision'|'task' + ref_id TEXT, + evidence_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS ux_governance_events_idem + ON governance_events(idempotency_key); +CREATE INDEX IF NOT EXISTS idx_governance_events_scope_time + ON governance_events(scope, project_id, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_governance_events_type_time + ON governance_events(event_type, created_at DESC); + +-- ── Graph Intelligence (Hygiene + Reflection) ────────────────────────────── +-- These ADD COLUMN statements are idempotent (IF NOT EXISTS requires SQLite 3.37+). +-- On older SQLite they fail silently — init_db() wraps them in a separate try block. +""" + +# Migration SQL run separately (idempotent — each statement is independent) +_MIGRATION_SQL_STMTS = [ + # dialog_nodes: fingerprint, lifecycle, importance + "ALTER TABLE dialog_nodes ADD COLUMN fingerprint TEXT", + "ALTER TABLE dialog_nodes ADD COLUMN lifecycle TEXT NOT NULL DEFAULT 'active'", + "ALTER TABLE dialog_nodes ADD COLUMN importance REAL NOT NULL DEFAULT 0.3", + # dialog_edges: strength + "ALTER TABLE dialog_edges ADD COLUMN strength REAL NOT NULL DEFAULT 0.5", + # Recreate dialog_edges with expanded edge_type CHECK (reflects_on, supersedes) + # The IF NOT EXISTS on the temp table makes this idempotent. + """CREATE TABLE IF NOT EXISTS dialog_edges_v2 ( + edge_id TEXT PRIMARY KEY, + project_id TEXT NOT NULL REFERENCES projects(project_id) ON DELETE CASCADE, + from_node_id TEXT NOT NULL, + to_node_id TEXT NOT NULL, + edge_type TEXT NOT NULL CHECK(edge_type IN ( + 'references','summarizes','derives_task','updates_doc', + 'schedules_meeting','resolves','blocks','relates_to', + 'produced_by','executed_as','reflects_on','supersedes' + )), + props TEXT DEFAULT '{}', + created_by TEXT DEFAULT 'system', + created_at TEXT NOT NULL, + strength REAL NOT NULL DEFAULT 0.5, + CHECK(from_node_id != to_node_id), + UNIQUE(project_id, from_node_id, to_node_id, edge_type) + )""", + "INSERT OR IGNORE INTO dialog_edges_v2 SELECT edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,COALESCE(strength,0.5) FROM dialog_edges", + "DROP TABLE IF EXISTS dialog_edges", + "ALTER TABLE dialog_edges_v2 RENAME TO dialog_edges", + # Re-create indexes after rename + "CREATE INDEX IF NOT EXISTS idx_dedges_project_from ON dialog_edges(project_id, from_node_id)", + "CREATE INDEX IF NOT EXISTS idx_dedges_project_to ON dialog_edges(project_id, to_node_id)", + "CREATE INDEX IF NOT EXISTS idx_dedges_project_type ON dialog_edges(project_id, edge_type)", + "CREATE INDEX IF NOT EXISTS idx_dedges_strength ON dialog_edges(project_id, strength DESC)", + # Indexes for dialog_nodes new columns + "CREATE UNIQUE INDEX IF NOT EXISTS ux_dnodes_fingerprint ON dialog_nodes(project_id, node_type, fingerprint) WHERE fingerprint IS NOT NULL", + "CREATE INDEX IF NOT EXISTS idx_dnodes_lifecycle ON dialog_nodes(project_id, lifecycle)", + "CREATE INDEX IF NOT EXISTS idx_dnodes_importance ON dialog_nodes(project_id, importance DESC)", + # lessons: impact columns (Delta Intelligence v2) + "ALTER TABLE lessons ADD COLUMN impact_score REAL NOT NULL DEFAULT 0.0", + "ALTER TABLE lessons ADD COLUMN impact_json TEXT NOT NULL DEFAULT '{}'", + # doc_versions: add project_id, reason columns to existing old schema (idempotent) + "ALTER TABLE doc_versions ADD COLUMN project_id TEXT NOT NULL DEFAULT ''", + "ALTER TABLE doc_versions ADD COLUMN reason TEXT NOT NULL DEFAULT ''", + # agent_overrides: Safe Apply v2 columns (Agents Ops) + "ALTER TABLE agent_overrides ADD COLUMN last_applied_hash TEXT", + "ALTER TABLE agent_overrides ADD COLUMN last_applied_at TEXT", + # agent_override_versions table (idempotent) + """CREATE TABLE IF NOT EXISTS agent_override_versions ( + id TEXT PRIMARY KEY, + node_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + version_hash TEXT NOT NULL, + payload_json TEXT NOT NULL, + created_at TEXT NOT NULL + )""", + "CREATE UNIQUE INDEX IF NOT EXISTS ux_agent_ver_hash ON agent_override_versions(node_id, agent_id, version_hash)", + "CREATE INDEX IF NOT EXISTS idx_agent_ver_agent ON agent_override_versions(node_id, agent_id, created_at)", + # governance_events: idempotency + indexes (Level 7) + """CREATE TABLE IF NOT EXISTS governance_events ( + event_id TEXT PRIMARY KEY, + scope TEXT NOT NULL DEFAULT 'project', + project_id TEXT NOT NULL, + actor_type TEXT NOT NULL DEFAULT 'system', + actor_id TEXT, + event_type TEXT NOT NULL, + idempotency_key TEXT NOT NULL, + severity TEXT NOT NULL DEFAULT 'info', + status TEXT NOT NULL DEFAULT 'ok', + ref_type TEXT, + ref_id TEXT, + evidence_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL + )""", + "CREATE UNIQUE INDEX IF NOT EXISTS ux_governance_events_idem ON governance_events(idempotency_key)", + "CREATE INDEX IF NOT EXISTS idx_governance_events_scope_time ON governance_events(scope, project_id, created_at)", + "CREATE INDEX IF NOT EXISTS idx_governance_events_type_time ON governance_events(event_type, created_at)", +] + +_db_lock = asyncio.Lock() +_db_conn = None # type: Any + + +async def _ensure_aiosqlite(): + try: + import aiosqlite + return aiosqlite + except ImportError: + raise RuntimeError( + "aiosqlite not installed. Add 'aiosqlite>=0.20' to requirements.txt." + ) + + +async def init_db(): + """Create tables and apply idempotent migrations. Called once on startup.""" + global _db_conn + aiosqlite = await _ensure_aiosqlite() + _DATA_DIR.mkdir(parents=True, exist_ok=True) + async with _db_lock: + if _db_conn is None: + _db_conn = await aiosqlite.connect(str(_DB_PATH)) + _db_conn.row_factory = aiosqlite.Row + await _db_conn.executescript(_INIT_SQL) + await _db_conn.commit() + # Apply column migrations — each is idempotent (duplicate column → ignored) + for stmt in _MIGRATION_SQL_STMTS: + try: + await _db_conn.execute(stmt) + except Exception as e: + # "duplicate column name" or "index already exists" are expected on re-init + msg = str(e).lower() + if "duplicate column" not in msg and "already exists" not in msg: + logger.warning("migration stmt skipped (%s): %s", stmt[:50], e) + await _db_conn.commit() + logger.info("DB initialised: %s", _DB_PATH) + + +async def get_db(): + """Return the shared connection, initialising if needed.""" + if _db_conn is None: + await init_db() + return _db_conn + + +def _now() -> str: + from datetime import datetime, timezone + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +# ── Projects ────────────────────────────────────────────────────────────────── + +async def create_project(name: str, description: str = "", project_id: str = None) -> Dict[str, Any]: + db = await get_db() + pid = project_id or str(uuid.uuid4()) + now = _now() + await db.execute( + "INSERT INTO projects(project_id, name, description, created_at, updated_at) VALUES(?,?,?,?,?)", + (pid, name, description, now, now), + ) + await db.commit() + return {"project_id": pid, "name": name, "description": description, "created_at": now} + + +async def list_projects() -> List[Dict[str, Any]]: + db = await get_db() + async with db.execute("SELECT * FROM projects ORDER BY updated_at DESC") as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def get_project(project_id: str) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute("SELECT * FROM projects WHERE project_id=?", (project_id,)) as cur: + row = await cur.fetchone() + return dict(row) if row else None + + +async def update_project(project_id: str, name: str = None, description: str = None) -> bool: + db = await get_db() + now = _now() + fields, vals = [], [] + if name is not None: + fields.append("name=?"); vals.append(name) + if description is not None: + fields.append("description=?"); vals.append(description) + if not fields: + return False + fields.append("updated_at=?"); vals.append(now) + vals.append(project_id) + await db.execute(f"UPDATE projects SET {', '.join(fields)} WHERE project_id=?", vals) + await db.commit() + return True + + +# ── Documents ───────────────────────────────────────────────────────────────── + +async def create_document( + project_id: str, file_id: str, sha256: str, mime: str, + size_bytes: int, filename: str, title: str = "", + tags: List[str] = None, extracted_text: str = "", +) -> Dict[str, Any]: + db = await get_db() + did = str(uuid.uuid4()) + now = _now() + tags_json = json.dumps(tags or []) + await db.execute( + "INSERT INTO documents(doc_id,project_id,file_id,sha256,mime,size_bytes,filename," + "title,tags,created_at,extracted_text) VALUES(?,?,?,?,?,?,?,?,?,?,?)", + (did, project_id, file_id, sha256, mime, size_bytes, filename, title, tags_json, now, extracted_text), + ) + await db.commit() + # Touch project updated_at + await db.execute("UPDATE projects SET updated_at=? WHERE project_id=?", (now, project_id)) + await db.commit() + return { + "doc_id": did, "project_id": project_id, "file_id": file_id, + "sha256": sha256, "mime": mime, "size_bytes": size_bytes, + "filename": filename, "title": title, "tags": tags or [], + "created_at": now, "extracted_text_preview": extracted_text[:500], + } + + +async def list_documents(project_id: str, limit: int = 50) -> List[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT doc_id,project_id,file_id,sha256,mime,size_bytes,filename,title,tags,created_at " + "FROM documents WHERE project_id=? ORDER BY created_at DESC LIMIT ?", + (project_id, limit), + ) as cur: + rows = await cur.fetchall() + result = [] + for r in rows: + d = dict(r) + try: + d["tags"] = json.loads(d["tags"]) + except Exception: + d["tags"] = [] + result.append(d) + return result + + +async def get_document(doc_id: str) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute("SELECT * FROM documents WHERE doc_id=?", (doc_id,)) as cur: + row = await cur.fetchone() + if not row: + return None + d = dict(row) + try: + d["tags"] = json.loads(d["tags"]) + except Exception: + d["tags"] = [] + return d + + +async def search_documents(project_id: str, query: str, limit: int = 20) -> List[Dict[str, Any]]: + """Keyword search in filename, title, extracted_text (Phase 1 — no embeddings).""" + db = await get_db() + pattern = f"%{query}%" + async with db.execute( + "SELECT doc_id,project_id,filename,title,mime,size_bytes,created_at " + "FROM documents WHERE project_id=? " + "AND (filename LIKE ? OR title LIKE ? OR extracted_text LIKE ?) " + "ORDER BY created_at DESC LIMIT ?", + (project_id, pattern, pattern, pattern, limit), + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +# ── Sessions ────────────────────────────────────────────────────────────────── + +async def upsert_session( + session_id: str, project_id: str = "default", title: str = "" +) -> Dict[str, Any]: + """Create or touch a session (update last_active).""" + db = await get_db() + now = _now() + existing = await get_session(session_id) + if existing: + await db.execute( + "UPDATE sessions SET last_active=? WHERE session_id=?", (now, session_id) + ) + await db.commit() + return {**existing, "last_active": now} + await db.execute( + "INSERT INTO sessions(session_id,project_id,title,started_at,last_active,turn_count) " + "VALUES(?,?,?,?,?,0)", + (session_id, project_id, title, now, now), + ) + await db.commit() + return { + "session_id": session_id, "project_id": project_id, + "title": title, "started_at": now, "last_active": now, "turn_count": 0, + } + + +async def get_session(session_id: str) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute("SELECT * FROM sessions WHERE session_id=?", (session_id,)) as cur: + row = await cur.fetchone() + return dict(row) if row else None + + +async def list_sessions(project_id: str, limit: int = 30) -> List[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT * FROM sessions WHERE project_id=? ORDER BY last_active DESC LIMIT ?", + (project_id, limit), + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def list_sessions_page( + project_id: str, + limit: int = 30, + before_last_active: Optional[str] = None, + before_session_id: Optional[str] = None, +) -> List[Dict[str, Any]]: + """Page sessions by stable key: (last_active DESC, session_id DESC).""" + db = await get_db() + eff_limit = max(1, min(int(limit), 500)) + if before_last_active: + # Tie-break by session_id to keep cursor deterministic. + sid = before_session_id or "~~~~~~~~" + sql = ( + "SELECT * FROM sessions " + "WHERE project_id=? " + "AND (last_active < ? OR (last_active = ? AND session_id < ?)) " + "ORDER BY last_active DESC, session_id DESC LIMIT ?" + ) + params = (project_id, before_last_active, before_last_active, sid, eff_limit) + else: + sql = ( + "SELECT * FROM sessions WHERE project_id=? " + "ORDER BY last_active DESC, session_id DESC LIMIT ?" + ) + params = (project_id, eff_limit) + async with db.execute(sql, params) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def update_session_title(session_id: str, title: str): + db = await get_db() + await db.execute("UPDATE sessions SET title=? WHERE session_id=?", (title, session_id)) + await db.commit() + + +# ── Messages ────────────────────────────────────────────────────────────────── + +async def save_message( + session_id: str, role: str, content: str, + parent_msg_id: str = None, branch_label: str = "main", +) -> Dict[str, Any]: + db = await get_db() + mid = str(uuid.uuid4()) + now = _now() + await db.execute( + "INSERT INTO messages(msg_id,session_id,role,content,ts,parent_msg_id,branch_label) " + "VALUES(?,?,?,?,?,?,?)", + (mid, session_id, role, content, now, parent_msg_id, branch_label), + ) + await db.execute( + "UPDATE sessions SET last_active=?, turn_count=turn_count+1 WHERE session_id=?", + (now, session_id), + ) + await db.commit() + return { + "msg_id": mid, "session_id": session_id, "role": role, + "content": content, "ts": now, + "parent_msg_id": parent_msg_id, "branch_label": branch_label, + } + + +async def list_messages( + session_id: str, limit: int = 50, branch_label: str = None, +) -> List[Dict[str, Any]]: + db = await get_db() + if branch_label: + async with db.execute( + "SELECT * FROM messages WHERE session_id=? AND branch_label=? " + "ORDER BY ts ASC LIMIT ?", + (session_id, branch_label, limit), + ) as cur: + rows = await cur.fetchall() + else: + async with db.execute( + "SELECT * FROM messages WHERE session_id=? ORDER BY ts ASC LIMIT ?", + (session_id, limit), + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def list_messages_page( + session_id: str, + limit: int = 50, + before_ts: Optional[str] = None, + before_msg_id: Optional[str] = None, +) -> List[Dict[str, Any]]: + """Page messages by stable key: (ts DESC, msg_id DESC), return newest-first.""" + db = await get_db() + eff_limit = max(1, min(int(limit), 500)) + if before_ts: + mid = before_msg_id or "~~~~~~~~" + sql = ( + "SELECT * FROM messages " + "WHERE session_id=? " + "AND (ts < ? OR (ts = ? AND msg_id < ?)) " + "ORDER BY ts DESC, msg_id DESC LIMIT ?" + ) + params = (session_id, before_ts, before_ts, mid, eff_limit) + else: + sql = ( + "SELECT * FROM messages WHERE session_id=? " + "ORDER BY ts DESC, msg_id DESC LIMIT ?" + ) + params = (session_id, eff_limit) + async with db.execute(sql, params) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def get_dialog_map(session_id: str) -> Dict[str, Any]: + """Return nodes and edges for the dialog map tree. + + Nodes: each message. + Edges: parent_msg_id → msg_id (DAG, enables branching). + """ + msgs = await list_messages(session_id, limit=500) + nodes = [ + { + "id": m["msg_id"], + "role": m["role"], + "preview": m["content"][:80].replace("\n", " "), + "ts": m["ts"], + "branch": m["branch_label"], + } + for m in msgs + ] + edges = [ + {"from": m["parent_msg_id"], "to": m["msg_id"]} + for m in msgs + if m.get("parent_msg_id") + ] + return { + "session_id": session_id, + "nodes": nodes, + "edges": edges, + "branch_labels": list({m["branch_label"] for m in msgs}), + } + + +async def fork_session( + source_session_id: str, from_msg_id: str, new_title: str = "", + project_id: str = "default", +) -> Dict[str, Any]: + """Fork a session from a specific message — creates new session + copies ancestor messages.""" + msgs = await list_messages(source_session_id, limit=500) + # Collect ancestors of from_msg_id (inclusive) + msg_by_id = {m["msg_id"]: m for m in msgs} + ancestors = [] + cur_id = from_msg_id + while cur_id: + m = msg_by_id.get(cur_id) + if not m: + break + ancestors.append(m) + cur_id = m.get("parent_msg_id") + ancestors.reverse() # chronological order + + new_sid = f"fork_{uuid.uuid4().hex[:12]}" + title = new_title or f"Fork from {source_session_id[:8]}" + await upsert_session(new_sid, project_id=project_id, title=title) + + last_mid = None + for m in ancestors: + saved = await save_message( + new_sid, m["role"], m["content"], + parent_msg_id=last_mid, branch_label="main", + ) + last_mid = saved["msg_id"] + + return {"new_session_id": new_sid, "copied_turns": len(ancestors), "fork_root": from_msg_id} + + +async def close_db(): + global _db_conn + if _db_conn: + await _db_conn.close() + _db_conn = None + + +# ── Transactional helpers ────────────────────────────────────────────────────── + +@asynccontextmanager +async def transaction(): + """Async context manager for explicit SQLite transaction. + + Usage: + async with transaction(): + await db.execute(...) # part of atomic unit + """ + db = await get_db() + try: + yield db + await db.commit() + except Exception: + await db.rollback() + raise + + +# ── Tasks ────────────────────────────────────────────────────────────────────── + +async def create_task( + project_id: str, + title: str, + description: str = "", + status: str = "backlog", + priority: str = "normal", + labels: List[str] = None, + assignees: List[str] = None, + due_at: str = None, + created_by: str = "", + source_msg_id: str = None, # if set → auto-creates derives_task edge from message node +) -> Dict[str, Any]: + """Create a task and atomically upsert its dialog_node. + + If source_msg_id is provided, also creates a derives_task edge from the + message node to the task node — all within a single SQLite transaction. + """ + db = await get_db() + tid = str(uuid.uuid4()) + node_id = str(uuid.uuid4()) + now = _now() + labels_json = json.dumps(labels or []) + assignees_json = json.dumps(assignees or []) + + async with db.execute( + "SELECT MAX(sort_key) FROM tasks WHERE project_id=? AND status=?", (project_id, status) + ) as cur: + row = await cur.fetchone() + sort_key = float((row[0] or 0.0)) + 1.0 + + task_data = { + "task_id": tid, "project_id": project_id, "title": title, + "description": description, "status": status, "priority": priority, + "labels": labels or [], "assignees": assignees or [], + "due_at": due_at, "sort_key": sort_key, "created_by": created_by, + "created_at": now, "updated_at": now, + } + + # Atomic: task + dialog_node (+ optional derives_task edge) + # Note: aiosqlite auto-opens a transaction on first DML, so no explicit BEGIN needed. + try: + await db.execute( + "INSERT INTO tasks(task_id,project_id,title,description,status,priority," + "labels,assignees,due_at,sort_key,created_by,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)", + (tid, project_id, title, description, status, priority, + labels_json, assignees_json, due_at, sort_key, created_by, now, now), + ) + props_json = json.dumps({"status": status, "priority": priority, "source": "create_task"}) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,summary,props,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + title=excluded.title, summary=excluded.summary, + props=excluded.props, updated_at=excluded.updated_at""", + (node_id, project_id, "task", tid, title, description[:200], props_json, created_by or "system", now, now), + ) + if source_msg_id: + # Resolve message node (may not exist yet — create stub) + msg_node_id = str(uuid.uuid4()) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,summary,props,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO NOTHING""", + (msg_node_id, project_id, "message", source_msg_id, f"msg:{source_msg_id[:8]}", "", "{}", "system", now, now), + ) + # Get actual message node_id (may be existing) + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='message' AND ref_id=?", + (project_id, source_msg_id), + ) as cur: + msg_row = await cur.fetchone() + if msg_row: + eid = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at) " + "VALUES(?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (eid, project_id, msg_row[0], node_id, "derives_task", "{}", created_by or "system", now), + ) + await db.commit() + except Exception as e: + await db.rollback() + logger.error("create_task atomic failed: %s", e) + raise + + task_data["node_id"] = node_id + return task_data + + +async def list_tasks(project_id: str, status: str = None, limit: int = 100) -> List[Dict[str, Any]]: + db = await get_db() + if status: + async with db.execute( + "SELECT * FROM tasks WHERE project_id=? AND status=? ORDER BY sort_key ASC, created_at ASC LIMIT ?", + (project_id, status, limit), + ) as cur: + rows = await cur.fetchall() + else: + async with db.execute( + "SELECT * FROM tasks WHERE project_id=? ORDER BY status ASC, sort_key ASC, created_at ASC LIMIT ?", + (project_id, limit), + ) as cur: + rows = await cur.fetchall() + result = [] + for r in rows: + d = dict(r) + for f in ("labels", "assignees"): + try: + d[f] = json.loads(d[f]) + except Exception: + d[f] = [] + result.append(d) + return result + + +async def get_task(task_id: str) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute("SELECT * FROM tasks WHERE task_id=?", (task_id,)) as cur: + row = await cur.fetchone() + if not row: + return None + d = dict(row) + for f in ("labels", "assignees"): + try: + d[f] = json.loads(d[f]) + except Exception: + d[f] = [] + return d + + +async def update_task(task_id: str, **kwargs) -> bool: + db = await get_db() + now = _now() + allowed = {"title", "description", "status", "priority", "labels", "assignees", "due_at", "sort_key"} + fields, vals = [], [] + for k, v in kwargs.items(): + if k not in allowed: + continue + if k in ("labels", "assignees"): + v = json.dumps(v if isinstance(v, list) else []) + fields.append(f"{k}=?") + vals.append(v) + if not fields: + return False + fields.append("updated_at=?"); vals.append(now) + vals.append(task_id) + await db.execute(f"UPDATE tasks SET {', '.join(fields)} WHERE task_id=?", vals) + await db.commit() + return True + + +async def delete_task(task_id: str) -> bool: + db = await get_db() + await db.execute("DELETE FROM tasks WHERE task_id=?", (task_id,)) + await db.commit() + return True + + +# ── Meetings ─────────────────────────────────────────────────────────────────── + +async def create_meeting( + project_id: str, + title: str, + starts_at: str, + agenda: str = "", + duration_min: int = 30, + location: str = "", + attendees: List[str] = None, + created_by: str = "", + source_msg_id: str = None, # if set → auto-creates schedules_meeting edge +) -> Dict[str, Any]: + """Create a meeting and atomically upsert its dialog_node.""" + db = await get_db() + mid = str(uuid.uuid4()) + node_id = str(uuid.uuid4()) + now = _now() + attendees_json = json.dumps(attendees or []) + + meeting_data = { + "meeting_id": mid, "project_id": project_id, "title": title, + "agenda": agenda, "starts_at": starts_at, "duration_min": duration_min, + "location": location, "attendees": attendees or [], + "created_by": created_by, "created_at": now, "updated_at": now, + } + + try: + await db.execute( + "INSERT INTO meetings(meeting_id,project_id,title,agenda,starts_at,duration_min," + "location,attendees,created_by,created_at,updated_at) VALUES(?,?,?,?,?,?,?,?,?,?,?)", + (mid, project_id, title, agenda, starts_at, duration_min, + location, attendees_json, created_by, now, now), + ) + props_json = json.dumps({"starts_at": starts_at, "duration_min": duration_min, "source": "create_meeting"}) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,summary,props,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + title=excluded.title, props=excluded.props, updated_at=excluded.updated_at""", + (node_id, project_id, "meeting", mid, title, agenda[:200], props_json, created_by or "system", now, now), + ) + if source_msg_id: + msg_node_id = str(uuid.uuid4()) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO NOTHING""", + (msg_node_id, project_id, "message", source_msg_id, f"msg:{source_msg_id[:8]}", "{}", "system", now, now), + ) + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='message' AND ref_id=?", + (project_id, source_msg_id), + ) as cur: + msg_row = await cur.fetchone() + if msg_row: + eid = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at) " + "VALUES(?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (eid, project_id, msg_row[0], node_id, "schedules_meeting", "{}", created_by or "system", now), + ) + await db.commit() + except Exception as e: + await db.rollback() + logger.error("create_meeting atomic failed: %s", e) + raise + + meeting_data["node_id"] = node_id + return meeting_data + + +async def list_meetings(project_id: str, limit: int = 50) -> List[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT * FROM meetings WHERE project_id=? ORDER BY starts_at ASC LIMIT ?", + (project_id, limit), + ) as cur: + rows = await cur.fetchall() + result = [] + for r in rows: + d = dict(r) + try: + d["attendees"] = json.loads(d["attendees"]) + except Exception: + d["attendees"] = [] + result.append(d) + return result + + +async def get_meeting(meeting_id: str) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute("SELECT * FROM meetings WHERE meeting_id=?", (meeting_id,)) as cur: + row = await cur.fetchone() + if not row: + return None + d = dict(row) + try: + d["attendees"] = json.loads(d["attendees"]) + except Exception: + d["attendees"] = [] + return d + + +async def update_meeting(meeting_id: str, **kwargs) -> bool: + db = await get_db() + now = _now() + allowed = {"title", "agenda", "starts_at", "duration_min", "location", "attendees"} + fields, vals = [], [] + for k, v in kwargs.items(): + if k not in allowed: + continue + if k == "attendees": + v = json.dumps(v if isinstance(v, list) else []) + fields.append(f"{k}=?") + vals.append(v) + if not fields: + return False + fields.append("updated_at=?"); vals.append(now) + vals.append(meeting_id) + await db.execute(f"UPDATE meetings SET {', '.join(fields)} WHERE meeting_id=?", vals) + await db.commit() + return True + + +async def delete_meeting(meeting_id: str) -> bool: + db = await get_db() + await db.execute("DELETE FROM meetings WHERE meeting_id=?", (meeting_id,)) + await db.commit() + return True + + +# ── Dialog Graph ─────────────────────────────────────────────────────────────── + +async def upsert_dialog_node( + project_id: str, + node_type: str, + ref_id: str, + title: str = "", + summary: str = "", + props: Dict[str, Any] = None, + created_by: str = "system", +) -> Dict[str, Any]: + """Insert or update a dialog node (UPSERT by project_id+node_type+ref_id).""" + db = await get_db() + now = _now() + props_json = json.dumps(props or {}) + node_id = str(uuid.uuid4()) + # Try insert; on conflict update title/summary/props/updated_at + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,summary,props,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + title=excluded.title, summary=excluded.summary, + props=excluded.props, updated_at=excluded.updated_at""", + (node_id, project_id, node_type, ref_id, title, summary, props_json, created_by, now, now), + ) + await db.commit() + # Return the actual node (may be existing) + async with db.execute( + "SELECT * FROM dialog_nodes WHERE project_id=? AND node_type=? AND ref_id=?", + (project_id, node_type, ref_id), + ) as cur: + row = await cur.fetchone() + d = dict(row) if row else {} + if "props" in d: + try: + d["props"] = json.loads(d["props"]) + except Exception: + d["props"] = {} + return d + + +async def create_dialog_edge( + project_id: str, + from_node_id: str, + to_node_id: str, + edge_type: str, + props: Dict[str, Any] = None, + created_by: str = "system", +) -> Dict[str, Any]: + db = await get_db() + eid = str(uuid.uuid4()) + now = _now() + props_json = json.dumps(props or {}) + try: + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at) " + "VALUES(?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (eid, project_id, from_node_id, to_node_id, edge_type, props_json, created_by, now), + ) + await db.commit() + except Exception as e: + logger.warning("create_dialog_edge error: %s", e) + return { + "edge_id": eid, "project_id": project_id, + "from_node_id": from_node_id, "to_node_id": to_node_id, + "edge_type": edge_type, "created_at": now, + } + + +async def get_project_dialog_map(project_id: str) -> Dict[str, Any]: + """Return all dialog nodes and edges for a project.""" + db = await get_db() + async with db.execute( + "SELECT * FROM dialog_nodes WHERE project_id=? ORDER BY created_at ASC", + (project_id,), + ) as cur: + node_rows = await cur.fetchall() + async with db.execute( + "SELECT * FROM dialog_edges WHERE project_id=? ORDER BY created_at ASC", + (project_id,), + ) as cur: + edge_rows = await cur.fetchall() + + nodes = [] + for r in node_rows: + d = dict(r) + try: + d["props"] = json.loads(d["props"]) + except Exception: + d["props"] = {} + nodes.append(d) + + edges = [] + for r in edge_rows: + d = dict(r) + try: + d["props"] = json.loads(d["props"]) + except Exception: + d["props"] = {} + edges.append(d) + + return { + "project_id": project_id, + "nodes": nodes, + "edges": edges, + "node_count": len(nodes), + "edge_count": len(edges), + } + + +async def get_dialog_node_by_ref(project_id: str, node_type: str, ref_id: str) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT * FROM dialog_nodes WHERE project_id=? AND node_type=? AND ref_id=?", + (project_id, node_type, ref_id), + ) as cur: + row = await cur.fetchone() + if not row: + return None + d = dict(row) + try: + d["props"] = json.loads(d["props"]) + except Exception: + d["props"] = {} + return d + + +# ── Entity Links ─────────────────────────────────────────────────────────────── + +async def create_entity_link( + project_id: str, + from_type: str, from_id: str, + to_type: str, to_id: str, + link_type: str = "references", + props: Dict[str, Any] = None, + created_by: str = "", +) -> Dict[str, Any]: + db = await get_db() + lid = str(uuid.uuid4()) + now = _now() + props_json = json.dumps(props or {}) + try: + await db.execute( + "INSERT INTO entity_links(link_id,project_id,from_type,from_id,to_type,to_id,link_type,props,created_by,created_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_type,from_id,to_type,to_id,link_type) DO NOTHING", + (lid, project_id, from_type, from_id, to_type, to_id, link_type, props_json, created_by, now), + ) + await db.commit() + except Exception as e: + logger.warning("create_entity_link error: %s", e) + return { + "link_id": lid, "project_id": project_id, + "from_type": from_type, "from_id": from_id, + "to_type": to_type, "to_id": to_id, + "link_type": link_type, "created_at": now, + } + + +# ── Doc Versions ─────────────────────────────────────────────────────────────── + +async def save_doc_version(doc_id: str, content: str, author_id: str = "system") -> Dict[str, Any]: + db = await get_db() + vid = str(uuid.uuid4()) + now = _now() + await db.execute( + "INSERT INTO doc_versions(version_id,doc_id,content,author_id,created_at) VALUES(?,?,?,?,?)", + (vid, doc_id, content, author_id, now), + ) + await db.commit() + return {"version_id": vid, "doc_id": doc_id, "author_id": author_id, "created_at": now} + + +async def list_doc_versions(doc_id: str, limit: int = 20) -> List[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT version_id, doc_id, author_id, created_at, LENGTH(content) as content_len " + "FROM doc_versions WHERE doc_id=? ORDER BY created_at DESC LIMIT ?", + (doc_id, limit), + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def get_doc_version_content(version_id: str) -> Optional[str]: + db = await get_db() + async with db.execute( + "SELECT content FROM doc_versions WHERE version_id=?", (version_id,) + ) as cur: + row = await cur.fetchone() + return row[0] if row else None + + +# ── Dialog Views ─────────────────────────────────────────────────────────────── + +async def upsert_dialog_view( + project_id: str, + name: str, + filters: Dict[str, Any] = None, + layout: Dict[str, Any] = None, + created_by: str = "", +) -> Dict[str, Any]: + db = await get_db() + vid = str(uuid.uuid4()) + now = _now() + filters_json = json.dumps(filters or {}) + layout_json = json.dumps(layout or {}) + await db.execute( + "INSERT INTO dialog_views(view_id,project_id,name,filters,layout,created_by,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,name) DO UPDATE SET " + "filters=excluded.filters, layout=excluded.layout, updated_at=excluded.updated_at", + (vid, project_id, name, filters_json, layout_json, created_by, now, now), + ) + await db.commit() + async with db.execute( + "SELECT * FROM dialog_views WHERE project_id=? AND name=?", (project_id, name) + ) as cur: + row = await cur.fetchone() + if not row: + return {} + d = dict(row) + for f in ("filters", "layout"): + try: + d[f] = json.loads(d[f]) + except Exception: + d[f] = {} + return d + + +async def list_dialog_views(project_id: str) -> List[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT * FROM dialog_views WHERE project_id=? ORDER BY updated_at DESC", (project_id,) + ) as cur: + rows = await cur.fetchall() + result = [] + for r in rows: + d = dict(r) + for f in ("filters", "layout"): + try: + d[f] = json.loads(d[f]) + except Exception: + d[f] = {} + result.append(d) + return result + + +# ── Graph Integrity ──────────────────────────────────────────────────────────── + +async def check_graph_integrity(project_id: str) -> Dict[str, Any]: + """Verify graph consistency for a project. + + Checks: + 1. No orphaned edges (edges referencing non-existent nodes) + 2. No nodes with node_type=task that lack a corresponding task row + 3. No nodes with node_type=meeting that lack a corresponding meeting row + 4. No self-loop edges + 5. Counts summary + + Returns: {"ok": bool, "violations": [...], "stats": {...}} + """ + db = await get_db() + violations = [] + + # Count nodes/edges + async with db.execute( + "SELECT COUNT(*) FROM dialog_nodes WHERE project_id=?", (project_id,) + ) as cur: + node_count = (await cur.fetchone())[0] + async with db.execute( + "SELECT COUNT(*) FROM dialog_edges WHERE project_id=?", (project_id,) + ) as cur: + edge_count = (await cur.fetchone())[0] + async with db.execute( + "SELECT COUNT(*) FROM tasks WHERE project_id=?", (project_id,) + ) as cur: + task_count = (await cur.fetchone())[0] + async with db.execute( + "SELECT COUNT(*) FROM meetings WHERE project_id=?", (project_id,) + ) as cur: + meeting_count = (await cur.fetchone())[0] + + # 1. Orphaned edges (from_node_id not in dialog_nodes) + async with db.execute( + """SELECT e.edge_id, e.from_node_id, e.to_node_id FROM dialog_edges e + WHERE e.project_id=? + AND e.from_node_id NOT IN (SELECT node_id FROM dialog_nodes WHERE project_id=?)""", + (project_id, project_id), + ) as cur: + rows = await cur.fetchall() + if rows: + violations.append({ + "type": "orphaned_edge_from", + "count": len(rows), + "edge_ids": [r[0] for r in rows[:5]], + }) + + async with db.execute( + """SELECT e.edge_id FROM dialog_edges e + WHERE e.project_id=? + AND e.to_node_id NOT IN (SELECT node_id FROM dialog_nodes WHERE project_id=?)""", + (project_id, project_id), + ) as cur: + rows = await cur.fetchall() + if rows: + violations.append({ + "type": "orphaned_edge_to", + "count": len(rows), + "edge_ids": [r[0] for r in rows[:5]], + }) + + # 2. Task nodes without task rows + async with db.execute( + """SELECT n.node_id, n.ref_id FROM dialog_nodes n + WHERE n.project_id=? AND n.node_type='task' + AND n.ref_id NOT IN (SELECT task_id FROM tasks WHERE project_id=?)""", + (project_id, project_id), + ) as cur: + rows = await cur.fetchall() + if rows: + violations.append({ + "type": "dangling_task_nodes", + "count": len(rows), + "node_ids": [r[0] for r in rows[:5]], + }) + + # 3. Meeting nodes without meeting rows + async with db.execute( + """SELECT n.node_id, n.ref_id FROM dialog_nodes n + WHERE n.project_id=? AND n.node_type='meeting' + AND n.ref_id NOT IN (SELECT meeting_id FROM meetings WHERE project_id=?)""", + (project_id, project_id), + ) as cur: + rows = await cur.fetchall() + if rows: + violations.append({ + "type": "dangling_meeting_nodes", + "count": len(rows), + "node_ids": [r[0] for r in rows[:5]], + }) + + # 4. Self-loops (should be blocked by CHECK constraint, verify defensively) + async with db.execute( + "SELECT COUNT(*) FROM dialog_edges WHERE project_id=? AND from_node_id=to_node_id", + (project_id,), + ) as cur: + self_loops = (await cur.fetchone())[0] + if self_loops: + violations.append({"type": "self_loop_edges", "count": self_loops}) + + return { + "ok": len(violations) == 0, + "project_id": project_id, + "violations": violations, + "stats": { + "node_count": node_count, + "edge_count": edge_count, + "task_count": task_count, + "meeting_count": meeting_count, + }, + } + + +# ── Evidence Pack Engine ─────────────────────────────────────────────────────── + +async def create_evidence_pack( + project_id: str, + run_id: str, + graph_name: str, + result_data: Dict[str, Any], + created_by: str = "sofiia", +) -> Dict[str, Any]: + """Atomically record an Evidence Pack for a Supervisor run. + + Creates: + 1. agent_run dialog_node (the run itself) + 2. doc_version with evidence markdown + 3. Auto-derived tasks from result["follow_up_tasks"] (if present) + 4. produced_by edges: task_nodes → agent_run_node + + This is the "reasoning ledger" entry point. + Returns: {node_id, doc_version_id, task_ids, edge_ids} + """ + db = await get_db() + now = _now() + + # Build evidence markdown + evidence_md = _build_evidence_markdown(run_id, graph_name, result_data, now) + + # Collect derived tasks from result + follow_up_tasks: List[Dict] = result_data.get("follow_up_tasks", []) + summary = result_data.get("summary", "") or result_data.get("message", "") + + run_node_id = str(uuid.uuid4()) + props_json = json.dumps({ + "run_id": run_id, + "graph": graph_name, + "status": result_data.get("status", "completed"), + "source": "supervisor_run", + }) + + created_task_ids: List[str] = [] + created_edge_ids: List[str] = [] + doc_version_id: Optional[str] = None + + try: + # 1. Upsert agent_run node + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,summary,props,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + title=excluded.title, summary=excluded.summary, + props=excluded.props, updated_at=excluded.updated_at""", + (run_node_id, project_id, "agent_run", run_id, + f"{graph_name} run", summary[:200], props_json, created_by, now, now), + ) + # Refresh node_id (may exist already) + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='agent_run' AND ref_id=?", + (project_id, run_id), + ) as cur: + row = await cur.fetchone() + if row: + run_node_id = row[0] + + # 2. Store evidence as doc_version (linked to "default" doc for now, or create one) + evidence_vid = str(uuid.uuid4()) + await db.execute( + "INSERT INTO doc_versions(version_id,doc_id,content,author_id,created_at) " + "SELECT ?, doc_id, ?, ?, ? FROM documents WHERE project_id=? AND filename='evidence_log.md' " + "ORDER BY created_at DESC LIMIT 1", + (evidence_vid, evidence_md, created_by, now, project_id), + ) + # Check if actually inserted (doc may not exist) + async with db.execute( + "SELECT version_id FROM doc_versions WHERE version_id=?", (evidence_vid,) + ) as cur: + if await cur.fetchone(): + doc_version_id = evidence_vid + + # 3. Derived tasks from follow_up_tasks + for t in follow_up_tasks[:10]: # cap at 10 + t_title = str(t.get("title", "Follow-up task"))[:200] + t_desc = str(t.get("description", ""))[:500] + t_priority = t.get("priority", "normal") + t_id = str(uuid.uuid4()) + t_node_id = str(uuid.uuid4()) + t_sort_key = float(len(created_task_ids) + 1) + t_labels = json.dumps(["evidence", graph_name]) + await db.execute( + "INSERT INTO tasks(task_id,project_id,title,description,status,priority," + "labels,assignees,due_at,sort_key,created_by,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)", + (t_id, project_id, t_title, t_desc, "backlog", t_priority, + t_labels, "[]", None, t_sort_key, created_by, now, now), + ) + # Task dialog node + t_props = json.dumps({"source": "evidence_pack", "run_id": run_id}) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,summary,props,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO NOTHING""", + (t_node_id, project_id, "task", t_id, t_title, t_desc[:200], t_props, created_by, now, now), + ) + # Edge: agent_run → task (produced_by means task was produced by run) + t_edge_id = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at) " + "VALUES(?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (t_edge_id, project_id, run_node_id, t_node_id, "produced_by", "{}", created_by, now), + ) + created_task_ids.append(t_id) + created_edge_ids.append(t_edge_id) + + await db.commit() + except Exception as e: + await db.rollback() + logger.error("create_evidence_pack atomic failed: %s", e) + raise + + return { + "ok": True, + "run_id": run_id, + "graph_name": graph_name, + "node_id": run_node_id, + "doc_version_id": doc_version_id, + "task_ids": created_task_ids, + "edge_ids": created_edge_ids, + "tasks_created": len(created_task_ids), + "created_at": now, + } + + +def _build_evidence_markdown(run_id: str, graph_name: str, result: Dict[str, Any], now: str) -> str: + """Build a human-readable Evidence Pack markdown from a supervisor run result.""" + lines = [ + f"# Evidence Pack: {graph_name}", + f"", + f"**Run ID:** `{run_id}` ", + f"**Graph:** `{graph_name}` ", + f"**Timestamp:** {now} ", + f"**Status:** {result.get('status', 'completed')}", + "", + ] + summary = result.get("summary") or result.get("message") or "" + if summary: + lines += ["## Summary", "", summary, ""] + + findings = result.get("findings") or result.get("checks") or [] + if findings: + lines += ["## Findings", ""] + for f in findings[:20]: + if isinstance(f, dict): + status_icon = "✅" if f.get("status") in ("ok", "pass", "passed") else "❌" if f.get("status") in ("fail", "failed", "error") else "⚠️" + lines.append(f"- {status_icon} **{f.get('name', 'check')}**: {f.get('detail', f.get('message', ''))}") + else: + lines.append(f"- {f}") + lines.append("") + + recommendations = result.get("recommendations") or [] + if recommendations: + lines += ["## Recommendations", ""] + for r in recommendations[:10]: + lines.append(f"- {r}") + lines.append("") + + follow_ups = result.get("follow_up_tasks") or [] + if follow_ups: + lines += ["## Follow-up Tasks Created", ""] + for t in follow_ups[:10]: + if isinstance(t, dict): + lines.append(f"- [{t.get('priority','normal').upper()}] {t.get('title', '')}") + else: + lines.append(f"- {t}") + lines.append("") + + lines += ["---", f"*Generated by Sofiia Evidence Pack Engine*"] + return "\n".join(lines) + + +# ── Graph Intelligence: Importance Scoring ──────────────────────────────────── + +# Base importance by node type (deterministic) +_BASE_IMPORTANCE: Dict[str, float] = { + "decision": 0.95, + "goal": 0.90, + "doc": 0.75, + "task": 0.70, + "pull_request": 0.65, + "ops_run": 0.60, + "meeting": 0.60, + "agent_run": 0.55, + "repo_changeset": 0.60, + "message": 0.15, +} + +# Lifecycle multiplier +_LIFECYCLE_MULTIPLIER: Dict[str, float] = { + "active": 1.0, + "superseded": 0.4, + "archived": 0.2, + "invalid": 0.1, +} + + +def _compute_importance( + node_type: str, + lifecycle: str = "active", + incoming_edge_count: int = 0, + task_status: str = None, + risk_level: str = None, + pinned: bool = False, +) -> float: + base = _BASE_IMPORTANCE.get(node_type, 0.3) + # Task done → halve importance + if node_type == "task" and task_status == "done": + base = 0.35 + score = base * _LIFECYCLE_MULTIPLIER.get(lifecycle, 1.0) + # Bump factors + if incoming_edge_count >= 3: + score = min(1.0, score + 0.05) + if risk_level == "high": + score = min(1.0, score + 0.10) + if pinned: + score = min(1.0, score + 0.10) + return round(score, 4) + + +def _compute_fingerprint(node_type: str, title: str, summary: str = "", labels: List[str] = None) -> str: + """Deterministic SHA-256 fingerprint for dedup. + + Canonical form: normalize whitespace + lowercase. + Same intent = same fingerprint → prevents duplicate nodes. + """ + def _norm(s: str) -> str: + return re.sub(r"\s+", " ", s.lower().strip()) + + if node_type in ("task", "decision", "goal"): + canonical = _norm(title) + "|" + _norm(summary[:100]) + if labels: + canonical += "|" + ",".join(sorted(_norm(l) for l in labels)) + elif node_type == "agent_run": + # agent_run fingerprint = title (which contains run_id) + canonical = _norm(title) + else: + canonical = _norm(title) + "|" + node_type + return hashlib.sha256(canonical.encode()).hexdigest()[:32] + + +# ── Graph Hygiene Engine ────────────────────────────────────────────────────── + +async def run_graph_hygiene( + project_id: str, + dry_run: bool = True, + scope: str = "all", + since: str = None, +) -> Dict[str, Any]: + """Semantic normalization of the Dialog Graph. + + Steps: + 1. Compute missing fingerprints for all nodes. + 2. Detect duplicate groups (same project_id + node_type + fingerprint). + 3. Mark duplicates lifecycle=archived, keep canonical (latest updated_at). + 4. Add 'supersedes' edge: canonical → archived (for decisions/goals). + 5. Recompute importance scores for all active nodes. + 6. Return structured diff report. + + dry_run=True: compute but do not write changes. + """ + db = await get_db() + now = _now() + changes: List[Dict] = [] + stats = {"nodes_scanned": 0, "fingerprints_computed": 0, + "duplicates_found": 0, "archived": 0, "importance_updated": 0} + + # -- 1. Fetch nodes (scoped) + if scope == "recent" and since: + async with db.execute( + "SELECT * FROM dialog_nodes WHERE project_id=? AND updated_at >= ? ORDER BY created_at ASC", + (project_id, since), + ) as cur: + nodes = [dict(r) for r in await cur.fetchall()] + else: + async with db.execute( + "SELECT * FROM dialog_nodes WHERE project_id=? ORDER BY created_at ASC", + (project_id,), + ) as cur: + nodes = [dict(r) for r in await cur.fetchall()] + stats["nodes_scanned"] = len(nodes) + + # -- 2. Compute fingerprints where missing + fp_map: Dict[str, str] = {} # node_id → fingerprint + for n in nodes: + fp = n.get("fingerprint") + if not fp: + props = {} + try: + props = json.loads(n.get("props") or "{}") + except Exception: + pass + labels = props.get("labels", []) + fp = _compute_fingerprint(n["node_type"], n.get("title", ""), n.get("summary", ""), labels) + fp_map[n["node_id"]] = fp + if not dry_run: + try: + await db.execute( + "UPDATE dialog_nodes SET fingerprint=?, updated_at=? WHERE node_id=? AND (fingerprint IS NULL OR fingerprint != ?)", + (fp, now, n["node_id"], fp), + ) + except Exception: + # UNIQUE constraint: another node already has this fingerprint — skip + pass + stats["fingerprints_computed"] += 1 + else: + fp_map[n["node_id"]] = fp + + # -- 3. Group by (node_type, fingerprint) → detect duplicates + from collections import defaultdict + groups: Dict[Tuple[str, str], List[Dict]] = defaultdict(list) + for n in nodes: + key = (n["node_type"], fp_map.get(n["node_id"], "")) + groups[key].append(n) + + supersede_pairs: List[Tuple[str, str]] = [] # (canonical_node_id, archived_node_id) + for (ntype, fp), group in groups.items(): + if len(group) <= 1 or not fp: + continue + stats["duplicates_found"] += len(group) - 1 + # Canonical = most recently updated + group_sorted = sorted(group, key=lambda x: x.get("updated_at", ""), reverse=True) + canonical = group_sorted[0] + duplicates = group_sorted[1:] + for dup in duplicates: + current_lifecycle = dup.get("lifecycle", "active") + if current_lifecycle in ("archived", "superseded", "invalid"): + continue + new_lifecycle = "superseded" if ntype in ("decision", "goal", "task") else "archived" + changes.append({ + "action": "archive_duplicate", + "node_id": dup["node_id"], + "node_type": ntype, + "title": dup.get("title", "")[:60], + "new_lifecycle": new_lifecycle, + "canonical_node_id": canonical["node_id"], + }) + if not dry_run: + await db.execute( + "UPDATE dialog_nodes SET lifecycle=?, updated_at=? WHERE node_id=?", + (new_lifecycle, now, dup["node_id"]), + ) + stats["archived"] += 1 + if ntype in ("decision", "goal"): + supersede_pairs.append((canonical["node_id"], dup["node_id"])) + + # -- 4. Add supersedes edges (canonical → archived) + if not dry_run: + for (canon_id, old_id) in supersede_pairs: + eid = str(uuid.uuid4()) + try: + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,strength) " + "VALUES(?,?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (eid, project_id, canon_id, old_id, "supersedes", "{}", "hygiene", now, 0.8), + ) + except Exception as e: + logger.debug("supersedes edge skipped: %s", e) + + # -- 5. Recompute importance for all nodes (active only) + incoming_counts: Dict[str, int] = {} + async with db.execute( + "SELECT to_node_id, COUNT(*) as cnt FROM dialog_edges WHERE project_id=? GROUP BY to_node_id", + (project_id,), + ) as cur: + for r in await cur.fetchall(): + incoming_counts[r[0]] = r[1] + + for n in nodes: + node_id = n["node_id"] + lifecycle = n.get("lifecycle", "active") + props = {} + try: + props = json.loads(n.get("props") or "{}") + except Exception: + pass + task_status = props.get("status") if n["node_type"] == "task" else None + risk_level = props.get("risk_level") + pinned = bool(props.get("pinned")) + new_importance = _compute_importance( + n["node_type"], lifecycle, + incoming_edge_count=incoming_counts.get(node_id, 0), + task_status=task_status, + risk_level=risk_level, + pinned=pinned, + ) + old_importance = n.get("importance", 0.3) + if abs(new_importance - (old_importance or 0.3)) > 0.001: + changes.append({ + "action": "update_importance", + "node_id": node_id, + "old": old_importance, + "new": new_importance, + }) + if not dry_run: + await db.execute( + "UPDATE dialog_nodes SET importance=?, updated_at=? WHERE node_id=?", + (new_importance, now, node_id), + ) + stats["importance_updated"] += 1 + + if not dry_run: + await db.commit() + + return { + "ok": True, + "dry_run": dry_run, + "project_id": project_id, + "changes": changes, + "stats": stats, + } + + +# ── Self-Reflection Engine ───────────────────────────────────────────────────── + +async def create_run_reflection( + project_id: str, + run_id: str, + evidence_data: Dict[str, Any] = None, + created_by: str = "sofiia", +) -> Dict[str, Any]: + """Create a Self-Reflection artifact for a completed Supervisor run. + + Analyzes the Evidence Pack and creates: + 1. A 'decision' node (reflection) linked to agent_run via 'reflects_on' edge. + 2. Structured reflection props: completeness score, open risks, missing steps. + 3. Optionally creates follow-up tasks for critical missing steps. + + Returns: {node_id, reflection, edge_id, task_ids} + """ + db = await get_db() + now = _now() + + # Load agent_run node to get context + async with db.execute( + "SELECT * FROM dialog_nodes WHERE project_id=? AND node_type='agent_run' AND ref_id=?", + (project_id, run_id), + ) as cur: + run_row = await cur.fetchone() + + run_node_id = run_row["node_id"] if run_row else None + graph_name = "" + if run_row: + try: + run_props = json.loads(run_row["props"] or "{}") + graph_name = run_props.get("graph", "") + except Exception: + pass + + data = evidence_data or {} + findings = data.get("findings") or [] + follow_ups = data.get("follow_up_tasks") or [] + recommendations = data.get("recommendations") or [] + summary = data.get("summary") or "" + + # Analyze completeness + passed = sum(1 for f in findings if isinstance(f, dict) and f.get("status") in ("ok", "pass", "passed")) + failed = sum(1 for f in findings if isinstance(f, dict) and f.get("status") in ("fail", "failed", "error")) + total = len(findings) + plan_completeness = round(passed / total, 2) if total > 0 else 0.8 # default optimistic if no findings + + open_risks = [ + f.get("detail", f.get("message", str(f))) + for f in findings + if isinstance(f, dict) and f.get("status") in ("fail", "failed", "error", "warn", "warning") + ][:5] + + missing_steps = [r for r in recommendations if isinstance(r, str)][:5] + evidence_quality = round(min(1.0, 0.5 + (len(findings) * 0.05) + (len(follow_ups) * 0.03)), 2) + + reflection = { + "run_id": run_id, + "graph_name": graph_name, + "plan_completeness_score": plan_completeness, + "evidence_quality_score": evidence_quality, + "open_risks": open_risks, + "missing_steps": missing_steps, + "follow_up_tasks_created": len(follow_ups), + "recommended_next_actions": recommendations[:3], + "confidence": "high" if plan_completeness >= 0.8 else "medium" if plan_completeness >= 0.5 else "low", + "uncertainty_notes": f"{failed} check(s) failed" if failed > 0 else "", + "reflected_at": now, + } + + refl_title = f"Reflection: {graph_name} run" if graph_name else f"Reflection: run {run_id[:8]}" + refl_summary = ( + f"Completeness: {int(plan_completeness * 100)}% | " + f"Evidence quality: {int(evidence_quality * 100)}% | " + f"Confidence: {reflection['confidence']}" + ) + refl_node_id = str(uuid.uuid4()) + created_task_ids: List[str] = [] + edge_id: Optional[str] = None + + try: + # Upsert reflection decision node + # Note: fingerprint is NOT set here — Hygiene Engine computes it later. + # This avoids UNIQUE constraint conflicts on idempotent calls. + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,summary,props,lifecycle,importance,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + title=excluded.title, summary=excluded.summary, props=excluded.props, + importance=excluded.importance, updated_at=excluded.updated_at""", + (refl_node_id, project_id, "decision", f"reflection:{run_id}", + refl_title, refl_summary, json.dumps(reflection), + "active", _compute_importance("decision"), created_by, now, now), + ) + # Refresh actual node_id (may be existing) + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='decision' AND ref_id=?", + (project_id, f"reflection:{run_id}"), + ) as cur: + row = await cur.fetchone() + if row: + refl_node_id = row[0] + + # Edge: reflection → agent_run via reflects_on + if run_node_id is not None and run_node_id: + edge_id = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,strength) " + "VALUES(?,?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (edge_id, project_id, refl_node_id, run_node_id, "reflects_on", + json.dumps({"confidence": reflection["confidence"]}), created_by, now, 0.9), + ) + + # Auto-create tasks for critical missing steps + for risk in open_risks[:2]: # max 2 auto-tasks from risks + t_title = f"[RISK] {risk}"[:200] + t_id = str(uuid.uuid4()) + t_node_id = str(uuid.uuid4()) + t_sort = float(len(created_task_ids) + 100) + await db.execute( + "INSERT INTO tasks(task_id,project_id,title,description,status,priority," + "labels,assignees,due_at,sort_key,created_by,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)", + (t_id, project_id, t_title, f"Auto-created from run reflection: {run_id}", + "backlog", "high", json.dumps(["reflection", "risk"]), "[]", None, + t_sort, created_by, now, now), + ) + t_props = json.dumps({"source": "reflection", "run_id": run_id}) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props,lifecycle,importance,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO NOTHING""", + (t_node_id, project_id, "task", t_id, t_title, t_props, "active", + _compute_importance("task"), created_by, now, now), + ) + eid2 = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,strength) " + "VALUES(?,?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (eid2, project_id, refl_node_id, t_node_id, "derives_task", "{}", created_by, now, 0.7), + ) + created_task_ids.append(t_id) + + await db.commit() + except Exception as e: + await db.rollback() + logger.error("create_run_reflection failed: %s", e) + raise + + return { + "ok": True, + "run_id": run_id, + "node_id": refl_node_id, + "edge_id": edge_id, + "reflection": reflection, + "risk_tasks_created": len(created_task_ids), + "task_ids": created_task_ids, + } + + +# ── Ops Graph Bridging ──────────────────────────────────────────────────────── + +async def upsert_ops_run_node( + project_id: str, + ops_run_id: str, + action_id: str, + node_id: str, + status: str, # "ok" | "failed" | "timeout" + elapsed_ms: int = 0, + error: str = "", + started_at: str = "", + source_run_id: str = "", # if triggered from a supervisor run + source_msg_id: str = "", # if triggered from a message + created_by: str = "sofiia", +) -> Dict[str, Any]: + """Create or update an ops_run dialog_node and link it to source artifacts. + + Props schema: + action_id, node_id, status, elapsed_ms, error, started_at, finished_at + + Edges created (if source provided): + source_run_node --executed_as--> ops_run_node + source_msg_node --derives_task--> ops_run_node (fallback) + + Returns: {node_id, edge_id} + """ + db = await get_db() + now = _now() + if not started_at: + started_at = now + + # Importance based on status: failed ops are more important + imp = 0.65 if status != "failed" else 0.80 + + props = json.dumps({ + "action_id": action_id, + "ops_node_id": node_id, # physical node (NODA1/NODA2) + "status": status, + "elapsed_ms": elapsed_ms, + "error": error, + "started_at": started_at, + "finished_at": now, + "source_run_id": source_run_id, + }) + title = f"[{status.upper()}] {action_id} @ {node_id}" + gnode_id = str(uuid.uuid4()) + + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props,lifecycle,importance,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + title=excluded.title, props=excluded.props, + importance=excluded.importance, lifecycle=excluded.lifecycle, + updated_at=excluded.updated_at""", + (gnode_id, project_id, "ops_run", ops_run_id, title, props, + "active", imp, created_by, now, now), + ) + # Refresh actual node_id in case of conflict + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='ops_run' AND ref_id=?", + (project_id, ops_run_id), + ) as cur: + row = await cur.fetchone() + if row: + gnode_id = row[0] + + edge_id: Optional[str] = None + + # Link to source supervisor run node + if source_run_id: + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='agent_run' AND ref_id=?", + (project_id, source_run_id), + ) as cur: + src_row = await cur.fetchone() + if src_row: + edge_id = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,strength) " + "VALUES(?,?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (edge_id, project_id, src_row[0], gnode_id, "produced_by", + json.dumps({"action_id": action_id}), created_by, now, 0.8), + ) + + # Link to source message node + elif source_msg_id: + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='message' AND ref_id=?", + (project_id, source_msg_id), + ) as cur: + msg_row = await cur.fetchone() + if msg_row: + edge_id = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,strength) " + "VALUES(?,?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (edge_id, project_id, msg_row[0], gnode_id, "relates_to", + json.dumps({"action_id": action_id}), created_by, now, 0.6), + ) + + await db.commit() + return {"node_id": gnode_id, "edge_id": edge_id, "ops_run_id": ops_run_id} + + +# ── Strategic CTO Layer ──────────────────────────────────────────────────────── + +def _window_days(window: str) -> int: + """Parse window string like '7d', '24h', '30d' to number of days.""" + w = window.lower().strip() + if w.endswith("h"): + return max(1, int(w[:-1]) // 24) + if w.endswith("d"): + return int(w[:-1]) + return 7 + + +def _signal_fingerprint(signal_type: str, evidence: Dict[str, Any]) -> str: + """Deterministic fingerprint for a signal (for idempotent upsert).""" + ev_key = json.dumps( + {k: sorted(v) if isinstance(v, list) else v for k, v in sorted(evidence.items())}, + sort_keys=True, + ) + return hashlib.sha256(f"{signal_type}|{ev_key}".encode()).hexdigest()[:32] + + +async def compute_graph_snapshot( + project_id: str, + window: str = "7d", +) -> Dict[str, Any]: + """Compute and store a graph analytics snapshot for a project. + + Metrics computed (deterministic, no ML): + - tasks_created / tasks_done in window + - wip (in_progress + review) + - cycle_time_proxy_days (median done task age) + - blocked_tasks_count + - risk_tasks_open (tasks with [RISK] prefix, not done) + - agent_runs_total / agent_runs_in_window + - run_quality_avg (avg plan_completeness from reflections) + - dedup_events (archived nodes in window) + - graph_density (edges/nodes) + - open_signals_count (high/critical) + - stale_goals_count + + Idempotent: one snapshot per (project_id, scope, window, date_bucket). + """ + db = await get_db() + days = _window_days(window) + now = _now() + # Date bucket = today's date + date_bucket = now[:10] + + # Window start (ISO string comparison works for our datetime('now') format) + import datetime as _dt + window_start = (_dt.datetime.utcnow() - _dt.timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ") + + # -- Tasks in window + async with db.execute( + "SELECT COUNT(*) FROM tasks WHERE project_id=? AND created_at >= ?", + (project_id, window_start), + ) as cur: + tasks_created = (await cur.fetchone())[0] + + async with db.execute( + "SELECT COUNT(*) FROM tasks WHERE project_id=? AND status='done' AND updated_at >= ?", + (project_id, window_start), + ) as cur: + tasks_done = (await cur.fetchone())[0] + + async with db.execute( + "SELECT COUNT(*) FROM tasks WHERE project_id=? AND status IN ('in_progress','review')", + (project_id,), + ) as cur: + wip = (await cur.fetchone())[0] + + # Cycle time: median done task (days between created_at and updated_at) + async with db.execute( + "SELECT created_at, updated_at FROM tasks WHERE project_id=? AND status='done' AND updated_at >= ? LIMIT 50", + (project_id, window_start), + ) as cur: + done_rows = await cur.fetchall() + cycle_times: List[float] = [] + for r in done_rows: + try: + import datetime as _dt2 + c = _dt2.datetime.fromisoformat(r[0].replace("Z", "+00:00")) + u = _dt2.datetime.fromisoformat(r[1].replace("Z", "+00:00")) + cycle_times.append((u - c).total_seconds() / 86400) + except Exception: + pass + cycle_time_proxy = round(sorted(cycle_times)[len(cycle_times) // 2], 2) if cycle_times else 0.0 + + # Blocked tasks + async with db.execute( + """SELECT COUNT(DISTINCT t.task_id) FROM tasks t + JOIN dialog_nodes dn ON dn.project_id=? AND dn.node_type='task' AND dn.ref_id=t.task_id + JOIN dialog_edges de ON de.to_node_id=dn.node_id AND de.edge_type='blocks' + WHERE t.project_id=? AND t.status != 'done'""", + (project_id, project_id), + ) as cur: + blocked_tasks_count = (await cur.fetchone())[0] + + # Risk tasks + async with db.execute( + "SELECT COUNT(*) FROM tasks WHERE project_id=? AND title LIKE '[RISK]%' AND status != 'done'", + (project_id,), + ) as cur: + risk_tasks_open = (await cur.fetchone())[0] + + # Agent runs + async with db.execute( + "SELECT COUNT(*) FROM dialog_nodes WHERE project_id=? AND node_type='agent_run'", + (project_id,), + ) as cur: + agent_runs_total = (await cur.fetchone())[0] + + async with db.execute( + "SELECT COUNT(*) FROM dialog_nodes WHERE project_id=? AND node_type='agent_run' AND created_at >= ?", + (project_id, window_start), + ) as cur: + agent_runs_in_window = (await cur.fetchone())[0] + + # Run quality avg from reflection nodes + async with db.execute( + """SELECT props FROM dialog_nodes WHERE project_id=? AND node_type='decision' + AND title LIKE 'Reflection:%' AND created_at >= ? LIMIT 20""", + (project_id, window_start), + ) as cur: + refl_rows = await cur.fetchall() + quality_scores: List[float] = [] + for r in refl_rows: + try: + p = json.loads(r[0] or "{}") + s = p.get("plan_completeness_score") + if s is not None: + quality_scores.append(float(s)) + except Exception: + pass + run_quality_avg = round(sum(quality_scores) / len(quality_scores), 2) if quality_scores else None + + # Dedup events (archived in window) + async with db.execute( + "SELECT COUNT(*) FROM dialog_nodes WHERE project_id=? AND lifecycle IN ('archived','superseded') AND updated_at >= ?", + (project_id, window_start), + ) as cur: + dedup_events = (await cur.fetchone())[0] + + # Graph density + async with db.execute("SELECT COUNT(*) FROM dialog_nodes WHERE project_id=?", (project_id,)) as cur: + n_nodes = (await cur.fetchone())[0] + async with db.execute("SELECT COUNT(*) FROM dialog_edges WHERE project_id=?", (project_id,)) as cur: + n_edges = (await cur.fetchone())[0] + graph_density = round(n_edges / max(n_nodes, 1), 3) + + # Open high/critical signals + async with db.execute( + "SELECT COUNT(*) FROM graph_signals WHERE project_id=? AND status='open' AND severity IN ('high','critical')", + (project_id,), + ) as cur: + open_signals_critical = (await cur.fetchone())[0] + + # Stale goals (active, no updates > 14d) + import datetime as _dt3 + stale_cutoff = (_dt3.datetime.utcnow() - _dt3.timedelta(days=14)).strftime("%Y-%m-%dT%H:%M:%SZ") + async with db.execute( + "SELECT COUNT(*) FROM dialog_nodes WHERE project_id=? AND node_type='goal' AND lifecycle='active' AND updated_at < ?", + (project_id, stale_cutoff), + ) as cur: + stale_goals_count = (await cur.fetchone())[0] + + metrics = { + "window": window, + "tasks_created": tasks_created, + "tasks_done": tasks_done, + "wip": wip, + "cycle_time_proxy_days": cycle_time_proxy, + "blocked_tasks_count": blocked_tasks_count, + "risk_tasks_open": risk_tasks_open, + "agent_runs_total": agent_runs_total, + "agent_runs_in_window": agent_runs_in_window, + "run_quality_avg": run_quality_avg, + "dedup_events": dedup_events, + "graph_density": graph_density, + "node_count": n_nodes, + "edge_count": n_edges, + "open_signals_critical": open_signals_critical, + "stale_goals_count": stale_goals_count, + "computed_at": now, + } + + snap_id = str(uuid.uuid4()) + try: + await db.execute( + """INSERT INTO graph_snapshots(id,project_id,scope,window,date_bucket,metrics,created_at) + VALUES(?,?,?,?,?,?,?) + ON CONFLICT(project_id,scope,window,date_bucket) DO UPDATE SET + metrics=excluded.metrics, created_at=excluded.created_at""", + (snap_id, project_id, "project", window, date_bucket, json.dumps(metrics), now), + ) + await db.commit() + except Exception as e: + logger.error("compute_graph_snapshot failed: %s", e) + raise + + return {"ok": True, "project_id": project_id, "window": window, "date_bucket": date_bucket, "metrics": metrics} + + +async def get_latest_snapshot(project_id: str, window: str = "7d") -> Optional[Dict[str, Any]]: + """Get the most recent snapshot for (project, window).""" + db = await get_db() + async with db.execute( + "SELECT * FROM graph_snapshots WHERE project_id=? AND window=? ORDER BY created_at DESC LIMIT 1", + (project_id, window), + ) as cur: + row = await cur.fetchone() + if not row: + return None + d = dict(row) + try: + d["metrics"] = json.loads(d["metrics"]) + except Exception: + pass + return d + + +async def recompute_graph_signals( + project_id: str, + window: str = "7d", + dry_run: bool = True, +) -> Dict[str, Any]: + """Run 5 deterministic signal rules and upsert graph_signals. + + Rules: + 1. release_blocker — approved release with open [RISK] critical tasks + 2. ops_instability — >3 ops runs with low quality in window + 3. stale_goal — active goal not updated > 14 days + 4. risk_cluster — 3+ [RISK] tasks with same label + 5. run_quality_regression — avg quality < 0.6 in window + + Signals are idempotent via fingerprint; dry_run=True computes but does not write. + Returns: {ok, dry_run, signals_generated, signals_upserted, diff: [...]} + """ + db = await get_db() + days = _window_days(window) + now = _now() + + import datetime as _dt4 + window_start = (_dt4.datetime.utcnow() - _dt4.timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ") + stale_cutoff = (_dt4.datetime.utcnow() - _dt4.timedelta(days=14)).strftime("%Y-%m-%dT%H:%M:%SZ") + + new_signals: List[Dict[str, Any]] = [] + + # --- Rule 1: Release Blocker --- + async with db.execute( + """SELECT node_id, title, ref_id FROM dialog_nodes + WHERE project_id=? AND node_type='decision' + AND (title LIKE '%Release%approved%' OR title LIKE '%Release%Approved%' + OR title LIKE '%release_check%' OR title LIKE '%Release Check%') + AND lifecycle='active' LIMIT 5""", + (project_id,), + ) as cur: + release_nodes = await cur.fetchall() + + if release_nodes: + async with db.execute( + "SELECT task_id, title, priority FROM tasks WHERE project_id=? AND title LIKE '[RISK]%' AND status != 'done' AND priority IN ('high','urgent')", + (project_id,), + ) as cur: + blocker_tasks = await cur.fetchall() + if blocker_tasks: + evidence = { + "release_node_ids": [r[0] for r in release_nodes[:3]], + "blocker_task_ids": [r[0] for r in blocker_tasks[:5]], + "blocker_count": len(blocker_tasks), + } + new_signals.append({ + "signal_type": "release_blocker", + "severity": "critical" if len(blocker_tasks) >= 2 else "high", + "title": f"Release has {len(blocker_tasks)} open critical risk task(s)", + "summary": f"Release decision exists but {len(blocker_tasks)} [RISK] tasks are blocking.", + "evidence": evidence, + }) + + # --- Rule 2: Ops Instability --- + # Primary: count real ops_run nodes with status=failed + async with db.execute( + """SELECT node_id, props, title FROM dialog_nodes + WHERE project_id=? AND node_type='ops_run' AND created_at >= ? LIMIT 50""", + (project_id, window_start), + ) as cur: + ops_run_rows = await cur.fetchall() + failed_ops_runs = [] + failed_ops_actions: List[str] = [] + total_ops_runs = len(ops_run_rows) + for r in ops_run_rows: + try: + p = json.loads(r[1] or "{}") + if p.get("status") == "failed": + failed_ops_runs.append(r[0]) + failed_ops_actions.append(p.get("action_id", "unknown")) + except Exception: + pass + # Secondary: agent_run failures (status in props) + async with db.execute( + """SELECT node_id, props FROM dialog_nodes + WHERE project_id=? AND node_type='agent_run' AND created_at >= ? LIMIT 30""", + (project_id, window_start), + ) as cur: + recent_agent_runs = await cur.fetchall() + failed_agent_runs = [] + for r in recent_agent_runs: + try: + p = json.loads(r[1] or "{}") + if p.get("status") in ("failed", "error"): + failed_agent_runs.append(r[0]) + except Exception: + pass + # Tertiary: low quality reflections (proxy for poor runs) + async with db.execute( + """SELECT node_id, props FROM dialog_nodes WHERE project_id=? + AND node_type='decision' AND title LIKE 'Reflection:%' AND created_at >= ?""", + (project_id, window_start), + ) as cur: + refl_rows = await cur.fetchall() + low_quality_runs = [] + for r in refl_rows: + try: + p = json.loads(r[1] or "{}") + if (p.get("plan_completeness_score") or 1.0) < 0.5: + low_quality_runs.append(r[0]) + except Exception: + pass + # Failure rate: if we have ops_run data, use ratio; otherwise fall back to count + ops_failure_rate = (len(failed_ops_runs) / max(total_ops_runs, 1)) if total_ops_runs > 0 else 0 + total_instability = len(failed_ops_runs) + len(failed_agent_runs) + len(low_quality_runs) + trigger = total_instability >= 2 or (total_ops_runs >= 3 and ops_failure_rate >= 0.33) + if trigger: + evidence = { + "failed_ops_run_node_ids": failed_ops_runs[:5], + "failed_ops_actions": list(dict.fromkeys(failed_ops_actions))[:5], + "failed_agent_run_node_ids": failed_agent_runs[:3], + "low_quality_node_ids": low_quality_runs[:3], + "instability_count": total_instability, + "ops_failure_rate": round(ops_failure_rate, 2), + "total_ops_runs": total_ops_runs, + "window": window, + } + ops_fail_pct = int(ops_failure_rate * 100) + sev = "critical" if ops_failure_rate >= 0.5 and total_ops_runs >= 4 else ("high" if total_instability >= 3 else "medium") + new_signals.append({ + "signal_type": "ops_instability", + "severity": sev, + "title": ( + f"Ops instability: {len(failed_ops_runs)} ops failures ({ops_fail_pct}% rate) + " + f"{len(low_quality_runs)} low-quality runs in {window}" + ), + "summary": ( + f"{len(failed_ops_runs)} ops_run failures out of {total_ops_runs} ops actions " + f"({ops_fail_pct}% failure rate). " + f"Failed actions: {', '.join(set(failed_ops_actions[:3])) or 'n/a'}. " + f"{len(low_quality_runs)} low-quality reflections also detected." + ), + "evidence": evidence, + }) + + # --- Rule 3: Stale Goal --- + async with db.execute( + """SELECT node_id, title, updated_at FROM dialog_nodes + WHERE project_id=? AND node_type='goal' AND lifecycle='active' + AND updated_at < ? LIMIT 10""", + (project_id, stale_cutoff), + ) as cur: + stale_goals = await cur.fetchall() + for g in stale_goals: + evidence = {"node_id": g[0], "title": g[1], "last_updated": g[2]} + new_signals.append({ + "signal_type": "stale_goal", + "severity": "medium", + "title": f"Stale goal: '{g[1][:60]}' not updated for 14+ days", + "summary": f"Goal '{g[1]}' has been active but not progressed since {g[2][:10]}.", + "evidence": evidence, + }) + + # --- Rule 4: Risk Cluster --- + async with db.execute( + "SELECT task_id, title, labels FROM tasks WHERE project_id=? AND title LIKE '[RISK]%' AND status != 'done'", + (project_id,), + ) as cur: + risk_tasks = await cur.fetchall() + from collections import Counter as _Counter + label_to_tasks: Dict[str, List[str]] = {} + for r in risk_tasks: + try: + labels = json.loads(r[2] or "[]") + for lbl in labels: + if lbl not in ("evidence", "reflection", "risk", "auto"): + label_to_tasks.setdefault(lbl, []).append(r[0]) + except Exception: + pass + for lbl, tids in label_to_tasks.items(): + if len(tids) >= 3: + evidence = {"label": lbl, "task_ids": tids[:10], "count": len(tids)} + new_signals.append({ + "signal_type": "risk_cluster", + "severity": "high", + "title": f"Risk cluster: {len(tids)} [RISK] tasks under label '{lbl}'", + "summary": f"{len(tids)} open risk tasks share label '{lbl}', indicating a systemic issue.", + "evidence": evidence, + }) + + # --- Rule 5: Run Quality Regression --- + if len(quality_scores := []) == 0: + async with db.execute( + """SELECT props FROM dialog_nodes WHERE project_id=? + AND node_type='decision' AND title LIKE 'Reflection:%' + AND created_at >= ? ORDER BY created_at DESC LIMIT 10""", + (project_id, window_start), + ) as cur: + q_rows = await cur.fetchall() + for r in q_rows: + try: + p = json.loads(r[0] or "{}") + s = p.get("plan_completeness_score") + if s is not None: + quality_scores.append(float(s)) + except Exception: + pass + if len(quality_scores) >= 2: + avg_q = sum(quality_scores) / len(quality_scores) + if avg_q < 0.6: + evidence = { + "avg_quality": round(avg_q, 2), + "samples": len(quality_scores), + "window": window, + "scores": quality_scores[:10], + } + new_signals.append({ + "signal_type": "run_quality_regression", + "severity": "high" if avg_q < 0.4 else "medium", + "title": f"Run quality regression: avg completeness {int(avg_q * 100)}% in {window}", + "summary": f"Average plan completeness dropped to {int(avg_q * 100)}% over {len(quality_scores)} runs.", + "evidence": evidence, + }) + + # --- Merge / Reopen / Cooldown --- + # Cooldown per signal_type (hours). After ACK/RESOLVE, reopen only when cooldown expires. + _COOLDOWN_HOURS: Dict[str, int] = { + "release_blocker": 4, + "ops_instability": 6, + "stale_goal": 48, + "risk_cluster": 12, + "run_quality_regression": 24, + } + DEFAULT_COOLDOWN = 12 + + import datetime as _dtm + upserted = 0 + reopened = 0 + refreshed = 0 + diff: List[Dict] = [] + + for sig in new_signals: + fp = _signal_fingerprint(sig["signal_type"], sig["evidence"]) + sig_evidence = dict(sig["evidence"]) + sig_evidence["last_triggered_at"] = now + sig_evidence["cooldown_hours"] = _COOLDOWN_HOURS.get(sig["signal_type"], DEFAULT_COOLDOWN) + + async with db.execute( + "SELECT id, status, severity, evidence, updated_at FROM graph_signals WHERE project_id=? AND fingerprint=?", + (project_id, fp), + ) as cur: + existing = await cur.fetchone() + + if existing: + ex_id, ex_status, ex_sev, ex_ev_raw, ex_updated = existing + try: + ex_ev = json.loads(ex_ev_raw or "{}") + except Exception: + ex_ev = {} + + if ex_status in ("resolved", "dismissed"): + # Check cooldown: if closed recently, skip; otherwise reopen + try: + closed_at = _dtm.datetime.fromisoformat(ex_updated.replace("Z", "+00:00")) + elapsed_h = (_dtm.datetime.now(_dtm.timezone.utc) - closed_at).total_seconds() / 3600 + cooldown_h = ex_ev.get("cooldown_hours", DEFAULT_COOLDOWN) + except Exception: + elapsed_h = 999 + cooldown_h = DEFAULT_COOLDOWN + + if elapsed_h < cooldown_h: + diff.append({ + "action": "cooldown", + "signal_type": sig["signal_type"], + "id": ex_id, + "status": ex_status, + "cooldown_remaining_h": round(cooldown_h - elapsed_h, 1), + }) + else: + # Reopen: condition returned, cooldown expired + merged_ev = {**ex_ev, **sig_evidence, "state": "reopened", "reopened_at": now} + diff.append({ + "action": "reopen", + "signal_type": sig["signal_type"], + "id": ex_id, + "prev_status": ex_status, + }) + if not dry_run: + await db.execute( + "UPDATE graph_signals SET status='open', severity=?, title=?, summary=?, " + "evidence=?, updated_at=? WHERE id=?", + (sig["severity"], sig["title"], sig["summary"], + json.dumps(merged_ev), now, ex_id), + ) + reopened += 1 + + elif ex_status in ("open", "ack"): + # Already active — check if we should refresh evidence (outside cooldown) + try: + triggered_at_str = ex_ev.get("last_triggered_at", ex_updated) + last_t = _dtm.datetime.fromisoformat(triggered_at_str.replace("Z", "+00:00")) + elapsed_h = (_dtm.datetime.now(_dtm.timezone.utc) - last_t).total_seconds() / 3600 + cooldown_h = ex_ev.get("cooldown_hours", DEFAULT_COOLDOWN) + except Exception: + elapsed_h = 999 + cooldown_h = DEFAULT_COOLDOWN + + if elapsed_h >= cooldown_h: + # Outside cooldown: refresh evidence (condition still active) + merged_ev = {**ex_ev, **sig_evidence, "state": "refreshed", "refreshed_at": now} + diff.append({ + "action": "refresh", + "signal_type": sig["signal_type"], + "id": ex_id, + "status": ex_status, + }) + if not dry_run: + await db.execute( + "UPDATE graph_signals SET evidence=?, severity=?, updated_at=? WHERE id=?", + (json.dumps(merged_ev), sig["severity"], now, ex_id), + ) + refreshed += 1 + else: + # Within cooldown: skip silently + diff.append({ + "action": "skip_cooldown", + "signal_type": sig["signal_type"], + "id": ex_id, + "status": ex_status, + "cooldown_remaining_h": round(cooldown_h - elapsed_h, 1), + }) + else: + # Brand new signal + sig_id = str(uuid.uuid4()) + diff.append({ + "action": "new", + "signal_type": sig["signal_type"], + "severity": sig["severity"], + "title": sig["title"], + "fingerprint": fp, + }) + if not dry_run: + await db.execute( + "INSERT INTO graph_signals(id,project_id,signal_type,severity,title,summary,evidence,status,fingerprint,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?)", + (sig_id, project_id, sig["signal_type"], sig["severity"], + sig["title"], sig["summary"], json.dumps(sig_evidence), + "open", fp, now, now), + ) + upserted += 1 + + if not dry_run and (upserted + reopened + refreshed) > 0: + await db.commit() + + return { + "ok": True, + "dry_run": dry_run, + "project_id": project_id, + "window": window, + "signals_generated": len(new_signals), + "signals_upserted": upserted, + "signals_reopened": reopened, + "signals_refreshed": refreshed, + "diff": diff, + } + + +async def get_graph_signals( + project_id: str, + status: str = "open", + limit: int = 50, +) -> List[Dict[str, Any]]: + db = await get_db() + if status == "all": + async with db.execute( + "SELECT * FROM graph_signals WHERE project_id=? ORDER BY severity DESC, created_at DESC LIMIT ?", + (project_id, limit), + ) as cur: + rows = await cur.fetchall() + else: + async with db.execute( + "SELECT * FROM graph_signals WHERE project_id=? AND status=? ORDER BY severity DESC, created_at DESC LIMIT ?", + (project_id, status, limit), + ) as cur: + rows = await cur.fetchall() + result = [] + for r in rows: + d = dict(r) + try: + d["evidence"] = json.loads(d["evidence"]) + except Exception: + d["evidence"] = {} + result.append(d) + return result + + +async def update_signal_status( + signal_id: str, + new_status: str, +) -> Optional[Dict[str, Any]]: + """Update signal status: ack|resolve|dismiss.""" + if new_status not in ("ack", "resolved", "dismissed"): + raise ValueError(f"Invalid status: {new_status}") + db = await get_db() + now = _now() + await db.execute( + "UPDATE graph_signals SET status=?, updated_at=? WHERE id=?", + (new_status, now, signal_id), + ) + await db.commit() + async with db.execute("SELECT * FROM graph_signals WHERE id=?", (signal_id,)) as cur: + row = await cur.fetchone() + if not row: + return None + d = dict(row) + try: + d["evidence"] = json.loads(d["evidence"]) + except Exception: + d["evidence"] = {} + return d + + +# ── Auto-resolve: Resolution Criteria ──────────────────────────────────────── + +async def _check_resolution_criteria( + project_id: str, + signal_type: str, + evidence: Dict[str, Any], + window_hours: int = 24, +) -> Dict[str, Any]: + """Check if the resolution criteria for a signal are met. + + Returns: {resolved: bool, reason: str, snapshot: dict} + """ + db = await get_db() + import datetime as _dtr + window_start = ( + _dtr.datetime.now(_dtr.timezone.utc) - _dtr.timedelta(hours=window_hours) + ).strftime("%Y-%m-%dT%H:%M:%SZ") + snapshot: Dict[str, Any] = {} + + if signal_type == "release_blocker": + # Resolved when: no open [RISK] tasks with high/urgent priority + async with db.execute( + "SELECT COUNT(*) FROM tasks WHERE project_id=? AND title LIKE '[RISK]%' " + "AND status != 'done' AND priority IN ('high','urgent')", + (project_id,), + ) as cur: + remaining = (await cur.fetchone())[0] + snapshot["remaining_risk_tasks"] = remaining + if remaining == 0: + return {"resolved": True, "reason": "No open critical [RISK] tasks remain", "snapshot": snapshot} + return {"resolved": False, "reason": f"{remaining} critical [RISK] tasks still open", "snapshot": snapshot} + + elif signal_type == "ops_instability": + # Resolved when: ops failure rate < 20% in last window_hours with ≥ 5 runs + async with db.execute( + "SELECT COUNT(*), SUM(CASE WHEN json_extract(props,'$.status')='failed' THEN 1 ELSE 0 END) " + "FROM dialog_nodes WHERE project_id=? AND node_type='ops_run' AND created_at >= ?", + (project_id, window_start), + ) as cur: + row = await cur.fetchone() + total_ops, failed_ops = (row[0] or 0), (row[1] or 0) + rate = (failed_ops / max(total_ops, 1)) if total_ops > 0 else 1.0 + snapshot = {"total_ops": total_ops, "failed_ops": failed_ops, "failure_rate": round(rate, 2)} + if total_ops >= 5 and rate < 0.20: + return {"resolved": True, "reason": f"Ops failure rate {int(rate*100)}% < 20% over {total_ops} runs", "snapshot": snapshot} + return {"resolved": False, "reason": f"Rate {int(rate*100)}% or too few runs ({total_ops})", "snapshot": snapshot} + + elif signal_type == "stale_goal": + # Resolved when: the specific goal node was updated recently OR has task edges + goal_node_id = evidence.get("node_id") + if not goal_node_id: + return {"resolved": False, "reason": "No goal node_id in evidence", "snapshot": snapshot} + async with db.execute( + "SELECT updated_at FROM dialog_nodes WHERE node_id=?", (goal_node_id,) + ) as cur: + gn = await cur.fetchone() + if not gn: + return {"resolved": True, "reason": "Goal node no longer exists (archived)", "snapshot": snapshot} + try: + import datetime as _dtg + updated = _dtg.datetime.fromisoformat(gn[0].replace("Z", "+00:00")) + days_since = (_dtg.datetime.now(_dtg.timezone.utc) - updated).days + except Exception: + days_since = 999 + # Check for derived tasks + async with db.execute( + "SELECT COUNT(*) FROM dialog_edges WHERE project_id=? AND from_node_id=? AND edge_type='derives_task'", + (project_id, goal_node_id), + ) as cur: + task_edges = (await cur.fetchone())[0] + snapshot = {"days_since_update": days_since, "derived_tasks": task_edges} + if days_since <= 7 or task_edges >= 1: + return {"resolved": True, "reason": f"Goal updated {days_since}d ago or has {task_edges} derived tasks", "snapshot": snapshot} + return {"resolved": False, "reason": f"Goal still stale: {days_since}d, {task_edges} tasks", "snapshot": snapshot} + + elif signal_type == "risk_cluster": + # Resolved when: all [RISK] tasks in cluster are done OR relabeled + label = evidence.get("label", "") + async with db.execute( + "SELECT COUNT(*) FROM tasks WHERE project_id=? AND title LIKE '[RISK]%' " + "AND json_extract(labels,'$') LIKE ? AND status != 'done'", + (project_id, f'%"{label}"%'), + ) as cur: + remaining = (await cur.fetchone())[0] + snapshot = {"label": label, "remaining_risk_tasks": remaining} + if remaining == 0: + return {"resolved": True, "reason": f"All [RISK] tasks for '{label}' are resolved", "snapshot": snapshot} + return {"resolved": False, "reason": f"{remaining} [RISK] tasks still open for '{label}'", "snapshot": snapshot} + + elif signal_type == "run_quality_regression": + # Resolved when: avg completeness >= 0.75 over last 5 reflections + async with db.execute( + """SELECT props FROM dialog_nodes WHERE project_id=? + AND node_type='decision' AND title LIKE 'Reflection:%' + AND created_at >= ? ORDER BY created_at DESC LIMIT 5""", + (project_id, window_start), + ) as cur: + rows = await cur.fetchall() + scores: List[float] = [] + for r in rows: + try: + p = json.loads(r[0] or "{}") + s = p.get("plan_completeness_score") + if s is not None: + scores.append(float(s)) + except Exception: + pass + if not scores: + return {"resolved": False, "reason": "No recent reflections to evaluate", "snapshot": snapshot} + avg = sum(scores) / len(scores) + snapshot = {"avg_completeness": round(avg, 2), "samples": len(scores)} + if len(scores) >= 3 and avg >= 0.75: + return {"resolved": True, "reason": f"Avg completeness {int(avg*100)}% >= 75% over {len(scores)} runs", "snapshot": snapshot} + return {"resolved": False, "reason": f"Avg {int(avg*100)}%, need ≥75% over ≥3 runs", "snapshot": snapshot} + + return {"resolved": False, "reason": "No criteria defined for this signal type", "snapshot": snapshot} + + +async def auto_resolve_signals( + project_id: str, + dry_run: bool = True, +) -> Dict[str, Any]: + """Check resolution criteria for all open/ack signals and auto-resolve if met. + + Returns: {ok, dry_run, checked, resolved, diff: [{signal_id, signal_type, reason, ...}]} + """ + db = await get_db() + now = _now() + + async with db.execute( + "SELECT id, signal_type, severity, evidence FROM graph_signals " + "WHERE project_id=? AND status IN ('open','ack') ORDER BY created_at ASC", + (project_id,), + ) as cur: + open_signals = await cur.fetchall() + + checked = len(open_signals) + resolved_count = 0 + diff: List[Dict[str, Any]] = [] + + for row in open_signals: + sig_id, sig_type, severity, ev_raw = row + try: + ev = json.loads(ev_raw or "{}") + except Exception: + ev = {} + + criteria = await _check_resolution_criteria(project_id, sig_type, ev) + if criteria["resolved"]: + diff.append({ + "signal_id": sig_id, + "signal_type": sig_type, + "action": "resolved", + "reason": criteria["reason"], + "snapshot": criteria["snapshot"], + }) + if not dry_run: + # Store resolution metadata in evidence + ev["resolved_at"] = now + ev["resolution_reason"] = criteria["reason"] + ev["resolution_snapshot"] = criteria["snapshot"] + await db.execute( + "UPDATE graph_signals SET status='resolved', evidence=?, updated_at=? WHERE id=?", + (json.dumps(ev), now, sig_id), + ) + resolved_count += 1 + else: + diff.append({ + "signal_id": sig_id, + "signal_type": sig_type, + "action": "still_open", + "reason": criteria["reason"], + }) + + if not dry_run and resolved_count > 0: + await db.commit() + + return { + "ok": True, + "dry_run": dry_run, + "project_id": project_id, + "checked": checked, + "resolved": resolved_count, + "diff": diff, + } + + +# ── Mitigation Planner ──────────────────────────────────────────────────────── + +# Deterministic task templates per signal_type. +# Each template: (title_template, priority, labels, description_template) +_MITIGATION_TEMPLATES: Dict[str, List[Dict[str, Any]]] = { + "release_blocker": [ + { + "title": "[Mitigation] Freeze release branch", + "priority": "urgent", + "labels": ["release", "mitigation", "freeze"], + "description": "Immediately freeze the release branch. No new merges until all [RISK] tasks are resolved.", + }, + { + "title": "[Mitigation] Triage and prioritize blocking [RISK] tasks", + "priority": "urgent", + "labels": ["release", "mitigation", "triage"], + "description": "Review all open [RISK] tasks. Assign owners, set due dates, and define acceptance criteria for each.", + }, + { + "title": "[Mitigation] Rerun release check after fixes", + "priority": "high", + "labels": ["release", "mitigation", "verify"], + "description": "After closing [RISK] tasks, rerun the release_check workflow to confirm green status.", + }, + { + "title": "[Mitigation] Notify stakeholders of release delay", + "priority": "high", + "labels": ["release", "mitigation", "comms"], + "description": "Send update to stakeholders: release is blocked, estimated fix timeline, and next check date.", + }, + ], + "ops_instability": [ + { + "title": "[Mitigation] Investigate and reproduce failing ops actions", + "priority": "urgent", + "labels": ["ops", "mitigation", "debug"], + "description": "Reproduce each failing ops action locally/staging. Capture full error logs and root cause hypothesis.", + }, + { + "title": "[Mitigation] Add retry logic or circuit breaker for flaky ops", + "priority": "high", + "labels": ["ops", "mitigation", "resilience"], + "description": "For intermittently failing ops: add retry with backoff. For consistently failing: add circuit breaker.", + }, + { + "title": "[Mitigation] Update ops runbook with failure handling steps", + "priority": "normal", + "labels": ["ops", "mitigation", "runbook"], + "description": "Document known failure modes, expected errors, and escalation path in the ops runbook.", + }, + ], + "stale_goal": [ + { + "title": "[Mitigation] Review and update stale goal status", + "priority": "high", + "labels": ["goal", "mitigation", "review"], + "description": "Review the stale goal. Either: close it (mark archived), update with current progress, or decompose into actionable tasks.", + }, + { + "title": "[Mitigation] Assign owner to stale goal", + "priority": "high", + "labels": ["goal", "mitigation", "ownership"], + "description": "Stale goals often lack a clear owner. Assign a DRI (directly responsible individual) with a check-in date.", + }, + { + "title": "[Mitigation] Break down stale goal into concrete tasks", + "priority": "normal", + "labels": ["goal", "mitigation", "decompose"], + "description": "If the goal is still valid, decompose it into 3-5 actionable tasks with clear acceptance criteria.", + }, + ], + "risk_cluster": [ + { + "title": "[Mitigation] Group and cluster related risk tasks for batch resolution", + "priority": "urgent", + "labels": ["risk", "mitigation", "cluster"], + "description": "Clustered [RISK] tasks share a root cause. Identify the common denominator and plan a single fix covering multiple risks.", + }, + { + "title": "[Mitigation] Assign component owner to risk cluster", + "priority": "urgent", + "labels": ["risk", "mitigation", "ownership"], + "description": "A cluster of risks in one component needs a dedicated owner. Assign and schedule a risk review session.", + }, + { + "title": "[Mitigation] Define acceptance tests for risk cluster component", + "priority": "high", + "labels": ["risk", "mitigation", "testing"], + "description": "Write acceptance tests covering the risk area. These become the exit criteria for resolving the cluster.", + }, + { + "title": "[Mitigation] Architectural review for risk-dense component", + "priority": "normal", + "labels": ["risk", "mitigation", "architecture"], + "description": "Schedule an architecture review for the risk-dense component to identify systemic issues.", + }, + ], + "run_quality_regression": [ + { + "title": "[Mitigation] Audit recent Supervisor runs for missing steps", + "priority": "high", + "labels": ["quality", "mitigation", "audit"], + "description": "Review the last 5-10 supervisor runs. Identify which steps are consistently incomplete or low-confidence.", + }, + { + "title": "[Mitigation] Update workflow prompts with missing step definitions", + "priority": "high", + "labels": ["quality", "mitigation", "prompts"], + "description": "If runs are systematically skipping steps, update the workflow system prompts with explicit step requirements.", + }, + { + "title": "[Mitigation] Add completeness checklist to workflow outputs", + "priority": "normal", + "labels": ["quality", "mitigation", "checklist"], + "description": "Add a structured completeness checklist to each workflow's expected output format.", + }, + ], +} + +_MITIGATION_DEFAULT = [ + { + "title": "[Mitigation] Investigate signal root cause", + "priority": "high", + "labels": ["mitigation", "investigate"], + "description": "Analyze the signal evidence, identify root cause, and define remediation steps.", + }, + { + "title": "[Mitigation] Define and execute remediation plan", + "priority": "high", + "labels": ["mitigation", "remediate"], + "description": "Based on root cause analysis, execute targeted remediation steps.", + }, + { + "title": "[Mitigation] Verify resolution and monitor", + "priority": "normal", + "labels": ["mitigation", "verify"], + "description": "After remediation, verify the signal condition is resolved and monitor for recurrence.", + }, +] + + +async def create_mitigation_plan( + project_id: str, + signal_id: str, + created_by: str = "sofiia", +) -> Dict[str, Any]: + """Create a mitigation plan for a graph signal. + + Creates: + 1. A 'decision' node: Mitigation Plan for + 2. Tasks from deterministic templates (by signal_type) + 3. Edges: signal_node --relates_to--> plan_node + 4. Edges: plan_node --derives_task--> task_nodes + + All idempotent: ON CONFLICT DO NOTHING for edges, unique title for plan node. + + Returns: {plan_node_id, task_ids, task_count, signal_type} + """ + db = await get_db() + now = _now() + + # Load signal + async with db.execute( + "SELECT * FROM graph_signals WHERE id=? AND project_id=?", + (signal_id, project_id), + ) as cur: + row = await cur.fetchone() + if not row: + raise ValueError(f"Signal {signal_id} not found in project {project_id}") + sig = dict(row) + try: + sig["evidence"] = json.loads(sig["evidence"]) + except Exception: + sig["evidence"] = {} + + signal_type = sig["signal_type"] + templates = _MITIGATION_TEMPLATES.get(signal_type, _MITIGATION_DEFAULT) + + # Create mitigation plan decision node + plan_title = f"Mitigation Plan: {sig['title'][:80]}" + plan_ref_id = f"mitigation:{signal_id}" + plan_props = json.dumps({ + "signal_id": signal_id, + "signal_type": signal_type, + "severity": sig["severity"], + "template_count": len(templates), + "created_by": created_by, + "auto_generated": True, + }) + plan_node_id = str(uuid.uuid4()) + + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props,lifecycle,importance,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + title=excluded.title, props=excluded.props, updated_at=excluded.updated_at""", + (plan_node_id, project_id, "decision", plan_ref_id, plan_title, plan_props, + "active", 0.90, created_by, now, now), + ) + # Refresh real node_id + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='decision' AND ref_id=?", + (project_id, plan_ref_id), + ) as cur: + pr = await cur.fetchone() + if pr: + plan_node_id = pr[0] + + # Try to link signal's related graph nodes to plan (from evidence) + ev_node_ids = ( + sig["evidence"].get("node_ids", []) + + sig["evidence"].get("release_node_ids", []) + + sig["evidence"].get("blocker_task_ids", []) + ) + for src_nid in ev_node_ids[:3]: + e = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,strength) " + "VALUES(?,?,?,?,?,?,?,?,?) ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (e, project_id, src_nid, plan_node_id, "relates_to", + json.dumps({"signal_id": signal_id}), created_by, now, 0.7), + ) + + # Create tasks from templates + task_ids: List[str] = [] + for i, tmpl in enumerate(templates): + t_id = str(uuid.uuid4()) + t_node_id = str(uuid.uuid4()) + t_ref_id = f"mitigation:{signal_id}:{i}" + t_title = tmpl["title"] + t_desc = tmpl.get("description", "") + t_labels = json.dumps(tmpl.get("labels", ["mitigation"])) + t_priority = tmpl.get("priority", "normal") + t_sort = float(i + 200) + + await db.execute( + "INSERT INTO tasks(task_id,project_id,title,description,status,priority,labels,assignees,due_at,sort_key,created_by,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)" + " ON CONFLICT DO NOTHING", + (t_id, project_id, t_title, t_desc, "backlog", t_priority, + t_labels, "[]", None, t_sort, created_by, now, now), + ) + # Task dialog node + t_props = json.dumps({"signal_id": signal_id, "signal_type": signal_type, "auto_mitigation": True}) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props,lifecycle,importance,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO NOTHING""", + (t_node_id, project_id, "task", t_id, t_title, t_props, + "active", _compute_importance("task"), created_by, now, now), + ) + # Refresh actual task node_id + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='task' AND ref_id=?", + (project_id, t_id), + ) as cur: + tnr = await cur.fetchone() + if tnr: + t_node_id = tnr[0] + # Edge: plan → task + e2 = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,strength) " + "VALUES(?,?,?,?,?,?,?,?,?) ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (e2, project_id, plan_node_id, t_node_id, "derives_task", + json.dumps({"template_index": i}), created_by, now, 0.85), + ) + task_ids.append(t_id) + + # Update signal evidence with plan_node_id (for UI linkage) + ev = sig["evidence"] + ev["plan_node_id"] = plan_node_id + ev["mitigation_task_ids"] = task_ids + await db.execute( + "UPDATE graph_signals SET evidence=?, updated_at=? WHERE id=?", + (json.dumps(ev), now, signal_id), + ) + + await db.commit() + return { + "ok": True, + "signal_id": signal_id, + "signal_type": signal_type, + "plan_node_id": plan_node_id, + "task_ids": task_ids, + "task_count": len(task_ids), + } + + +# ── Graph Learning Layer: Playbooks ─────────────────────────────────────────── + +def _safe_key(s: str) -> str: + """Make a string safe for use as a doc_id path segment.""" + import re as _re + return _re.sub(r"[^a-zA-Z0-9_\-]", "_", s)[:40] + + +def _compute_context_key(signal_type: str, evidence: Dict[str, Any]) -> str: + """Derive a stable context_key from a signal's type and evidence.""" + if signal_type == "risk_cluster": + label = evidence.get("label", "") + return f"label:{label}" if label else "global" + elif signal_type == "ops_instability": + actions = evidence.get("failed_ops_actions", []) + if actions: + return f"ops_action:{actions[0]}" + return "global" + elif signal_type == "release_blocker": + return "global" + elif signal_type == "stale_goal": + nid = evidence.get("node_id", "") + return f"goal:{nid}" if nid else "global" + elif signal_type == "run_quality_regression": + workflow = evidence.get("graph_name", evidence.get("workflow", "")) + return f"workflow:{workflow}" if workflow else "global" + return "global" + + +def _playbook_fingerprint(project_id: str, signal_type: str, context_key: str) -> str: + raw = f"{project_id}|{signal_type}|{context_key}" + return hashlib.sha256(raw.encode()).hexdigest()[:32] + + +def _playbook_markdown( + playbook_id: str, + signal_type: str, + context_key: str, + steps: List[Dict[str, Any]], + stats: Dict[str, Any], + examples: List[Dict[str, Any]], +) -> str: + """Render playbook as markdown with embedded JSON frontmatter.""" + frontmatter = json.dumps({ + "kind": "playbook", + "playbook_id": playbook_id, + "signal_type": signal_type, + "context_key": context_key, + "steps": steps, + "stats": stats, + "examples": examples[-10:], # keep last 10 examples + }, indent=2) + lines = [ + f"```json", + frontmatter, + "```", + "", + f"# Playbook: {signal_type} / {context_key}", + "", + f"**Signal type:** `{signal_type}` ", + f"**Context:** `{context_key}`", + "", + "## Steps", + "", + ] + for i, s in enumerate(steps, 1): + lines.append(f"{i}. **{s.get('title', '?')}** — priority: {s.get('priority','normal')}, labels: {', '.join(s.get('labels',[]))}") + lines += [ + "", + "## Stats", + "", + f"- Uses: {stats.get('uses', 0)}", + f"- Success rate: {int(stats.get('success_rate', 0) * 100)}%", + f"- Avg time to resolve: {stats.get('ema_time_to_resolve_h', 0):.1f}h", + f"- Last used: {stats.get('last_used_at', 'never')}", + ] + if examples: + lines += ["", "## Examples", ""] + for ex in examples[-3:]: + status = "✅ resolved" if ex.get("resolved") else "❌ not resolved" + lines.append(f"- signal `{ex['signal_id'][:8]}…` → {status}" + + (f" in {ex['time_to_resolve_h']:.1f}h" if ex.get("time_to_resolve_h") else "")) + return "\n".join(lines) + + +def _parse_playbook_examples(content: str) -> List[Dict]: + """Parse examples list from playbook markdown frontmatter.""" + try: + if content.startswith("```json"): + fm_str = content.split("```json\n", 1)[1].split("\n```", 1)[0] + return json.loads(fm_str).get("examples", []) + except Exception: + pass + return [] + + +def _parse_playbook_steps(content: str) -> List[Dict]: + """Parse steps list from playbook markdown frontmatter.""" + try: + if content.startswith("```json"): + fm_str = content.split("```json\n", 1)[1].split("\n```", 1)[0] + return json.loads(fm_str).get("steps", []) + except Exception: + pass + return [] + + +async def upsert_playbook_from_signal( + project_id: str, + signal_id: str, + resolved: bool = False, + time_to_resolve_h: float = 0.0, +) -> Dict[str, Any]: + """Create or update a playbook from a signal's mitigation data. + + Requires the signal to already have plan_node_id + mitigation_task_ids in evidence. + Steps are derived from the actual tasks created by mitigation (learning from action). + Content stored directly in playbooks.content (no FK to documents). + + Returns: {playbook_id, context_key, signal_type, created, stats} + """ + db = await get_db() + now = _now() + + # Load signal + async with db.execute( + "SELECT * FROM graph_signals WHERE id=? AND project_id=?", (signal_id, project_id) + ) as cur: + row = await cur.fetchone() + if not row: + raise ValueError(f"Signal {signal_id} not found") + sig = dict(row) + try: + ev = json.loads(sig["evidence"] or "{}") + except Exception: + ev = {} + + plan_node_id = ev.get("plan_node_id") + mitigation_task_ids = ev.get("mitigation_task_ids", []) + if not plan_node_id: + raise ValueError("Signal has no plan_node_id — run mitigate first") + + signal_type = sig["signal_type"] + context_key = _compute_context_key(signal_type, ev) + fp = _playbook_fingerprint(project_id, signal_type, context_key) + + # Fetch actual task titles/labels/priorities (learning from real tasks) + steps: List[Dict[str, Any]] = [] + for tid in mitigation_task_ids[:10]: + async with db.execute( + "SELECT title, priority, labels FROM tasks WHERE task_id=?", (tid,) + ) as cur: + tr = await cur.fetchone() + if tr: + try: + lbls = json.loads(tr[2] or "[]") + except Exception: + lbls = [] + steps.append({"title": tr[0], "priority": tr[1], "labels": lbls}) + + # Check existing playbook + async with db.execute( + "SELECT * FROM playbooks WHERE fingerprint=?", (fp,) + ) as cur: + pb_row = await cur.fetchone() + + example = { + "signal_id": signal_id, + "plan_node_id": plan_node_id, + "resolved": resolved, + "time_to_resolve_h": round(time_to_resolve_h, 2), + "timestamp": now, + } + + if pb_row: + pb = dict(pb_row) + playbook_id = pb["playbook_id"] + + # Parse existing examples from content + existing_examples = _parse_playbook_examples(pb.get("content", "")) + existing_ids = {e.get("signal_id") for e in existing_examples} + is_new_signal = signal_id not in existing_ids + if is_new_signal: + existing_examples.append(example) + + # Update stats + uses = pb["uses"] + (1 if is_new_signal else 0) + successes = pb["successes"] + (1 if resolved and is_new_signal else 0) + failures = pb["failures"] + (1 if not resolved and is_new_signal else 0) + success_rate = successes / max(1, uses) + ema = pb["ema_time_to_resolve_h"] + if resolved and time_to_resolve_h > 0: + ema = 0.7 * ema + 0.3 * time_to_resolve_h if ema > 0 else time_to_resolve_h + + stats = { + "uses": uses, "successes": successes, "failures": failures, + "success_rate": round(success_rate, 3), + "ema_time_to_resolve_h": round(ema, 2), + "last_used_at": now, + } + # Use existing steps if no new steps learned + effective_steps = steps or _parse_playbook_steps(pb.get("content", "")) + md = _playbook_markdown(playbook_id, signal_type, context_key, effective_steps, stats, existing_examples) + await db.execute( + "UPDATE playbooks SET content=?, uses=?, successes=?, failures=?, " + "success_rate=?, ema_time_to_resolve_h=?, last_used_at=?, updated_at=? WHERE playbook_id=?", + (md, uses, successes, failures, round(success_rate, 3), round(ema, 2), now, now, playbook_id), + ) + created = False + final_stats = {"uses": uses, "success_rate": round(success_rate, 3)} + else: + # New playbook + playbook_id = f"pb_{fp[:16]}" + uses = 1 + successes = 1 if resolved else 0 + failures = 0 if resolved else 1 + success_rate = 1.0 if resolved else 0.0 + ema = round(time_to_resolve_h, 2) if resolved else 0.0 + stats = { + "uses": uses, "successes": successes, "failures": failures, + "success_rate": round(success_rate, 3), + "ema_time_to_resolve_h": ema, + "last_used_at": now, + } + md = _playbook_markdown(playbook_id, signal_type, context_key, steps, stats, [example]) + await db.execute( + "INSERT INTO playbooks(playbook_id,project_id,signal_type,context_key,content," + "fingerprint,uses,successes,failures,success_rate," + "ema_time_to_resolve_h,last_used_at,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)", + (playbook_id, project_id, signal_type, context_key, md, + fp, uses, successes, failures, round(success_rate, 3), ema, now, now, now), + ) + created = True + final_stats = {"uses": uses, "success_rate": round(success_rate, 3)} + + await db.commit() + return { + "ok": True, + "playbook_id": playbook_id, + "context_key": context_key, + "signal_type": signal_type, + "created": created, + "stats": final_stats, + } + + +async def list_playbooks( + project_id: str, + signal_type: str = "", + limit: int = 10, +) -> List[Dict[str, Any]]: + """List playbooks for a project, ordered by success_rate desc, uses desc, last_used_at desc.""" + db = await get_db() + if signal_type: + async with db.execute( + "SELECT * FROM playbooks WHERE project_id=? AND signal_type=? " + "ORDER BY success_rate DESC, uses DESC, last_used_at DESC LIMIT ?", + (project_id, signal_type, limit), + ) as cur: + rows = await cur.fetchall() + else: + async with db.execute( + "SELECT * FROM playbooks WHERE project_id=? " + "ORDER BY success_rate DESC, uses DESC, last_used_at DESC LIMIT ?", + (project_id, limit), + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def apply_playbook_to_signal( + project_id: str, + signal_id: str, + playbook_id: str, + created_by: str = "sofiia", +) -> Dict[str, Any]: + """Apply a playbook to a signal: create mitigation plan from playbook steps. + + - Loads playbook steps from latest doc_version frontmatter + - Creates Mitigation Plan decision node + tasks atomically + - Links derives_task edges + updates signal evidence + - ACKs signal + - Increments playbook.uses (idempotent: check if signal already in playbook examples) + Returns: {plan_node_id, task_ids, task_count, playbook_id, context_key} + """ + db = await get_db() + now = _now() + + # Load playbook + async with db.execute( + "SELECT * FROM playbooks WHERE playbook_id=? AND project_id=?", (playbook_id, project_id) + ) as cur: + pb_row = await cur.fetchone() + if not pb_row: + raise ValueError(f"Playbook {playbook_id} not found") + pb = dict(pb_row) + + # Load steps directly from playbook content + steps: List[Dict[str, Any]] = _parse_playbook_steps(pb.get("content", "")) + + # Load signal + async with db.execute( + "SELECT * FROM graph_signals WHERE id=? AND project_id=?", (signal_id, project_id) + ) as cur: + sig_row = await cur.fetchone() + if not sig_row: + raise ValueError(f"Signal {signal_id} not found") + sig = dict(sig_row) + try: + ev = json.loads(sig["evidence"] or "{}") + except Exception: + ev = {} + + # Create mitigation plan node + plan_title = f"Mitigation Plan [Playbook]: {sig['title'][:60]}" + plan_ref_id = f"mitigation:{signal_id}" + plan_props = json.dumps({ + "signal_id": signal_id, + "signal_type": sig["signal_type"], + "playbook_id": playbook_id, + "context_key": pb["context_key"], + "auto_generated": True, + "from_playbook": True, + }) + plan_node_id = str(uuid.uuid4()) + imp = _compute_importance("decision") + + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props,lifecycle,importance,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + title=excluded.title, props=excluded.props, updated_at=excluded.updated_at""", + (plan_node_id, project_id, "decision", plan_ref_id, plan_title, plan_props, + "active", imp, created_by, now, now), + ) + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='decision' AND ref_id=?", + (project_id, plan_ref_id), + ) as cur: + pr = await cur.fetchone() + if pr: + plan_node_id = pr[0] + + # Create tasks from playbook steps + task_ids: List[str] = [] + for i, step in enumerate(steps): + t_id = str(uuid.uuid4()) + t_node_id = str(uuid.uuid4()) + t_title = step.get("title", f"Step {i+1}") + t_labels = json.dumps(step.get("labels", ["playbook", "mitigation"])) + t_priority = step.get("priority", "normal") + t_sort = float(i + 300) + await db.execute( + "INSERT INTO tasks(task_id,project_id,title,description,status,priority,labels,assignees,due_at,sort_key,created_by,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?) ON CONFLICT DO NOTHING", + (t_id, project_id, t_title, f"[Playbook: {pb['context_key']}] {pb['signal_type']}", + "backlog", t_priority, t_labels, "[]", None, t_sort, created_by, now, now), + ) + t_props = json.dumps({"playbook_id": playbook_id, "signal_id": signal_id}) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props,lifecycle,importance,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?) ON CONFLICT(project_id,node_type,ref_id) DO NOTHING""", + (t_node_id, project_id, "task", t_id, t_title, t_props, + "active", _compute_importance("task"), created_by, now, now), + ) + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='task' AND ref_id=?", + (project_id, t_id), + ) as cur: + tnr = await cur.fetchone() + if tnr: + t_node_id = tnr[0] + eid = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type,props,created_by,created_at,strength) " + "VALUES(?,?,?,?,?,?,?,?,?) ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (eid, project_id, plan_node_id, t_node_id, "derives_task", + json.dumps({"playbook_id": playbook_id}), created_by, now, 0.85), + ) + task_ids.append(t_id) + + # Update signal evidence + ev["plan_node_id"] = plan_node_id + ev["mitigation_task_ids"] = task_ids + ev["playbook_id"] = playbook_id + await db.execute( + "UPDATE graph_signals SET evidence=?, status='ack', updated_at=? WHERE id=?", + (json.dumps(ev), now, signal_id), + ) + + # Update playbook usage (idempotent: check examples in content) + existing_ids = {e.get("signal_id") for e in _parse_playbook_examples(pb.get("content", ""))} + if signal_id not in existing_ids: + await db.execute( + "UPDATE playbooks SET uses=uses+1, last_used_at=?, updated_at=? WHERE playbook_id=?", + (now, now, playbook_id), + ) + + await db.commit() + return { + "ok": True, + "plan_node_id": plan_node_id, + "task_ids": task_ids, + "task_count": len(task_ids), + "playbook_id": playbook_id, + "context_key": pb["context_key"], + } + + +async def update_playbook_stats_on_resolve( + project_id: str, + signal_id: str, + signal_type: str, + evidence: Dict[str, Any], + resolved_at: str, + created_at: str, +) -> Optional[str]: + """Update playbook stats when a signal is auto-resolved. + + Idempotent: checks examples list before counting. + Returns: playbook_id if updated, None if no matching playbook. + """ + if not evidence.get("plan_node_id"): + return None + + context_key = _compute_context_key(signal_type, evidence) + fp = _playbook_fingerprint(project_id, signal_type, context_key) + + db = await get_db() + async with db.execute( + "SELECT * FROM playbooks WHERE fingerprint=?", (fp,) + ) as cur: + pb_row = await cur.fetchone() + if not pb_row: + return None + pb = dict(pb_row) + + # Check idempotency: don't double-count (read from content column) + existing_ids = {e.get("signal_id") for e in _parse_playbook_examples(pb.get("content", ""))} + if signal_id in existing_ids: + return pb["playbook_id"] # already counted + + # Compute time to resolve + try: + import datetime as _dtpb + t_created = _dtpb.datetime.fromisoformat(created_at.replace("Z", "+00:00")) + t_resolved = _dtpb.datetime.fromisoformat(resolved_at.replace("Z", "+00:00")) + ttr_h = (t_resolved - t_created).total_seconds() / 3600 + except Exception: + ttr_h = 0.0 + + uses = pb["uses"] + 1 + successes = pb["successes"] + 1 + success_rate = successes / max(1, uses) + ema = pb["ema_time_to_resolve_h"] + if ttr_h > 0: + ema = 0.7 * ema + 0.3 * ttr_h if ema > 0 else ttr_h + + now = _now() + await db.execute( + "UPDATE playbooks SET uses=?, successes=?, success_rate=?, " + "ema_time_to_resolve_h=?, last_used_at=?, updated_at=? WHERE playbook_id=?", + (uses, successes, round(success_rate, 3), round(ema, 2), now, now, pb["playbook_id"]), + ) + await db.commit() + return pb["playbook_id"] + + +# ── Graph Learning Layer: Lessons ───────────────────────────────────────────── + +def compute_lesson_bucket(now: Optional[str] = None) -> str: + """Return ISO week string for the given datetime or current UTC time. + + Example: '2026-W09' + """ + import datetime as _dtl + if now: + try: + dt = _dtl.datetime.fromisoformat(now.replace("Z", "+00:00")) + except Exception: + dt = _dtl.datetime.utcnow() + else: + dt = _dtl.datetime.utcnow() + year, week, _ = dt.isocalendar() + return f"{year}-W{week:02d}" + + +def _lesson_fingerprint(project_id: str, window: str, date_bucket: str) -> str: + raw = f"{project_id}|{window}|{date_bucket}" + return hashlib.sha256(raw.encode()).hexdigest()[:32] + + +def _improvement_task_fingerprint(project_id: str, bucket: str, label: str) -> str: + raw = f"{project_id}|lesson|{bucket}|{label}" + return hashlib.sha256(raw.encode()).hexdigest()[:24] + + +_SEV_WEIGHT = {"critical": 5, "high": 3, "medium": 2, "low": 1} + +# Delta computation helpers +_DELTA_EPSILONS: Dict[str, float] = { + "quality_avg": 0.02, + "ops_failure_rate": 0.05, + "risk_open": 1.0, + "cycle_time_h": 6.0, + "done": 1.0, + "wip": 1.0, + "runs": 1.0, +} +# "good" direction: "down" means lower is better, "up" means higher is better +_GOOD_DIRECTION: Dict[str, str] = { + "risk_open": "down", + "ops_failure_rate": "down", + "cycle_time_h": "down", + "quality_avg": "up", + "done": "up", +} + + +def _compute_delta_entry(curr: Optional[float], prev: Optional[float], metric: str) -> Dict[str, Any]: + """Compute delta dict for a single metric.""" + if curr is None: + return {"abs": None, "pct": None, "trend": "new"} + if prev is None: + return {"abs": None, "pct": None, "trend": "new"} + abs_delta = curr - prev + eps = _DELTA_EPSILONS.get(metric, 1.0) + if abs(abs_delta) < eps: + trend = "flat" + else: + trend = "up" if abs_delta > 0 else "down" + pct = round(abs_delta / prev, 4) if prev != 0 else None + return {"abs": round(abs_delta, 4), "pct": pct, "trend": trend} + + +def _compute_trend_flags(delta: Dict[str, Dict]) -> Dict[str, bool]: + """Compute risk/quality/ops/delivery improving/regressing flags.""" + def _improving(metric: str) -> bool: + d = delta.get(metric, {}) + gd = _GOOD_DIRECTION.get(metric) + t = d.get("trend", "flat") + if t == "flat" or t == "new": + return False + return (gd == "down" and t == "down") or (gd == "up" and t == "up") + + def _regressing(metric: str) -> bool: + d = delta.get(metric, {}) + gd = _GOOD_DIRECTION.get(metric) + t = d.get("trend", "flat") + if t == "flat" or t == "new": + return False + return (gd == "down" and t == "up") or (gd == "up" and t == "down") + + return { + "risk_improving": _improving("risk_open"), + "risk_regressing": _regressing("risk_open"), + "quality_improving": _improving("quality_avg"), + "quality_regressing": _regressing("quality_avg"), + "ops_improving": _improving("ops_failure_rate"), + "ops_regressing": _regressing("ops_failure_rate"), + "delivery_improving": _improving("done") and not _regressing("cycle_time_h"), + "delivery_regressing": _regressing("done") or _regressing("cycle_time_h"), + } + + +def _clamp(v: float, lo: float, hi: float) -> float: + return max(lo, min(hi, v)) + + +async def generate_lessons_report(project_id: str, window: str = "7d") -> Dict[str, Any]: + """Compute a deterministic lesson report from existing data. + + Reads: graph_snapshots, graph_signals, tasks, dialog_nodes (reflections, ops_run), playbooks. + Returns a structured dict (no DB writes). + """ + import datetime as _dtl + db = await get_db() + days = _window_days(window) + window_start = (_dtl.datetime.utcnow() - _dtl.timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ") + date_bucket = compute_lesson_bucket() + + # ── A) Snapshot summary ──────────────────────────────────────────────────── + snapshot_metrics: Dict[str, Any] = {} + snapshot_id = "" + async with db.execute( + "SELECT id, metrics FROM graph_snapshots WHERE project_id=? AND window=? ORDER BY created_at DESC LIMIT 1", + (project_id, window), + ) as cur: + snap = await cur.fetchone() + if snap: + snapshot_id = snap[0] + try: + snapshot_metrics = json.loads(snap[1] or "{}") + except Exception: + pass + + # ── B) Signals in window ─────────────────────────────────────────────────── + async with db.execute( + "SELECT id, signal_type, severity, status, title, evidence, updated_at " + "FROM graph_signals WHERE project_id=? AND updated_at >= ?", + (project_id, window_start), + ) as cur: + sig_rows = [dict(r) for r in await cur.fetchall()] + + signal_ids = [s["id"] for s in sig_rows] + # group by type + sig_by_type: Dict[str, Dict] = {} + for s in sig_rows: + t = s["signal_type"] + if t not in sig_by_type: + sig_by_type[t] = {"count": 0, "score": 0, "severities": [], "last_seen": ""} + sig_by_type[t]["count"] += 1 + sig_by_type[t]["score"] += _SEV_WEIGHT.get(s["severity"], 1) + sig_by_type[t]["severities"].append(s["severity"]) + if s["updated_at"] > sig_by_type[t]["last_seen"]: + sig_by_type[t]["last_seen"] = s["updated_at"] + top_signals = sorted(sig_by_type.items(), key=lambda x: x[1]["score"], reverse=True)[:5] + + # ── C) Ops / Reliability ─────────────────────────────────────────────────── + async with db.execute( + "SELECT node_id, props FROM dialog_nodes WHERE project_id=? AND node_type='ops_run' AND created_at >= ?", + (project_id, window_start), + ) as cur: + ops_rows = [dict(r) for r in await cur.fetchall()] + ops_run_node_ids = [r["node_id"] for r in ops_rows] + ops_total = len(ops_rows) + ops_fails = 0 + failed_actions: Dict[str, int] = {} + for r in ops_rows: + try: + p = json.loads(r["props"] or "{}") + except Exception: + p = {} + if p.get("status") in ("error", "failed"): + ops_fails += 1 + act = p.get("action_id", "unknown") + failed_actions[act] = failed_actions.get(act, 0) + 1 + ops_failure_rate = ops_fails / max(1, ops_total) if ops_total > 0 else 0.0 + top_failed_actions = sorted(failed_actions.items(), key=lambda x: x[1], reverse=True)[:3] + + # Fallback: ops_instability signals + ops_instab_sigs = [s for s in sig_rows if s["signal_type"] == "ops_instability"] + if ops_total == 0 and ops_instab_sigs: + ops_total = len(ops_instab_sigs) + ops_failure_rate = min(1.0, len(ops_instab_sigs) * 0.25) + for s in ops_instab_sigs: + try: + ev = json.loads(s["evidence"] or "{}") + for act in ev.get("failed_ops_actions", []): + failed_actions[act] = failed_actions.get(act, 0) + 1 + except Exception: + pass + top_failed_actions = sorted(failed_actions.items(), key=lambda x: x[1], reverse=True)[:3] + + # ── D) Run quality (reflections) ────────────────────────────────────────── + async with db.execute( + "SELECT node_id, props FROM dialog_nodes " + "WHERE project_id=? AND node_type='decision' AND title LIKE 'Run Reflection:%' AND created_at >= ?", + (project_id, window_start), + ) as cur: + ref_rows = [dict(r) for r in await cur.fetchall()] + reflection_node_ids = [r["node_id"] for r in ref_rows] + completeness_scores = [] + for r in ref_rows: + try: + p = json.loads(r["props"] or "{}") + sc = p.get("plan_completeness_score") + if sc is not None: + completeness_scores.append(float(sc)) + except Exception: + pass + avg_completeness = sum(completeness_scores) / max(1, len(completeness_scores)) if completeness_scores else None + pct_below_threshold = ( + sum(1 for s in completeness_scores if s < 0.6) / max(1, len(completeness_scores)) + if completeness_scores else 0.0 + ) + + # ── E) Risk clusters (label → count) ────────────────────────────────────── + risk_cluster_sigs = [s for s in sig_rows if s["signal_type"] == "risk_cluster"] + label_counts: Dict[str, Dict] = {} + for s in risk_cluster_sigs: + try: + ev = json.loads(s["evidence"] or "{}") + except Exception: + ev = {} + lbl = ev.get("label", "unknown") + if lbl not in label_counts: + label_counts[lbl] = {"count": 0, "resolved": 0, "open": 0} + label_counts[lbl]["count"] += 1 + if s["status"] == "resolved": + label_counts[lbl]["resolved"] += 1 + else: + label_counts[lbl]["open"] += 1 + top_risk_labels = sorted(label_counts.items(), key=lambda x: x[1]["count"], reverse=True)[:5] + + # ── F) Playbooks ────────────────────────────────────────────────────────── + async with db.execute( + "SELECT * FROM playbooks WHERE project_id=? ORDER BY success_rate DESC, uses DESC LIMIT 5", + (project_id,), + ) as cur: + pb_rows = [dict(r) for r in await cur.fetchall()] + playbook_ids = [p["playbook_id"] for p in pb_rows] + top_by_sr = sorted(pb_rows, key=lambda p: p["success_rate"], reverse=True)[:3] + top_by_uses = sorted(pb_rows, key=lambda p: p["uses"], reverse=True)[:3] + slowest = sorted(pb_rows, key=lambda p: p.get("ema_time_to_resolve_h") or 0, reverse=True)[:1] + + # ── G) Improvement triggers ──────────────────────────────────────────────── + improvement_tasks: List[Dict[str, Any]] = [] + + # Trigger 1: ops instability + ops_instab_count = len([s for s in sig_rows if s["signal_type"] == "ops_instability" + and s["severity"] in ("high", "critical")]) + if ops_instab_count >= 2 or (ops_failure_rate >= 0.33 and ops_total >= 3): + label_str = "ops" + tfp = _improvement_task_fingerprint(project_id, date_bucket, "ops_resilience") + improvement_tasks.append({ + "title": "[LESSON] Improve ops resilience + runbook hardening", + "labels": ["lesson", "improvement", f"bucket:{date_bucket}", "ops"], + "priority": "high", + "description": f"Triggered: ops_instability signals={ops_instab_count}, failure_rate={ops_failure_rate:.1%}", + "fingerprint": tfp, + }) + + # Trigger 2: run quality + rqr_count = len([s for s in sig_rows if s["signal_type"] == "run_quality_regression"]) + if rqr_count >= 1 or (avg_completeness is not None and avg_completeness < 0.75): + tfp = _improvement_task_fingerprint(project_id, date_bucket, "run_quality") + improvement_tasks.append({ + "title": "[LESSON] Workflow prompt/checklist hardening", + "labels": ["lesson", "improvement", f"bucket:{date_bucket}", "quality"], + "priority": "high", + "description": ( + f"Triggered: rqr_signals={rqr_count}, " + f"avg_completeness={avg_completeness:.2f}" if avg_completeness else f"rqr_signals={rqr_count}" + ), + "fingerprint": tfp, + }) + + # Trigger 3: repeating risk cluster label + repeat_labels = [(lbl, d) for lbl, d in label_counts.items() if d["count"] >= 2] + if repeat_labels and len(improvement_tasks) < 3: + top_lbl, top_d = repeat_labels[0] + tfp = _improvement_task_fingerprint(project_id, date_bucket, f"risk_{top_lbl}") + improvement_tasks.append({ + "title": f"[LESSON] Assign owner + test gate for [{top_lbl}]", + "labels": ["lesson", "improvement", f"bucket:{date_bucket}", "risk", f"area:{top_lbl}"], + "priority": "normal", + "description": f"Triggered: risk_cluster label '{top_lbl}' appeared {top_d['count']} times, {top_d['open']} still open", + "fingerprint": tfp, + }) + + improvement_tasks = improvement_tasks[:3] # hard cap + + # ── Delta: load previous lesson ──────────────────────────────────────────── + prev_lesson_metrics: Optional[Dict[str, Any]] = None + prev_bucket: Optional[str] = None + async with db.execute( + "SELECT date_bucket, metrics_json FROM lessons " + "WHERE project_id=? AND window=? AND date_bucket < ? " + "ORDER BY date_bucket DESC LIMIT 1", + (project_id, window, date_bucket), + ) as cur: + prev_row = await cur.fetchone() + if prev_row: + prev_bucket = prev_row[0] + try: + prev_full = json.loads(prev_row[1] or "{}") + # Support both old flat format and new {current: ...} format + prev_lesson_metrics = prev_full.get("current") or { + "risk_open": prev_full.get("risk_tasks_open"), + "done": prev_full.get("tasks_done"), + "wip": prev_full.get("wip"), + "quality_avg": prev_full.get("run_quality_avg"), + "ops_failure_rate": prev_full.get("ops_failure_rate"), + "cycle_time_h": ( + (prev_full.get("cycle_time_proxy_days") or 0) * 24 + if prev_full.get("cycle_time_proxy_days") is not None else None + ), + "runs": prev_full.get("agent_runs_in_window"), + } + except Exception: + prev_lesson_metrics = None + + # Build current snapshot summary for delta + curr_quality = round(avg_completeness, 3) if avg_completeness is not None else snapshot_metrics.get("run_quality_avg") + cycle_h = (snapshot_metrics.get("cycle_time_proxy_days") or 0) * 24 if snapshot_metrics.get("cycle_time_proxy_days") is not None else None + current_block = { + "risk_open": snapshot_metrics.get("risk_tasks_open"), + "done": snapshot_metrics.get("tasks_done"), + "wip": snapshot_metrics.get("wip"), + "quality_avg": curr_quality, + "ops_failure_rate": round(ops_failure_rate, 3), + "cycle_time_h": round(cycle_h, 1) if cycle_h is not None else None, + "runs": snapshot_metrics.get("agent_runs_in_window"), + } + + # Compute per-metric delta + delta_keys = ["risk_open", "done", "wip", "quality_avg", "ops_failure_rate", "cycle_time_h", "runs"] + delta_block: Dict[str, Any] = {} + for k in delta_keys: + prev_val = prev_lesson_metrics.get(k) if prev_lesson_metrics else None + delta_block[k] = _compute_delta_entry(current_block.get(k), prev_val, k) + + trend_flags = _compute_trend_flags(delta_block) + + # ── Metrics dict ────────────────────────────────────────────────────────── + # Full structured metrics with current / previous / delta / trend_flags + metrics = { + "kind": "lesson_metrics", + "project_id": project_id, + "window": window, + "date_bucket": date_bucket, + # Flat keys for backward compatibility (UI reads these directly) + "wip": snapshot_metrics.get("wip"), + "tasks_done": snapshot_metrics.get("tasks_done"), + "risk_tasks_open": snapshot_metrics.get("risk_tasks_open"), + "cycle_time_proxy_days": snapshot_metrics.get("cycle_time_proxy_days"), + "run_quality_avg": snapshot_metrics.get("run_quality_avg"), + "agent_runs_in_window": snapshot_metrics.get("agent_runs_in_window"), + "signals_in_window": len(sig_rows), + "top_signal_type": top_signals[0][0] if top_signals else None, + "ops_failure_rate": round(ops_failure_rate, 3), + "avg_completeness": round(avg_completeness, 3) if avg_completeness is not None else None, + "improvement_tasks_count": len(improvement_tasks), + # Delta intelligence + "current": current_block, + "previous": {"date_bucket": prev_bucket, **(prev_lesson_metrics or {})} if prev_lesson_metrics else None, + "delta": delta_block, + "trend_flags": trend_flags, + "impact": {"impact_score": 0.0}, # populated later by evaluate_lesson_impact + } + + # ── Markdown report ─────────────────────────────────────────────────────── + frontmatter = json.dumps({ + "kind": "lesson", + "window": window, + "date_bucket": date_bucket, + "snapshot_id": snapshot_id, + "signal_ids": signal_ids[:10], + "top_signal_types": [t for t, _ in top_signals[:3]], + "created_improvement_tasks": [t["title"] for t in improvement_tasks], + "trend_flags": trend_flags, + }, indent=2) + + def _sev_mix(sevs: List[str]) -> str: + c = {"critical": 0, "high": 0, "medium": 0, "low": 0} + for s in sevs: + c[s] = c.get(s, 0) + 1 + return " ".join(f"{k}:{v}" for k, v in c.items() if v) + + md_lines = [ + "```json", + frontmatter, + "```", + "", + f"# Lessons Learned — {date_bucket} ({window})", + "", + "## A) Snapshot Summary", + "", + ] + sm = snapshot_metrics + md_lines += [ + f"| Metric | Value |", + f"|--------|-------|", + f"| WIP | {sm.get('wip', '—')} |", + f"| Done | {sm.get('tasks_done', '—')} |", + f"| [RISK] open | {sm.get('risk_tasks_open', '—')} |", + f"| Cycle (days) | {sm.get('cycle_time_proxy_days', '—')} |", + "| Run quality | " + (f"{sm.get('run_quality_avg', 0):.0%}" if sm.get('run_quality_avg') is not None else '—') + " |", + f"| Agent runs | {sm.get('agent_runs_in_window', '—')} |", + "", + "## B) Top Signals", + "", + ] + if top_signals: + md_lines += ["| Type | Count | Score | Severity mix | Last seen |", "|------|-------|-------|-------------|-----------|"] + for t, d in top_signals: + md_lines.append(f"| {t} | {d['count']} | {d['score']} | {_sev_mix(d['severities'])} | {d['last_seen'][:10]} |") + else: + md_lines.append("_No signals in this window._") + md_lines += ["", "## C) Ops / Reliability", ""] + if ops_total > 0: + md_lines += [ + f"- Ops runs: {ops_total} | Failures: {ops_fails} | Failure rate: {ops_failure_rate:.0%}", + ] + if top_failed_actions: + md_lines.append("- Top failed actions:") + for act, cnt in top_failed_actions: + md_lines.append(f" - `{act}`: {cnt}x") + else: + md_lines.append("_No ops_run data in this window._") + md_lines += ["", "## D) Run Quality", ""] + if completeness_scores: + md_lines += [ + f"- Reflections: {len(completeness_scores)}", + f"- Avg completeness: {avg_completeness:.0%}", + f"- Below 0.6 threshold: {pct_below_threshold:.0%}", + ] + else: + md_lines.append("_No reflection data in this window._") + md_lines += ["", "## E) Risk Clusters", ""] + if top_risk_labels: + md_lines += ["| Label | Count | Open | Resolved |", "|-------|-------|------|----------|"] + for lbl, d in top_risk_labels: + md_lines.append(f"| {lbl} | {d['count']} | {d['open']} | {d['resolved']} |") + else: + md_lines.append("_No risk cluster signals in this window._") + md_lines += ["", "## F) Playbooks", ""] + if top_by_sr: + md_lines.append("**Best success rate:**") + for p in top_by_sr: + md_lines.append(f"- `{p['context_key']}`: {int(p['success_rate']*100)}% ({p['uses']} uses)") + if top_by_uses: + md_lines.append("\n**Most used:**") + for p in top_by_uses: + md_lines.append(f"- `{p['context_key']}`: {p['uses']} uses, {int(p['success_rate']*100)}% SR") + if slowest: + p = slowest[0] + md_lines.append(f"\n**Slowest resolution:** `{p['context_key']}`: avg {p.get('ema_time_to_resolve_h', 0):.1f}h") + if not pb_rows: + md_lines.append("_No playbooks yet._") + md_lines += ["", "## G) Recommended Improvements", ""] + if improvement_tasks: + for i, t in enumerate(improvement_tasks, 1): + md_lines += [ + f"### {i}. {t['title']}", + f"**Priority:** {t['priority']} ", + f"**Labels:** {', '.join(t['labels'])} ", + f"**Rationale:** {t['description']}", + "", + ] + else: + md_lines.append("_No improvement actions triggered this period._") + # ── Trend section ────────────────────────────────────────────────────────── + md_lines += ["", "## H) 📈 Trend vs Previous Week", ""] + if prev_lesson_metrics and prev_bucket: + def _arrow(metric: str) -> str: + d = delta_block.get(metric, {}) + trend = d.get("trend", "flat") + gd = _GOOD_DIRECTION.get(metric) + if trend == "flat": + return "→" + if trend == "up": + return "⬆️ 🟢" if gd == "up" else "⬆️ 🔴" + if trend == "down": + return "⬇️ 🟢" if gd == "down" else "⬇️ 🔴" + return "—" + + def _delta_str(metric: str, fmt: str = ".2f") -> str: + d = delta_block.get(metric, {}) + v = d.get("abs") + if v is None: + return "—" + prefix = "+" if v > 0 else "" + return f"{prefix}{v:{fmt}}" + + md_lines += [ + f"_vs {prev_bucket}_", + "", + "| Metric | Prev | Now | Δ | Trend |", + "|--------|------|-----|---|-------|", + f"| [RISK] open | {prev_lesson_metrics.get('risk_open', '—')} | {current_block.get('risk_open', '—')} | {_delta_str('risk_open', '.0f')} | {_arrow('risk_open')} |", + f"| Quality avg | " + (f"{(prev_lesson_metrics.get('quality_avg') or 0):.0%}" if prev_lesson_metrics.get('quality_avg') is not None else '—') + " | " + (f"{(current_block.get('quality_avg') or 0):.0%}" if current_block.get('quality_avg') is not None else '—') + f" | {_delta_str('quality_avg', '.2f')} | {_arrow('quality_avg')} |", + f"| Ops failure | " + (f"{(prev_lesson_metrics.get('ops_failure_rate') or 0):.0%}" if prev_lesson_metrics.get('ops_failure_rate') is not None else '—') + " | " + (f"{(current_block.get('ops_failure_rate') or 0):.0%}" if current_block.get('ops_failure_rate') is not None else '—') + f" | {_delta_str('ops_failure_rate', '.3f')} | {_arrow('ops_failure_rate')} |", + f"| Done | {prev_lesson_metrics.get('done', '—')} | {current_block.get('done', '—')} | {_delta_str('done', '.0f')} | {_arrow('done')} |", + f"| WIP | {prev_lesson_metrics.get('wip', '—')} | {current_block.get('wip', '—')} | {_delta_str('wip', '.0f')} | {_arrow('wip')} |", + ] + tf = trend_flags + improving = [k.replace("_improving", "") for k, v in tf.items() if v and k.endswith("_improving")] + regressing = [k.replace("_regressing", "") for k, v in tf.items() if v and k.endswith("_regressing")] + if improving: + md_lines.append(f"\n✅ **Improving:** {', '.join(improving)}") + if regressing: + md_lines.append(f"⚠️ **Regressing:** {', '.join(regressing)}") + else: + md_lines.append("_No previous lesson to compare against._") + + md_lines += ["", "## Evidence", ""] + md_lines.append(f"- Signals: {len(signal_ids)} (window: {window})") + md_lines.append(f"- Playbooks referenced: {len(playbook_ids)}") + md_lines.append(f"- Ops run nodes: {len(ops_run_node_ids)}") + md_lines.append(f"- Reflections: {len(reflection_node_ids)}") + + markdown = "\n".join(md_lines) + + return { + "date_bucket": date_bucket, + "window": window, + "metrics_json": json.dumps(metrics), + "markdown": markdown, + "evidence": { + "signal_ids": signal_ids, + "playbook_ids": playbook_ids, + "reflection_node_ids": reflection_node_ids, + "ops_run_node_ids": ops_run_node_ids, + "snapshot_id": snapshot_id, + }, + "planned_improvement_tasks": improvement_tasks, + } + + +async def upsert_lesson( + project_id: str, + window: str = "7d", + dry_run: bool = True, + created_by: str = "sofiia", +) -> Dict[str, Any]: + """Generate and optionally persist a Lesson for the current week bucket. + + dry_run=True: compute and return report without writing to DB. + dry_run=False: upsert decision node + doc_version + lessons row + improvement tasks + links. + + Idempotent: same bucket → same lesson_id, lesson_node; new doc_version created only on content change. + """ + report = await generate_lessons_report(project_id, window) + if dry_run: + return { + "dry_run": True, + "date_bucket": report["date_bucket"], + "metrics_json": report["metrics_json"], + "markdown": report["markdown"], + "evidence": report["evidence"], + "planned_improvement_tasks": report["planned_improvement_tasks"], + } + + db = await get_db() + now = _now() + date_bucket = report["date_bucket"] + fp = _lesson_fingerprint(project_id, window, date_bucket) + + # ── Upsert decision node ────────────────────────────────────────────────── + node_title = f"Lesson: {date_bucket} ({window})" + node_ref_id = f"lesson:{project_id}:{date_bucket}:{window}" + node_props = json.dumps({ + "window": window, + "date_bucket": date_bucket, + "metrics": json.loads(report["metrics_json"]), + "signal_ids": report["evidence"]["signal_ids"][:10], + }) + node_imp = 0.9 + + lesson_node_id = str(uuid.uuid4()) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props, + lifecycle,importance,fingerprint,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,node_type,ref_id) DO UPDATE SET + props=excluded.props, updated_at=excluded.updated_at""", + (lesson_node_id, project_id, "decision", node_ref_id, node_title, + node_props, "active", node_imp, fp, created_by, now, now), + ) + # Fetch real node_id (may differ if already existed) + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND node_type='decision' AND ref_id=?", + (project_id, node_ref_id), + ) as cur: + nrow = await cur.fetchone() + if nrow: + lesson_node_id = nrow[0] + + # ── Create doc_version (always new for latest content) ──────────────────── + doc_version_id = str(uuid.uuid4()) + # doc_id for lessons is a logical soft key (no FK to documents table) + lesson_doc_id = f"lessons/{project_id}/{date_bucket}" + # Store in dialog_nodes props only — we don't use doc_versions for lessons + # to avoid FK constraint on documents table. Instead embed markdown in lessons row. + + # ── Upsert lessons row ──────────────────────────────────────────────────── + lesson_id = f"ls_{fp[:16]}" + async with db.execute( + "SELECT lesson_id FROM lessons WHERE fingerprint=?", (fp,) + ) as cur: + existing = await cur.fetchone() + + if existing: + lesson_id = existing[0] + await db.execute( + "UPDATE lessons SET lesson_node_id=?, doc_version_id=?, metrics_json=?, updated_at=? " + "WHERE lesson_id=?", + (lesson_node_id, doc_version_id, report["metrics_json"], now, lesson_id), + ) + else: + await db.execute( + "INSERT INTO lessons(lesson_id,project_id,window,date_bucket,fingerprint,status," + "lesson_node_id,doc_version_id,metrics_json,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?)", + (lesson_id, project_id, window, date_bucket, fp, "published", + lesson_node_id, doc_version_id, report["metrics_json"], now, now), + ) + # Store markdown in lesson node props (accessible via graph) + await db.execute( + "UPDATE dialog_nodes SET props=? WHERE node_id=?", + (json.dumps({ + "window": window, + "date_bucket": date_bucket, + "metrics": json.loads(report["metrics_json"]), + "markdown": report["markdown"][:4000], # cap for props + "signal_ids": report["evidence"]["signal_ids"][:10], + }), lesson_node_id), + ) + + # ── Create improvement tasks ─────────────────────────────────────────────── + created_task_ids: List[str] = [] + for i, task_spec in enumerate(report["planned_improvement_tasks"]): + t_fp = task_spec["fingerprint"] + # Check if task node already exists by fingerprint + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND fingerprint=?", + (project_id, t_fp), + ) as cur: + tn_existing = await cur.fetchone() + + if tn_existing: + t_node_id = tn_existing[0] + # Get task_id from ref_id + async with db.execute( + "SELECT ref_id FROM dialog_nodes WHERE node_id=?", (t_node_id,) + ) as cur: + refrow = await cur.fetchone() + t_id = refrow[0] if refrow else None + else: + t_id = str(uuid.uuid4()) + t_node_id = str(uuid.uuid4()) + t_sort = float(500 + i) + await db.execute( + "INSERT INTO tasks(task_id,project_id,title,description,status,priority," + "labels,assignees,due_at,sort_key,created_by,created_at,updated_at) " + "VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?) ON CONFLICT DO NOTHING", + (t_id, project_id, task_spec["title"], task_spec.get("description", ""), + "backlog", task_spec["priority"], json.dumps(task_spec["labels"]), + "[]", None, t_sort, created_by, now, now), + ) + t_props = json.dumps({"lesson_id": lesson_id, "lesson_bucket": date_bucket}) + await db.execute( + """INSERT INTO dialog_nodes(node_id,project_id,node_type,ref_id,title,props, + lifecycle,importance,fingerprint,created_by,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?,?) ON CONFLICT DO NOTHING""", + (t_node_id, project_id, "task", t_id, task_spec["title"], + t_props, "active", _compute_importance("task"), + t_fp, created_by, now, now), + ) + + if t_id: + created_task_ids.append(t_id) + # Edge: lesson_node → task_node (derives_task) + eid = str(uuid.uuid4()) + await db.execute( + "INSERT INTO dialog_edges(edge_id,project_id,from_node_id,to_node_id,edge_type," + "props,created_by,created_at,strength) VALUES(?,?,?,?,?,?,?,?,?) " + "ON CONFLICT(project_id,from_node_id,to_node_id,edge_type) DO NOTHING", + (eid, project_id, lesson_node_id, t_node_id, "derives_task", + json.dumps({"lesson_id": lesson_id}), created_by, now, 0.9), + ) + + # ── Link lesson to signals (entity_links) ───────────────────────────────── + for sig_id in report["evidence"]["signal_ids"][:10]: + eid = str(uuid.uuid4()) + await db.execute( + """INSERT INTO entity_links(link_id,project_id,from_type,from_id,to_type,to_id, + link_type,created_by,created_at) + VALUES(?,?,?,?,?,?,?,?,?) + ON CONFLICT(project_id,from_type,from_id,to_type,to_id,link_type) DO NOTHING""", + (eid, project_id, "dialog_node", lesson_node_id, "signal", sig_id, + "summarizes", created_by, now), + ) + + await db.commit() + + # Auto-evaluate impact for previous bucket in background + try: + await evaluate_lesson_impact(project_id, window, current_bucket=date_bucket) + except Exception: + pass # Non-critical + + return { + "dry_run": False, + "lesson_id": lesson_id, + "lesson_node_id": lesson_node_id, + "doc_version_id": doc_version_id, + "date_bucket": date_bucket, + "metrics_json": report["metrics_json"], + "created_task_ids": created_task_ids, + "evidence": report["evidence"], + "planned_improvement_tasks": report["planned_improvement_tasks"], + } + + +async def evaluate_lesson_impact( + project_id: str, + window: str = "7d", + current_bucket: Optional[str] = None, + force: bool = False, +) -> Optional[Dict[str, Any]]: + """Compute impact score for the prior-bucket lesson based on current-bucket metrics. + + Called automatically after upsert_lesson succeeds (current bucket), + evaluates the lesson from the previous bucket. + """ + db = await get_db() + if current_bucket is None: + current_bucket = compute_lesson_bucket() + + # Fetch current and prior lessons (gracefully handle missing impact columns) + try: + async with db.execute( + "SELECT lesson_id, date_bucket, metrics_json, impact_score, impact_json " + "FROM lessons WHERE project_id=? AND window=? AND date_bucket<=? " + "ORDER BY date_bucket DESC LIMIT 2", + (project_id, window, current_bucket), + ) as cur: + rows = await cur.fetchall() + except Exception: + # Fallback without impact columns (old schema) + async with db.execute( + "SELECT lesson_id, date_bucket, metrics_json, 0.0, '{}' " + "FROM lessons WHERE project_id=? AND window=? AND date_bucket<=? " + "ORDER BY date_bucket DESC LIMIT 2", + (project_id, window, current_bucket), + ) as cur: + rows = await cur.fetchall() + + if len(rows) < 2: + return None # Need at least two buckets + + curr_row = rows[0] + prev_row = rows[1] + + # Idempotency: skip if already evaluated unless force + if not force: + prev_impact = json.loads(prev_row[4] or "{}") + if prev_impact.get("evaluated_bucket") == curr_row[1]: + return None + + # Parse metrics + def _parse_current(metrics_json: str) -> Dict[str, Any]: + try: + m = json.loads(metrics_json or "{}") + return m.get("current") or { + "risk_open": m.get("risk_tasks_open"), + "done": m.get("tasks_done"), + "wip": m.get("wip"), + "quality_avg": m.get("run_quality_avg") or m.get("avg_completeness"), + "ops_failure_rate": m.get("ops_failure_rate"), + } + except Exception: + return {} + + prev_curr = _parse_current(prev_row[2]) + next_curr = _parse_current(curr_row[2]) + + # Impact formula + def _safe(d: Dict, k: str, default: float = 0.0) -> float: + v = d.get(k) + return float(v) if v is not None else default + + risk_delta = _safe(prev_curr, "risk_open") - _safe(next_curr, "risk_open") + ops_delta = _safe(prev_curr, "ops_failure_rate") - _safe(next_curr, "ops_failure_rate") + quality_delta = _safe(next_curr, "quality_avg") - _safe(prev_curr, "quality_avg") + done_delta = _safe(next_curr, "done") - _safe(prev_curr, "done") + + score = ( + 1.0 * _clamp(risk_delta, -5, 5) / 5 + + 1.0 * _clamp(ops_delta, -1, 1) + + 0.5 * _clamp(quality_delta, -0.2, 0.2) / 0.2 + + 0.5 * _clamp(done_delta, -10, 10) / 10 + ) + score = round(score, 4) + + # Fetch improvement task node ids from prior lesson node (via derives_task edges) + prior_lesson_row = await db.execute( + "SELECT lesson_node_id FROM lessons WHERE lesson_id=?", (prev_row[0],) + ) + prior_lesson_row = await prior_lesson_row.fetchone() + improvement_task_node_ids: List[str] = [] + if prior_lesson_row and prior_lesson_row[0]: + async with db.execute( + "SELECT to_node_id FROM dialog_edges " + "WHERE project_id=? AND from_node_id=? AND edge_type='derives_task'", + (project_id, prior_lesson_row[0]), + ) as cur: + task_rows = await cur.fetchall() + improvement_task_node_ids = [r[0] for r in task_rows] + + # Attribution: resolve task statuses via dialog_nodes.ref_id → tasks + done_task_ids: List[str] = [] + all_task_ref_ids: List[str] = [] + if improvement_task_node_ids: + placeholders = ",".join("?" * len(improvement_task_node_ids)) + async with db.execute( + f"SELECT ref_id FROM dialog_nodes WHERE node_id IN ({placeholders}) AND ref_id IS NOT NULL", + improvement_task_node_ids, + ) as cur: + ref_rows = await cur.fetchall() + all_task_ref_ids = [r[0] for r in ref_rows] + if all_task_ref_ids: + tp = ",".join("?" * len(all_task_ref_ids)) + async with db.execute( + f"SELECT task_id, status FROM tasks WHERE task_id IN ({tp})", + all_task_ref_ids, + ) as cur: + task_status_rows = await cur.fetchall() + done_task_ids = [r[0] for r in task_status_rows if r[1] == "done"] + + total_tasks = len(all_task_ref_ids) + done_count = len(done_task_ids) + completion_ratio = round(done_count / total_tasks, 4) if total_tasks > 0 else None + + # Determine attribution level + if completion_ratio is None: + attribution_level = "unknown" + attribution_rule = "no_improvement_tasks" + elif completion_ratio >= 0.5 and score >= 0.2: + attribution_level = "strong" + attribution_rule = "completion>=0.5 && impact_score>=0.2" + elif score >= 0.2: + attribution_level = "weak" + attribution_rule = "completion<0.5 && impact_score>=0.2" + else: + attribution_level = "unknown" + attribution_rule = "impact_score<0.2" + + now_iso = _now() + impact_j = json.dumps({ + "evaluated_at": now_iso, + "prior_bucket": prev_row[1], + "evaluated_bucket": curr_row[1], + "risk_open_delta": round(float(next_curr.get("risk_open") or 0) - float(prev_curr.get("risk_open") or 0), 2), + "ops_failure_delta": round(float(next_curr.get("ops_failure_rate") or 0) - float(prev_curr.get("ops_failure_rate") or 0), 4), + "quality_delta": round(quality_delta, 4), + "done_delta": round(done_delta, 2), + "improvements": { + "task_ids": all_task_ref_ids, + "done_task_ids": done_task_ids, + "completion_ratio": completion_ratio, + }, + "attribution": { + "level": attribution_level, + "rule": attribution_rule, + }, + # Legacy field kept for backward compat + "improvement_task_ids": all_task_ref_ids, + "completion_ratio": completion_ratio, + }) + + await db.execute( + "UPDATE lessons SET impact_score=?, impact_json=?, updated_at=? WHERE lesson_id=?", + (score, impact_j, now_iso, prev_row[0]), + ) + await db.commit() + + return {"lesson_id": prev_row[0], "impact_score": score, "impact_json": json.loads(impact_j)} + + +# ── Streak / Drift detection ────────────────────────────────────────────────── + +async def get_recent_lessons(project_id: str, limit: int = 3) -> List[Dict[str, Any]]: + """Return the most recent lessons for a project, ordered by date_bucket desc.""" + db = await get_db() + async with db.execute( + "SELECT lesson_id, date_bucket, metrics_json FROM lessons " + "WHERE project_id=? ORDER BY date_bucket DESC LIMIT ?", + (project_id, limit), + ) as cur: + rows = await cur.fetchall() + result = [] + for r in rows: + try: + metrics = json.loads(r[2] or "{}") + except Exception: + metrics = {} + result.append({"lesson_id": r[0], "date_bucket": r[1], "metrics": metrics}) + return result + + +def _extract_streak_dir(flags: Dict, metric: str) -> Optional[str]: + """Return 'regressing', 'improving', or None for a metric at one bucket.""" + if not flags: + return None + if flags.get(f"{metric}_regressing"): + return "regressing" + if flags.get(f"{metric}_improving"): + return "improving" + return "flat" + + +async def compute_lesson_streaks(project_id: str) -> Dict[str, Any]: + """Compute streak length/direction per metric (risk/ops/quality) for a project. + + Returns: + {risk:{dir,len,since_bucket}, ops:{...}, quality:{...}} + """ + recent = await get_recent_lessons(project_id, limit=3) + # ordered most-recent first; we need [B, B-1, B-2] + + metrics = ["risk", "ops", "quality"] + result = {} + for metric in metrics: + dirs = [] + buckets = [] + for r in recent: + tf = r["metrics"].get("trend_flags") or {} + dirs.append(_extract_streak_dir(tf, metric)) + buckets.append(r["date_bucket"]) + + if not dirs: + result[metric] = {"dir": "flat", "len": 0, "since_bucket": None} + continue + + # Check len=3 + if len(dirs) >= 3 and dirs[0] and dirs[0] not in ("flat",) and dirs[0] == dirs[1] == dirs[2]: + result[metric] = {"dir": dirs[0], "len": 3, "since_bucket": buckets[2]} + # Check len=2 + elif len(dirs) >= 2 and dirs[0] and dirs[0] not in ("flat",) and dirs[0] == dirs[1]: + result[metric] = {"dir": dirs[0], "len": 2, "since_bucket": buckets[1]} + else: + result[metric] = {"dir": dirs[0] or "flat", "len": 1 if dirs[0] not in (None, "flat") else 0, "since_bucket": buckets[0] if dirs and dirs[0] not in (None, "flat") else None} + + return result + + +# ── Portfolio strategic signals ─────────────────────────────────────────────── + +_PORTFOLIO_PROJECT_ID = "portfolio" + + +def _portfolio_signal_fingerprint(signal_type: str, bucket: str, project_ids: List[str]) -> str: + raw = f"portfolio|{signal_type}|{bucket}|{','.join(sorted(project_ids))}" + return hashlib.sha256(raw.encode()).hexdigest()[:32] + + +async def recompute_portfolio_signals(window: str = "7d", dry_run: bool = True) -> Dict[str, Any]: + """Create/refresh portfolio drift signals based on streak detection across all projects. + + Uses graph_signals with project_id='portfolio'. + """ + db = await get_db() + current_bucket = compute_lesson_bucket() + now = _now() + + # Gather all projects + async with db.execute("SELECT project_id FROM projects") as cur: + all_pids = [r[0] for r in await cur.fetchall()] + + # Compute streaks per project + project_streaks: Dict[str, Dict] = {} + project_lessons: Dict[str, List[str]] = {} + for pid in all_pids: + streaks = await compute_lesson_streaks(pid) + project_streaks[pid] = streaks + recent = await get_recent_lessons(pid, limit=3) + project_lessons[pid] = [r["lesson_id"] for r in recent] + + # Build drift groups + drift_groups: Dict[str, List[Dict]] = { + "portfolio_risk_drift": [], + "portfolio_ops_drift": [], + "portfolio_quality_drift": [], + } + metric_map = {"risk": "portfolio_risk_drift", "ops": "portfolio_ops_drift", "quality": "portfolio_quality_drift"} + + for pid, streaks in project_streaks.items(): + for metric, signal_type in metric_map.items(): + s = streaks.get(metric, {}) + if s.get("dir") == "regressing" and s.get("len", 0) >= 2: + drift_groups[signal_type].append({ + "project_id": pid, + "streak": s, + "lesson_ids": project_lessons.get(pid, [])[:s.get("len", 2)], + }) + + # Ensure pseudo-project "portfolio" exists in projects table (for FK constraint) + if not dry_run: + await db.execute( + "INSERT OR IGNORE INTO projects(project_id,name,created_at,updated_at) VALUES(?,?,?,?)", + (_PORTFOLIO_PROJECT_ID, "Portfolio (Meta)", now, now), + ) + + changes = [] + for signal_type, affected_projects in drift_groups.items(): + if not affected_projects: + continue + + pids = [p["project_id"] for p in affected_projects] + max_len = max(p["streak"]["len"] for p in affected_projects) + severity = "critical" if max_len >= 3 else "high" + fingerprint = _portfolio_signal_fingerprint(signal_type, current_bucket, pids) + title = { + "portfolio_risk_drift": f"Portfolio Risk Drift — {len(pids)} project(s)", + "portfolio_ops_drift": f"Portfolio Ops Drift — {len(pids)} project(s)", + "portfolio_quality_drift": f"Portfolio Quality Drift — {len(pids)} project(s)", + }[signal_type] + summary = f"Streak len≥2 regressing for {len(pids)} project(s): {', '.join(pids[:5])}" + evidence = json.dumps({ + "bucket": current_bucket, + "window": window, + "cooldown_hours": 12, + "projects": affected_projects, + }) + + changes.append({ + "signal_type": signal_type, + "severity": severity, + "fingerprint": fingerprint, + "affected_projects": pids, + "title": title, + }) + + if dry_run: + continue + + # Upsert signal (reuse anti-spam logic: reopen if cooldown expired) + async with db.execute( + "SELECT id, status, updated_at FROM graph_signals WHERE project_id=? AND fingerprint=?", + (_PORTFOLIO_PROJECT_ID, fingerprint), + ) as cur: + existing = await cur.fetchone() + + if existing: + sig_id, status, upd_at = existing[0], existing[1], existing[2] + if status in ("resolved", "dismissed"): + # Reopen + await db.execute( + "UPDATE graph_signals SET status='open', severity=?, title=?, summary=?, evidence=?, updated_at=? WHERE id=?", + (severity, title, summary, evidence, now, sig_id), + ) + else: + sig_id = str(uuid.uuid4()) + await db.execute( + """INSERT INTO graph_signals(id,project_id,signal_type,severity,title,summary,evidence,status,fingerprint,created_at,updated_at) + VALUES(?,?,?,?,?,?,?,?,?,?,?)""", + (sig_id, _PORTFOLIO_PROJECT_ID, signal_type, severity, title, summary, evidence, "open", fingerprint, now, now), + ) + + if not dry_run: + await db.commit() + + return {"dry_run": dry_run, "changes": changes, "bucket": current_bucket, "portfolio_project_id": _PORTFOLIO_PROJECT_ID} + + +async def list_portfolio_signals(status: str = "open") -> List[Dict[str, Any]]: + """List portfolio-level drift signals.""" + db = await get_db() + if status == "all": + async with db.execute( + "SELECT * FROM graph_signals WHERE project_id=? ORDER BY severity DESC, created_at DESC LIMIT 30", + (_PORTFOLIO_PROJECT_ID,), + ) as cur: + rows = await cur.fetchall() + else: + async with db.execute( + "SELECT * FROM graph_signals WHERE project_id=? AND status=? ORDER BY severity DESC, created_at DESC LIMIT 30", + (_PORTFOLIO_PROJECT_ID, status), + ) as cur: + rows = await cur.fetchall() + result = [] + for r in rows: + d = dict(r) + try: + d["evidence_parsed"] = json.loads(d.get("evidence") or "{}") + except Exception: + d["evidence_parsed"] = {} + result.append(d) + return result + + +async def list_lessons( + project_id: str, + window: str = "7d", + limit: int = 8, +) -> List[Dict[str, Any]]: + """List lessons for a project, ordered by date_bucket desc.""" + db = await get_db() + if window: + async with db.execute( + "SELECT * FROM lessons WHERE project_id=? AND window=? ORDER BY date_bucket DESC LIMIT ?", + (project_id, window, limit), + ) as cur: + rows = await cur.fetchall() + else: + async with db.execute( + "SELECT * FROM lessons WHERE project_id=? ORDER BY date_bucket DESC LIMIT ?", + (project_id, limit), + ) as cur: + rows = await cur.fetchall() + result = [] + for row in rows: + r = dict(row) + try: + r["metrics"] = json.loads(r.get("metrics_json", "{}")) + except Exception: + r["metrics"] = {} + result.append(r) + return result + + +async def get_lesson_detail(project_id: str, lesson_id: str) -> Optional[Dict[str, Any]]: + """Get full lesson details including markdown (from node props) + linked evidence + delta/impact.""" + db = await get_db() + async with db.execute( + "SELECT * FROM lessons WHERE lesson_id=? AND project_id=?", (lesson_id, project_id) + ) as cur: + row = await cur.fetchone() + if not row: + return None + lesson = dict(row) + try: + metrics = json.loads(lesson.get("metrics_json", "{}")) + lesson["metrics"] = metrics + lesson["delta"] = metrics.get("delta") + lesson["trend_flags"] = metrics.get("trend_flags") + lesson["current"] = metrics.get("current") + lesson["previous"] = metrics.get("previous") + except Exception: + lesson["metrics"] = {} + lesson["delta"] = None + lesson["trend_flags"] = None + lesson["current"] = None + lesson["previous"] = None + + # Impact + try: + lesson["impact"] = json.loads(lesson.get("impact_json", "{}")) + except Exception: + lesson["impact"] = {} + + # Get markdown from lesson node props + async with db.execute( + "SELECT props FROM dialog_nodes WHERE node_id=?", (lesson["lesson_node_id"],) + ) as cur: + nrow = await cur.fetchone() + lesson["markdown"] = "" + if nrow: + try: + p = json.loads(nrow[0] or "{}") + lesson["markdown"] = p.get("markdown", "") + except Exception: + pass + + # Get linked signal ids + async with db.execute( + "SELECT to_id FROM entity_links WHERE project_id=? AND from_id=? AND link_type='summarizes'", + (project_id, lesson["lesson_node_id"]), + ) as cur: + sig_links = [r[0] for r in await cur.fetchall()] + lesson["linked_signal_ids"] = sig_links + + # Get improvement task ids via derives_task edges + async with db.execute( + "SELECT dn.ref_id FROM dialog_edges de " + "JOIN dialog_nodes dn ON dn.node_id=de.to_node_id " + "WHERE de.project_id=? AND de.from_node_id=? AND de.edge_type='derives_task'", + (project_id, lesson["lesson_node_id"]), + ) as cur: + task_ids = [r[0] for r in await cur.fetchall()] + lesson["improvement_task_ids"] = task_ids + + return lesson + + +# ── Governance Gates (Level 6) ──────────────────────────────────────────────── + +_GATE_BLOCK_RELEASE = "BLOCK_RELEASE" +_GATE_DEGRADE_MODE = "DEGRADE_MODE" +_GATE_PROMPT_FREEZE = "PROMPT_FREEZE" + +_DRIFT_WORKFLOW_MAP: Dict[str, List[str]] = { + "portfolio_ops_drift": ["incident_triage", "alert_triage"], + "portfolio_risk_drift": ["release_check"], + "portfolio_quality_drift": ["postmortem_draft"], +} + +_ALLOWED_AUTO_WORKFLOWS = { + "incident_triage", "alert_triage", "release_check", "postmortem_draft", "workflow_hardening", +} + + +async def evaluate_governance_gates( + project_id: str, + window: str = "7d", + dry_run: bool = True, +) -> Dict[str, Any]: + """Evaluate governance policy gates for a project. + + Gates: + BLOCK_RELEASE: open high/critical [RISK] tasks exist + DEGRADE_MODE: ops_failure_rate > 33% with >=3 runs + PROMPT_FREEZE: quality_regressing streak >= 2 + + Returns {gates:[...], bucket, snapshot_at} and optionally persists as decision node. + """ + db = await get_db() + bucket = compute_lesson_bucket() + now = _now() + gates: List[Dict[str, Any]] = [] + + # ── Gate 1: BLOCK_RELEASE ────────────────────────────────────────────────── + # No JOIN needed — check tasks directly (dialog_node may not always exist for programmatic tasks) + async with db.execute( + "SELECT COUNT(*) FROM tasks " + "WHERE project_id=? AND status NOT IN ('done') " + "AND priority IN ('high','urgent') " + "AND (title LIKE '%[RISK]%' OR labels LIKE '%risk%')", + (project_id,), + ) as cur: + risk_count = (await cur.fetchone())[0] + + gates.append({ + "name": _GATE_BLOCK_RELEASE, + "status": "BLOCKED" if risk_count > 0 else "PASS", + "reason": f"{risk_count} open high/critical [RISK] task(s)" if risk_count > 0 else "No open [RISK] tasks", + "evidence": {"open_risk_tasks": risk_count}, + }) + + # ── Gate 2: DEGRADE_MODE ────────────────────────────────────────────────── + # Read directly from graph_snapshots to avoid any row-factory issues + async with db.execute( + "SELECT metrics FROM graph_snapshots WHERE project_id=? AND window=? ORDER BY created_at DESC LIMIT 1", + (project_id, window), + ) as cur: + snap_row = await cur.fetchone() + sm: Dict[str, Any] = {} + if snap_row: + try: + sm = json.loads(snap_row[0]) if isinstance(snap_row[0], str) else (snap_row[0] or {}) + except Exception: + sm = {} + ops_fail_rate = float(sm.get("ops_failure_rate") or 0.0) + ops_runs = int(sm.get("agent_runs_in_window") or sm.get("runs") or 0) + degrade = ops_fail_rate > 0.33 and ops_runs >= 3 + gates.append({ + "name": _GATE_DEGRADE_MODE, + "status": "DEGRADED" if degrade else "PASS", + "reason": f"Ops failure rate {ops_fail_rate:.0%} (>{ops_runs} runs)" if degrade else f"Ops failure rate {ops_fail_rate:.0%} within limits", + "evidence": {"ops_failure_rate": ops_fail_rate, "ops_runs": ops_runs, "threshold": 0.33}, + }) + + # ── Gate 3: PROMPT_FREEZE ───────────────────────────────────────────────── + streaks = await compute_lesson_streaks(project_id) + q_streak = streaks.get("quality", {}) + freeze = q_streak.get("dir") == "regressing" and q_streak.get("len", 0) >= 2 + gates.append({ + "name": _GATE_PROMPT_FREEZE, + "status": "FROZEN" if freeze else "PASS", + "reason": f"Quality regressing ×{q_streak.get('len',0)} buckets" if freeze else "Quality within acceptable range", + "evidence": {"quality_streak": q_streak}, + }) + + result: Dict[str, Any] = { + "project_id": project_id, + "window": window, + "bucket": bucket, + "evaluated_at": now, + "gates": gates, + "summary": { + "blocked": [g["name"] for g in gates if g["status"] != "PASS"], + "all_clear": all(g["status"] == "PASS" for g in gates), + }, + } + + if dry_run: + result["dry_run"] = True + # Audit: gate_previewed + asyncio.ensure_future(append_governance_event( + scope="project", project_id=project_id, actor_type="user", + event_type="gate_previewed", + idempotency_key=f"ge|preview|{project_id}|{bucket}", + severity="info", status="ok", ref_type="gate_decision", + evidence=_make_evidence( + f"Gates previewed — all_clear={result['summary']['all_clear']}", + bucket=bucket, + outputs={"gates": [{"name": g["name"], "status": g["status"], "reason": g["reason"]} for g in gates]}, + ), + )) + return result + + # Persist as decision node + store in props + gate_fingerprint = hashlib.sha256(f"{project_id}|gates|{bucket}".encode()).hexdigest()[:24] + gate_node_id: Optional[str] = None + async with db.execute( + "SELECT node_id FROM dialog_nodes WHERE project_id=? AND fingerprint=? AND node_type='decision'", + (project_id, gate_fingerprint), + ) as cur: + existing_node = await cur.fetchone() + + gate_content = json.dumps(result, indent=2) + if existing_node: + gate_node_id = existing_node[0] + await db.execute( + "UPDATE dialog_nodes SET props=?, updated_at=? WHERE node_id=?", + (json.dumps({"markdown": f"```json\n{gate_content}\n```", "gates": result}), now, gate_node_id), + ) + else: + gate_node_id = str(uuid.uuid4()) + blocked = result["summary"]["blocked"] + title = f"Governance Gates: {bucket}" + (f" ⚠️ {', '.join(blocked)}" if blocked else " ✓ Clear") + await db.execute( + "INSERT INTO dialog_nodes(node_id,project_id,node_type,title,summary,props,ref_id,created_by,created_at,updated_at,lifecycle,importance,fingerprint)" + " VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)", + (gate_node_id, project_id, "decision", title, + f"Gates evaluated: {len(gates)} | Blocked: {len(blocked)}", + json.dumps({"markdown": f"```json\n{gate_content}\n```", "gates": result}), + gate_node_id, "sofiia", now, now, "active", 0.85, gate_fingerprint), + ) + + await db.commit() + result["dry_run"] = False + result["gate_node_id"] = gate_node_id + # Audit: gate_evaluated + asyncio.ensure_future(append_governance_event( + scope="project", project_id=project_id, actor_type="user", + event_type="gate_evaluated", + idempotency_key=f"ge|persist|{project_id}|{bucket}", + severity="warn" if result["summary"]["blocked"] else "info", + status="ok", ref_type="gate_decision", ref_id=gate_node_id, + evidence=_make_evidence( + f"Gates evaluated — blocked={result['summary']['blocked']}", + bucket=bucket, + outputs={"gates": [{"name": g["name"], "status": g["status"]} for g in gates]}, + links={"gate_decision_node_id": gate_node_id}, + ), + )) + return result + + +# ── Auto-plan / Auto-run for Portfolio Drift Signals ───────────────────────── + +def _auto_run_idem_key(signal_type: str, bucket: str, project_id: str, workflow: str) -> str: + return f"ar|{signal_type}|{bucket}|{project_id}|{workflow}" + + +async def auto_plan_drift_signal(signal_id: str) -> Dict[str, Any]: + """Populate evidence.auto_actions.runs with planned entries (dry_run=True) for a portfolio drift signal.""" + db = await get_db() + async with db.execute( + "SELECT id, signal_type, evidence, status FROM graph_signals WHERE id=? AND project_id='portfolio'", + (signal_id,), + ) as cur: + row = await cur.fetchone() + if not row: + return {"error": "Signal not found"} + + sig_id, signal_type, evidence_raw, sig_status = row[0], row[1], row[2], row[3] + try: + evidence = json.loads(evidence_raw or "{}") + except Exception: + evidence = {} + + bucket = evidence.get("bucket", compute_lesson_bucket()) + workflows = _DRIFT_WORKFLOW_MAP.get(signal_type, []) + projects_in_ev = [p["project_id"] for p in evidence.get("projects", [])] + + auto_actions = evidence.get("auto_actions", { + "policy": {"mode": "off", "scope": "portfolio_drift"}, + "runs": [], + }) + existing_keys = {r["idempotency_key"] for r in auto_actions.get("runs", [])} + now = _now() + planned = [] + for pid in projects_in_ev: + for wf in workflows: + ikey = _auto_run_idem_key(signal_type, bucket, pid, wf) + if ikey in existing_keys: + continue + entry = { + "idempotency_key": ikey, + "action_type": "workflow_run", + "workflow": wf, + "project_id": pid, + "source_signal_id": sig_id, + "bucket": bucket, + "status": "planned", + "dry_run": True, + "created_at": now, + "updated_at": now, + "run_id": None, + "error": None, + "meta": { + "reason": f"{signal_type} auto-plan", + "streak_len": evidence.get("projects", [{}])[0].get("streak", {}).get("len", 0), + "severity": "", + "initiator": "user", + "attempt": 0, + }, + } + auto_actions.setdefault("runs", []).append(entry) + existing_keys.add(ikey) + planned.append(entry) + + evidence["auto_actions"] = auto_actions + await db.execute( + "UPDATE graph_signals SET evidence=?, updated_at=? WHERE id=?", + (json.dumps(evidence), now, sig_id), + ) + await db.commit() + # Audit: drift_planned + if planned: + asyncio.ensure_future(append_governance_event( + scope="portfolio", project_id=_PORTFOLIO_PROJECT_ID, actor_type="user", + event_type="drift_planned", + idempotency_key=f"dp|{sig_id}|{bucket}", + severity="info", status="ok", ref_type="signal", ref_id=sig_id, + evidence=_make_evidence( + f"Drift auto-plan: {len(planned)} run(s) planned for {signal_type}", + bucket=bucket, + outputs={"planned_runs_count": len(planned), + "workflows": list({r["workflow"] for r in planned}), + "projects": list({r["project_id"] for r in planned})}, + links={"signal_id": sig_id}, + ), + )) + return {"signal_id": sig_id, "planned": planned, "total_planned": len(planned)} + + +async def auto_run_drift_signal( + signal_id: str, + dry_run: bool = False, + force: bool = False, + supervisor_url: str = "http://sofiia-supervisor:8080", +) -> Dict[str, Any]: + """Execute planned workflow runs for a portfolio drift signal. + + Reads evidence.auto_actions.runs, fires supervisor runs for queued/planned entries. + Idempotent: skips entries already in queued/running/done. + """ + db = await get_db() + async with db.execute( + "SELECT id, signal_type, evidence FROM graph_signals WHERE id=? AND project_id='portfolio'", + (signal_id,), + ) as cur: + row = await cur.fetchone() + if not row: + return {"error": "Signal not found"} + + sig_id, signal_type, evidence_raw = row[0], row[1], row[2] + try: + evidence = json.loads(evidence_raw or "{}") + except Exception: + evidence = {} + + auto_actions = evidence.get("auto_actions", {"policy": {"mode": "off"}, "runs": []}) + runs = auto_actions.get("runs", []) + if not runs: + # Auto-plan first + plan_result = await auto_plan_drift_signal(signal_id) + # Reload + async with db.execute( + "SELECT evidence FROM graph_signals WHERE id=?", (sig_id,) + ) as cur: + new_ev = await cur.fetchone() + evidence = json.loads(new_ev[0] or "{}") + auto_actions = evidence.get("auto_actions", {"runs": []}) + runs = auto_actions.get("runs", []) + + now = _now() + fired: List[Dict] = [] + skipped: List[str] = [] + + for run_entry in runs: + status = run_entry.get("status", "planned") + if status in ("queued", "running", "done") and not force: + skipped.append(run_entry["idempotency_key"]) + continue + if status == "failed" and not force: + skipped.append(run_entry["idempotency_key"]) + continue + + wf = run_entry.get("workflow") + if wf not in _ALLOWED_AUTO_WORKFLOWS: + run_entry["status"] = "skipped" + run_entry["error"] = {"code": "NOT_ALLOWED", "message": f"Workflow '{wf}' not in allowlist"} + skipped.append(run_entry["idempotency_key"]) + continue + + if dry_run: + run_entry["dry_run"] = True + run_entry["status"] = "planned" + fired.append({"ikey": run_entry["idempotency_key"], "workflow": wf, "project_id": run_entry["project_id"], "dry_run": True}) + continue + + # Mark as queued BEFORE calling supervisor (race-safe) + run_entry["status"] = "queued" + run_entry["dry_run"] = False + run_entry["updated_at"] = now + ikey = run_entry["idempotency_key"] + run_entry["meta"]["attempt"] = run_entry.get("meta", {}).get("attempt", 0) + 1 + evidence["auto_actions"] = auto_actions + await db.execute( + "UPDATE graph_signals SET evidence=?, updated_at=? WHERE id=?", + (json.dumps(evidence), now, sig_id), + ) + await db.commit() + # Audit: queued + asyncio.ensure_future(append_governance_event( + scope="portfolio", project_id=_PORTFOLIO_PROJECT_ID, actor_type="user", + event_type="drift_run_queued", + idempotency_key=f"dr|queued|{ikey}", + severity="info", status="ok", ref_type="signal", ref_id=sig_id, + evidence=_make_evidence( + f"Run queued: {wf} for {run_entry['project_id']}", + outputs={"workflow": wf, "project_id": run_entry["project_id"]}, + links={"signal_id": sig_id}, + ), + )) + + # Call supervisor (httpx is already a project dependency) + try: + import httpx as _httpx + payload = { + "graph": wf, + "project_id": run_entry["project_id"], + "source_signal_id": sig_id, + "idempotency_key": ikey, + } + async with _httpx.AsyncClient(timeout=10.0) as client: + resp = await client.post(f"{supervisor_url}/api/graphs/{wf}/runs", json=payload) + if resp.status_code in (200, 201, 202): + resp_data = resp.json() + run_entry["run_id"] = resp_data.get("run_id") + run_entry["status"] = "running" + # Audit: started + asyncio.ensure_future(append_governance_event( + scope="portfolio", project_id=_PORTFOLIO_PROJECT_ID, actor_type="system", + event_type="drift_run_started", + idempotency_key=f"dr|started|{ikey}", + severity="info", status="ok", ref_type="run", ref_id=run_entry["run_id"], + evidence=_make_evidence( + f"Run started: {wf} run_id={run_entry['run_id']}", + outputs={"workflow": wf, "run_id": run_entry["run_id"]}, + links={"signal_id": sig_id, "run_id": run_entry["run_id"]}, + ), + )) + else: + err_code = str(resp.status_code) + run_entry["status"] = "failed" + run_entry["error"] = {"code": err_code, "message": resp.text[:200], "at": now} + asyncio.ensure_future(append_governance_event( + scope="portfolio", project_id=_PORTFOLIO_PROJECT_ID, actor_type="system", + event_type="drift_run_failed", + idempotency_key=f"dr|fail|{ikey}|{err_code}", + severity="high", status="error", ref_type="signal", ref_id=sig_id, + evidence=_make_evidence( + f"Run failed HTTP {err_code}: {wf}", + outputs={"workflow": wf, "error_code": err_code, "message": resp.text[:200]}, + links={"signal_id": sig_id}, + ), + )) + except Exception as e: + err_msg = str(e)[:200] + run_entry["status"] = "failed" + run_entry["error"] = {"code": "EXCEPTION", "message": err_msg, "at": now} + asyncio.ensure_future(append_governance_event( + scope="portfolio", project_id=_PORTFOLIO_PROJECT_ID, actor_type="system", + event_type="drift_run_failed", + idempotency_key=f"dr|fail|{ikey}|exception", + severity="high", status="error", ref_type="signal", ref_id=sig_id, + evidence=_make_evidence( + f"Run exception: {wf} — {err_msg}", + outputs={"workflow": wf, "error_code": "EXCEPTION", "message": err_msg}, + links={"signal_id": sig_id}, + ), + )) + + run_entry["updated_at"] = now + fired.append({ + "ikey": ikey, + "workflow": wf, + "project_id": run_entry["project_id"], + "status": run_entry["status"], + "run_id": run_entry.get("run_id"), + }) + + # Final persist + evidence["auto_actions"] = auto_actions + await db.execute( + "UPDATE graph_signals SET evidence=?, updated_at=? WHERE id=?", + (json.dumps(evidence), now, sig_id), + ) + await db.commit() + + return { + "signal_id": sig_id, + "dry_run": dry_run, + "fired": fired, + "skipped": skipped, + "total_runs": len(runs), + } + + +# ── Governance Audit Trail (Level 7) ───────────────────────────────────────── + +def _make_evidence( + message: str, + bucket: Optional[str] = None, + inputs: Optional[Dict] = None, + outputs: Optional[Dict] = None, + links: Optional[Dict] = None, + timings: Optional[Dict] = None, +) -> Dict[str, Any]: + """Build a v=1 evidence_json payload.""" + return { + "v": 1, + "message": message, + "bucket": bucket or compute_lesson_bucket(), + "inputs": inputs or {}, + "outputs": outputs or {}, + "links": { + "signal_id": None, + "lesson_id": None, + "run_id": None, + "gate_decision_node_id": None, + "task_ids": [], + **(links or {}), + }, + "timings": { + "started_at": None, + "finished_at": None, + "elapsed_ms": None, + **(timings or {}), + }, + } + + +async def append_governance_event( + scope: str, + project_id: str, + actor_type: str, + event_type: str, + idempotency_key: str, + *, + actor_id: Optional[str] = None, + severity: str = "info", + status: str = "ok", + ref_type: Optional[str] = None, + ref_id: Optional[str] = None, + evidence: Optional[Dict[str, Any]] = None, +) -> Optional[str]: + """Append an audit event. INSERT ... ON CONFLICT DO NOTHING (idempotent).""" + db = await get_db() + # Ensure pseudo-project exists for portfolio scope + if project_id == _PORTFOLIO_PROJECT_ID: + await db.execute( + "INSERT OR IGNORE INTO projects(project_id,name,created_at,updated_at) VALUES(?,?,?,?)", + (_PORTFOLIO_PROJECT_ID, "portfolio", _now(), _now()), + ) + event_id = str(uuid.uuid4()) + ev_json = json.dumps(evidence or _make_evidence("governance event")) + try: + await db.execute( + "INSERT INTO governance_events" + "(event_id,scope,project_id,actor_type,actor_id,event_type,idempotency_key," + " severity,status,ref_type,ref_id,evidence_json,created_at)" + " VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)" + " ON CONFLICT(idempotency_key) DO NOTHING", + (event_id, scope, project_id, actor_type, actor_id, event_type, + idempotency_key, severity, status, ref_type, ref_id, ev_json, _now()), + ) + await db.commit() + return event_id + except Exception: + return None + + +async def list_governance_events( + scope: Optional[str] = None, + project_id: Optional[str] = None, + event_type: Optional[str] = None, + status: Optional[str] = None, + since: Optional[str] = None, + limit: int = 100, +) -> List[Dict[str, Any]]: + """Return audit events with optional filters, newest first.""" + db = await get_db() + where_parts: List[str] = [] + params: List[Any] = [] + if scope: + where_parts.append("scope=?"); params.append(scope) + if project_id: + where_parts.append("project_id=?"); params.append(project_id) + if event_type: + where_parts.append("event_type=?"); params.append(event_type) + if status: + where_parts.append("status=?"); params.append(status) + if since: + where_parts.append("created_at >= ?"); params.append(since) + where_sql = ("WHERE " + " AND ".join(where_parts)) if where_parts else "" + params.append(limit) + async with db.execute( + f"SELECT event_id,scope,project_id,actor_type,actor_id,event_type," + f" idempotency_key,severity,status,ref_type,ref_id,evidence_json,created_at" + f" FROM governance_events {where_sql}" + f" ORDER BY created_at DESC LIMIT ?", + params, + ) as cur: + rows = await cur.fetchall() + result = [] + for row in rows: + r = dict(row) + try: + r["evidence"] = json.loads(r.pop("evidence_json", "{}")) + except Exception: + r["evidence"] = {} + result.append(r) + return result + + +# ── Agent Overrides (Level 8 + Agents Ops) ──────────────────────────────────── + +def _agent_payload_hash(payload: Dict[str, Any]) -> str: + """Stable sha256 of canonical JSON payload (sorted keys, no timestamps).""" + import hashlib + canon = json.dumps({k: v for k, v in sorted(payload.items()) + if k not in ("updated_at", "last_applied_at", "last_applied_hash")}, + ensure_ascii=False, sort_keys=True) + return hashlib.sha256(canon.encode()).hexdigest()[:16] + + +async def get_agent_override(node_id: str, agent_id: str) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT node_id,agent_id,display_name,domain,system_prompt_md,is_hidden," + "last_applied_hash,last_applied_at,updated_at" + " FROM agent_overrides WHERE node_id=? AND agent_id=?", + (node_id, agent_id), + ) as cur: + row = await cur.fetchone() + return dict(row) if row else None + + +async def upsert_agent_override( + node_id: str, + agent_id: str, + *, + display_name: Optional[str] = None, + domain: Optional[str] = None, + system_prompt_md: Optional[str] = None, + is_hidden: Optional[bool] = None, + _mark_applied_hash: Optional[str] = None, +) -> Dict[str, Any]: + """Create or update an agent override. Only provided fields are changed. + Automatically writes a version snapshot (idempotent by content hash). + """ + db = await get_db() + existing = await get_agent_override(node_id, agent_id) + now = _now() + + if existing: + if display_name is not None: existing["display_name"] = display_name + if domain is not None: existing["domain"] = domain + if system_prompt_md is not None: existing["system_prompt_md"] = system_prompt_md + if is_hidden is not None: existing["is_hidden"] = int(is_hidden) + if _mark_applied_hash is not None: + existing["last_applied_hash"] = _mark_applied_hash + existing["last_applied_at"] = now + existing["updated_at"] = now + await db.execute( + "UPDATE agent_overrides SET display_name=?,domain=?,system_prompt_md=?," + "is_hidden=?,last_applied_hash=?,last_applied_at=?,updated_at=?" + " WHERE node_id=? AND agent_id=?", + (existing["display_name"], existing["domain"], existing["system_prompt_md"], + existing["is_hidden"], existing.get("last_applied_hash"), + existing.get("last_applied_at"), now, node_id, agent_id), + ) + else: + existing = { + "node_id": node_id, "agent_id": agent_id, + "display_name": display_name, "domain": domain, + "system_prompt_md": system_prompt_md, + "is_hidden": int(is_hidden) if is_hidden is not None else 0, + "last_applied_hash": _mark_applied_hash, + "last_applied_at": now if _mark_applied_hash else None, + "updated_at": now, + } + await db.execute( + "INSERT INTO agent_overrides(node_id,agent_id,display_name,domain,system_prompt_md," + "is_hidden,last_applied_hash,last_applied_at,updated_at)" + " VALUES(?,?,?,?,?,?,?,?,?)", + (node_id, agent_id, existing["display_name"], existing["domain"], + existing["system_prompt_md"], existing["is_hidden"], + existing["last_applied_hash"], existing["last_applied_at"], now), + ) + + # Write version snapshot (idempotent by hash) + payload = { + "display_name": existing.get("display_name"), + "domain": existing.get("domain"), + "system_prompt_md": existing.get("system_prompt_md"), + } + vh = _agent_payload_hash(payload) + await db.execute( + "INSERT OR IGNORE INTO agent_override_versions(id,node_id,agent_id,version_hash,payload_json,created_at)" + " VALUES(?,?,?,?,?,?)", + (str(uuid.uuid4()), node_id, agent_id, vh, json.dumps(payload), now), + ) + + await db.commit() + existing["version_hash"] = vh + return existing + + +async def delete_agent_override(node_id: str, agent_id: str) -> bool: + db = await get_db() + await db.execute( + "DELETE FROM agent_overrides WHERE node_id=? AND agent_id=?", (node_id, agent_id) + ) + await db.commit() + return True + + +async def list_agent_overrides(node_id: Optional[str] = None) -> List[Dict[str, Any]]: + db = await get_db() + where = "WHERE node_id=?" if node_id else "" + params = (node_id,) if node_id else () + async with db.execute( + f"SELECT node_id,agent_id,display_name,domain,system_prompt_md,is_hidden," + f"last_applied_hash,last_applied_at,updated_at" + f" FROM agent_overrides {where} ORDER BY node_id,agent_id", + params, + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def list_agent_versions(node_id: str, agent_id: str, limit: int = 10) -> List[Dict[str, Any]]: + """Return version history for an agent (most recent first).""" + db = await get_db() + async with db.execute( + "SELECT id,node_id,agent_id,version_hash,payload_json,created_at" + " FROM agent_override_versions WHERE node_id=? AND agent_id=?" + " ORDER BY created_at DESC LIMIT ?", + (node_id, agent_id, limit), + ) as cur: + rows = await cur.fetchall() + result = [] + for r in rows: + d = dict(r) + try: + d["payload"] = json.loads(d["payload_json"]) + except Exception: + d["payload"] = {} + result.append(d) + return result + + +async def get_agent_version_by_hash(node_id: str, agent_id: str, version_hash: str) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT id,node_id,agent_id,version_hash,payload_json,created_at" + " FROM agent_override_versions WHERE node_id=? AND agent_id=? AND version_hash=?", + (node_id, agent_id, version_hash), + ) as cur: + row = await cur.fetchone() + if not row: + return None + d = dict(row) + try: + d["payload"] = json.loads(d["payload_json"]) + except Exception: + d["payload"] = {} + return d + + +# ── Document versions (doc_service persistence) ─────────────────────────────── + +async def insert_doc_version( + project_id: str, + document_id: str, + version_hash: str, + artifact_path: str, + created_by: str = "system", + reason: str = "", +) -> str: + """Insert a doc version row (idempotent by version_hash). Returns version id.""" + db = await get_db() + now = datetime.utcnow().isoformat() + vid = str(uuid.uuid4()) + await db.execute( + "INSERT OR IGNORE INTO doc_versions" + "(id,project_id,document_id,version_hash,artifact_path,created_at,created_by,reason)" + " VALUES(?,?,?,?,?,?,?,?)", + (vid, project_id, document_id, version_hash, artifact_path, now, created_by, reason), + ) + await db.commit() + # Return the existing id if it was a no-op insert + async with db.execute( + "SELECT id FROM doc_versions WHERE project_id=? AND document_id=? AND version_hash=?", + (project_id, document_id, version_hash), + ) as cur: + row = await cur.fetchone() + return row["id"] if row else vid + + +async def list_doc_versions( + project_id: str, + document_id: str, + limit: int = 20, +) -> List[Dict[str, Any]]: + """Return version history for a document (most recent first).""" + db = await get_db() + async with db.execute( + "SELECT id,project_id,document_id,version_hash,artifact_path,created_at,created_by,reason" + " FROM doc_versions WHERE project_id=? AND document_id=?" + " ORDER BY created_at DESC LIMIT ?", + (project_id, document_id, limit), + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + +async def get_doc_version_by_hash( + project_id: str, + document_id: str, + version_hash: str, +) -> Optional[Dict[str, Any]]: + db = await get_db() + async with db.execute( + "SELECT id,project_id,document_id,version_hash,artifact_path,created_at,created_by,reason" + " FROM doc_versions WHERE project_id=? AND document_id=? AND version_hash=?", + (project_id, document_id, version_hash), + ) as cur: + row = await cur.fetchone() + return dict(row) if row else None diff --git a/services/sofiia-console/app/main.py b/services/sofiia-console/app/main.py index 908b802b..fbfde3e1 100644 --- a/services/sofiia-console/app/main.py +++ b/services/sofiia-console/app/main.py @@ -4,6 +4,7 @@ Runtime contract (project/session/user), full status, WebSocket events, voice proxy, ops, nodes. UI never calls external services directly. """ import asyncio +import base64 import io import json import os @@ -70,6 +71,10 @@ _NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2")) # ── Rate limiter ────────────────────────────────────────────────────────────── _rate_buckets: Dict[str, collections.deque] = {} +# ── Chat idempotency cache (TTL in-memory) ─────────────────────────────────── +_IDEMPOTENCY_TTL_SEC = int(os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900")) +_idempotency_cache: "collections.OrderedDict[str, Dict[str, Any]]" = collections.OrderedDict() + def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool: now = time.monotonic() dq = _rate_buckets.setdefault(key, collections.deque()) @@ -80,6 +85,44 @@ def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool: dq.append(now) return True + +def _idem_cleanup(now: Optional[float] = None) -> None: + ts = now if now is not None else time.monotonic() + while _idempotency_cache: + first_key = next(iter(_idempotency_cache)) + exp = float((_idempotency_cache[first_key] or {}).get("expires_at", 0)) + if exp > ts: + break + _idempotency_cache.popitem(last=False) + + +def _idem_get(chat_id: str, idem_key: str) -> Optional[Dict[str, Any]]: + _idem_cleanup() + cache_key = f"{chat_id}::{idem_key}" + hit = _idempotency_cache.get(cache_key) + if not hit: + return None + # Touch LRU + _idempotency_cache.move_to_end(cache_key, last=True) + payload = hit.get("payload") + return payload if isinstance(payload, dict) else None + + +def _idem_put(chat_id: str, idem_key: str, payload: Dict[str, Any]) -> None: + if not idem_key: + return + now = time.monotonic() + _idem_cleanup(now) + cache_key = f"{chat_id}::{idem_key}" + _idempotency_cache[cache_key] = { + "expires_at": now + max(60, _IDEMPOTENCY_TTL_SEC), + "payload": payload, + } + _idempotency_cache.move_to_end(cache_key, last=True) + # Bound memory growth + while len(_idempotency_cache) > 5000: + _idempotency_cache.popitem(last=False) + # ── Voice error rings (repro pack for incident diagnosis) ───────────────────── # Circular buffers: last 5 TTS errors and last 5 LLM errors. # Populated by all voice endpoints. Read by /api/voice/degradation_status. @@ -1400,7 +1443,7 @@ async def _smart_monitor_run(run_id: str) -> None: run["kling"] = { **kling, "status": "failed", - "error": str(exc)[:320], + "error": str(exc)[:640], } run["status"] = "completed" run["phase"] = "completed_with_kling_failure" @@ -1415,6 +1458,7 @@ async def _smart_monitor_run(run_id: str) -> None: **kling, "task_id": task_id, "status": str(submit.get("status") or "submitted").lower(), + "endpoint": str(submit.get("kling_endpoint") or "video2video"), "submitted_at": _smart_now_iso(), } _smart_append_audit(run, "kling.submitted", {"task_id": task_id}) @@ -2638,7 +2682,7 @@ async def api_aurora_report_pdf(job_id: str) -> StreamingResponse: @app.get("/api/aurora/files/{job_id}/{file_name:path}") -async def api_aurora_file(job_id: str, file_name: str) -> StreamingResponse: +async def api_aurora_file(job_id: str, file_name: str, request: Request) -> StreamingResponse: encoded_job = quote(job_id, safe="") encoded_name = quote(file_name, safe="") paths = [AURORA_SERVICE_URL] @@ -2649,7 +2693,13 @@ async def api_aurora_file(job_id: str, file_name: str) -> StreamingResponse: url = f"{base}/api/aurora/files/{encoded_job}/{encoded_name}" client = httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0)) try: - resp = await client.send(client.build_request("GET", url), stream=True) + upstream_headers: Dict[str, str] = {} + for name in ("range", "if-range", "if-none-match", "if-modified-since"): + value = request.headers.get(name) + if value: + upstream_headers[name] = value + + resp = await client.send(client.build_request("GET", url, headers=upstream_headers), stream=True) if resp.status_code >= 400: body = (await resp.aread()).decode(errors="replace")[:400] await resp.aclose() @@ -2659,7 +2709,22 @@ async def api_aurora_file(job_id: str, file_name: str) -> StreamingResponse: continue raise HTTPException(status_code=resp.status_code, detail=body or f"Aurora file error {resp.status_code}") ct = resp.headers.get("content-type", "application/octet-stream") - disp = resp.headers.get("content-disposition", f'inline; filename="{Path(file_name).name}"') + passthrough_headers: Dict[str, str] = {} + for name in ( + "content-disposition", + "content-length", + "content-range", + "accept-ranges", + "etag", + "last-modified", + "cache-control", + ): + value = resp.headers.get(name) + if value: + passthrough_headers[name] = value + if "content-disposition" not in passthrough_headers: + passthrough_headers["content-disposition"] = f'inline; filename="{Path(file_name).name}"' + passthrough_headers.setdefault("cache-control", "no-store") async def _stream(): try: @@ -2671,8 +2736,9 @@ async def api_aurora_file(job_id: str, file_name: str) -> StreamingResponse: return StreamingResponse( _stream(), + status_code=resp.status_code, media_type=ct, - headers={"Content-Disposition": disp, "Cache-Control": "no-store"}, + headers=passthrough_headers, ) except HTTPException: raise @@ -2977,6 +3043,340 @@ class ChatSendBody(BaseModel): voice_profile: Optional[str] = None +CHAT_PROJECT_ID = "chats" + + +class ChatCreateBody(BaseModel): + agent_id: str + node_id: str = "NODA2" + source: str = "console" + external_chat_ref: Optional[str] = None + title: Optional[str] = None + + +class ChatMessageSendBody(BaseModel): + text: str + attachments: List[Dict[str, Any]] = [] + project_id: Optional[str] = None + session_id: Optional[str] = None + user_id: Optional[str] = None + routing: Optional[Dict[str, Any]] = None + client: Optional[Dict[str, Any]] = None + idempotency_key: Optional[str] = None + + +def _make_chat_id(node_id: str, agent_id: str, source: str = "console", external_chat_ref: Optional[str] = None) -> str: + ext = (external_chat_ref or "main").strip() or "main" + return f"chat:{node_id.upper()}:{agent_id.strip().lower()}:{source.strip().lower()}:{ext}" + + +def _parse_chat_id(chat_id: str) -> Dict[str, str]: + raw = (chat_id or "").strip() + parts = raw.split(":", 4) + if len(parts) == 5 and parts[0] == "chat": + return { + "chat_id": raw, + "node_id": parts[1].upper(), + "agent_id": parts[2].lower(), + "source": parts[3].lower(), + "external_chat_ref": parts[4], + } + # Legacy fallback: treat arbitrary session_id as local NODA2 chat with sofiia + return { + "chat_id": raw, + "node_id": "NODA2", + "agent_id": "sofiia", + "source": "console", + "external_chat_ref": raw or "main", + } + + +async def _ensure_chat_project() -> None: + proj = await _app_db.get_project(CHAT_PROJECT_ID) + if not proj: + await _app_db.create_project( + name="Chats", + description="Cross-node chat index for Sofiia Console", + project_id=CHAT_PROJECT_ID, + ) + + +def _clean_chat_reply(text: str) -> str: + import re + cleaned = re.sub(r".*?", "", text, flags=re.DOTALL | re.IGNORECASE) + if "" in cleaned.lower(): + cleaned = re.split(r"(?i)", cleaned)[0] + return cleaned.strip() + + +def _cursor_encode(payload: Dict[str, Any]) -> str: + raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=True).encode("utf-8") + return base64.urlsafe_b64encode(raw).decode("ascii") + + +def _cursor_decode(cursor: Optional[str]) -> Dict[str, Any]: + if not cursor: + return {} + try: + decoded = base64.urlsafe_b64decode(cursor.encode("ascii")).decode("utf-8") + data = json.loads(decoded) + return data if isinstance(data, dict) else {} + except Exception: + return {} + + +@app.get("/api/chats") +async def api_chats_list( + nodes: str = Query("NODA1,NODA2"), + agent_id: Optional[str] = Query(None), + q: Optional[str] = Query(None), + limit: int = Query(50, ge=1, le=200), + cursor: Optional[str] = Query(None), + _auth: str = Depends(require_auth), +): + await _ensure_chat_project() + node_filter = {n.strip().upper() for n in nodes.split(",") if n.strip()} + cur = _cursor_decode(cursor) + before_last_active = str(cur.get("last_active") or "").strip() or None + before_chat_id = str(cur.get("chat_id") or "").strip() or None + fetch_limit = max(limit * 5, limit + 1) + sessions = await _app_db.list_sessions_page( + CHAT_PROJECT_ID, + limit=fetch_limit, + before_last_active=before_last_active, + before_session_id=before_chat_id, + ) + + items: List[Dict[str, Any]] = [] + agent_filter = (agent_id or "").strip().lower() + q_filter = (q or "").strip().lower() + for s in sessions: + sid = str(s.get("session_id") or "") + if not sid: + continue + info = _parse_chat_id(sid) + if node_filter and info["node_id"] not in node_filter: + continue + if agent_filter and info["agent_id"] != agent_filter: + continue + msgs = await _app_db.list_messages(sid, limit=200) + last = msgs[-1] if msgs else None + item = { + "chat_id": sid, + "title": (s.get("title") or f"{info['agent_id']} • {info['node_id']}").strip(), + "agent_id": info["agent_id"], + "node_id": info["node_id"], + "source": info["source"], + "external_chat_ref": info["external_chat_ref"], + "updated_at": s.get("last_active"), + "last_message": ( + { + "message_id": last.get("msg_id"), + "role": last.get("role"), + "text": (last.get("content") or "")[:280], + "ts": last.get("ts"), + } if last else None + ), + "turn_count": s.get("turn_count", 0), + } + if q_filter: + hay = " ".join( + [ + item["title"], + item["agent_id"], + item["node_id"], + (item["last_message"] or {}).get("text", ""), + ] + ).lower() + if q_filter not in hay: + continue + items.append(item) + if len(items) >= limit: + break + + next_cursor = None + if items: + last_item = items[-1] + next_cursor = _cursor_encode( + { + "last_active": last_item.get("updated_at"), + "chat_id": last_item.get("chat_id"), + } + ) + has_more = len(sessions) >= fetch_limit or len(items) >= limit + return { + "items": items, + "count": len(items), + "nodes": sorted(node_filter), + "project_id": CHAT_PROJECT_ID, + "next_cursor": next_cursor, + "has_more": has_more, + } + + +@app.post("/api/chats") +async def api_chat_create(body: ChatCreateBody, _auth: str = Depends(require_auth)): + await _ensure_chat_project() + cid = _make_chat_id( + node_id=body.node_id, + agent_id=body.agent_id, + source=body.source, + external_chat_ref=body.external_chat_ref, + ) + info = _parse_chat_id(cid) + title = (body.title or f"{info['agent_id']} • {info['node_id']} • {info['source']}").strip() + sess = await _app_db.upsert_session(cid, project_id=CHAT_PROJECT_ID, title=title) + return {"ok": True, "chat": {"chat_id": cid, "title": title, "agent_id": info["agent_id"], "node_id": info["node_id"], "source": info["source"], "external_chat_ref": info["external_chat_ref"], "updated_at": sess.get("last_active")}} + + +@app.get("/api/chats/{chat_id}/messages") +async def api_chat_messages( + chat_id: str, + limit: int = Query(100, ge=1, le=500), + cursor: Optional[str] = Query(None), + _auth: str = Depends(require_auth), +): + cur = _cursor_decode(cursor) + before_ts = str(cur.get("ts") or "").strip() or None + before_message_id = str(cur.get("message_id") or "").strip() or None + rows_desc = await _app_db.list_messages_page( + chat_id, + limit=limit + 1, + before_ts=before_ts, + before_msg_id=before_message_id, + ) + has_more = len(rows_desc) > limit + page_desc = rows_desc[:limit] + rows = list(reversed(page_desc)) + info = _parse_chat_id(chat_id) + messages = [ + { + "message_id": r.get("msg_id"), + "chat_id": chat_id, + "role": r.get("role"), + "text": r.get("content", ""), + "ts": r.get("ts"), + "meta": { + "node_id": info["node_id"], + "agent_id": info["agent_id"], + "source": info["source"], + }, + } + for r in rows + ] + next_cursor = None + if has_more and page_desc: + tail = page_desc[-1] + next_cursor = _cursor_encode({"ts": tail.get("ts"), "message_id": tail.get("msg_id")}) + return { + "items": messages, + "count": len(messages), + "chat_id": chat_id, + "next_cursor": next_cursor, + "has_more": has_more, + } + + +@app.post("/api/chats/{chat_id}/send") +async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Request, _auth: str = Depends(require_auth)): + client_ip = request.client.host if request.client else "unknown" + if not _check_rate(f"chat_v2:{client_ip}", max_calls=30, window_sec=60): + raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min") + text = (body.text or "").strip() + if not text: + raise HTTPException(status_code=400, detail="text is required") + idem_key = ( + ( + request.headers.get("Idempotency-Key") + or body.idempotency_key + or "" + ).strip() + )[:128] + if idem_key: + cached = _idem_get(chat_id, idem_key) + if cached: + replay = dict(cached) + replay["idempotency"] = {"replayed": True, "key": idem_key} + return replay + + await _ensure_chat_project() + info = _parse_chat_id(chat_id) + target_node = ((body.routing or {}).get("force_node_id") or info["node_id"] or "NODA2").upper() + target_agent = info["agent_id"] or "sofiia" + project_id = body.project_id or CHAT_PROJECT_ID + session_id = body.session_id or chat_id + user_id = body.user_id or "console_user" + title = f"{target_agent} • {target_node} • {info['source']}" + await _app_db.upsert_session(chat_id, project_id=CHAT_PROJECT_ID, title=title) + + user_saved = await _app_db.save_message(chat_id, "user", text[:4096]) + metadata: Dict[str, Any] = { + "project_id": project_id, + "session_id": session_id, + "user_id": user_id, + "client": "sofiia-console", + "chat_id": chat_id, + "node_id": target_node, + "agent_id": target_agent, + "source": info["source"], + "external_chat_ref": info["external_chat_ref"], + "attachments": body.attachments or [], + "client_meta": body.client or {}, + } + base_url = get_router_url(target_node) + if not base_url: + raise HTTPException(status_code=400, detail=f"router_url is not configured for node {target_node}") + try: + out = await infer( + base_url, + target_agent, + text, + model=None, + metadata=metadata, + timeout=300.0, + api_key=ROUTER_API_KEY, + ) + except Exception as e: + _broadcast_bg( + _make_event( + "error", + {"where": "chat_v2.router", "message": str(e)[:180], "chat_id": chat_id, "node_id": target_node, "agent_id": target_agent}, + project_id=project_id, + session_id=session_id, + user_id=user_id, + ) + ) + raise HTTPException(status_code=502, detail=str(e)[:300]) + + reply = _clean_chat_reply(out.get("response", out.get("text", ""))) + assistant_saved = await _app_db.save_message(chat_id, "assistant", (reply or "")[:4096], parent_msg_id=user_saved.get("msg_id")) + trace_id = f"chatv2_{session_id}_{uuid.uuid4().hex[:8]}" + result = { + "ok": True, + "accepted": True, + "chat_id": chat_id, + "node_id": target_node, + "agent_id": target_agent, + "trace_id": trace_id, + "message": { + "message_id": assistant_saved.get("msg_id"), + "role": "assistant", + "text": reply, + "ts": assistant_saved.get("ts"), + "meta": { + "node_id": target_node, + "agent_id": target_agent, + "backend": out.get("backend"), + "model": out.get("model"), + }, + }, + } + if idem_key: + _idem_put(chat_id, idem_key, result) + result["idempotency"] = {"replayed": False, "key": idem_key} + return result + + @app.post("/api/chat/send") async def api_chat_send(body: ChatSendBody, request: Request): """BFF chat: Ollama or router. Returns runtime contract fields. Rate: 30/min.""" @@ -6821,9 +7221,9 @@ async def console_kling_health() -> Dict[str, Any]: return {"ok": False, "error": str(exc)} -@app.post("/api/aurora/kling/enhance/{job_id}") -async def console_kling_enhance( - job_id: str, +@app.post("/api/aurora/kling/enhance") +async def console_kling_enhance_plain( + job_id: str = Form(...), prompt: str = Form("enhance video quality, improve sharpness and clarity"), negative_prompt: str = Form("noise, blur, artifacts, distortion"), mode: str = Form("pro"), @@ -6846,6 +7246,25 @@ async def console_kling_enhance( ) +@app.post("/api/aurora/kling/enhance/{job_id}") +async def console_kling_enhance( + job_id: str, + prompt: str = Form("enhance video quality, improve sharpness and clarity"), + negative_prompt: str = Form("noise, blur, artifacts, distortion"), + mode: str = Form("pro"), + duration: str = Form("5"), + cfg_scale: float = Form(0.5), +) -> Dict[str, Any]: + return await console_kling_enhance_plain( + job_id=job_id, + prompt=prompt, + negative_prompt=negative_prompt, + mode=mode, + duration=duration, + cfg_scale=cfg_scale, + ) + + @app.get("/api/aurora/kling/status/{job_id}") async def console_kling_status(job_id: str) -> Dict[str, Any]: return await _aurora_request_json("GET", f"/api/aurora/kling/status/{job_id}", timeout=20.0, retries=2)