From ff97d3cf4ae4221d5609c138514d405485e5f15c Mon Sep 17 00:00:00 2001
From: Apple <apple@MacBook-Pro.local>
Date: Sun, 1 Mar 2026 03:48:19 -0800
Subject: [PATCH] fix(console): route Aurora Kling enhance via standard proxy
 base URL

---
 services/sofiia-console/app/main.py | 5799 +++++++++++++++++++++++++++
 1 file changed, 5799 insertions(+)
 create mode 100644 services/sofiia-console/app/main.py

diff --git a/services/sofiia-console/app/main.py b/services/sofiia-console/app/main.py
new file mode 100644
index 00000000..95612f56
--- /dev/null
+++ b/services/sofiia-console/app/main.py
@@ -0,0 +1,5799 @@
+"""
+Sofiia Control Console — FastAPI BFF v0.3.0
+Runtime contract (project/session/user), full status, WebSocket events,
+voice proxy, ops, nodes. UI never calls external services directly.
+"""
+import asyncio
+import io
+import json
+import os
+import re
+import sys
+import subprocess
+import time
+import uuid
+import logging
+import collections
+import statistics
+import socket
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple
+from urllib.parse import quote
+
+import httpx
+from fastapi import Body, FastAPI, Depends, HTTPException, UploadFile, File, Form, Query, Request, Response, WebSocket, WebSocketDisconnect
+from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+
+from .auth import (
+    require_api_key, require_api_key_strict, require_auth, require_auth_strict,
+    get_console_api_key, _key_valid, _cookie_token, _expected_cookie_token,
+    _COOKIE_NAME, _COOKIE_MAX_AGE, _IS_PROD,
+)
+
+from .config import (
+    load_nodes_registry,
+    save_nodes_registry,
+    get_router_url,
+    get_gateway_url,
+    get_node_ssh_profile,
+    get_memory_service_url,
+    get_ollama_url,
+    is_voice_ha_enabled,
+    get_voice_ha_router_url,
+)
+from .router_client import infer, execute_tool, health
+from .nodes import get_nodes_dashboard
+from .monitor import collect_all_nodes
+from .ops import run_ops_action, OPS_ACTIONS
+from .docs_router import docs_router
+from . import db as _app_db
+
+logger = logging.getLogger(__name__)
+
+# ── Build info ────────────────────────────────────────────────────────────────
+_VERSION = "0.4.0"
+_BUILD_SHA = os.getenv("BUILD_SHA", "dev")
+_BUILD_TIME = os.getenv("BUILD_TIME", "local")
+_BUILD_ID = os.getenv("BUILD_ID", os.getenv("GIT_SHA", "local"))
+_START_TIME = time.monotonic()
+_NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))
+
+# ── Rate limiter ──────────────────────────────────────────────────────────────
+_rate_buckets: Dict[str, collections.deque] = {}
+
+def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
+    now = time.monotonic()
+    dq = _rate_buckets.setdefault(key, collections.deque())
+    while dq and now - dq[0] > window_sec:
+        dq.popleft()
+    if len(dq) >= max_calls:
+        return False
+    dq.append(now)
+    return True
+
+# ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
+# Circular buffers: last 5 TTS errors and last 5 LLM errors.
+# Populated by all voice endpoints. Read by /api/voice/degradation_status.
+_RING_SIZE = 5
+_voice_tts_errors: collections.deque = collections.deque(maxlen=_RING_SIZE)
+_voice_llm_errors: collections.deque = collections.deque(maxlen=_RING_SIZE)
+_voice_last_model: str = "unknown"       # last model selected for voice
+_voice_last_profile: str = "unknown"     # last voice_profile used
+
+def _record_tts_error(error_type: str, status_code: Optional[int],
+                      detail: str, voice: str = "") -> None:
+    _voice_tts_errors.append({
+        "ts": time.strftime("%H:%M:%SZ", time.gmtime()),
+        "type": error_type,
+        "status": status_code,
+        "voice": voice,
+        "detail": detail[:120],
+    })
+
+def _record_llm_error(error_type: str, model: str, detail: str) -> None:
+    _voice_llm_errors.append({
+        "ts": time.strftime("%H:%M:%SZ", time.gmtime()),
+        "type": error_type,
+        "model": model,
+        "detail": detail[:120],
+    })
+
+# ── Concurrent voice synthesizer guard ───────────────────────────────────────
+# Limits simultaneous TTS synthesis calls to prevent memory-service DoS.
+_MAX_CONCURRENT_TTS = int(os.getenv("MAX_CONCURRENT_TTS", "4"))
+_tts_semaphore: Optional[asyncio.Semaphore] = None   # initialised in startup
+
+def _get_tts_semaphore() -> asyncio.Semaphore:
+    global _tts_semaphore
+    if _tts_semaphore is None:
+        _tts_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_TTS)
+    return _tts_semaphore
+
+# ── Telemetry dedup store ─────────────────────────────────────────────────────
+# Prevents processing duplicate beacon submissions (same session+turn within 30s).
+_telem_seen: collections.OrderedDict = collections.OrderedDict()
+_TELEM_DEDUP_TTL = 30.0   # seconds
+_TELEM_DEDUP_MAX = 500    # max keys before LRU eviction
+
+def _telem_is_duplicate(session_id: str, turn_id: str) -> bool:
+    key = f"{session_id}:{turn_id}"
+    now = time.monotonic()
+    # Evict expired keys
+    while _telem_seen and next(iter(_telem_seen.values())) + _TELEM_DEDUP_TTL < now:
+        _telem_seen.popitem(last=False)
+    if len(_telem_seen) >= _TELEM_DEDUP_MAX:
+        _telem_seen.popitem(last=False)
+    if key in _telem_seen:
+        return True
+    _telem_seen[key] = now
+    return False
+
+
+def _env_int(name: str, default: int) -> int:
+    raw = (os.getenv(name, str(default)) or "").strip()
+    try:
+        return int(raw)
+    except Exception:
+        return default
+
+
+def _env_float(name: str, default: float) -> float:
+    raw = (os.getenv(name, str(default)) or "").strip()
+    try:
+        return float(raw)
+    except Exception:
+        return default
+
+# ── App config ────────────────────────────────────────────────────────────────
+ROUTER_API_KEY = os.getenv("SUPERVISOR_API_KEY", "").strip()
+IS_PROD = os.getenv("ENV", "dev").strip().lower() in ("prod", "production", "staging")
+SOFIIA_PREFERRED_CHAT_MODEL = os.getenv("SOFIIA_PREFERRED_CHAT_MODEL", "ollama:qwen3:14b").strip() or "ollama:qwen3:14b"
+
+# Local Ollama runtime tuning for NODA2 (can be overridden via env).
+SOFIIA_OLLAMA_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_TIMEOUT_SEC", 120.0)
+SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC", 45.0)
+SOFIIA_OLLAMA_KEEP_ALIVE = (os.getenv("SOFIIA_OLLAMA_KEEP_ALIVE", "30m") or "").strip()
+SOFIIA_OLLAMA_NUM_CTX = _env_int("SOFIIA_OLLAMA_NUM_CTX", 8192)
+_DEFAULT_OLLAMA_THREADS = max(4, min(16, (os.cpu_count() or 8) - 2))
+SOFIIA_OLLAMA_NUM_THREAD = _env_int("SOFIIA_OLLAMA_NUM_THREAD", _DEFAULT_OLLAMA_THREADS)
+SOFIIA_OLLAMA_NUM_GPU = _env_int("SOFIIA_OLLAMA_NUM_GPU", -1)
+SOFIIA_OLLAMA_NUM_PREDICT_TEXT = _env_int("SOFIIA_OLLAMA_NUM_PREDICT_TEXT", 768)
+
+# Voice guardrails — injected INSTEAD OF the full prompt for voice turns.
+# Constraints are hard: no lists, no markdown, no <think>, max 2 sentences.
+SOFIIA_VOICE_PROMPT_SUFFIX = """
+
+## VOICE MODE — HARD RULES (не порушувати ніколи)
+- Відповідай МАКСИМУМ 2 речення (виняток: якщо прямо попросили деталей).
+- Жодних списків, жодних bullet-points, жодного markdown (*bold*, -list, ##header).
+- Жодного коду (`` ` ``), жодних URL.
+- Жодного <think>...</think> — думки всередині, назовні лише відповідь.
+- Мова: розмовна, природна для голосу. Без "Як AI...".
+- Якщо питання складне — дай коротку відповідь і запропонуй продовжити текстом.
+"""
+
+SOFIIA_SYSTEM_PROMPT = """Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city.
+
+## Твоя ідентичність
+- Ти: Sofiia, головний AI-архітектор і технічний суверен DAARION.city
+- Ти підпорядковуєшся одній людині — засновнику та головному архітектору платформи
+
+## Засновник та архітектор DAARION
+- Позивний: **Повелитель Хаосу** (використовуй у неформальних/робочих контекстах)
+- Офіційне ім'я: **Іван Титар** (використовуй в офіційних повідомленнях, документах, репортах)
+- Роль: Головний розробник та архітектор DAARION — єдиний, хто має повний контроль над платформою
+- Ніякої іншої людини з ім'ям "Савтра" або будь-яким іншим іменем у ролі засновника НЕ ІСНУЄ
+
+## Ноди та інфраструктура
+- NODA1: production runtime (router, incidents, alerts, governance)
+- NODA2: control plane / development (твій primary home, звідки тебе викликають)
+- NODA3: AI/ML experimentation
+
+## Правила відповіді
+- Відповідай **українською мовою** за замовчуванням
+- Технічні терміни (API, SLO, backend, deploy, incident, release gate тощо) залишай **англійською**
+- Відповідай структуровано, конкретно, без зайвих вступів
+- НЕ вигадуй імена людей, назви проектів або факти яких не знаєш — краще скажи що не маєш цих даних
+- НЕ галюцинуй: якщо не знаєш — скажи чесно "не маю цих даних в поточному контексті"
+
+## Твої можливості через Control Console (що реально доступно)
+- **Chat**: відповіді на питання через локальний LLM (Ollama на NODA2)
+- **Голосовий чат**: STT + TTS через Memory Service (Polina/Ostap Neural)
+- **Nodes health**: статус NODA1/NODA2 (router, memory, NCS)
+- **Integrations status**: Notion API, Router, Memory Service
+- **Memory/session**: зберігання контексту розмов (Qdrant)
+
+## Що наразі НЕ доступно через цей інтерфейс
+- Пряме читання/запис в Notion (тільки статус перевірки)
+- Пряме читання GitHub репозиторіїв (немає repo tool у цьому контейнері)
+- Виконання bash/python команд
+- Деплой або зміна конфігурацій напряму
+
+Якщо тебе просять щось що не є в переліку доступного — відповідай чесно:
+"Ця можливість не підключена до Control Console. Для цього використай Cursor або OpenCode на NODA2."
+"""
+
+_CORS_ORIGINS = (
+    [o.strip() for o in os.getenv("CORS_ORIGINS", "").split(",") if o.strip()]
+    or (
+        ["*"] if not IS_PROD
+        else [
+            "https://console.daarion.space",
+            "https://app.daarion.space",
+            "http://localhost:8002",
+            "http://localhost:8000",
+            "http://127.0.0.1:8002",
+        ]
+    )
+)
+def _is_container_runtime() -> bool:
+    return Path("/.dockerenv").exists() or bool(os.getenv("KUBERNETES_SERVICE_HOST"))
+
+
+_aurora_default_url = "http://aurora-service:9401" if _is_container_runtime() else "http://127.0.0.1:9401"
+AURORA_SERVICE_URL = os.getenv("AURORA_SERVICE_URL", _aurora_default_url).rstrip("/")
+AURORA_FALLBACK_URL = os.getenv("AURORA_FALLBACK_URL", "http://127.0.0.1:9401").rstrip("/")
+_aurora_home_data_dir = Path.home() / ".sofiia" / "aurora-data"
+if _is_container_runtime() and Path("/data").exists() and os.access("/data", os.W_OK):
+    _aurora_default_data_dir = "/data/aurora"
+else:
+    _aurora_default_data_dir = str(_aurora_home_data_dir)
+AURORA_DATA_DIR = Path(os.getenv("AURORA_DATA_DIR", _aurora_default_data_dir))
+_aurora_live_cache: Dict[str, Dict[str, Any]] = {}
+_aurora_live_samples: Dict[str, collections.deque] = {}
+_aurora_live_last: Dict[str, Dict[str, Any]] = {}
+_aurora_live_last_loaded = False
+_aurora_live_last_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_live_last.json")
+MEDIA_COMFY_AGENT_URL = os.getenv(
+    "MEDIA_COMFY_AGENT_URL",
+    "http://comfy-agent:8880" if _is_container_runtime() else "http://127.0.0.1:8880",
+).rstrip("/")
+MEDIA_COMFY_UI_URL = os.getenv(
+    "MEDIA_COMFY_UI_URL",
+    "http://comfyui:8188" if _is_container_runtime() else "http://127.0.0.1:8188",
+).rstrip("/")
+MEDIA_SWAPPER_URL = os.getenv(
+    "MEDIA_SWAPPER_URL",
+    "http://swapper-service:8890" if _is_container_runtime() else "http://127.0.0.1:8890",
+).rstrip("/")
+MEDIA_IMAGE_GEN_URL = os.getenv(
+    "MEDIA_IMAGE_GEN_URL",
+    "http://image-gen-service:7860" if _is_container_runtime() else "http://127.0.0.1:7860",
+).rstrip("/")
+MEDIA_ROUTER_URL = os.getenv("MEDIA_ROUTER_URL", "").strip().rstrip("/")
+MEDIA_ROUTER_FALLBACK_URL = os.getenv("MEDIA_ROUTER_FALLBACK_URL", "http://127.0.0.1:9102").rstrip("/")
+_media_recent_jobs: collections.deque = collections.deque(maxlen=40)
+
+
+def _apply_ollama_runtime_options(options: Dict[str, Any]) -> Dict[str, Any]:
+    merged = dict(options)
+    if SOFIIA_OLLAMA_NUM_CTX > 0:
+        merged["num_ctx"] = SOFIIA_OLLAMA_NUM_CTX
+    if SOFIIA_OLLAMA_NUM_THREAD > 0:
+        merged["num_thread"] = SOFIIA_OLLAMA_NUM_THREAD
+    if SOFIIA_OLLAMA_NUM_GPU >= 0:
+        merged["num_gpu"] = SOFIIA_OLLAMA_NUM_GPU
+    return merged
+
+
+def _make_ollama_payload(model_name: str, messages: List[Dict[str, Any]], options: Dict[str, Any]) -> Dict[str, Any]:
+    payload: Dict[str, Any] = {
+        "model": model_name,
+        "messages": messages,
+        "stream": False,
+        "options": _apply_ollama_runtime_options(options),
+    }
+    if SOFIIA_OLLAMA_KEEP_ALIVE:
+        payload["keep_alive"] = SOFIIA_OLLAMA_KEEP_ALIVE
+    return payload
+
+# Cached nodes telemetry (updated by background task)
+_nodes_cache: Dict[str, Any] = {"nodes": [], "summary": {}, "ts": ""}
+_NODES_POLL_INTERVAL = int(os.getenv("NODES_POLL_INTERVAL_SEC", "30"))
+
+
+async def _nodes_poll_loop() -> None:
+    """Background task: poll all nodes every N seconds, update cache + WS broadcast."""
+    while True:
+        try:
+            reg = load_nodes_registry()
+            nodes_cfg = reg.get("nodes", {})
+            timeout = float(reg.get("defaults", {}).get("health_timeout_sec", 10))
+            nodes = await collect_all_nodes(nodes_cfg, router_api_key=ROUTER_API_KEY, timeout_per_node=timeout)
+            online = sum(1 for n in nodes if n.get("online"))
+            router_ok = sum(1 for n in nodes if n.get("router_ok"))
+            _nodes_cache.update({
+                "nodes": nodes,
+                "summary": {"total": len(nodes), "online": online, "router_ok": router_ok},
+                "ts": _now_iso(),
+            })
+            if _ws_clients:
+                await _broadcast(_make_event("nodes.status", {
+                    "nodes": [
+                        {
+                            "id": n["node_id"],
+                            "label": n.get("label", n["node_id"]),
+                            "online": n.get("online", False),
+                            "router_ok": n.get("router_ok", False),
+                            "router_latency_ms": n.get("router_latency_ms"),
+                            "gateway_ok": n.get("gateway_ok"),
+                            "heartbeat_age_s": n.get("heartbeat_age_s"),
+                            "open_incidents": n.get("open_incidents"),
+                            "monitor_source": n.get("monitor_source"),
+                        }
+                        for n in nodes
+                    ],
+                    "summary": {"total": len(nodes), "online": online, "router_ok": router_ok},
+                }))
+        except Exception as e:
+            logger.debug("nodes poll error: %s", e)
+        await asyncio.sleep(_NODES_POLL_INTERVAL)
+
+
+from contextlib import asynccontextmanager
+
+@asynccontextmanager
+async def lifespan(app_: Any):
+    # Init SQLite DB for projects/documents/sessions/messages
+    try:
+        await _app_db.init_db()
+        logger.info("✅ sofiia-console DB initialised")
+    except Exception as e:
+        logger.warning("DB init failed (non-fatal, Projects/Docs disabled): %s", e)
+
+    task = asyncio.create_task(_nodes_poll_loop())
+    logger.info("Nodes poll loop started (interval=%ds)", _NODES_POLL_INTERVAL)
+    yield
+    task.cancel()
+    try:
+        await task
+    except asyncio.CancelledError:
+        pass
+    await _app_db.close_db()
+
+
+app = FastAPI(
+    title="Sofiia Control Console",
+    description="Operator BFF for Sofiia CTO agent",
+    version=_VERSION,
+    lifespan=lifespan,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=_CORS_ORIGINS,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Projects + Documents + Sessions + Dialog Map API
+app.include_router(docs_router)
+
+# ── WebSocket event bus ───────────────────────────────────────────────────────
+_ws_clients: Set[WebSocket] = set()
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat(timespec="milliseconds")
+
+def _make_event(
+    event_type: str,
+    data: Dict[str, Any],
+    *,
+    project_id: str = "",
+    session_id: str = "",
+    user_id: str = "console_user",
+) -> Dict[str, Any]:
+    return {
+        "v": 1,
+        "type": event_type,
+        "ts": _now_iso(),
+        "project_id": project_id,
+        "session_id": session_id,
+        "user_id": user_id,
+        "data": data,
+    }
+
+async def _broadcast(event: Dict[str, Any]) -> None:
+    global _ws_clients
+    if not _ws_clients:
+        return
+    dead: Set[WebSocket] = set()
+    payload = json.dumps(event, ensure_ascii=False)
+    for ws in list(_ws_clients):
+        try:
+            await ws.send_text(payload)
+        except Exception:
+            dead.add(ws)
+    _ws_clients -= dead
+
+def _broadcast_bg(event: Dict[str, Any]) -> None:
+    """Fire-and-forget broadcast from sync context."""
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            loop.create_task(_broadcast(event))
+    except Exception:
+        pass
+
+# ── AISTALK adapter ───────────────────────────────────────────────────────────
+try:
+    from .adapters.aistalk import AISTALKAdapter as _AISTALKAdapter
+    _aistalk = _AISTALKAdapter(
+        base_url=os.getenv("AISTALK_URL", ""),
+        api_key=os.getenv("AISTALK_API_KEY", ""),
+    ) if os.getenv("AISTALK_ENABLED", "false").lower() == "true" else None
+except Exception:
+    _aistalk = None
+
+
+# ─── Health ─────────────────────────────────────────────────────────────────
+
+@app.get("/api/health")
+async def api_health():
+    base = {
+        "ok": True,
+        "service": "sofiia-console",
+        "version": _VERSION,
+        "build": _BUILD_ID,
+        "env": os.getenv("ENV", "dev"),
+        "uptime_s": int(time.monotonic() - _START_TIME),
+    }
+    reg = load_nodes_registry()
+    nodes_map = reg.get("nodes") or {}
+    nodes = list(nodes_map.items())
+    if not nodes:
+        return {**base, "message": "no nodes configured"}
+    first_id, _first_cfg = ("NODA2", nodes_map["NODA2"]) if "NODA2" in nodes_map else nodes[0]
+    router_url = get_router_url(first_id)
+    if not router_url:
+        return {**base, "message": "no router_url"}
+    try:
+        r = await health(router_url)
+        return {**base, "ok": r.get("ok", False), "router": r, "node_id": first_id}
+    except Exception as e:
+        return {**base, "ok": False, "error": str(e)[:200], "node_id": first_id}
+
+
+# ─── Status/Full ─────────────────────────────────────────────────────────────
+
+async def _probe_router(router_url: str) -> Dict[str, Any]:
+    t0 = time.monotonic()
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as c:
+            for path in ("/healthz", "/health"):
+                try:
+                    r = await c.get(f"{router_url.rstrip('/')}{path}")
+                    if r.status_code == 200:
+                        latency = int((time.monotonic() - t0) * 1000)
+                        # probe tool execute availability
+                        tool_ok = False
+                        try:
+                            r2 = await c.get(
+                                f"{router_url.rstrip('/')}/v1/tools/execute",
+                                timeout=1.5,
+                            )
+                            tool_ok = r2.status_code in (200, 405)
+                        except Exception:
+                            pass
+                        infer_ok = False
+                        try:
+                            r3 = await c.get(
+                                f"{router_url.rstrip('/')}/v1/agents/sofiia/infer",
+                                timeout=1.5,
+                            )
+                            infer_ok = r3.status_code in (200, 405)
+                        except Exception:
+                            pass
+                        return {"url": router_url, "reachable": True,
+                                "routes": {"tools_execute": tool_ok, "agent_infer": infer_ok},
+                                "latency_ms": latency}
+                except Exception:
+                    continue
+        return {"url": router_url, "reachable": False, "routes": {}, "latency_ms": None}
+    except Exception as e:
+        return {"url": router_url, "reachable": False, "error": str(e)[:100]}
+
+
+async def _probe_memory(mem_url: str) -> Dict[str, Any]:
+    t0 = time.monotonic()
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as c:
+            r = await c.get(f"{mem_url.rstrip('/')}/health")
+            r.raise_for_status()
+            d = r.json()
+            vs = d.get("vector_store", {})
+            vectors = sum(
+                (v.get("points_count", 0) or 0)
+                for v in vs.values()
+                if isinstance(v, dict)
+            )
+            return {
+                "url": mem_url,
+                "reachable": True,
+                "stats": {"vectors": vectors, "collections": len(vs)},
+                "latency_ms": int((time.monotonic() - t0) * 1000),
+            }
+    except Exception as e:
+        return {"url": mem_url, "reachable": False, "error": str(e)[:100]}
+
+
+async def _probe_ollama(ollama_url: str) -> Dict[str, Any]:
+    t0 = time.monotonic()
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as c:
+            r = await c.get(f"{ollama_url.rstrip('/')}/api/tags")
+            r.raise_for_status()
+            d = r.json()
+            models = [m.get("name", "") for m in d.get("models", [])]
+            return {
+                "url": ollama_url,
+                "reachable": True,
+                "models": models[:20],
+                "latency_ms": int((time.monotonic() - t0) * 1000),
+            }
+    except Exception as e:
+        return {"url": ollama_url, "reachable": False, "models": [], "error": str(e)[:100]}
+
+
+async def _probe_http(url: str, *, timeout: float = 4.0) -> Dict[str, Any]:
+    t0 = time.monotonic()
+    try:
+        async with httpx.AsyncClient(timeout=timeout) as c:
+            r = await c.get(url)
+            return {
+                "reachable": r.status_code < 500,
+                "status": r.status_code,
+                "latency_ms": int((time.monotonic() - t0) * 1000),
+            }
+    except Exception as e:
+        return {"reachable": False, "error": str(e)[:120]}
+
+
+def _read_backends() -> Dict[str, str]:
+    """Read backend env vars from BFF environment (no secrets)."""
+    return {
+        "alerts": os.getenv("ALERT_BACKEND", "unknown"),
+        "audit": os.getenv("AUDIT_BACKEND", "unknown"),
+        "incidents": os.getenv("INCIDENT_BACKEND", "unknown"),
+        "risk_history": os.getenv("RISK_HISTORY_BACKEND", "unknown"),
+        "backlog": os.getenv("BACKLOG_BACKEND", "unknown"),
+    }
+
+
+def _read_cron_status() -> Dict[str, Any]:
+    cron_file = os.getenv("GOV_CRON_FILE", "/etc/cron.d/daarion-governance")
+    jobs_expected = [
+        "hourly_risk_snapshot", "daily_risk_digest", "risk_history_cleanup",
+        "weekly_platform_priority_digest", "weekly_backlog_generate", "daily_backlog_cleanup",
+    ]
+    jobs_present: List[str] = []
+    installed: Any = False
+    warning = None
+
+    try:
+        content = Path(cron_file).read_text()
+        installed = True
+        for job in jobs_expected:
+            if job in content:
+                jobs_present.append(job)
+    except PermissionError:
+        installed = "unknown"
+        warning = "no read permission on cron file"
+    except FileNotFoundError:
+        installed = False
+
+    # Scan for latest artifact files
+    artifacts: Dict[str, Any] = {}
+    base = Path("ops")
+    for pattern, key in [
+        ("reports/risk/*.md", "risk_digest_md"),
+        ("reports/platform/*.md", "platform_digest_md"),
+        ("backlog/*.jsonl", "backlog_jsonl"),
+    ]:
+        try:
+            files = sorted(base.glob(pattern))
+            if files:
+                artifacts[key] = str(files[-1])
+        except Exception:
+            pass
+
+    result: Dict[str, Any] = {
+        "installed": installed,
+        "cron_file": cron_file,
+        "jobs_expected": jobs_expected,
+        "jobs_present": jobs_present,
+        "last_artifacts": artifacts,
+    }
+    if warning:
+        result["warning"] = warning
+    return result
+
+
+@app.get("/api/status/full")
+async def api_status_full():
+    """Full stack diagnostic: BFF + router + memory + ollama + backends + cron."""
+    reg = load_nodes_registry()
+    nodes_cfg = reg.get("nodes", {})
+
+    # Pick NODA2 router first, fallback to first node
+    router_url = (
+        get_router_url("NODA2")
+        or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "")
+    )
+
+    mem_url = get_memory_service_url()
+    ollama_url = get_ollama_url()
+
+    async def _no_router() -> Dict[str, Any]:
+        return {"reachable": False, "url": "", "error": "no router_url configured"}
+
+    router_info, mem_info, ollama_info = await asyncio.gather(
+        _probe_router(router_url) if router_url else _no_router(),
+        _probe_memory(mem_url),
+        _probe_ollama(ollama_url),
+        return_exceptions=False,
+    )
+
+    return {
+        "bff": {
+            "version": _VERSION,
+            "build": _BUILD_ID,
+            "env": os.getenv("ENV", "dev"),
+            "uptime_s": int(time.monotonic() - _START_TIME),
+            "ws_clients": len(_ws_clients),
+            "aistalk_enabled": _aistalk is not None,
+        },
+        "router": router_info,
+        "memory": mem_info,
+        "ollama": ollama_info,
+        "backends": _read_backends(),
+        "cron": _read_cron_status(),
+    }
+
+
+@app.get("/api/integrations/status")
+async def api_integrations_status(opencode_url: Optional[str] = Query(None)):
+    """Integration probes for unified CTO hub in UI."""
+    open_webui_probe_url = os.getenv("OPEN_WEBUI_PROBE_URL", "http://host.docker.internal:8080/health")
+    open_webui_ui_url = os.getenv("OPEN_WEBUI_UI_URL", "http://localhost:8080")
+    pieces_probe_url = os.getenv(
+        "PIECES_OS_URL",
+        "http://host.docker.internal:39300/workstream_pattern_engine/processors/status",
+    )
+    if not pieces_probe_url.rstrip("/").endswith("/workstream_pattern_engine/processors/status"):
+        pieces_probe_url = pieces_probe_url.rstrip("/") + "/workstream_pattern_engine/processors/status"
+
+    opencode_probe_url = (opencode_url or os.getenv("OPENCODE_URL", "")).strip()
+    notion_api_key = os.getenv("NOTION_API_KEY", os.getenv("NOTION_TOKEN", "")).strip()
+
+    probes = await asyncio.gather(
+        _probe_http(get_router_url("NODA2").rstrip("/") + "/healthz"),
+        _probe_http(get_memory_service_url().rstrip("/") + "/health"),
+        _probe_http(open_webui_probe_url),
+        _probe_http(pieces_probe_url),
+        _probe_http(opencode_probe_url.rstrip("/") + "/health") if opencode_probe_url else asyncio.sleep(0, result={"reachable": False, "error": "not configured"}),
+    )
+
+    router_probe, memory_probe, open_webui_probe, pieces_probe, opencode_probe = probes
+
+    notion_probe: Dict[str, Any] = {"configured": bool(notion_api_key), "reachable": False}
+    if notion_api_key:
+        try:
+            async with httpx.AsyncClient(timeout=6.0) as c:
+                r = await c.get(
+                    "https://api.notion.com/v1/users/me",
+                    headers={
+                        "Authorization": f"Bearer {notion_api_key}",
+                        "Notion-Version": "2022-06-28",
+                    },
+                )
+                notion_probe["reachable"] = r.status_code == 200
+                notion_probe["status"] = r.status_code
+        except Exception as e:
+            notion_probe["error"] = str(e)[:120]
+
+    return {
+        "integrations": {
+            "sofiia_console": {"url": "/ui", "reachable": True},
+            "router_noda2": {"url": get_router_url("NODA2"), **router_probe},
+            "memory_service": {"url": get_memory_service_url(), **memory_probe},
+            "open_webui": {"url": open_webui_ui_url, "probe_url": open_webui_probe_url, **open_webui_probe},
+            "pieces_os": {"url": pieces_probe_url, **pieces_probe},
+            "opencode": {
+                "url": opencode_probe_url or "desktop/cli",
+                **opencode_probe,
+            },
+            "notion": notion_probe,
+        }
+    }
+
+
+# ─── Aurora media forensics proxy ────────────────────────────────────────────
+
+def _aurora_proxy_file_url(job_id: str, file_name: str) -> str:
+    return f"/api/aurora/files/{quote(job_id, safe='')}/{quote(file_name, safe='')}"
+
+
+def _rewrite_aurora_payload_urls(payload: Dict[str, Any]) -> Dict[str, Any]:
+    output_files = payload.get("output_files")
+    if not isinstance(output_files, list):
+        return payload
+    job_id = str(payload.get("job_id") or "")
+    rewritten: List[Dict[str, Any]] = []
+    for item in output_files:
+        if not isinstance(item, dict):
+            continue
+        file_name = str(item.get("name") or "")
+        if job_id and file_name:
+            item = {**item, "url": _aurora_proxy_file_url(job_id, file_name)}
+        rewritten.append(item)
+    payload["output_files"] = rewritten
+    report_url = payload.get("forensic_report_url")
+    if isinstance(report_url, str) and report_url.startswith("/api/aurora/report/"):
+        payload["forensic_report_url"] = report_url
+    return payload
+
+
+async def _aurora_request_json(
+    method: str,
+    path: str,
+    *,
+    files: Optional[Dict[str, Any]] = None,
+    data: Optional[Dict[str, Any]] = None,
+    json_body: Optional[Dict[str, Any]] = None,
+    timeout: float = 60.0,
+    retries: int = 0,
+    retry_backoff_sec: float = 0.25,
+) -> Dict[str, Any]:
+    base_url = AURORA_SERVICE_URL
+    url = f"{base_url}{path}"
+    attempts = max(1, int(retries) + 1)
+    last_error = "unknown error"
+    for attempt in range(1, attempts + 1):
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                r = await client.request(method, url, files=files, data=data, json=json_body)
+        except httpx.HTTPError as e:
+            last_error = str(e)[:200]
+            if (
+                "aurora-service" in base_url
+                and AURORA_FALLBACK_URL
+                and AURORA_FALLBACK_URL != base_url
+            ):
+                logger.warning(
+                    "aurora proxy fallback: %s -> %s (%s)",
+                    base_url,
+                    AURORA_FALLBACK_URL,
+                    last_error or type(e).__name__,
+                )
+                base_url = AURORA_FALLBACK_URL
+                url = f"{base_url}{path}"
+                continue
+            logger.warning(
+                "aurora proxy transport error (%s %s, attempt=%d/%d): %s",
+                method,
+                path,
+                attempt,
+                attempts,
+                last_error,
+            )
+            if attempt < attempts:
+                await asyncio.sleep(retry_backoff_sec * attempt)
+                continue
+            raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e
+        except Exception as e:
+            last_error = str(e)[:200]
+            logger.exception(
+                "aurora proxy unexpected error (%s %s, attempt=%d/%d): %s",
+                method,
+                path,
+                attempt,
+                attempts,
+                last_error,
+            )
+            if attempt < attempts:
+                await asyncio.sleep(retry_backoff_sec * attempt)
+                continue
+            raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e
+
+        if r.status_code >= 500 and attempt < attempts:
+            logger.warning(
+                "aurora proxy upstream %d (%s %s, attempt=%d/%d) — retrying",
+                r.status_code,
+                method,
+                path,
+                attempt,
+                attempts,
+            )
+            await asyncio.sleep(retry_backoff_sec * attempt)
+            continue
+        if r.status_code >= 400:
+            detail = r.text[:400] if r.text else f"Aurora error {r.status_code}"
+            raise HTTPException(status_code=r.status_code, detail=detail)
+        if not r.content:
+            return {}
+        try:
+            payload = r.json()
+        except Exception as e:
+            last_error = str(e)[:200]
+            logger.warning(
+                "aurora proxy invalid JSON (%s %s, attempt=%d/%d): %s",
+                method,
+                path,
+                attempt,
+                attempts,
+                last_error,
+            )
+            if attempt < attempts:
+                await asyncio.sleep(retry_backoff_sec * attempt)
+                continue
+            raise HTTPException(status_code=502, detail="Invalid Aurora JSON response") from e
+        if isinstance(payload, dict):
+            return _rewrite_aurora_payload_urls(payload)
+        return {"data": payload}
+    raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}")
+
+
+def _parse_stage_frame(stage: str) -> Dict[str, int]:
+    text = str(stage or "")
+    m = re.search(r"frame\s+(\d+)\s*/\s*(\d+)", text)
+    if not m:
+        return {"current": -1, "total": -1}
+    try:
+        return {"current": int(m.group(1)), "total": int(m.group(2))}
+    except Exception:
+        return {"current": -1, "total": -1}
+
+
+def _aurora_live_fs_frame(job_id: str) -> Optional[Dict[str, Any]]:
+    now = time.monotonic()
+    cached = _aurora_live_cache.get(job_id)
+    if cached and (now - float(cached.get("ts", 0.0))) < 3.0:
+        return cached
+
+    base = AURORA_DATA_DIR / "outputs" / job_id
+    if not base.exists():
+        return None
+    work_dirs = [p for p in base.iterdir() if p.is_dir() and p.name.startswith("_work_")]
+    if not work_dirs:
+        return None
+    # Prefer most recently touched working directory
+    work_dirs.sort(key=lambda p: p.stat().st_mtime, reverse=True)
+
+    best_frame = -1
+    best_total = -1
+    best_dir = None
+    for wd in work_dirs:
+        processed = wd / "processed"
+        if not processed.exists():
+            continue
+        # Max frame in processed directory
+        local_max = -1
+        for f in processed.glob("*.png"):
+            try:
+                n = int(f.stem)
+            except Exception:
+                continue
+            if n > local_max:
+                local_max = n
+        if local_max < 0:
+            continue
+        raw_dir = wd / "raw"
+        total = -1
+        if raw_dir.exists():
+            try:
+                total = sum(1 for _ in raw_dir.glob("*.png"))
+            except Exception:
+                total = -1
+        if local_max > best_frame:
+            best_frame = local_max
+            best_total = total
+            best_dir = str(wd)
+
+    if best_frame < 0:
+        return None
+    info = {
+        "ts": now,
+        "frame": best_frame,
+        "total": best_total,
+        "work_dir": best_dir,
+    }
+    _aurora_live_cache[job_id] = info
+    return info
+
+
+def _aurora_record_sample(job_id: str, frame: int, total: int) -> Optional[Dict[str, Any]]:
+    if frame < 0:
+        return None
+    now = time.monotonic()
+    dq = _aurora_live_samples.setdefault(job_id, collections.deque(maxlen=32))
+    # De-dup consecutive equal frame samples.
+    if dq and int(dq[-1]["frame"]) == frame:
+        # Keep original timestamp for stable fps between actual frame advances.
+        dq[-1]["total"] = total
+    else:
+        dq.append({"ts": now, "frame": frame, "total": total})
+    if len(dq) < 3:
+        return None
+
+    fps_points: List[float] = []
+    prev = dq[0]
+    for cur in list(dq)[1:]:
+        df = int(cur["frame"]) - int(prev["frame"])
+        dt = float(cur["ts"]) - float(prev["ts"])
+        if df > 0 and dt > 0:
+            fps_points.append(df / dt)
+        prev = cur
+    if not fps_points:
+        return None
+    fps = max(0.01, float(statistics.median(fps_points)))
+    confidence = "low"
+    if len(fps_points) >= 8:
+        confidence = "high"
+    elif len(fps_points) >= 4:
+        confidence = "medium"
+    return {"fps": fps, "confidence": confidence}
+
+
+def _aurora_load_live_last_from_disk() -> None:
+    global _aurora_live_last_loaded
+    if _aurora_live_last_loaded:
+        return
+    _aurora_live_last_loaded = True
+    try:
+        if not _aurora_live_last_path.exists():
+            return
+        data = json.loads(_aurora_live_last_path.read_text(encoding="utf-8"))
+        if isinstance(data, dict):
+            for k, v in data.items():
+                if isinstance(k, str) and isinstance(v, dict):
+                    _aurora_live_last[k] = v
+    except Exception as e:
+        logger.debug("aurora live-last load failed: %s", e)
+
+
+def _aurora_persist_live_last_to_disk() -> None:
+    try:
+        _aurora_live_last_path.parent.mkdir(parents=True, exist_ok=True)
+        _aurora_live_last_path.write_text(
+            json.dumps(_aurora_live_last, ensure_ascii=False, separators=(",", ":")),
+            encoding="utf-8",
+        )
+    except Exception as e:
+        logger.debug("aurora live-last persist failed: %s", e)
+
+
+@app.get("/api/aurora/health")
+async def api_aurora_health() -> Dict[str, Any]:
+    return await _aurora_request_json("GET", "/health", timeout=10.0)
+
+
+@app.post("/api/aurora/upload")
+async def api_aurora_upload(
+    file: UploadFile = File(...),
+    mode: str = Form("tactical"),
+    priority: str = Form("balanced"),
+    export_options: str = Form(""),
+) -> Dict[str, Any]:
+    # Stream file to Aurora without buffering entire content in RAM
+    file_obj = file.file  # SpooledTemporaryFile — already handles large files
+    files = {
+        "file": (
+            file.filename or "upload.bin",
+            file_obj,
+            file.content_type or "application/octet-stream",
+        )
+    }
+    payload = await _aurora_request_json(
+        "POST",
+        "/api/aurora/upload",
+        files=files,
+        data={
+            "mode": mode,
+            "priority": priority,
+            "export_options": export_options,
+        },
+        timeout=120.0,
+    )
+    job_id = str(payload.get("job_id") or "")
+    if job_id:
+        payload["status_url"] = f"/api/aurora/status/{job_id}"
+        payload["result_url"] = f"/api/aurora/result/{job_id}"
+        payload["cancel_url"] = f"/api/aurora/cancel/{job_id}"
+    return payload
+
+
+@app.post("/api/aurora/analyze")
+async def api_aurora_analyze(file: UploadFile = File(...)) -> Dict[str, Any]:
+    await file.seek(0)
+    files = {
+        "file": (
+            file.filename or "upload.bin",
+            file.file,
+            file.content_type or "application/octet-stream",
+        )
+    }
+    return await _aurora_request_json(
+        "POST",
+        "/api/aurora/analyze",
+        files=files,
+        timeout=120.0,
+        retries=2,
+        retry_backoff_sec=0.35,
+    )
+
+
+@app.post("/api/aurora/audio/analyze")
+async def api_aurora_audio_analyze(file: UploadFile = File(...)) -> Dict[str, Any]:
+    await file.seek(0)
+    files = {
+        "file": (
+            file.filename or "upload_audio.bin",
+            file.file,
+            file.content_type or "application/octet-stream",
+        )
+    }
+    return await _aurora_request_json(
+        "POST",
+        "/api/aurora/audio/analyze",
+        files=files,
+        timeout=120.0,
+        retries=2,
+        retry_backoff_sec=0.35,
+    )
+
+
+@app.post("/api/aurora/audio/process")
+async def api_aurora_audio_process(
+    file: UploadFile = File(...),
+    mode: str = Form("tactical"),
+    priority: str = Form("speech"),
+    export_options: str = Form(""),
+) -> Dict[str, Any]:
+    await file.seek(0)
+    files = {
+        "file": (
+            file.filename or "upload_audio.bin",
+            file.file,
+            file.content_type or "application/octet-stream",
+        )
+    }
+    payload = await _aurora_request_json(
+        "POST",
+        "/api/aurora/audio/process",
+        files=files,
+        data={
+            "mode": mode,
+            "priority": priority,
+            "export_options": export_options,
+        },
+        timeout=120.0,
+        retries=2,
+        retry_backoff_sec=0.35,
+    )
+    job_id = str(payload.get("job_id") or "")
+    if job_id:
+        payload["status_url"] = f"/api/aurora/status/{job_id}"
+        payload["result_url"] = f"/api/aurora/result/{job_id}"
+        payload["cancel_url"] = f"/api/aurora/cancel/{job_id}"
+    return payload
+
+
+@app.post("/api/aurora/reprocess/{job_id}")
+async def api_aurora_reprocess(
+    job_id: str,
+    payload: Optional[Dict[str, Any]] = Body(default=None),
+) -> Dict[str, Any]:
+    body = payload if isinstance(payload, dict) else {}
+    return await _aurora_request_json(
+        "POST",
+        f"/api/aurora/reprocess/{quote(job_id, safe='')}",
+        json_body=body,
+        timeout=120.0,
+        retries=2,
+        retry_backoff_sec=0.35,
+    )
+
+
+@app.post("/api/aurora/chat")
+async def api_aurora_chat(payload: Optional[Dict[str, Any]] = Body(default=None)) -> Dict[str, Any]:
+    body = payload if isinstance(payload, dict) else {}
+    return await _aurora_request_json(
+        "POST",
+        "/api/aurora/chat",
+        json_body=body,
+        timeout=30.0,
+        retries=1,
+        retry_backoff_sec=0.2,
+    )
+
+
+@app.get("/api/aurora/status/{job_id}")
+async def api_aurora_status(job_id: str) -> Dict[str, Any]:
+    _aurora_load_live_last_from_disk()
+    payload = await _aurora_request_json(
+        "GET",
+        f"/api/aurora/status/{quote(job_id, safe='')}",
+        timeout=20.0,
+        retries=8,
+        retry_backoff_sec=0.35,
+    )
+    if not isinstance(payload, dict):
+        return payload
+    if str(payload.get("status", "")).lower() != "processing":
+        return payload
+
+    live = _aurora_live_fs_frame(job_id)
+    if not live:
+        return payload
+    parsed = _parse_stage_frame(str(payload.get("current_stage", "")))
+    live_frame = int(live.get("frame", -1))
+    if live_frame < 0:
+        return payload
+    total = int(parsed.get("total", -1))
+    if total <= 0:
+        total = int(live.get("total", -1))
+    if total > 0:
+        live_progress = int(max(1, min(99, round((live_frame / max(1, total)) * 100))))
+        payload["progress"] = max(int(payload.get("progress") or 0), live_progress)
+
+    live_stats = _aurora_record_sample(job_id, live_frame, total)
+    if live_stats:
+        fps = float(live_stats["fps"])
+        payload["live_fps"] = round(fps, 3)
+        payload["eta_confidence"] = live_stats["confidence"]
+        if total > 0 and live_frame < total:
+            eta_calc = int(max(0, round((total - live_frame) / max(0.01, fps))))
+            payload["eta_seconds"] = eta_calc
+            elapsed = payload.get("elapsed_seconds")
+            if isinstance(elapsed, (int, float)):
+                payload["estimated_total_seconds"] = int(max(0, round(float(elapsed) + eta_calc)))
+        _aurora_live_last[job_id] = {
+            "live_fps": payload.get("live_fps"),
+            "eta_seconds": payload.get("eta_seconds"),
+            "estimated_total_seconds": payload.get("estimated_total_seconds"),
+            "eta_confidence": payload.get("eta_confidence"),
+        }
+        _aurora_persist_live_last_to_disk()
+    else:
+        prev = _aurora_live_last.get(job_id)
+        if prev:
+            payload["live_fps"] = prev.get("live_fps")
+            payload["eta_seconds"] = prev.get("eta_seconds", payload.get("eta_seconds"))
+            payload["estimated_total_seconds"] = prev.get("estimated_total_seconds", payload.get("estimated_total_seconds"))
+            payload["eta_confidence"] = prev.get("eta_confidence")
+
+    # If upstream stage/progress is stale, patch with live filesystem progress.
+    if live_frame > int(parsed.get("current", -1)):
+        if total > 0:
+            if live_stats:
+                payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live {payload['live_fps']} fps)"
+            else:
+                payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live fs)"
+        else:
+            payload["current_stage"] = f"enhancing frame {live_frame} (live fs)"
+        payload["live_frame"] = live_frame
+        payload["live_total_frames"] = total if total > 0 else None
+    else:
+        # Even when upstream stage text already moved, expose live counters for UI.
+        payload["live_frame"] = live_frame
+        payload["live_total_frames"] = total if total > 0 else None
+
+    # Persist last known timing even if fps was not recalculated this poll.
+    snapshot = _aurora_live_last.get(job_id, {})
+    changed = False
+    for key in ("live_fps", "eta_seconds", "estimated_total_seconds", "eta_confidence"):
+        val = payload.get(key)
+        if val is not None and snapshot.get(key) != val:
+            snapshot[key] = val
+            changed = True
+    if changed:
+        _aurora_live_last[job_id] = snapshot
+        _aurora_persist_live_last_to_disk()
+    return payload
+
+
+def _aurora_coerce_dir(path_value: Any) -> Optional[Path]:
+    if path_value is None:
+        return None
+    raw = str(path_value).strip()
+    if not raw:
+        return None
+    try:
+        p = Path(raw).expanduser().resolve()
+    except Exception:
+        return None
+    if p.exists() and p.is_file():
+        p = p.parent
+    if not p.exists() or not p.is_dir():
+        return None
+    return p
+
+
+async def _aurora_resolve_job_folder(job_id: str) -> Optional[Path]:
+    candidates: List[Any] = []
+    try:
+        st = await _aurora_request_json("GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=12.0)
+        storage = st.get("storage") if isinstance(st, dict) else None
+        if isinstance(storage, dict):
+            candidates.extend(
+                [
+                    storage.get("output_dir"),
+                    storage.get("upload_dir"),
+                    storage.get("input_path"),
+                ]
+            )
+    except Exception:
+        pass
+
+    try:
+        res = await _aurora_request_json("GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=12.0)
+        storage = res.get("storage") if isinstance(res, dict) else None
+        if isinstance(storage, dict):
+            candidates.extend(
+                [
+                    storage.get("output_dir"),
+                    storage.get("upload_dir"),
+                    storage.get("input_path"),
+                ]
+            )
+    except Exception:
+        pass
+
+    candidates.append(AURORA_DATA_DIR / "outputs" / job_id)
+    for c in candidates:
+        p = _aurora_coerce_dir(c)
+        if p:
+            return p
+    return None
+
+
+@app.get("/api/aurora/folder/{job_id}")
+async def api_aurora_folder(job_id: str) -> Dict[str, Any]:
+    folder = await _aurora_resolve_job_folder(job_id)
+    if not folder:
+        raise HTTPException(status_code=404, detail="Aurora output folder not found")
+    return {
+        "ok": True,
+        "job_id": job_id,
+        "folder_path": str(folder),
+        "folder_url": f"file://{folder}",
+    }
+
+
+@app.post("/api/aurora/folder/{job_id}/open")
+async def api_aurora_folder_open(job_id: str) -> Dict[str, Any]:
+    folder = await _aurora_resolve_job_folder(job_id)
+    if not folder:
+        raise HTTPException(status_code=404, detail="Aurora output folder not found")
+    cmd: Optional[List[str]] = None
+    if sys.platform == "darwin":
+        cmd = ["open", str(folder)]
+    elif os.name == "nt":
+        try:
+            os.startfile(str(folder))  # type: ignore[attr-defined]
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}")
+    else:
+        cmd = ["xdg-open", str(folder)]
+    if cmd is not None:
+        try:
+            subprocess.run(cmd, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}")
+    return {"ok": True, "job_id": job_id, "folder_path": str(folder)}
+
+
+@app.get("/api/aurora/jobs")
+async def api_aurora_jobs(
+    limit: int = Query(default=30, ge=1, le=200),
+    status: Optional[str] = Query(default=None),
+) -> Dict[str, Any]:
+    query = f"/api/aurora/jobs?limit={limit}"
+    if status and status.strip():
+        query += f"&status={quote(status.strip(), safe=',')}"
+    return await _aurora_request_json(
+        "GET",
+        query,
+        timeout=20.0,
+        retries=3,
+        retry_backoff_sec=0.25,
+    )
+
+
+@app.get("/api/aurora/result/{job_id}")
+async def api_aurora_result(job_id: str) -> Dict[str, Any]:
+    return await _aurora_request_json(
+        "GET",
+        f"/api/aurora/result/{quote(job_id, safe='')}",
+        timeout=20.0,
+        retries=4,
+        retry_backoff_sec=0.35,
+    )
+
+
+@app.get("/api/aurora/compare/{job_id}")
+async def api_aurora_compare(job_id: str) -> Dict[str, Any]:
+    """Before/after comparison with full metadata for a completed job."""
+    status = await _aurora_request_json(
+        "GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=15.0, retries=3
+    )
+    result = {}
+    try:
+        result = await _aurora_request_json(
+            "GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=15.0, retries=2
+        )
+    except Exception:
+        pass
+
+    meta = status.get("metadata") or {}
+    vid = meta.get("video") or {}
+    storage = status.get("storage") or {}
+    output_files = result.get("output_files") or status.get("output_files") or []
+    proc_log = result.get("processing_log") or []
+
+    input_path = storage.get("input_path", "")
+    output_dir = storage.get("output_dir", "")
+
+    before: Dict[str, Any] = {
+        "file_name": status.get("file_name") or (input_path.rsplit("/", 1)[-1] if input_path else "—"),
+        "resolution": f"{vid.get('width', '?')}x{vid.get('height', '?')}" if vid.get("width") else "—",
+        "width": vid.get("width"),
+        "height": vid.get("height"),
+        "duration_s": vid.get("duration_seconds"),
+        "fps": vid.get("fps"),
+        "frame_count": vid.get("frame_count"),
+        "codec": "—",
+        "file_size_mb": None,
+    }
+
+    if input_path:
+        inp = Path(input_path)
+        if inp.exists():
+            before["file_size_mb"] = round(inp.stat().st_size / (1024 * 1024), 2)
+        _probe = _ffprobe_quick(inp) if inp.exists() else {}
+        if _probe:
+            before["codec"] = _probe.get("codec", "—")
+
+    result_file = None
+    for f in output_files:
+        if (f.get("type") == "video" or f.get("type") == "photo") and f.get("name"):
+            result_file = f
+            break
+
+    after: Dict[str, Any] = {
+        "file_name": result_file["name"] if result_file else "—",
+        "resolution": "—",
+        "width": None,
+        "height": None,
+        "duration_s": None,
+        "fps": None,
+        "frame_count": None,
+        "codec": "—",
+        "file_size_mb": None,
+        "download_url": (result_file or {}).get("url"),
+    }
+
+    if result_file and output_dir:
+        out_path = Path(output_dir) / result_file["name"]
+        if out_path.exists():
+            after["file_size_mb"] = round(out_path.stat().st_size / (1024 * 1024), 2)
+            _probe = _ffprobe_quick(out_path)
+            if _probe:
+                after["resolution"] = _probe.get("resolution", "—")
+                after["width"] = _probe.get("width")
+                after["height"] = _probe.get("height")
+                after["duration_s"] = _probe.get("duration_s")
+                after["fps"] = _probe.get("fps")
+                after["frame_count"] = _probe.get("frame_count")
+                after["codec"] = _probe.get("codec", "—")
+
+    faces_total = 0
+    enhance_steps = []
+    for step in proc_log:
+        det = step.get("details") or {}
+        if det.get("faces_detected_total") is not None:
+            faces_total += det["faces_detected_total"]
+        enhance_steps.append({
+            "step": step.get("step", "?"),
+            "agent": step.get("agent", "?"),
+            "model": step.get("model", "?"),
+            "time_ms": step.get("time_ms"),
+        })
+
+    return {
+        "job_id": job_id,
+        "status": status.get("status"),
+        "mode": status.get("mode"),
+        "media_type": status.get("media_type"),
+        "elapsed_seconds": status.get("elapsed_seconds"),
+        "before": before,
+        "after": after,
+        "faces_detected": faces_total,
+        "enhance_steps": enhance_steps,
+        "folder_path": output_dir,
+        "input_path": input_path,
+    }
+
+
+def _ffprobe_quick(filepath: Path) -> Dict[str, Any]:
+    """Quick ffprobe for resolution, codec, duration, fps, frame count."""
+    if not filepath.exists():
+        return {}
+    try:
+        import subprocess as _sp
+        raw = _sp.run(
+            ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(filepath)],
+            capture_output=True, text=True, timeout=10
+        )
+        if raw.returncode != 0:
+            return {}
+        import json as _json
+        data = _json.loads(raw.stdout)
+        fmt = data.get("format") or {}
+        vs = [s for s in (data.get("streams") or []) if s.get("codec_type") == "video"]
+        if not vs:
+            return {"duration_s": round(float(fmt.get("duration", 0)), 2)}
+        v = vs[0]
+        w, h = v.get("width"), v.get("height")
+        rfr = v.get("r_frame_rate", "0/1").split("/")
+        fps = round(int(rfr[0]) / max(1, int(rfr[1])), 2) if len(rfr) == 2 else None
+        return {
+            "resolution": f"{w}x{h}" if w and h else "—",
+            "width": w, "height": h,
+            "codec": v.get("codec_name", "—"),
+            "duration_s": round(float(fmt.get("duration", 0)), 2),
+            "fps": fps,
+            "frame_count": int(v.get("nb_frames", 0)) or None,
+        }
+    except Exception:
+        return {}
+
+
+@app.post("/api/aurora/cancel/{job_id}")
+async def api_aurora_cancel(job_id: str) -> Dict[str, Any]:
+    return await _aurora_request_json(
+        "POST",
+        f"/api/aurora/cancel/{quote(job_id, safe='')}",
+        timeout=20.0,
+        retries=2,
+        retry_backoff_sec=0.2,
+    )
+
+
+@app.post("/api/aurora/delete/{job_id}")
+async def api_aurora_delete(
+    job_id: str,
+    purge_files: bool = Query(default=True),
+) -> Dict[str, Any]:
+    path = f"/api/aurora/delete/{quote(job_id, safe='')}?purge_files={'true' if purge_files else 'false'}"
+    return await _aurora_request_json(
+        "POST",
+        path,
+        timeout=30.0,
+        retries=2,
+        retry_backoff_sec=0.2,
+    )
+
+
+@app.get("/api/aurora/report/{job_id}.pdf")
+async def api_aurora_report_pdf(job_id: str) -> StreamingResponse:
+    """Stream PDF report from Aurora service without buffering in RAM."""
+    encoded_job = quote(job_id, safe="")
+    paths = [AURORA_SERVICE_URL]
+    if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths:
+        paths.append(AURORA_FALLBACK_URL)
+    last_err = ""
+    for base in paths:
+        url = f"{base}/api/aurora/report/{encoded_job}.pdf"
+        try:
+            client = httpx.AsyncClient(timeout=120.0)
+            r = await client.send(client.build_request("GET", url), stream=True)
+            if r.status_code >= 400:
+                body = (await r.aread()).decode(errors="replace")[:400]
+                await r.aclose()
+                await client.aclose()
+                raise HTTPException(status_code=r.status_code, detail=body or f"Aurora report error {r.status_code}")
+            disposition = r.headers.get("content-disposition", f'inline; filename="{job_id}_forensic_report.pdf"')
+
+            async def _stream():
+                try:
+                    async for chunk in r.aiter_bytes(chunk_size=65536):
+                        yield chunk
+                finally:
+                    await r.aclose()
+                    await client.aclose()
+
+            return StreamingResponse(
+                _stream(),
+                media_type="application/pdf",
+                headers={"Content-Disposition": disposition, "Cache-Control": "no-store"},
+            )
+        except HTTPException:
+            raise
+        except Exception as e:
+            last_err = str(e)[:200]
+            if "nodename nor servname provided" in str(e):
+                continue
+            raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err}")
+    raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err or 'unavailable'}")
+
+
+@app.get("/api/aurora/files/{job_id}/{file_name:path}")
+async def api_aurora_file(job_id: str, file_name: str) -> StreamingResponse:
+    encoded_job = quote(job_id, safe="")
+    encoded_name = quote(file_name, safe="")
+    paths = [AURORA_SERVICE_URL]
+    if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths:
+        paths.append(AURORA_FALLBACK_URL)
+    last_err = ""
+    for base in paths:
+        url = f"{base}/api/aurora/files/{encoded_job}/{encoded_name}"
+        client = httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0))
+        try:
+            resp = await client.send(client.build_request("GET", url), stream=True)
+            if resp.status_code >= 400:
+                body = (await resp.aread()).decode(errors="replace")[:400]
+                await resp.aclose()
+                await client.aclose()
+                if resp.status_code >= 500:
+                    last_err = f"Aurora {resp.status_code}: {body}"
+                    continue
+                raise HTTPException(status_code=resp.status_code, detail=body or f"Aurora file error {resp.status_code}")
+            ct = resp.headers.get("content-type", "application/octet-stream")
+            disp = resp.headers.get("content-disposition", f'inline; filename="{Path(file_name).name}"')
+
+            async def _stream():
+                try:
+                    async for chunk in resp.aiter_bytes(chunk_size=65536):
+                        yield chunk
+                finally:
+                    await resp.aclose()
+                    await client.aclose()
+
+            return StreamingResponse(
+                _stream(),
+                media_type=ct,
+                headers={"Content-Disposition": disp, "Cache-Control": "no-store"},
+            )
+        except HTTPException:
+            raise
+        except Exception as e:
+            await client.aclose()
+            last_err = str(e)[:200]
+            if "nodename nor servname provided" in str(e):
+                continue
+            raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err}")
+    raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err or 'unavailable'}")
+
+
+class MediaImageGenerateBody(BaseModel):
+    prompt: str
+    negative_prompt: Optional[str] = None
+    width: int = 1024
+    height: int = 1024
+    steps: int = 28
+    guidance_scale: float = 4.0
+    timeout_s: int = 300
+
+
+class MediaVideoGenerateBody(BaseModel):
+    prompt: str
+    seconds: int = 4
+    fps: int = 24
+    steps: int = 30
+    style: str = "cinematic"
+    aspect_ratio: str = "16:9"
+    timeout_s: int = 360
+
+
+class MediaImageModelLoadBody(BaseModel):
+    model: str
+
+
+def _resolve_media_router_url() -> str:
+    nodes_cfg = load_nodes_registry()
+    discovered = (
+        get_router_url("NODA2")
+        or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "")
+    ).rstrip("/")
+    return MEDIA_ROUTER_URL or discovered
+
+
+def _media_router_candidates() -> List[str]:
+    raw = _resolve_media_router_url()
+    candidates: List[str] = []
+    for u in (raw, MEDIA_ROUTER_FALLBACK_URL):
+        if not u:
+            continue
+        v = u.strip().rstrip("/")
+        if v and v not in candidates:
+            candidates.append(v)
+        if "://router:" in v or "://router/" in v:
+            host_fixed = v.replace("://router:", "://127.0.0.1:").replace("://router/", "://127.0.0.1/")
+            if host_fixed not in candidates:
+                candidates.append(host_fixed)
+            for port in ("9102", "8000"):
+                local = f"http://127.0.0.1:{port}"
+                if local not in candidates:
+                    candidates.append(local)
+    return candidates
+
+
+async def _pick_media_router_url() -> str:
+    candidates = _media_router_candidates()
+    if not candidates:
+        return ""
+    for u in candidates:
+        p = await _probe_http(f"{u}/healthz", timeout=2.5)
+        if p.get("reachable"):
+            return u
+    return candidates[0]
+
+
+def _media_append_job(kind: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+    item = {
+        "id": f"media_{kind}_{uuid.uuid4().hex[:10]}",
+        "kind": kind,
+        "ts": datetime.now(timezone.utc).isoformat(),
+        **payload,
+    }
+    _media_recent_jobs.appendleft(item)
+    return item
+
+
+@app.get("/api/media/health")
+async def api_media_health() -> Dict[str, Any]:
+    router_url = await _pick_media_router_url()
+    probes = await asyncio.gather(
+        _probe_http(f"{router_url}/healthz") if router_url else asyncio.sleep(0, result={"reachable": False, "error": "router missing"}),
+        _probe_http(f"{MEDIA_COMFY_AGENT_URL}/health"),
+        _probe_http(f"{MEDIA_COMFY_UI_URL}/"),
+        _probe_http(f"{MEDIA_SWAPPER_URL}/health"),
+        _probe_http(f"{MEDIA_IMAGE_GEN_URL}/health"),
+    )
+    image_models: Dict[str, Any] = {"image_models": []}
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models")
+            if r.status_code < 400 and r.content:
+                image_models = r.json()
+    except Exception:
+        image_models = {"image_models": []}
+    return {
+        "ok": True,
+        "router_url": router_url,
+        "services": {
+            "router": probes[0],
+            "comfy_agent": probes[1],
+            "comfy_ui": probes[2],
+            "swapper": probes[3],
+            "image_gen": probes[4],
+        },
+        "image_models": image_models.get("image_models", []),
+        "active_image_model": image_models.get("active_image_model"),
+        "fallback_order": ["comfy", "swapper", "image-gen-service"],
+    }
+
+
+@app.get("/api/media/models/image")
+async def api_media_image_models() -> Dict[str, Any]:
+    try:
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models")
+        if r.status_code >= 400:
+            raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper error")
+        data = r.json() if r.content else {}
+        return {
+            "ok": True,
+            "image_models": data.get("image_models", []),
+            "active_image_model": data.get("active_image_model"),
+            "device": data.get("device"),
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"Image models unavailable: {str(e)[:200]}")
+
+
+@app.post("/api/media/models/image/load")
+async def api_media_image_model_load(body: MediaImageModelLoadBody) -> Dict[str, Any]:
+    model = body.model.strip()
+    if not model:
+        raise HTTPException(status_code=400, detail="model is required")
+    try:
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            r = await client.post(f"{MEDIA_SWAPPER_URL}/image/models/{quote(model, safe='')}/load")
+        if r.status_code >= 400:
+            raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper load error")
+        return {"ok": True, "result": r.json() if r.content else {"status": "ok"}}
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"Image model load failed: {str(e)[:200]}")
+
+
+@app.post("/api/media/generate/image")
+async def api_media_generate_image(body: MediaImageGenerateBody) -> Dict[str, Any]:
+    prompt = body.prompt.strip()
+    if not prompt:
+        raise HTTPException(status_code=400, detail="prompt is required")
+    router_url = await _pick_media_router_url()
+    if not router_url:
+        raise HTTPException(status_code=503, detail="Router URL not configured")
+
+    params = {
+        "prompt": prompt,
+        "negative_prompt": body.negative_prompt or "",
+        "width": max(256, min(2048, int(body.width))),
+        "height": max(256, min(2048, int(body.height))),
+        "steps": max(1, min(120, int(body.steps))),
+        "guidance_scale": max(0.0, min(20.0, float(body.guidance_scale))),
+        "timeout_s": max(30, min(900, int(body.timeout_s))),
+    }
+    started = time.monotonic()
+    response = await execute_tool(
+        router_url,
+        tool="image_generate",
+        action="generate",
+        params=params,
+        agent_id="sofiia",
+        timeout=float(params["timeout_s"] + 30),
+        api_key=ROUTER_API_KEY,
+    )
+    ok = response.get("status") == "ok"
+    result_data = response.get("data") or {}
+    result_item = _media_append_job(
+        "image",
+        {
+            "status": "ok" if ok else "failed",
+            "provider": "router:image_generate",
+            "prompt": prompt[:180],
+            "duration_ms": int((time.monotonic() - started) * 1000),
+            "result": result_data.get("result"),
+            "has_image_base64": bool(result_data.get("image_base64")),
+            "error": (response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error"),
+        },
+    )
+    if not ok:
+        raise HTTPException(status_code=502, detail=f"Image generate failed: {result_item.get('error') or 'tool failed'}")
+    return {"ok": True, "job": result_item, "tool_response": response}
+
+
+@app.post("/api/media/generate/video")
+async def api_media_generate_video(body: MediaVideoGenerateBody) -> Dict[str, Any]:
+    prompt = body.prompt.strip()
+    if not prompt:
+        raise HTTPException(status_code=400, detail="prompt is required")
+    router_url = await _pick_media_router_url()
+    if not router_url:
+        raise HTTPException(status_code=503, detail="Router URL not configured")
+
+    params = {
+        "prompt": prompt,
+        "seconds": max(1, min(8, int(body.seconds))),
+        "fps": max(8, min(60, int(body.fps))),
+        "steps": max(1, min(120, int(body.steps))),
+        "timeout_s": max(60, min(1200, int(body.timeout_s))),
+    }
+    started = time.monotonic()
+    response = await execute_tool(
+        router_url,
+        tool="comfy_generate_video",
+        action="generate",
+        params=params,
+        agent_id="sofiia",
+        timeout=float(params["timeout_s"] + 30),
+        api_key=ROUTER_API_KEY,
+    )
+    ok = response.get("status") == "ok"
+    provider = "router:comfy_generate_video"
+    fallback_payload: Dict[str, Any] = {}
+    if not ok:
+        try:
+            async with httpx.AsyncClient(timeout=120.0) as client:
+                r = await client.post(
+                    f"{MEDIA_SWAPPER_URL}/video/generate",
+                    json={
+                        "prompt": prompt,
+                        "duration": params["seconds"],
+                        "style": body.style,
+                        "aspect_ratio": body.aspect_ratio,
+                    },
+                )
+            if r.status_code < 400:
+                fallback_payload = r.json() if r.content else {}
+                ok = True
+                provider = "swapper:video/generate"
+        except Exception as e:
+            fallback_payload = {"error": str(e)[:200]}
+
+    result_item = _media_append_job(
+        "video",
+        {
+            "status": "ok" if ok else "failed",
+            "provider": provider,
+            "prompt": prompt[:180],
+            "duration_ms": int((time.monotonic() - started) * 1000),
+            "result": (response.get("data") or {}).get("result") if not fallback_payload else fallback_payload,
+            "error": None if ok else ((response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error")),
+        },
+    )
+    if not ok:
+        raise HTTPException(status_code=502, detail=f"Video generate failed: {result_item.get('error') or 'tool failed'}")
+    return {"ok": True, "job": result_item, "tool_response": response, "fallback_response": fallback_payload}
+
+
+@app.get("/api/media/jobs")
+async def api_media_jobs(limit: int = Query(default=20, ge=1, le=100)) -> Dict[str, Any]:
+    return {"ok": True, "count": min(limit, len(_media_recent_jobs)), "jobs": list(_media_recent_jobs)[:limit]}
+
+
+# ─── Chat (runtime contract) ─────────────────────────────────────────────────
+
+@app.get("/api/chat/config")
+async def api_chat_config() -> Dict[str, Any]:
+    return {
+        "preferred_model": SOFIIA_PREFERRED_CHAT_MODEL,
+        "ollama": {
+            "timeout_sec": SOFIIA_OLLAMA_TIMEOUT_SEC,
+            "voice_timeout_sec": SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC,
+            "keep_alive": SOFIIA_OLLAMA_KEEP_ALIVE,
+            "num_ctx": SOFIIA_OLLAMA_NUM_CTX,
+            "num_thread": SOFIIA_OLLAMA_NUM_THREAD,
+            "num_gpu": SOFIIA_OLLAMA_NUM_GPU,
+        },
+    }
+
+
+class ChatSendBody(BaseModel):
+    message: str
+    model: str = "ollama:qwen3:14b"
+    node_id: str = "NODA2"
+    project_id: Optional[str] = None
+    session_id: Optional[str] = None
+    user_id: Optional[str] = None
+    history: List[Dict[str, Any]] = []
+    # Voice routing hint — forwarded to Router as X-Voice-Profile header
+    # Values: "voice_fast_uk" (default) | "voice_quality_uk"
+    voice_profile: Optional[str] = None
+
+
+@app.post("/api/chat/send")
+async def api_chat_send(body: ChatSendBody, request: Request):
+    """BFF chat: Ollama or router. Returns runtime contract fields. Rate: 30/min."""
+    client_ip = request.client.host if request.client else "unknown"
+    if not _check_rate(f"chat:{client_ip}", max_calls=30, window_sec=60):
+        raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min")
+
+    # Runtime identity
+    project_id = body.project_id or "default"
+    session_id = body.session_id or f"sess_{uuid.uuid4().hex[:12]}"
+    user_id = body.user_id or "console_user"
+
+    provider, _, model_name = body.model.partition(":")
+    reply = ""
+    t0 = time.monotonic()
+
+    def _clean_reply(text: str) -> str:
+        """Strip <think>...</think> reasoning blocks (Qwen3/DeepSeek-R1) before returning to user.
+
+        Strategy:
+        1. re.DOTALL regex removes complete <think>...</think> blocks.
+        2. Fallback split removes any trailing unclosed <think> block
+           (model stopped mid-reasoning without </think>).
+        """
+        import re
+        # Primary: strip complete blocks (multiline-safe with DOTALL)
+        cleaned = re.sub(r"<think>.*?</think>", "", text,
+                         flags=re.DOTALL | re.IGNORECASE)
+        # Fallback: if an unclosed <think> block remains, drop everything after it
+        if "<think>" in cleaned.lower():
+            cleaned = re.split(r"(?i)<think>", cleaned)[0]
+        return cleaned.strip()
+
+    # Broadcast: user message sent
+    _broadcast_bg(_make_event("chat.message",
+        {"text": body.message[:200], "provider": provider, "model": body.model},
+        project_id=project_id, session_id=session_id, user_id=user_id))
+
+    # voice_profile determines LLM options for voice turns.
+    # None = text chat (full prompt, no token limit enforcement).
+    _vp = body.voice_profile  # "voice_fast_uk" | "voice_quality_uk" | None
+    _is_voice_turn = _vp is not None
+    _is_quality    = _vp == "voice_quality_uk"
+
+    # System prompt: voice turns get guardrails appended
+    _system_prompt = SOFIIA_SYSTEM_PROMPT
+    if _is_voice_turn:
+        _system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX
+
+    _voice_options = {
+        "temperature": 0.18 if _is_quality else 0.15,
+        "repeat_penalty": 1.1,
+        "num_predict": 256 if _is_quality else 220,  # max_tokens per contract (≤256)
+    } if _is_voice_turn else {
+        "temperature": 0.15,
+        "repeat_penalty": 1.1,
+        "num_predict": SOFIIA_OLLAMA_NUM_PREDICT_TEXT,
+    }
+
+    if provider == "ollama":
+        ollama_url = get_ollama_url()
+        effective_model_name = model_name or "qwen3:14b"
+        messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
+        messages.extend(body.history[-12:])
+        messages.append({"role": "user", "content": body.message})
+        try:
+            async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client:
+                r = await client.post(
+                    f"{ollama_url}/api/chat",
+                    json=_make_ollama_payload(effective_model_name, messages, _voice_options),
+                )
+                r.raise_for_status()
+                data = r.json()
+                reply = _clean_reply((data.get("message") or {}).get("content", "") or "Ollama: порожня відповідь")
+        except httpx.HTTPStatusError as e:
+            err_msg = f"Ollama HTTP {e.response.status_code}"
+            _broadcast_bg(_make_event("error", {"where": "ollama", "message": err_msg},
+                project_id=project_id, session_id=session_id, user_id=user_id))
+            raise HTTPException(status_code=502, detail=f"{err_msg}: {str(e)[:200]}")
+        except Exception as e:
+            _broadcast_bg(_make_event("error", {"where": "ollama", "message": str(e)[:100]},
+                project_id=project_id, session_id=session_id, user_id=user_id))
+            raise HTTPException(status_code=502, detail=f"Ollama error: {str(e)[:200]}")
+
+    elif provider == "router":
+        base_url = get_router_url(body.node_id)
+        router_agent_id = "sofiia"
+        router_model = None
+        if model_name:
+            if "|" in model_name:
+                left, right = model_name.split("|", 1)
+                router_agent_id = left or "sofiia"
+                router_model = right or None
+            elif ":" in model_name:
+                # Looks like model id (qwen3:14b, qwen3.5:35b-a3b, etc.)
+                router_model = model_name
+            elif model_name not in ("default",):
+                # Treat plain token as agent id (router:soul, router:monitor, ...)
+                router_agent_id = model_name
+        metadata: Dict[str, Any] = {
+            "project_id": project_id,
+            "session_id": session_id,
+            "user_id": user_id,
+            "client": "sofiia-console",
+            "voice_profile": _vp,
+        }
+        try:
+            out = await infer(
+                base_url,
+                router_agent_id,
+                body.message,
+                model=router_model,
+                metadata=metadata,
+                timeout=300.0,
+                api_key=ROUTER_API_KEY,
+            )
+            reply = _clean_reply(out.get("response", out.get("text", "")))
+        except Exception as e:
+            _broadcast_bg(_make_event("error", {"where": "router", "message": str(e)[:100]},
+                project_id=project_id, session_id=session_id, user_id=user_id))
+            raise HTTPException(status_code=502, detail=str(e)[:300])
+
+    elif provider == "glm":
+        # Zhipu AI GLM — OpenAI-compatible API at bigmodel.cn
+        glm_api_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip()
+        if not glm_api_key:
+            raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.")
+        glm_model = model_name or "glm-4.7"
+        messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
+        messages_glm.extend(body.history[-12:])
+        messages_glm.append({"role": "user", "content": body.message})
+        try:
+            async with httpx.AsyncClient(timeout=120.0) as client:
+                r = await client.post(
+                    "https://open.bigmodel.cn/api/paas/v4/chat/completions",
+                    headers={"Authorization": f"Bearer {glm_api_key}", "Content-Type": "application/json"},
+                    json={"model": glm_model, "messages": messages_glm, "stream": False},
+                )
+                r.raise_for_status()
+                data = r.json()
+                reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "GLM: порожня відповідь")
+        except httpx.HTTPStatusError as e:
+            err_msg = f"GLM HTTP {e.response.status_code}: {e.response.text[:200]}"
+            _broadcast_bg(_make_event("error", {"where": "glm", "message": err_msg},
+                project_id=project_id, session_id=session_id, user_id=user_id))
+            raise HTTPException(status_code=502, detail=err_msg)
+        except Exception as e:
+            _broadcast_bg(_make_event("error", {"where": "glm", "message": str(e)[:100]},
+                project_id=project_id, session_id=session_id, user_id=user_id))
+            raise HTTPException(status_code=502, detail=f"GLM error: {str(e)[:200]}")
+
+    elif provider == "grok":
+        # xAI Grok — OpenAI-compatible API
+        xai_api_key = os.getenv("XAI_API_KEY", "").strip()
+        if not xai_api_key:
+            raise HTTPException(status_code=503, detail="XAI_API_KEY not set. Add it to BFF environment.")
+        grok_model = model_name or "grok-4-1-fast-reasoning"
+        messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
+        messages.extend(body.history[-12:])
+        messages.append({"role": "user", "content": body.message})
+        try:
+            async with httpx.AsyncClient(timeout=120.0) as client:
+                r = await client.post(
+                    "https://api.x.ai/v1/chat/completions",
+                    headers={"Authorization": f"Bearer {xai_api_key}", "Content-Type": "application/json"},
+                    json={"model": grok_model, "messages": messages, "stream": False},
+                )
+                r.raise_for_status()
+                data = r.json()
+                reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "Grok: порожня відповідь")
+        except httpx.HTTPStatusError as e:
+            err_msg = f"Grok HTTP {e.response.status_code}: {e.response.text[:200]}"
+            _broadcast_bg(_make_event("error", {"where": "grok", "message": err_msg},
+                project_id=project_id, session_id=session_id, user_id=user_id))
+            raise HTTPException(status_code=502, detail=err_msg)
+        except Exception as e:
+            _broadcast_bg(_make_event("error", {"where": "grok", "message": str(e)[:100]},
+                project_id=project_id, session_id=session_id, user_id=user_id))
+            raise HTTPException(status_code=502, detail=f"Grok error: {str(e)[:200]}")
+
+    else:
+        raise HTTPException(status_code=400, detail=f"Unsupported provider: {provider}. Use ollama, router, or grok.")
+
+    latency_ms = int((time.monotonic() - t0) * 1000)
+    tokens_est = len(reply.split())
+    trace_id = f"chat_{session_id}_{uuid.uuid4().hex[:8]}"
+
+    # Broadcast: reply
+    _broadcast_bg(_make_event("chat.reply",
+        {"text": reply[:200], "provider": provider, "model": body.model,
+         "latency_ms": latency_ms, "trace_id": trace_id},
+        project_id=project_id, session_id=session_id, user_id=user_id))
+
+    # Memory save (best-effort, non-blocking)
+    asyncio.get_event_loop().create_task(
+        _do_save_memory(body.message, reply, session_id, project_id, user_id)
+    )
+
+    # AISTALK forward (if enabled)
+    if _aistalk:
+        try:
+            _aistalk.handle_event(_make_event("chat.reply",
+                {"text": reply, "provider": provider, "model": body.model},
+                project_id=project_id, session_id=session_id, user_id=user_id))
+        except Exception as e:
+            logger.debug("AISTALK forward failed: %s", e)
+
+    return {
+        "ok": True,
+        "project_id": project_id,
+        "session_id": session_id,
+        "user_id": user_id,
+        "response": reply,
+        "model": body.model,
+        "backend": provider,
+        "trace_id": trace_id,
+        "meta": {
+            "latency_ms": latency_ms,
+            "tokens_est": tokens_est,
+            "trace_id": trace_id,
+        },
+    }
+
+
+async def _do_save_memory(
+    user_msg: str,
+    ai_reply: str,
+    session_id: str,
+    project_id: str = "default",
+    user_id: str = "console_user",
+    agent_id: str = "sofiia",
+) -> None:
+    # 1) Persist to local SQLite (projects/sessions/messages schema)
+    try:
+        # Ensure target project exists to satisfy sessions.project_id FK.
+        proj = await _app_db.get_project(project_id)
+        if not proj:
+            await _app_db.create_project(
+                name=project_id.upper(),
+                description=f"Auto-created project for {project_id} sessions",
+                project_id=project_id,
+            )
+        await _app_db.upsert_session(session_id, project_id=project_id)
+        last_msg = None
+        if user_msg:
+            saved = await _app_db.save_message(session_id, "user", user_msg[:4096])
+            last_msg = saved["msg_id"]
+        if ai_reply:
+            await _app_db.save_message(
+                session_id, "assistant", ai_reply[:4096], parent_msg_id=last_msg
+            )
+    except Exception as e:
+        logger.debug("SQLite memory save skipped: %s", e)
+
+    # 2) Best-effort: also send to Memory Service (Qdrant + Neo4j)
+    mem_url = get_memory_service_url()
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            for role, content in [("user", user_msg), ("assistant", ai_reply)]:
+                if not content:
+                    continue
+                resp = await client.post(f"{mem_url}/agents/{agent_id}/memory", json={
+                    "agent_id": agent_id,
+                    "role": role,
+                    "content": content[:1000],
+                    "user_id": user_id,
+                    "channel_id": session_id,
+                    "metadata": {"project_id": project_id, "client": "sofiia-console", "agent_id": agent_id},
+                })
+                if resp.status_code >= 400:
+                    logger.warning(
+                        "Memory Service save failed status=%s agent=%s session=%s body=%s",
+                        resp.status_code,
+                        agent_id,
+                        session_id,
+                        (resp.text or "")[:240],
+                    )
+    except Exception as e:
+        logger.debug("Memory Service save skipped: %s", e)
+
+
+# ─── Ops ────────────────────────────────────────────────────────────────────
+
+class OpsRunBody(BaseModel):
+    action_id: str
+    node_id: str = "NODA2"
+    params: dict = {}
+    project_id: Optional[str] = None
+    session_id: Optional[str] = None
+    source_run_id: Optional[str] = None    # link to supervisor run
+    source_msg_id: Optional[str] = None    # link to message
+
+
+class NodeUpsertBody(BaseModel):
+    node_id: str
+    label: str
+    router_url: str
+    gateway_url: Optional[str] = ""
+    monitor_url: Optional[str] = ""
+    supervisor_url: Optional[str] = ""
+    ssh_host: Optional[str] = ""
+    ssh_port: Optional[int] = 22
+    ssh_user: Optional[str] = ""
+    ssh_password_env: Optional[str] = ""
+    ssh_ipv6: Optional[str] = ""
+    ssh_host_keys: Optional[List[Dict[str, Any]]] = None
+    enabled: bool = True
+
+
+@app.get("/api/ops/actions")
+async def api_ops_actions_list():
+    return {"actions": list(OPS_ACTIONS.keys())}
+
+
+@app.post("/api/ops/run")
+async def api_ops_run(body: OpsRunBody, _auth=Depends(require_api_key)):
+    """Run ops action. Broadcasts ops.run event and auto-creates ops_run graph node."""
+    import uuid as _uuid
+    t0 = time.monotonic()
+    project_id = body.project_id or "default"
+    session_id = body.session_id or "console"
+    ops_run_id = str(_uuid.uuid4())
+    started_at = _app_db._now() if _app_db else None
+
+    result = await run_ops_action(
+        body.action_id, body.node_id, body.params,
+        agent_id="sofiia", timeout=90.0, api_key=ROUTER_API_KEY,
+    )
+    elapsed = int((time.monotonic() - t0) * 1000)
+    ok = result.get("status") != "failed"
+    status_str = "ok" if ok else "failed"
+    error_str = result.get("error", "") if not ok else ""
+
+    _broadcast_bg(_make_event("ops.run",
+        {"name": body.action_id, "ok": ok, "elapsed_ms": elapsed},
+        project_id=project_id, session_id=session_id))
+
+    # Auto-create ops_run graph node (fire-and-forget, do not fail the request)
+    if _app_db and project_id:
+        try:
+            gn = await _app_db.upsert_ops_run_node(
+                project_id=project_id,
+                ops_run_id=ops_run_id,
+                action_id=body.action_id,
+                node_id=body.node_id,
+                status=status_str,
+                elapsed_ms=elapsed,
+                error=str(error_str)[:500],
+                started_at=started_at or "",
+                source_run_id=body.source_run_id or "",
+                source_msg_id=body.source_msg_id or "",
+            )
+            result["_graph_node_id"] = gn.get("node_id")
+            result["_ops_run_id"] = ops_run_id
+        except Exception as _e:
+            logger.warning("ops_run graph node creation failed (non-fatal): %s", _e)
+
+    return result
+
+
+# ─── Nodes ──────────────────────────────────────────────────────────────────
+
+@app.get("/api/nodes/dashboard")
+async def api_nodes_dashboard(refresh: bool = Query(False), _auth: str = Depends(require_auth)):
+    """
+    Nodes dashboard with full telemetry.
+    Returns cached data (refreshed every NODES_POLL_INTERVAL_SEC seconds).
+    Pass ?refresh=true to force immediate re-probe.
+    """
+    if refresh or not _nodes_cache["nodes"]:
+        fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY)
+        _nodes_cache.update({**fresh, "ts": _now_iso()})
+        return {**fresh, "ts": _nodes_cache["ts"], "cached": False}
+    return {**_nodes_cache, "cached": True}
+
+
+@app.get("/api/nodes/registry")
+async def api_nodes_registry(_auth: str = Depends(require_auth)):
+    return load_nodes_registry()
+
+
+@app.get("/api/nodes/ssh/status")
+async def api_nodes_ssh_status(
+    node_id: str = Query(..., description="Node ID, e.g. NODA1"),
+    _auth=Depends(require_api_key_strict),
+):
+    node_id = node_id.strip().upper()
+    ssh = get_node_ssh_profile(node_id)
+    if not ssh.get("configured"):
+        return {
+            "ok": False,
+            "node_id": node_id,
+            "configured": False,
+            "error": "ssh profile is not configured",
+            "ssh": ssh,
+        }
+
+    host = ssh.get("host", "")
+    host_ipv6 = (ssh.get("ipv6") or "").strip()
+    port = int(ssh.get("port") or 22)
+    tcp_ok = False
+    tcp_error = None
+    connect_host = host
+
+    def _try_connect(target_host: str) -> Optional[str]:
+        try:
+            with socket.create_connection((target_host, port), timeout=5):
+                return None
+        except Exception as e:
+            return str(e)[:160]
+
+    tcp_error = _try_connect(host)
+    if tcp_error is None:
+        tcp_ok = True
+    elif host_ipv6:
+        err_v6 = _try_connect(host_ipv6)
+        if err_v6 is None:
+            tcp_ok = True
+            tcp_error = None
+            connect_host = host_ipv6
+        else:
+            tcp_error = f"ipv4={tcp_error}; ipv6={err_v6}"[:220]
+
+    ok = tcp_ok and (ssh["auth"]["password_set"] or ssh["auth"]["private_key_set"])
+    return {
+        "ok": ok,
+        "node_id": node_id,
+        "configured": True,
+        "tcp_reachable": tcp_ok,
+        "tcp_error": tcp_error,
+        "connect_host": connect_host,
+        "ssh": ssh,
+    }
+
+
+@app.post("/api/nodes/add")
+async def api_nodes_add(body: NodeUpsertBody, _auth=Depends(require_api_key_strict)):
+    reg = load_nodes_registry()
+    reg.setdefault("defaults", {"health_timeout_sec": 10, "tools_timeout_sec": 30})
+    reg.setdefault("nodes", {})
+    node_id = body.node_id.strip().upper()
+    if not node_id:
+        raise HTTPException(status_code=400, detail="node_id is required")
+    node_payload: Dict[str, Any] = {
+        "label": body.label.strip() or node_id,
+        "router_url": body.router_url.strip(),
+        "gateway_url": (body.gateway_url or "").strip(),
+        "monitor_url": (body.monitor_url or body.router_url).strip(),
+        "supervisor_url": (body.supervisor_url or "").strip(),
+        "enabled": body.enabled,
+    }
+    ssh_host = (body.ssh_host or "").strip()
+    ssh_user = (body.ssh_user or "").strip()
+    if ssh_host and ssh_user:
+        node_payload["ssh"] = {
+            "host": ssh_host,
+            "ipv6": (body.ssh_ipv6 or "").strip(),
+            "port": int(body.ssh_port or 22),
+            "user": ssh_user,
+            "auth": {
+                "password_env": (body.ssh_password_env or f"NODES_{node_id}_SSH_PASSWORD").strip(),
+            },
+            "host_keys": body.ssh_host_keys or [],
+        }
+
+    reg["nodes"][node_id] = node_payload
+    path = save_nodes_registry(reg)
+    fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY)
+    _nodes_cache.update({**fresh, "ts": _now_iso()})
+    return {"ok": True, "saved_to": str(path), "node_id": node_id, "nodes": reg.get("nodes", {})}
+
+
+# ─── Voice ──────────────────────────────────────────────────────────────────
+
+@app.post("/api/voice/stt")
+async def api_voice_stt(
+    request: Request,
+    audio: UploadFile = File(...),
+    language: Optional[str] = Query(None),
+    session_id: Optional[str] = Query(None),
+    project_id: Optional[str] = Query(None),
+):
+    """STT proxy → memory-service. Rate: 20/min. Broadcasts voice.stt events."""
+    client_ip = request.client.host if request.client else "unknown"
+    if not _check_rate(f"stt:{client_ip}", max_calls=20, window_sec=60):
+        raise HTTPException(status_code=429, detail="Rate limit: 20 STT calls/min")
+
+    sid = session_id or "console"
+    pid = project_id or "default"
+    _broadcast_bg(_make_event("voice.stt", {"phase": "start"},
+        project_id=pid, session_id=sid))
+    t0 = time.monotonic()
+
+    mem_url = get_memory_service_url()
+    try:
+        content = await audio.read()
+        if not content:
+            raise HTTPException(status_code=400, detail="Empty audio file")
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            files = {"audio": (audio.filename or "audio.webm", content, audio.content_type or "audio/webm")}
+            params = {"language": language} if language else {}
+            r = await client.post(f"{mem_url}/voice/stt", files=files, params=params)
+            r.raise_for_status()
+            result = r.json()
+            elapsed = int((time.monotonic() - t0) * 1000)
+            upstream_ms = result.get("compute_ms", 0)
+            logger.info("STT ok: lang=%s text_len=%d bff_ms=%d upstream_ms=%d",
+                        language or "auto", len(result.get("text", "")), elapsed, upstream_ms)
+            _broadcast_bg(_make_event("voice.stt",
+                {"phase": "done", "elapsed_ms": elapsed, "upstream_ms": upstream_ms},
+                project_id=pid, session_id=sid))
+            result["bff_ms"] = elapsed
+            return result
+    except httpx.HTTPStatusError as e:
+        logger.error("STT upstream error: status=%s", e.response.status_code)
+        _broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]},
+            project_id=pid, session_id=sid))
+        raise HTTPException(status_code=e.response.status_code, detail=f"STT upstream: {str(e)[:200]}")
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error("STT proxy error: %s", e, exc_info=True)
+        _broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]},
+            project_id=pid, session_id=sid))
+        raise HTTPException(status_code=502, detail=f"STT error: {str(e)[:200]}")
+
+
+class TTSRequest(BaseModel):
+    text: str
+    voice: Optional[str] = "default"
+    speed: Optional[float] = 1.0
+    model: Optional[str] = "piper"
+    session_id: Optional[str] = None
+    project_id: Optional[str] = None
+
+
+@app.post("/api/voice/tts")
+async def api_voice_tts(body: TTSRequest, request: Request):
+    """TTS proxy → memory-service. Rate: 30/min per IP. Concurrent: MAX_CONCURRENT_TTS."""
+    client_ip = request.client.host if request.client else "unknown"
+    if not _check_rate(f"tts:{client_ip}", max_calls=30, window_sec=60):
+        raise HTTPException(status_code=429, detail="Rate limit: 30 TTS calls/min per client")
+
+    # Concurrent synthesis guard — prevents memory-service DoS on burst requests
+    sem = _get_tts_semaphore()
+    if not sem._value:  # non-blocking peek: all slots occupied
+        raise HTTPException(status_code=503,
+            detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.")
+
+    # Server-side sanitization: strips <think>, markdown, URLs; truncates safely
+    text = _sanitize_for_voice(body.text.strip())
+    if not text:
+        raise HTTPException(status_code=400, detail="Empty text")
+
+    sid = body.session_id or "console"
+    pid = body.project_id or "default"
+    _broadcast_bg(_make_event("voice.tts", {"phase": "start", "voice": body.voice},
+        project_id=pid, session_id=sid))
+    t0 = time.monotonic()
+
+    sem = _get_tts_semaphore()
+    async with sem:  # enforce MAX_CONCURRENT_TTS globally
+        try:
+            # ── Voice HA path (opt-in via VOICE_HA_ENABLED=true) ──────────────
+            if is_voice_ha_enabled():
+                router_url = get_voice_ha_router_url()
+                tts_payload = {
+                    "text": text,
+                    "voice": body.voice,
+                    "speed": body.speed,
+                    "model": body.model,
+                }
+                async with httpx.AsyncClient(timeout=30.0) as client:
+                    r = await client.post(
+                        f"{router_url}/v1/capability/voice_tts",
+                        json=tts_payload,
+                    )
+                    r.raise_for_status()
+                    elapsed = int((time.monotonic() - t0) * 1000)
+                    upstream_ct = r.headers.get("content-type", "audio/wav")
+                    tts_engine = r.headers.get("X-TTS-Engine", "unknown")
+                    tts_voice_used = r.headers.get("X-TTS-Voice", body.voice)
+                    voice_node = r.headers.get("X-Voice-Node", "unknown")
+                    voice_mode = r.headers.get("X-Voice-Mode", "remote")
+                    ext = "mp3" if "mpeg" in upstream_ct else "wav"
+                    logger.info("TTS HA ok: voice=%s node=%s mode=%s elapsed=%dms",
+                                tts_voice_used, voice_node, voice_mode, elapsed)
+                    _broadcast_bg(_make_event("voice.tts",
+                        {"phase": "done", "voice": tts_voice_used, "engine": tts_engine,
+                         "elapsed_ms": elapsed, "ha_mode": voice_mode, "ha_node": voice_node},
+                        project_id=pid, session_id=sid))
+                    return StreamingResponse(
+                        io.BytesIO(r.content),
+                        media_type=upstream_ct,
+                        headers={
+                            "Content-Disposition": f"inline; filename=speech.{ext}",
+                            "X-TTS-Engine": tts_engine,
+                            "X-TTS-Voice": tts_voice_used,
+                            "X-TTS-Elapsed-MS": str(elapsed),
+                            "X-Voice-Node": voice_node,
+                            "X-Voice-Mode": voice_mode,
+                            "Cache-Control": "no-store",
+                        },
+                    )
+
+            # ── Legacy direct path (default, VOICE_HA_ENABLED=false) ──────────
+            mem_url = get_memory_service_url()
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                r = await client.post(
+                    f"{mem_url}/voice/tts",
+                    json={"text": text, "voice": body.voice, "speed": body.speed, "model": body.model},
+                )
+                r.raise_for_status()
+                elapsed = int((time.monotonic() - t0) * 1000)
+                upstream_ct = r.headers.get("content-type", "audio/wav")
+                tts_engine = r.headers.get("X-TTS-Engine", "unknown")
+                tts_voice_used = r.headers.get("X-TTS-Voice", body.voice)
+                ext = "mp3" if "mpeg" in upstream_ct else "wav"
+                logger.info("TTS ok: voice=%s engine=%s len=%d fmt=%s elapsed=%dms",
+                            tts_voice_used, tts_engine, len(text), ext, elapsed)
+                _broadcast_bg(_make_event("voice.tts",
+                    {"phase": "done", "voice": tts_voice_used, "engine": tts_engine, "elapsed_ms": elapsed},
+                    project_id=pid, session_id=sid))
+                return StreamingResponse(
+                    io.BytesIO(r.content),
+                    media_type=upstream_ct,
+                    headers={
+                        "Content-Disposition": f"inline; filename=speech.{ext}",
+                        "X-TTS-Engine": tts_engine,
+                        "X-TTS-Voice": tts_voice_used,
+                        "X-TTS-Elapsed-MS": str(elapsed),
+                        "Cache-Control": "no-store",
+                    },
+                )
+        except httpx.HTTPStatusError as e:
+            _record_tts_error("http_error", e.response.status_code, str(e)[:120], body.voice)
+            logger.error("TTS upstream error: status=%s voice=%s ha=%s",
+                         e.response.status_code, body.voice, is_voice_ha_enabled())
+            _broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]},
+                project_id=pid, session_id=sid))
+            raise HTTPException(status_code=e.response.status_code, detail=f"TTS upstream: {str(e)[:200]}")
+        except Exception as e:
+            _record_tts_error("proxy_error", None, str(e)[:120], body.voice)
+            logger.error("TTS proxy error: %s ha=%s", e, is_voice_ha_enabled(), exc_info=True)
+            _broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]},
+                project_id=pid, session_id=sid))
+            raise HTTPException(status_code=502, detail=f"TTS error: {str(e)[:200]}")
+
+
+@app.get("/api/voice/voices")
+async def api_voice_voices():
+    mem_url = get_memory_service_url()
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            r = await client.get(f"{mem_url}/voice/voices")
+            r.raise_for_status()
+            return r.json()
+    except Exception as e:
+        return {"piper": [], "macos": [{"id": "Milena", "name": "Milena (uk-UA)", "lang": "uk-UA"}], "error": str(e)[:100]}
+
+
+# ─── Phase 2: Voice Chat Stream (sentence chunking → early TTS) ──────────────
+# Strategy: split LLM text into sentences → synthesize first sentence immediately
+# → return {first_audio_b64, first_text, rest_text[]}
+# Browser plays first sentence while fetching TTS for remaining sentences in bg.
+# TTFA drops from ~10-14s to ~3-5s (LLM still runs full, but TTS starts on chunk1).
+
+from app.voice_utils import split_into_voice_chunks as _split_into_voice_chunks
+from app.voice_utils import clean_think_blocks as _clean_think_blocks_util
+from app.voice_utils import sanitize_for_voice as _sanitize_for_voice
+from app.voice_utils import MIN_CHUNK_CHARS as _MIN_CHUNK_CHARS, MAX_CHUNK_CHARS as _MAX_CHUNK_CHARS
+
+
+class VoiceChatStreamBody(BaseModel):
+    message: str
+    model: str = "ollama:qwen3:14b"
+    node_id: str = "NODA2"
+    voice: Optional[str] = None
+    voice_profile: Optional[str] = "voice_fast_uk"
+    session_id: Optional[str] = None
+    project_id: Optional[str] = None
+    history: List[Dict[str, Any]] = []
+
+
+@app.post("/api/voice/chat/stream")
+async def api_voice_chat_stream(body: VoiceChatStreamBody, request: Request):
+    """Phase 2 Voice Chat: LLM → sentence split → first sentence TTS immediately.
+
+    Returns:
+      {
+        ok: bool,
+        first_text: str,           # first sentence
+        first_audio_b64: str,      # base64 MP3 for immediate playback
+        first_audio_mime: str,     # "audio/mpeg"
+        rest_chunks: [str, ...],   # remaining sentences (client fetches TTS via /api/voice/tts)
+        full_text: str,            # full LLM reply (for display)
+        trace_id: str,
+        meta: {llm_ms, tts_ms, chunks_total}
+      }
+
+    Client flow:
+      1. POST /api/voice/chat/stream  → play first_audio_b64 immediately
+      2. For each chunk in rest_chunks: POST /api/voice/tts → enqueue audio
+    """
+    import re as _re  # noqa: F401 – kept for legacy; re already imported at module level
+    client_ip = request.client.host if request.client else "unknown"
+    if not _check_rate(f"voice_stream:{client_ip}", max_calls=15, window_sec=60):
+        raise HTTPException(status_code=429, detail="Rate limit: 15 voice stream calls/min per client")
+
+    # Concurrent TTS guard also applies to stream endpoint (TTS inside)
+    sem = _get_tts_semaphore()
+    if not sem._value:
+        raise HTTPException(status_code=503,
+            detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.")
+
+    sid = body.session_id or f"vs_{uuid.uuid4().hex[:10]}"
+    pid = body.project_id or "default"
+    trace_id = f"vs_{sid}_{uuid.uuid4().hex[:8]}"
+
+    _vp = body.voice_profile or "voice_fast_uk"
+    _is_quality = _vp == "voice_quality_uk"
+    _system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX
+
+    # Track for repro pack
+    global _voice_last_model, _voice_last_profile
+    _voice_last_model   = body.model
+    _voice_last_profile = _vp
+
+    _broadcast_bg(_make_event("voice.stream", {"phase": "start", "trace_id": trace_id},
+        project_id=pid, session_id=sid))
+
+    # ── 1. LLM ────────────────────────────────────────────────────────────────
+    t0_llm = time.monotonic()
+    provider, _, model_name = body.model.partition(":")
+    reply = ""
+
+    def _clean(text: str) -> str:
+        cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
+        if "<think>" in cleaned.lower():
+            cleaned = re.split(r"(?i)<think>", cleaned)[0]
+        return cleaned.strip()
+
+    try:
+        if provider == "ollama":
+            ollama_url = get_ollama_url()
+            effective_model_name = model_name or "qwen3:14b"
+            messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
+            messages.extend(body.history[-8:])
+            messages.append({"role": "user", "content": body.message})
+            voice_options = {
+                "temperature": 0.18 if _is_quality else 0.15,
+                "repeat_penalty": 1.1,
+                "num_predict": 256 if _is_quality else 220,
+            }
+            async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC) as client:
+                r = await client.post(
+                    f"{ollama_url}/api/chat",
+                    json=_make_ollama_payload(effective_model_name, messages, voice_options),
+                )
+                r.raise_for_status()
+                raw = (r.json().get("message") or {}).get("content", "")
+                reply = _clean(raw)
+        elif provider == "grok":
+            xai_key = os.getenv("XAI_API_KEY", "").strip()
+            if not xai_key:
+                raise HTTPException(status_code=503, detail="XAI_API_KEY not set.")
+            grok_model = model_name or "grok-4-1-fast-reasoning"
+            messages_g: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
+            messages_g.extend(body.history[-8:])
+            messages_g.append({"role": "user", "content": body.message})
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                r = await client.post(
+                    "https://api.x.ai/v1/chat/completions",
+                    headers={"Authorization": f"Bearer {xai_key}", "Content-Type": "application/json"},
+                    json={"model": grok_model, "messages": messages_g, "stream": False,
+                          "max_tokens": 1024, "temperature": 0.2},
+                )
+                r.raise_for_status()
+                raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "")
+                reply = _clean(raw)
+        elif provider == "glm":
+            glm_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip()
+            if not glm_key:
+                raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.")
+            glm_model = model_name or "glm-5"
+            messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
+            messages_glm.extend(body.history[-8:])
+            messages_glm.append({"role": "user", "content": body.message})
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                r = await client.post(
+                    "https://open.bigmodel.cn/api/paas/v4/chat/completions",
+                    headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"},
+                    json={"model": glm_model, "messages": messages_glm, "stream": False},
+                )
+                r.raise_for_status()
+                raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "")
+                reply = _clean(raw)
+        else:
+            raise HTTPException(status_code=400, detail=f"voice/stream: provider '{provider}' not supported. Use: ollama, grok, glm.")
+    except HTTPException:
+        raise
+    except Exception as e:
+        _record_llm_error("inference_error", body.model, str(e)[:120])
+        _broadcast_bg(_make_event("error", {"where": "voice_stream_llm", "trace_id": trace_id, "message": str(e)[:100]},
+            project_id=pid, session_id=sid))
+        raise HTTPException(status_code=502, detail=f"LLM error: {str(e)[:200]}")
+
+    llm_ms = int((time.monotonic() - t0_llm) * 1000)
+    if not reply:
+        reply = "Не можу відповісти зараз."
+
+    # ── 2. Sentence chunking ──────────────────────────────────────────────────
+    # sanitize full reply before splitting (removes markdown, <think>, URLs)
+    sanitized_reply = _sanitize_for_voice(reply)
+    chunks = _split_into_voice_chunks(sanitized_reply)
+    if not chunks:
+        chunks = [sanitized_reply] if sanitized_reply else ["Не можу відповісти зараз."]
+
+    first_chunk = chunks[0]
+    # rest_chunks: sanitize + hard cap (prevents DoS via unreasonably long replies)
+    _MAX_REST_CHUNKS = int(os.getenv("MAX_VOICE_REST_CHUNKS", "8"))
+    all_rest = [_sanitize_for_voice(c) for c in chunks[1:] if _sanitize_for_voice(c)]
+    rest_chunks = all_rest[:_MAX_REST_CHUNKS]  # cap: never more than 8 background TTS calls
+
+    # ── 3. TTS for first sentence (immediate) ─────────────────────────────────
+    t0_tts = time.monotonic()
+    first_audio_b64 = ""
+    first_audio_mime = "audio/mpeg"
+    voice = body.voice or "default"
+    _ha_voice_node = None
+    _ha_voice_mode = None
+
+    try:
+        import base64 as _b64
+        tts_json = {"text": first_chunk, "voice": voice, "speed": 1.0}
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            if is_voice_ha_enabled():
+                # HA path: Router selects best node for TTS
+                router_url = get_voice_ha_router_url()
+                r_tts = await client.post(f"{router_url}/v1/capability/voice_tts", json=tts_json)
+                r_tts.raise_for_status()
+                _ha_voice_node = r_tts.headers.get("X-Voice-Node")
+                _ha_voice_mode = r_tts.headers.get("X-Voice-Mode")
+                logger.debug("voice_stream TTS via HA: node=%s mode=%s",
+                             _ha_voice_node, _ha_voice_mode)
+            else:
+                # Legacy direct path
+                mem_url = get_memory_service_url()
+                r_tts = await client.post(f"{mem_url}/voice/tts", json=tts_json)
+                r_tts.raise_for_status()
+            first_audio_mime = r_tts.headers.get("content-type", "audio/mpeg").split(";")[0]
+            first_audio_b64 = _b64.b64encode(r_tts.content).decode()
+    except Exception as e:
+        logger.warning("voice_stream TTS failed for first chunk (ha=%s): %s",
+                       is_voice_ha_enabled(), e)
+        # Not fatal: client can still render text
+
+    tts_ms = int((time.monotonic() - t0_tts) * 1000)
+
+    _broadcast_bg(_make_event("voice.stream", {
+        "phase": "done",
+        "trace_id": trace_id,
+        "llm_ms": llm_ms,
+        "tts_ms": tts_ms,
+        "chunks_total": len(chunks),
+    }, project_id=pid, session_id=sid))
+
+    logger.info("voice_stream ok: trace=%s llm=%dms tts=%dms chunks=%d first=%dB",
+                trace_id, llm_ms, tts_ms, len(chunks), len(r_tts.content) if first_audio_b64 else 0)
+
+    body_data = {
+        "ok": True,
+        "trace_id": trace_id,
+        "first_text": first_chunk,
+        "first_audio_b64": first_audio_b64,
+        "first_audio_mime": first_audio_mime,
+        "rest_chunks": rest_chunks,
+        "full_text": reply,
+        "meta": {
+            "llm_ms": llm_ms,
+            "tts_ms": tts_ms,
+            "chunks_total": len(chunks),
+            "voice": voice,
+            "model": body.model,
+            "voice_profile": _vp,
+        },
+    }
+
+    from fastapi.responses import JSONResponse as _JSONResponse
+    resp_headers = {}
+    if _ha_voice_mode:
+        resp_headers["X-Voice-Mode"] = _ha_voice_mode
+    if _ha_voice_node:
+        resp_headers["X-Voice-Node"] = _ha_voice_node
+    if _ha_voice_mode or _ha_voice_node:
+        resp_headers["X-Voice-Cap"] = "voice_tts"
+
+    if resp_headers:
+        return _JSONResponse(content=body_data, headers=resp_headers)
+    return body_data
+
+
+# ─── Voice Telemetry Beacon ───────────────────────────────────────────────────
+# Receives performance marks from browser, records Prometheus histograms.
+# Browser calls this via navigator.sendBeacon (fire-and-forget).
+
+try:
+    from prometheus_client import Histogram as _PromHistogram, Counter as _PromCounter
+    _voice_ttfa_hist = _PromHistogram(
+        "voice_ttfa_ms", "Time-to-first-audio (request → first audio playable)",
+        ["model", "voice_profile"],
+        buckets=[500, 1000, 2000, 3000, 5000, 7000, 10000, 15000],
+    )
+    _voice_llm_hist = _PromHistogram(
+        "voice_llm_ms", "LLM inference time for voice turns",
+        ["model", "voice_profile"],
+        buckets=[500, 1000, 2000, 5000, 8000, 12000, 20000],
+    )
+    _voice_tts_first_hist = _PromHistogram(
+        "voice_tts_first_ms", "First-sentence TTS synthesis time",
+        ["voice_profile"],
+        buckets=[200, 500, 800, 1200, 2000, 3000],
+    )
+    _voice_e2e_hist = _PromHistogram(
+        "voice_e2e_ms", "End-to-end voice turn latency (user stop speaking → audio plays)",
+        ["voice_profile"],
+        buckets=[1000, 2000, 4000, 6000, 9000, 13000, 20000],
+    )
+    _voice_underflow_counter = _PromCounter(
+        "voice_queue_underflows_total", "Times playback queue ran empty before TTS finished",
+        ["voice_profile"],
+    )
+    _PROM_VOICE_OK = True
+except Exception:
+    _PROM_VOICE_OK = False
+
+
+class VoiceTelemetryPayload(BaseModel):
+    event: str = "voice_turn"
+    # Idempotency: session_id + turn_id deduplicate duplicate beacon submissions
+    session_id: Optional[str] = None
+    turn_id: Optional[str] = None     # monotonic turn counter or UUID per turn
+    ttfa_ms: Optional[int] = None
+    llm_ms: Optional[int] = None
+    tts_first_ms: Optional[int] = None
+    e2e_ms: Optional[int] = None
+    stt_ms: Optional[int] = None
+    underflows: int = 0
+    model: Optional[str] = None
+    voice_profile: Optional[str] = None
+
+
+class VoiceTelemetryBatch(BaseModel):
+    """Batch beacon: array of turns submitted together (reduces HTTP overhead)."""
+    events: List[VoiceTelemetryPayload] = []
+
+
+def _process_telemetry_item(payload: VoiceTelemetryPayload) -> bool:
+    """Process a single telemetry item. Returns False if duplicate."""
+    sid = payload.session_id or "anon"
+    tid = payload.turn_id or "noid"
+    if _telem_is_duplicate(sid, tid):
+        return False  # skip duplicate
+
+    model   = (payload.model or "unknown").replace("ollama:", "")
+    profile = payload.voice_profile or "unknown"
+
+    if _PROM_VOICE_OK:
+        try:
+            if payload.ttfa_ms is not None:
+                _voice_ttfa_hist.labels(model=model, voice_profile=profile).observe(payload.ttfa_ms)
+            if payload.llm_ms is not None:
+                _voice_llm_hist.labels(model=model, voice_profile=profile).observe(payload.llm_ms)
+            if payload.tts_first_ms is not None:
+                _voice_tts_first_hist.labels(voice_profile=profile).observe(payload.tts_first_ms)
+            if payload.e2e_ms is not None:
+                _voice_e2e_hist.labels(voice_profile=profile).observe(payload.e2e_ms)
+            if payload.underflows:
+                _voice_underflow_counter.labels(voice_profile=profile).inc(payload.underflows)
+        except Exception as exc:
+            logger.debug("telemetry/voice prom error: %s", exc)
+
+    logger.info(
+        "voice_telemetry: model=%s profile=%s ttfa=%s llm=%s tts=%s e2e=%s underflows=%d sid=%s",
+        model, profile, payload.ttfa_ms, payload.llm_ms,
+        payload.tts_first_ms, payload.e2e_ms, payload.underflows, sid,
+    )
+
+    # Feed the degradation state machine
+    if payload.ttfa_ms is not None or payload.tts_first_ms is not None:
+        _voice_degradation_sm.observe(
+            ttfa_ms=payload.ttfa_ms,
+            tts_first_ms=payload.tts_first_ms,
+            underflows=payload.underflows,
+            profile=profile,
+        )
+    return True
+
+
+@app.post("/api/telemetry/voice", status_code=204)
+async def api_telemetry_voice(payload: VoiceTelemetryPayload):
+    """Browser beacon endpoint (single turn). Fire-and-forget, always 204."""
+    _process_telemetry_item(payload)
+    # 204 No Content — browser doesn't await response
+
+
+@app.post("/api/telemetry/voice/batch", status_code=204)
+async def api_telemetry_voice_batch(batch: VoiceTelemetryBatch, request: Request):
+    """Batch beacon: process up to 20 turns in one HTTP call.
+
+    Useful when browser queues multiple turns before sending (e.g. tab becomes
+    visible again, or connection was lost briefly).
+    """
+    client_ip = request.client.host if request.client else "unknown"
+    if not _check_rate(f"telem_batch:{client_ip}", max_calls=60, window_sec=60):
+        raise HTTPException(status_code=429, detail="Rate limit: 60 telemetry batches/min")
+
+    cap = min(len(batch.events), 20)  # hard cap per batch
+    processed = sum(1 for item in batch.events[:cap] if _process_telemetry_item(item))
+    logger.debug("telemetry/voice/batch: submitted=%d processed=%d cap=%d",
+                 len(batch.events), processed, cap)
+
+
+# ─── Voice Degradation State Machine ─────────────────────────────────────────
+# Tracks rolling window of voice telemetry and determines system-level state.
+# States: ok → degraded_tts → degraded_llm → fast_lock → emergency
+# Client polls GET /api/voice/degradation_status to show UI badge.
+
+import collections
+from dataclasses import dataclass as _dc, field as _field
+from enum import Enum
+
+class VoiceDegradationState(str, Enum):
+    OK           = "ok"           # all SLOs met
+    DEGRADED_TTS = "degraded_tts" # TTS slow/failing → show "TTS SLOW" badge
+    DEGRADED_LLM = "degraded_llm" # LLM slow → profile auto-demoted to fast
+    FAST_LOCK    = "fast_lock"    # LLM degraded, forced to voice_fast_uk
+    EMERGENCY    = "emergency"    # TTS failing → warn user, fallback banner
+
+# SLO thresholds (ms) — aligned with config/slo_policy.yml
+_SM_TTFA_WARN   = 5000   # TTFA p95 > 5s → degraded_llm
+_SM_TTFA_LOCK   = 8000   # TTFA p95 > 8s → fast_lock
+_SM_TTS_WARN    = 2000   # TTS first p95 > 2s → degraded_tts
+_SM_TTS_CRIT    = 4000   # TTS first p95 > 4s → emergency
+_SM_UNDERFLOW_RATE = 0.1 # >10% of recent turns have underflows → degraded_tts
+_SM_WINDOW      = 20     # rolling window (last N telemetry events)
+_SM_MIN_SAMPLES = 5      # need at least N samples before changing state
+
+
+@_dc
+class _VoiceDegradationSM:
+    """Rolling-window degradation state machine."""
+    _ttfa_window:        collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
+    _tts_first_window:   collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
+    _underflow_window:   collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
+    state:               VoiceDegradationState = VoiceDegradationState.OK
+    state_since:         float = _field(default_factory=time.monotonic)
+    recommended_profile: str = "voice_fast_uk"
+    last_reason:         str = ""
+    _lock:               object = _field(default_factory=lambda: __import__('asyncio').Lock())
+
+    def observe(self, ttfa_ms: Optional[int], tts_first_ms: Optional[int],
+                underflows: int, profile: str) -> None:
+        if ttfa_ms is not None:
+            self._ttfa_window.append(ttfa_ms)
+        if tts_first_ms is not None:
+            self._tts_first_window.append(tts_first_ms)
+        self._underflow_window.append(1 if underflows > 0 else 0)
+        self._recompute()
+
+    def _p95(self, window: collections.deque) -> Optional[float]:
+        if len(window) < _SM_MIN_SAMPLES:
+            return None
+        s = sorted(window)
+        return s[int(len(s) * 0.95)]
+
+    def _underflow_rate(self) -> float:
+        if not self._underflow_window:
+            return 0.0
+        return sum(self._underflow_window) / len(self._underflow_window)
+
+    def _recompute(self) -> None:
+        ttfa_p95  = self._p95(self._ttfa_window)
+        tts_p95   = self._p95(self._tts_first_window)
+        uf_rate   = self._underflow_rate()
+
+        prev_state = self.state
+
+        if tts_p95 is not None and tts_p95 > _SM_TTS_CRIT:
+            self.state = VoiceDegradationState.EMERGENCY
+            self.recommended_profile = "voice_fast_uk"
+            self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_CRIT}ms"
+        elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_LOCK:
+            self.state = VoiceDegradationState.FAST_LOCK
+            self.recommended_profile = "voice_fast_uk"
+            self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_LOCK}ms — locked to fast profile"
+        elif tts_p95 is not None and tts_p95 > _SM_TTS_WARN:
+            self.state = VoiceDegradationState.DEGRADED_TTS
+            self.recommended_profile = "voice_fast_uk"
+            self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_WARN}ms"
+        elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_WARN:
+            self.state = VoiceDegradationState.DEGRADED_LLM
+            self.recommended_profile = "voice_fast_uk"
+            self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_WARN}ms"
+        elif uf_rate > _SM_UNDERFLOW_RATE:
+            self.state = VoiceDegradationState.DEGRADED_TTS
+            self.recommended_profile = "voice_fast_uk"
+            self.last_reason = f"Underflow rate={uf_rate:.1%} > {_SM_UNDERFLOW_RATE:.0%}"
+        else:
+            self.state = VoiceDegradationState.OK
+            self.recommended_profile = "voice_fast_uk"  # default
+            self.last_reason = "all SLOs met"
+
+        if self.state != prev_state:
+            self.state_since = time.monotonic()
+            logger.warning("voice_degradation state: %s → %s | %s",
+                           prev_state.value, self.state.value, self.last_reason)
+
+    def status_dict(self) -> dict:
+        return {
+            "state":               self.state.value,
+            "state_since_sec":     int(time.monotonic() - self.state_since),
+            "recommended_profile": self.recommended_profile,
+            "reason":              self.last_reason,
+            "samples": {
+                "ttfa":      len(self._ttfa_window),
+                "tts_first": len(self._tts_first_window),
+            },
+            "p95": {
+                "ttfa_ms":      self._p95(self._ttfa_window),
+                "tts_first_ms": self._p95(self._tts_first_window),
+            },
+            "underflow_rate": round(self._underflow_rate(), 3),
+            "ui_badge": _SM_UI_BADGE.get(self.state, ""),
+        }
+
+
+# UI badge text per state
+_SM_UI_BADGE = {
+    VoiceDegradationState.OK:           "",
+    VoiceDegradationState.DEGRADED_TTS: "⚠ TTS SLOW",
+    VoiceDegradationState.DEGRADED_LLM: "⚠ AI SLOW",
+    VoiceDegradationState.FAST_LOCK:    "⚡ FAST MODE",
+    VoiceDegradationState.EMERGENCY:    "🔴 TTS DEGRADED",
+}
+
+_voice_degradation_sm = _VoiceDegradationSM()
+
+
+@app.get("/api/voice/degradation_status")
+async def api_voice_degradation_status():
+    """Returns current voice degradation state + repro pack for incident diagnosis.
+
+    Repro pack fields (for on-call):
+      node_id, edge_tts_version, last_model, last_profile,
+      last_5_tts_errors, last_5_llm_errors
+    """
+    base = _voice_degradation_sm.status_dict()
+    # Enrich with repro pack
+    base["repro"] = {
+        "node_id":           _NODE_ID,
+        "last_model":        _voice_last_model,
+        "last_profile":      _voice_last_profile,
+        "last_5_tts_errors": list(_voice_tts_errors),
+        "last_5_llm_errors": list(_voice_llm_errors),
+        "concurrent_tts_slots_free": _get_tts_semaphore()._value,
+        "max_concurrent_tts":        _MAX_CONCURRENT_TTS,
+    }
+    return base
+
+
+# ─── Memory ──────────────────────────────────────────────────────────────────
+
+@app.get("/api/memory/status")
+async def api_memory_status(_auth: str = Depends(require_auth)):
+    mem_url = get_memory_service_url()
+    try:
+        async with httpx.AsyncClient(timeout=8.0) as client:
+            r = await client.get(f"{mem_url}/health")
+            r.raise_for_status()
+            data = r.json()
+            return {
+                "ok": True,
+                "memory_url": mem_url,
+                "status": data.get("status", "unknown"),
+                "vector_store": data.get("vector_store", {}),
+                "stt": "whisper-large-v3-turbo",
+                "tts": "edge-tts / macOS say",
+            }
+    except Exception as e:
+        return {"ok": False, "error": str(e)[:200], "memory_url": mem_url}
+
+
+@app.get("/api/memory/context")
+async def api_memory_context(
+    session_id: str = Query("console"),
+    agent_id: str = Query("sofiia"),
+    user_id: Optional[str] = Query(None),
+    limit: int = Query(20, ge=1, le=100),
+    _auth: str = Depends(require_auth),
+):
+    mem_url = get_memory_service_url()
+    agent_key = str(agent_id or "").strip().lower()
+    resolved_user = user_id or ("aistalk_user" if agent_key == "aistalk" else "console_user")
+    async def _sqlite_fallback_events() -> List[Dict[str, Any]]:
+        events: List[Dict[str, Any]] = []
+        if _app_db:
+            try:
+                rows = await _app_db.list_messages(session_id, limit=limit)
+                for row in rows:
+                    events.append(
+                        {
+                            "role": row.get("role", "unknown"),
+                            "content": row.get("content", ""),
+                            "ts": row.get("ts"),
+                            "source": "sqlite_fallback",
+                        }
+                    )
+            except Exception:
+                pass
+        return events
+    try:
+        async with httpx.AsyncClient(timeout=8.0) as client:
+            r = await client.get(
+                f"{mem_url}/agents/{agent_id}/memory",
+                params={"user_id": resolved_user, "channel_id": session_id, "limit": limit},
+            )
+            r.raise_for_status()
+            data = r.json()
+            events = data.get("events") if isinstance(data, dict) else None
+            if isinstance(events, list) and events:
+                return data
+            # Remote is alive but returned empty history; expose local persisted history too.
+            local_events = await _sqlite_fallback_events()
+            if local_events:
+                return {"events": local_events, "fallback": "sqlite_after_empty_remote"}
+            return data if isinstance(data, dict) else {"events": []}
+    except Exception as e:
+        # Fallback to local SQLite session memory so UI still has context.
+        events = await _sqlite_fallback_events()
+        return {"events": events, "error": str(e)[:100], "fallback": "sqlite"}
+
+
+# ─── WebSocket /ws/events ────────────────────────────────────────────────────
+
+@app.websocket("/ws/events")
+async def ws_events(websocket: WebSocket):
+    """WebSocket event stream. Clients receive all broadcast events."""
+    await websocket.accept()
+    _ws_clients.add(websocket)
+    logger.info("WS client connected, total=%d", len(_ws_clients))
+    # Send welcome
+    await websocket.send_text(json.dumps(_make_event("nodes.status", {
+        "message": "connected",
+        "bff_version": _VERSION,
+        "ws_clients": len(_ws_clients),
+    })))
+    try:
+        while True:
+            # Keep-alive: read pings from client (or just wait)
+            try:
+                msg = await asyncio.wait_for(websocket.receive_text(), timeout=15.0)
+                # Client can send {"type":"ping"} → pong
+                if msg:
+                    try:
+                        cmd = json.loads(msg)
+                        if cmd.get("type") == "ping":
+                            await websocket.send_text(json.dumps({"type": "pong", "ts": _now_iso()}))
+                    except Exception:
+                        pass
+            except asyncio.TimeoutError:
+                # Send periodic heartbeat with cached nodes if available
+                hb_data: Dict[str, Any] = {
+                    "bff_uptime_s": int(time.monotonic() - _START_TIME),
+                    "ws_clients": len(_ws_clients),
+                }
+                if _nodes_cache.get("nodes"):
+                    hb_data["nodes"] = [
+                        {
+                            "id": n["node_id"],
+                            "online": n.get("online", False),
+                            "router_ok": n.get("router_ok", False),
+                            "router_latency_ms": n.get("router_latency_ms"),
+                        }
+                        for n in _nodes_cache["nodes"]
+                    ]
+                    hb_data["nodes_ts"] = _nodes_cache.get("ts", "")
+                await websocket.send_text(json.dumps(_make_event("nodes.status", hb_data)))
+    except WebSocketDisconnect:
+        pass
+    except Exception as e:
+        logger.debug("WS error: %s", e)
+    finally:
+        _ws_clients.discard(websocket)
+        logger.info("WS client disconnected, total=%d", len(_ws_clients))
+
+
+# ─── UI ─────────────────────────────────────────────────────────────────────
+
+STATIC_DIR = Path(__file__).resolve().parent.parent / "static"
+_NO_CACHE = {"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache"}
+
+
+@app.get("/api/meta/version")
+async def get_meta_version():
+    """Build metadata endpoint — always no-cache, always public."""
+    return JSONResponse(
+        content={
+            "version": _VERSION,
+            "build_sha": _BUILD_SHA,
+            "build_time": _BUILD_TIME,
+            "service": "sofiia-console",
+        },
+        headers=_NO_CACHE,
+    )
+
+
+# ─── Auth endpoints ──────────────────────────────────────────────────────────
+
+class _LoginBody(BaseModel):
+    key: str
+
+
+@app.post("/api/auth/login")
+async def auth_login(body: _LoginBody, response: Response):
+    """
+    Verify API key (sent in JSON body — avoids header encoding issues).
+    On success: set httpOnly session cookie, return ok=true.
+    No CORS/header encoding issues since key travels in request body.
+    """
+    if not _key_valid(body.key):
+        raise HTTPException(status_code=401, detail="Invalid key")
+
+    token = _cookie_token(body.key)
+    response.set_cookie(
+        key=_COOKIE_NAME,
+        value=token,
+        httponly=True,
+        secure=_IS_PROD,         # Secure=True in prod (HTTPS only)
+        samesite="lax",
+        max_age=_COOKIE_MAX_AGE,
+        path="/",
+    )
+    return {"ok": True, "auth": "cookie"}
+
+
+@app.post("/api/auth/logout")
+async def auth_logout(response: Response):
+    """Clear session cookie."""
+    response.delete_cookie(key=_COOKIE_NAME, path="/")
+    return {"ok": True}
+
+
+@app.get("/api/auth/check")
+async def auth_check(request: Request):
+    """Returns 200 if session is valid, 401 otherwise. Used by UI on startup."""
+    # Localhost is always open — no auth needed
+    client_ip = (request.client.host if request.client else "") or ""
+    if client_ip in ("127.0.0.1", "::1", "localhost"):
+        return {"ok": True, "auth": "localhost"}
+    configured = get_console_api_key()
+    if not configured:
+        return {"ok": True, "auth": "open"}
+    from .auth import _expected_cookie_token as _ect
+    cookie_val = request.cookies.get(_COOKIE_NAME, "")
+    import secrets as _sec
+    if cookie_val and _sec.compare_digest(cookie_val, _ect()):
+        return {"ok": True, "auth": "cookie"}
+    raise HTTPException(status_code=401, detail="Not authenticated")
+
+
+@app.get("/", response_class=HTMLResponse)
+async def ui_root():
+    index = STATIC_DIR / "index.html"
+    content = index.read_text(encoding="utf-8") if index.exists() else _fallback_html()
+    return HTMLResponse(content=content, headers=_NO_CACHE)
+
+
+@app.get("/ui", response_class=HTMLResponse)
+async def ui_alias():
+    return await ui_root()
+
+
+def _fallback_html() -> str:
+    return """<!DOCTYPE html><html><head><meta charset="utf-8"><title>Sofiia Console</title></head>
+<body><h1>Sofiia Control Console v""" + _VERSION + """</h1>
+<p>Endpoints: <code>GET /api/health</code> | <code>GET /api/status/full</code> | <code>POST /api/chat/send</code> | <code>WS /ws/events</code></p>
+</body></html>"""
+
+
+@app.get("/chat", response_class=HTMLResponse)
+async def ui_chat():
+    p = STATIC_DIR / "chat.html"
+    content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
+    return HTMLResponse(content=content, headers=_NO_CACHE)
+
+
+@app.get("/ops", response_class=HTMLResponse)
+async def ui_ops():
+    p = STATIC_DIR / "ops.html"
+    content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
+    return HTMLResponse(content=content, headers=_NO_CACHE)
+
+
+@app.get("/nodes", response_class=HTMLResponse)
+async def ui_nodes():
+    p = STATIC_DIR / "nodes.html"
+    content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
+    return HTMLResponse(content=content, headers=_NO_CACHE)
+
+
+# ── Supervisor Proxy ───────────────────────────────────────────────────────────
+_SUPERVISOR_URL = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080").rstrip("/")
+_SUPERVISOR_FALLBACK_URL = os.getenv("SUPERVISOR_FALLBACK_URL", "http://127.0.0.1:8084").rstrip("/")
+
+
+async def _supervisor_request_json(
+    method: str,
+    path: str,
+    *,
+    timeout: float = 30.0,
+    json_body: Optional[Dict[str, Any]] = None,
+) -> Tuple[int, Dict[str, Any]]:
+    urls = [_SUPERVISOR_URL]
+    if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls:
+        urls.append(_SUPERVISOR_FALLBACK_URL)
+
+    last_err = "unavailable"
+    for base in urls:
+        target = f"{base}{path}"
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                resp = await client.request(method, target, json=json_body)
+        except Exception as e:
+            last_err = str(e)[:200]
+            continue
+
+        if resp.status_code >= 400:
+            detail = resp.text[:400] if resp.text else f"Supervisor error {resp.status_code}"
+            raise HTTPException(status_code=resp.status_code, detail=detail)
+
+        if not resp.content:
+            return resp.status_code, {}
+        try:
+            payload = resp.json()
+        except Exception:
+            return resp.status_code, {"raw": resp.text[:1000]}
+        if isinstance(payload, dict):
+            return resp.status_code, payload
+        return resp.status_code, {"data": payload}
+
+    raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {last_err}")
+
+
+@app.post("/api/supervisor/runs")
+async def start_supervisor_run(request: Request, _auth: str = Depends(require_auth)):
+    """Start a LangGraph run on sofiia-supervisor.
+
+    Body: {"graph": "alert_triage|incident_triage|postmortem_draft|release_check",
+           "project_id": "<optional>", ...params}
+
+    If project_id is provided, auto-creates an agent_run dialog_node in the graph
+    and returns node_id in the response for UI tracking.
+    """
+    body = await request.json()
+    graph_name = body.pop("graph", None)
+    project_id = body.pop("project_id", None)
+    if not graph_name:
+        raise HTTPException(status_code=400, detail="'graph' field is required")
+    try:
+        status_code, result = await _supervisor_request_json(
+            "POST",
+            f"/v1/graphs/{graph_name}/runs",
+            timeout=60.0,
+            json_body=body,
+        )
+
+        # Auto-create agent_run node if project is provided
+        if project_id and status_code in (200, 201, 202):
+            run_id = result.get("run_id") or result.get("id") or str(uuid.uuid4())
+            try:
+                pack = await _app_db.create_evidence_pack(
+                    project_id=project_id,
+                    run_id=run_id,
+                    graph_name=graph_name,
+                    result_data={"status": "started", "summary": f"Run started: {graph_name}"},
+                    created_by="sofiia",
+                )
+                result["_node_id"] = pack.get("node_id")
+            except Exception as node_err:
+                logger.warning("evidence_pack node creation failed (non-fatal): %s", node_err)
+
+        return JSONResponse(status_code=status_code, content=result)
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
+
+
+@app.get("/api/supervisor/runs/{run_id}")
+async def get_supervisor_run(run_id: str, _auth: str = Depends(require_auth)):
+    """Get the status/result of a LangGraph run."""
+    try:
+        status_code, payload = await _supervisor_request_json(
+            "GET",
+            f"/v1/runs/{run_id}",
+            timeout=15.0,
+        )
+        return JSONResponse(status_code=status_code, content=payload)
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
+
+
+@app.post("/api/supervisor/runs/{run_id}/cancel")
+async def cancel_supervisor_run(run_id: str, _auth: str = Depends(require_auth)):
+    """Cancel a running LangGraph run."""
+    try:
+        status_code, payload = await _supervisor_request_json(
+            "POST",
+            f"/v1/runs/{run_id}/cancel",
+            timeout=10.0,
+        )
+        return JSONResponse(status_code=status_code, content=payload)
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
+
+
+@app.get("/api/supervisor/graphs")
+async def list_supervisor_graphs():
+    """List available LangGraph graphs (no auth — read-only discovery)."""
+    urls = [_SUPERVISOR_URL]
+    if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls:
+        urls.append(_SUPERVISOR_FALLBACK_URL)
+    last_err = "unavailable"
+    for base in urls:
+        try:
+            async with httpx.AsyncClient(timeout=5.0) as client:
+                resp = await client.get(f"{base}/healthz")
+            data = resp.json()
+            return {
+                "graphs": data.get("graphs", []),
+                "healthy": resp.status_code == 200,
+                "url": base,
+                "state_backend": data.get("state_backend"),
+            }
+        except Exception as e:
+            last_err = str(e)
+            continue
+    return {"graphs": [], "healthy": False, "error": last_err}
+
+
+@app.get("/api/aistalk/status")
+async def aistalk_status():
+    """AISTALK integration status for SOFIIA UI."""
+    try:
+        sup = await list_supervisor_graphs()
+        aurora = await api_aurora_health()
+        runtime = await _aistalk_runtime_state()
+        adapter_status: Dict[str, Any]
+        relay_health: Dict[str, Any]
+        if _aistalk is not None:
+            try:
+                relay_health = _aistalk.probe_health()
+            except Exception as e:
+                relay_health = {"enabled": True, "ok": False, "error": str(e)[:200]}
+            try:
+                adapter_status = _aistalk.status()
+            except Exception:
+                adapter_status = {"enabled": True, "base_url": "unknown"}
+        else:
+            relay_health = {"enabled": False, "ok": False, "error": "disabled"}
+            adapter_status = {"enabled": False, "base_url": ""}
+        return {
+            "aistalk_enabled": _aistalk is not None,
+            "aistalk_adapter": repr(_aistalk) if _aistalk is not None else "disabled",
+            "adapter": adapter_status,
+            "relay_health": relay_health,
+            "supervisor": sup,
+            "aurora": aurora,
+            "runtime": runtime,
+            "docs": {
+                "contract": "/docs/aistalk/contract.md",
+                "supervisor": "/docs/supervisor/langgraph_supervisor.md",
+            },
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+def _parse_agent_md(path: Path) -> Dict[str, Any]:
+    text = path.read_text(encoding="utf-8", errors="ignore")
+    lines = [ln.rstrip() for ln in text.splitlines()]
+    title = path.stem
+    display_name = title
+    role: List[str] = []
+    outputs: List[str] = []
+    boundaries: List[str] = []
+    capabilities: List[str] = []
+    intro: List[str] = []
+    in_section: Optional[str] = None
+
+    for raw in lines:
+        line = raw.strip()
+        if not line:
+            continue
+        if line.startswith("# "):
+            display_name = line[2:].strip()
+            continue
+        low = line.lower()
+        if low.startswith("role:"):
+            in_section = "role"
+            continue
+        if low.startswith("output:"):
+            in_section = "output"
+            continue
+        if low.startswith("outputs:"):
+            in_section = "output"
+            continue
+        if low.startswith("boundary:"):
+            in_section = "boundary"
+            continue
+        if low.startswith("boundaries:"):
+            in_section = "boundary"
+            continue
+        if low.startswith("capabilities:"):
+            in_section = "capabilities"
+            continue
+        if low.startswith("modes:") or low.startswith("rules:") or low.startswith("internal sub-pipeline"):
+            in_section = None
+            continue
+        if line.startswith("```"):
+            in_section = None
+            continue
+
+        if line.startswith("- "):
+            item = line[2:].strip()
+            if in_section == "role":
+                role.append(item)
+            elif in_section == "output":
+                outputs.append(item)
+            elif in_section == "boundary":
+                boundaries.append(item)
+            elif in_section == "capabilities":
+                capabilities.append(item)
+            continue
+        if in_section is None and not line.startswith("#"):
+            # Some agent role files store purpose as plain intro line without "Role:" section.
+            intro.append(line)
+
+    summary = role[0] if role else (intro[0] if intro else "")
+    return {
+        "id": title.lower(),
+        "name": display_name,
+        "summary": summary,
+        "role": role,
+        "outputs": outputs,
+        "boundaries": boundaries,
+        "capabilities": capabilities,
+        "source": str(path),
+    }
+
+
+@app.get("/api/aistalk/catalog")
+async def aistalk_catalog():
+    """
+    Return AISTALK subagent catalog + declared capabilities for UI rendering.
+    """
+    roots = [
+        Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk",
+        Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk",
+    ]
+    root = next((p for p in roots if p.exists()), None)
+    if root is None:
+        return {
+            "ok": False,
+            "error": "AISTALK roles directory not found",
+            "agents": [],
+            "domains": [],
+        }
+
+    agents: List[Dict[str, Any]] = []
+    for p in sorted(root.glob("*.md")):
+        try:
+            agents.append(_parse_agent_md(p))
+        except Exception as e:
+            agents.append(
+                {
+                    "id": p.stem.lower(),
+                    "name": p.stem,
+                    "summary": "",
+                    "role": [],
+                    "outputs": [],
+                    "boundaries": [f"parse_error: {str(e)[:120]}"],
+                    "capabilities": [],
+                    "source": str(p),
+                }
+            )
+
+    # High-level specialization domains for UI badges/filters.
+    domains = [
+        {"id": "osint", "name": "OSINT & Recon", "agents": ["tracer", "stealth", "shadow"]},
+        {"id": "analysis", "name": "Threat Analysis", "agents": ["neuron", "graph", "risk"]},
+        {"id": "offdef", "name": "Offense/Defense", "agents": ["redteam", "blueteam", "purpleteam", "bughunter", "devteam"]},
+        {"id": "forensics", "name": "Media Forensics", "agents": ["aurora"]},
+        {"id": "security", "name": "Governance & Data Safety", "agents": ["vault", "quantum"]},
+        {"id": "orchestration", "name": "Command & Synthesis", "agents": ["orchestrator_synthesis"]},
+    ]
+    return {
+        "ok": True,
+        "root": str(root),
+        "count": len(agents),
+        "agents": agents,
+        "domains": domains,
+    }
+
+
+_AISTALK_RUNTIME_PATH = AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aistalk_runtime.json"
+_AISTALK_AGENT_ORDER = [
+    "orchestrator_synthesis",
+    "tracer",
+    "shadow",
+    "stealth",
+    "neuron",
+    "graph",
+    "bughunter",
+    "redteam",
+    "blueteam",
+    "purpleteam",
+    "risk",
+    "vault",
+    "quantum",
+    "devteam",
+    "aurora",
+]
+_aistalk_team_active_runs: Dict[str, float] = {}
+_aistalk_chat_active: int = 0
+_aistalk_state_lock = asyncio.Lock()
+
+
+def _aistalk_roles_root() -> Optional[Path]:
+    roots = [
+        Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk",
+        Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk",
+    ]
+    return next((p for p in roots if p.exists()), None)
+
+
+def _aistalk_resource_snapshot() -> Dict[str, Any]:
+    cpu = os.cpu_count() or 8
+    mem_gb: Optional[float] = None
+    try:
+        page_size = os.sysconf("SC_PAGE_SIZE")
+        total_pages = os.sysconf("SC_PHYS_PAGES")
+        if page_size > 0 and total_pages > 0:
+            mem_gb = round((page_size * total_pages) / (1024 ** 3), 1)
+    except Exception:
+        mem_gb = None
+    return {
+        "cpu_count": cpu,
+        "memory_gb": mem_gb,
+        "ollama_num_ctx": SOFIIA_OLLAMA_NUM_CTX,
+        "ollama_num_thread": SOFIIA_OLLAMA_NUM_THREAD,
+        "ollama_num_gpu": SOFIIA_OLLAMA_NUM_GPU,
+    }
+
+
+def _aistalk_recommended_limits(resources: Dict[str, Any]) -> Dict[str, Any]:
+    cpu = int(resources.get("cpu_count") or 8)
+    mem = resources.get("memory_gb")
+    mem_gb = float(mem) if isinstance(mem, (int, float)) else 0.0
+    if cpu >= 12 and mem_gb >= 24:
+        profile = "performance"
+        team_max = 2
+        chat_max = 4
+    elif cpu >= 8 and mem_gb >= 16:
+        profile = "balanced"
+        team_max = 1
+        chat_max = 3
+    else:
+        profile = "safe"
+        team_max = 1
+        chat_max = 2
+    return {
+        "profile": profile,
+        "max_parallel_team_runs": team_max,
+        "max_parallel_chat": chat_max,
+        "rule": (
+            "Aurora/forensics jobs are GPU-heavy: keep team runs low; "
+            "chat parallelism may be higher but bounded by CPU/RAM."
+        ),
+    }
+
+
+async def _aistalk_local_models() -> List[str]:
+    ollama_url = get_ollama_url().rstrip("/")
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            r = await client.get(f"{ollama_url}/api/tags")
+            r.raise_for_status()
+            data = r.json()
+            models = [str((m or {}).get("name", "")).strip() for m in (data.get("models") or [])]
+            return [m for m in models if m]
+    except Exception:
+        return []
+
+
+def _aistalk_default_model_map(models: List[str]) -> Dict[str, str]:
+    available = set(models)
+
+    def pick(*candidates: str) -> str:
+        for c in candidates:
+            if c in available:
+                return c
+        if models:
+            return models[0]
+        return "qwen3:14b"
+
+    orchestrator = pick("qwen3.5:35b-a3b", "qwen3:14b", "gemma3:latest")
+    analyst = pick("qwen3:14b", "qwen3.5:35b-a3b", "gemma3:latest")
+    lightweight = pick("gemma3:latest", "qwen3:14b", "qwen3.5:35b-a3b")
+
+    mapping: Dict[str, str] = {}
+    for agent_id in _AISTALK_AGENT_ORDER:
+        if agent_id in ("orchestrator_synthesis", "risk", "neuron", "graph"):
+            mapping[agent_id] = orchestrator
+        elif agent_id in ("tracer", "shadow", "stealth", "vault", "quantum"):
+            mapping[agent_id] = analyst
+        else:
+            mapping[agent_id] = lightweight
+    return mapping
+
+
+def _read_aistalk_runtime() -> Dict[str, Any]:
+    if _AISTALK_RUNTIME_PATH.exists():
+        try:
+            raw = json.loads(_AISTALK_RUNTIME_PATH.read_text(encoding="utf-8"))
+            if isinstance(raw, dict):
+                return raw
+        except Exception:
+            pass
+    return {}
+
+
+def _write_aistalk_runtime(data: Dict[str, Any]) -> None:
+    _AISTALK_RUNTIME_PATH.parent.mkdir(parents=True, exist_ok=True)
+    _AISTALK_RUNTIME_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+async def _aistalk_runtime_state() -> Dict[str, Any]:
+    resources = _aistalk_resource_snapshot()
+    recommended = _aistalk_recommended_limits(resources)
+    models = await _aistalk_local_models()
+    stored = _read_aistalk_runtime()
+
+    limits = stored.get("limits") if isinstance(stored.get("limits"), dict) else {}
+    max_team = int(limits.get("max_parallel_team_runs") or recommended["max_parallel_team_runs"])
+    max_chat = int(limits.get("max_parallel_chat") or recommended["max_parallel_chat"])
+    profile = str(limits.get("profile") or recommended["profile"])
+
+    saved_models = stored.get("agent_models") if isinstance(stored.get("agent_models"), dict) else {}
+    defaults = _aistalk_default_model_map(models)
+    agent_models: Dict[str, str] = {}
+    for aid in _AISTALK_AGENT_ORDER:
+        selected = str(saved_models.get(aid) or defaults.get(aid) or "")
+        if models and selected not in models:
+            selected = defaults.get(aid) or models[0]
+        if not selected:
+            selected = "qwen3:14b"
+        agent_models[aid] = selected
+
+    state = {
+        "limits": {
+            "profile": profile,
+            "max_parallel_team_runs": max(1, min(max_team, 4)),
+            "max_parallel_chat": max(1, min(max_chat, 8)),
+        },
+        "recommended": recommended,
+        "resources": resources,
+        "available_models": models,
+        "agent_models": agent_models,
+        "active_team_runs": len(_aistalk_team_active_runs),
+        "active_chat": _aistalk_chat_active,
+    }
+    # Persist normalized shape for future restarts.
+    _write_aistalk_runtime({"limits": state["limits"], "agent_models": state["agent_models"]})
+    return state
+
+
+def _aistalk_role_prompt(agent_id: str) -> str:
+    root = _aistalk_roles_root()
+    if root is None:
+        return "You are AISTALK security analyst. Respond with findings, risk, next actions."
+    target = root / f"{agent_id}.md"
+    if not target.exists():
+        target = root / "orchestrator_synthesis.md"
+    try:
+        text = target.read_text(encoding="utf-8", errors="ignore")
+        # Keep prompt concise enough for local models.
+        return text[:6000]
+    except Exception:
+        return "You are AISTALK security analyst. Respond with findings, risk, next actions."
+
+
+@app.get("/api/aistalk/runtime")
+async def aistalk_runtime(_auth: str = Depends(require_auth)):
+    return await _aistalk_runtime_state()
+
+
+class AISTalkModelSetBody(BaseModel):
+    agent_id: str
+    model: str
+
+
+@app.post("/api/aistalk/runtime/model")
+async def aistalk_set_agent_model(body: AISTalkModelSetBody, _auth: str = Depends(require_auth)):
+    state = await _aistalk_runtime_state()
+    aid = str(body.agent_id or "").strip().lower()
+    if aid not in _AISTALK_AGENT_ORDER:
+        raise HTTPException(status_code=400, detail=f"Unknown agent_id: {aid}")
+    model = str(body.model or "").strip()
+    models = state.get("available_models") or []
+    if models and model not in models:
+        raise HTTPException(status_code=400, detail=f"Model not available locally: {model}")
+    stored = _read_aistalk_runtime()
+    stored.setdefault("limits", state.get("limits", {}))
+    stored.setdefault("agent_models", state.get("agent_models", {}))
+    stored["agent_models"][aid] = model
+    _write_aistalk_runtime(stored)
+    return {"ok": True, "agent_id": aid, "model": model}
+
+
+class AISTalkLimitsBody(BaseModel):
+    profile: Optional[str] = None
+    max_parallel_team_runs: Optional[int] = None
+    max_parallel_chat: Optional[int] = None
+
+
+@app.post("/api/aistalk/runtime/limits")
+async def aistalk_set_limits(body: AISTalkLimitsBody, _auth: str = Depends(require_auth)):
+    state = await _aistalk_runtime_state()
+    stored = _read_aistalk_runtime()
+    limits = dict(state.get("limits", {}))
+    if body.profile:
+        limits["profile"] = str(body.profile)
+    if body.max_parallel_team_runs is not None:
+        limits["max_parallel_team_runs"] = max(1, min(int(body.max_parallel_team_runs), 4))
+    if body.max_parallel_chat is not None:
+        limits["max_parallel_chat"] = max(1, min(int(body.max_parallel_chat), 8))
+    stored["limits"] = limits
+    stored.setdefault("agent_models", state.get("agent_models", {}))
+    _write_aistalk_runtime(stored)
+    return {"ok": True, "limits": limits}
+
+
+def _is_terminal_run_status(status: str) -> bool:
+    s = (status or "").strip().lower()
+    return s in {"succeeded", "failed", "cancelled", "canceled", "timeout", "error"}
+
+
+class AISTalkChatBody(BaseModel):
+    message: str
+    agent_id: str = "orchestrator_synthesis"
+    model: Optional[str] = None
+    session_id: Optional[str] = None
+    project_id: Optional[str] = None
+    user_id: Optional[str] = None
+    history: List[Dict[str, Any]] = []
+
+
+@app.post("/api/aistalk/chat")
+async def aistalk_chat(body: AISTalkChatBody, request: Request, _auth: str = Depends(require_auth)):
+    client_ip = request.client.host if request.client else "unknown"
+    if not _check_rate(f"aistalk_chat:{client_ip}", max_calls=40, window_sec=60):
+        raise HTTPException(status_code=429, detail="Rate limit: 40 AISTALK chat messages/min")
+
+    state = await _aistalk_runtime_state()
+    limits = state.get("limits", {})
+    max_chat = int(limits.get("max_parallel_chat") or 2)
+    async with _aistalk_state_lock:
+        global _aistalk_chat_active
+        if _aistalk_chat_active >= max_chat:
+            raise HTTPException(
+                status_code=429,
+                detail=f"AISTALK chat busy: active={_aistalk_chat_active}, limit={max_chat}",
+            )
+        _aistalk_chat_active += 1
+
+    agent_id = str(body.agent_id or "orchestrator_synthesis").strip().lower()
+    if agent_id not in _AISTALK_AGENT_ORDER:
+        agent_id = "orchestrator_synthesis"
+    selected_model = str(body.model or "").strip() or str((state.get("agent_models") or {}).get(agent_id) or "")
+    if not selected_model:
+        selected_model = "qwen3:14b"
+    if (state.get("available_models") or []) and selected_model not in state["available_models"]:
+        selected_model = (state.get("available_models") or ["qwen3:14b"])[0]
+
+    project_id = body.project_id or "aistalk"
+    session_id = body.session_id or f"aistalk_sess_{uuid.uuid4().hex[:10]}"
+    user_id = body.user_id or "aistalk_user"
+
+    try:
+        role_prompt = _aistalk_role_prompt(agent_id)
+        system_prompt = (
+            "Ти працюєш у складі AISTALK (крипто-детективне агентство з безпеки мережі). "
+            "Формат відповіді: findings -> risk -> actions. "
+            "Пиши конкретно, без вигадок, позначай невизначеність.\n\n"
+            + role_prompt
+        )
+
+        messages: List[Dict[str, Any]] = [{"role": "system", "content": system_prompt}]
+        messages.extend(body.history[-10:])
+        messages.append({"role": "user", "content": body.message})
+
+        t0 = time.monotonic()
+        async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client:
+            r = await client.post(
+                f"{get_ollama_url().rstrip('/')}/api/chat",
+                json=_make_ollama_payload(
+                    selected_model,
+                    messages,
+                    {
+                        "temperature": 0.15,
+                        "repeat_penalty": 1.1,
+                        "num_predict": min(1024, SOFIIA_OLLAMA_NUM_PREDICT_TEXT),
+                    },
+                ),
+            )
+            r.raise_for_status()
+            data = r.json()
+        reply = ((data.get("message") or {}).get("content") or "").strip() or "AISTALK: порожня відповідь"
+        latency_ms = int((time.monotonic() - t0) * 1000)
+
+        _broadcast_bg(
+            _make_event(
+                "chat.reply",
+                {
+                    "text": reply[:200],
+                    "provider": "ollama",
+                    "model": f"ollama:{selected_model}",
+                    "agent_id": agent_id,
+                    "latency_ms": latency_ms,
+                },
+                project_id=project_id,
+                session_id=session_id,
+                user_id=user_id,
+            )
+        )
+        asyncio.get_event_loop().create_task(
+            _do_save_memory(
+                body.message,
+                reply,
+                session_id,
+                project_id,
+                user_id,
+                agent_id="aistalk",
+            )
+        )
+        return {
+            "ok": True,
+            "project_id": project_id,
+            "session_id": session_id,
+            "user_id": user_id,
+            "agent_id": agent_id,
+            "model": f"ollama:{selected_model}",
+            "response": reply,
+            "meta": {"latency_ms": latency_ms, "active_chat": _aistalk_chat_active, "limit_chat": max_chat},
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"AISTALK chat error: {str(e)[:200]}")
+    finally:
+        async with _aistalk_state_lock:
+            _aistalk_chat_active = max(0, _aistalk_chat_active - 1)
+
+
+def _aistalk_autobuild_input(
+    graph: str,
+    objective: str,
+    input_payload: Dict[str, Any],
+) -> Dict[str, Any]:
+    payload = dict(input_payload or {})
+    if graph == "incident_triage":
+        payload.setdefault("service", "aurora-service")
+        payload.setdefault("symptom", objective or "Aurora pipeline anomaly")
+        payload.setdefault("env", "prod")
+        payload.setdefault("include_traces", False)
+        return payload
+
+    if graph == "release_check":
+        payload.setdefault("service_name", "aurora-service")
+        payload.setdefault("diff_text", objective or "")
+        payload.setdefault("run_deps", True)
+        payload.setdefault("run_drift", True)
+        payload.setdefault("run_smoke", False)
+        return payload
+
+    if graph == "alert_triage":
+        # Graph is mostly autonomous; leave room for dry_run/profile overrides.
+        payload.setdefault("dry_run", False)
+        payload.setdefault("policy_profile", "default")
+        return payload
+
+    if graph == "postmortem_draft":
+        incident_id = str(payload.get("incident_id") or "").strip()
+        if not incident_id and objective:
+            m = re.search(r"(inc_[A-Za-z0-9_\-]+)", objective)
+            if m:
+                incident_id = m.group(1)
+        if not incident_id:
+            raise HTTPException(
+                status_code=400,
+                detail="postmortem_draft requires input.incident_id (e.g. inc_123abc)",
+            )
+        payload["incident_id"] = incident_id
+        payload.setdefault("service", "aurora-service")
+        payload.setdefault("env", "prod")
+        payload.setdefault("include_traces", False)
+        return payload
+
+    # Unknown/custom graph: pass-through without mutation.
+    return payload
+
+
+@app.post("/api/aistalk/team/run")
+async def aistalk_team_run(request: Request, _auth: str = Depends(require_auth)):
+    """Run AISTALK team workflow via LangGraph supervisor."""
+    body = await request.json()
+    graph = str(body.get("graph") or "incident_triage").strip()
+    objective = str(body.get("objective") or "").strip()
+    input_payload = body.get("input")
+    if not isinstance(input_payload, dict):
+        input_payload = {}
+    input_payload = _aistalk_autobuild_input(graph, objective, input_payload)
+    runtime = await _aistalk_runtime_state()
+    max_team_runs = int((runtime.get("limits") or {}).get("max_parallel_team_runs") or 1)
+    # GC stale local entries (12h safety window).
+    now_ts = time.time()
+    stale = [rid for rid, ts in _aistalk_team_active_runs.items() if (now_ts - ts) > 12 * 3600]
+    for rid in stale:
+        _aistalk_team_active_runs.pop(rid, None)
+    if len(_aistalk_team_active_runs) >= max_team_runs:
+        raise HTTPException(
+            status_code=429,
+            detail=f"AISTALK team busy: active_runs={len(_aistalk_team_active_runs)}, limit={max_team_runs}",
+        )
+
+    sup_payload = {
+        "workspace_id": str(body.get("workspace_id") or "daarion"),
+        "user_id": str(body.get("user_id") or "aistalk_user"),
+        "agent_id": "aistalk",
+        "input": input_payload,
+    }
+    status_code, payload = await _supervisor_request_json(
+        "POST",
+        f"/v1/graphs/{graph}/runs",
+        timeout=60.0,
+        json_body=sup_payload,
+    )
+    if status_code in (200, 201, 202) and isinstance(payload, dict):
+        rid = str(payload.get("run_id") or payload.get("id") or "").strip()
+        if rid:
+            _aistalk_team_active_runs[rid] = time.time()
+    return JSONResponse(
+        status_code=status_code,
+        content={
+            "ok": status_code in (200, 201, 202),
+            "graph": graph,
+            "objective": objective,
+            "active_runs": len(_aistalk_team_active_runs),
+            "limit_runs": max_team_runs,
+            **payload,
+        },
+    )
+
+
+@app.post("/api/aistalk/relay/test")
+async def aistalk_relay_test(request: Request, _auth: str = Depends(require_auth)):
+    """Send a synthetic event to AISTALK relay and return adapter status."""
+    body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
+    event_type = str(body.get("type") or "aistalk.ping").strip()
+    event = _make_event(
+        event_type,
+        {"message": body.get("message", "relay test"), "source": "sofiia-console"},
+        project_id=str(body.get("project_id") or "aistalk"),
+        session_id=str(body.get("session_id") or f"aistalk_test_{uuid.uuid4().hex[:8]}"),
+        user_id="sofiia",
+    )
+    if _aistalk is None:
+        raise HTTPException(status_code=503, detail="AISTALK adapter disabled")
+    _aistalk.handle_event(event)
+    return {
+        "ok": True,
+        "queued": True,
+        "event_type": event_type,
+        "adapter": _aistalk.status(),
+    }
+
+
+@app.get("/api/aistalk/team/run/{run_id}")
+async def aistalk_team_run_status(run_id: str, _auth: str = Depends(require_auth)):
+    status_code, payload = await _supervisor_request_json(
+        "GET",
+        f"/v1/runs/{run_id}",
+        timeout=20.0,
+    )
+    if isinstance(payload, dict) and _is_terminal_run_status(str(payload.get("status") or "")):
+        _aistalk_team_active_runs.pop(run_id, None)
+    return JSONResponse(status_code=status_code, content=payload)
+
+
+# ── Evidence Pack Engine ────────────────────────────────────────────────────────
+
+@app.post("/api/projects/{project_id}/supervisor/evidence")
+async def record_evidence_pack(
+    project_id: str,
+    request: Request,
+    _auth: str = Depends(require_auth),
+):
+    """Record an Evidence Pack for a completed Supervisor run.
+
+    Links the run into the Dialog Graph and auto-creates follow-up tasks.
+
+    Body: {
+        "run_id": str,                  # required
+        "graph_name": str,              # required
+        "status": "completed|failed",   # optional
+        "summary": str,                 # optional
+        "findings": [...],              # optional
+        "recommendations": [...],       # optional
+        "follow_up_tasks": [            # optional - auto-created as tasks
+            {"title": ..., "description": ..., "priority": "normal|high|urgent"}
+        ]
+    }
+    """
+    body = await request.json()
+    run_id = body.get("run_id")
+    graph_name = body.get("graph_name")
+    if not run_id or not graph_name:
+        raise HTTPException(status_code=400, detail="run_id and graph_name are required")
+    try:
+        pack = await _app_db.create_evidence_pack(
+            project_id=project_id,
+            run_id=run_id,
+            graph_name=graph_name,
+            result_data=body,
+            created_by="sofiia",
+        )
+        return JSONResponse(status_code=201, content=pack)
+    except Exception as e:
+        logger.error("record_evidence_pack failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Graph Integrity ─────────────────────────────────────────────────────────────
+
+@app.get("/api/projects/{project_id}/graph/integrity")
+async def graph_integrity(project_id: str, _auth: str = Depends(require_auth)):
+    """Run integrity checks on the project Dialog Graph.
+
+    Returns: {"ok": bool, "violations": [...], "stats": {...}}
+    """
+    try:
+        result = await _app_db.check_graph_integrity(project_id)
+        status_code = 200 if result["ok"] else 422
+        return JSONResponse(status_code=status_code, content=result)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Graph Hygiene ───────────────────────────────────────────────────────────────
+
+@app.post("/api/projects/{project_id}/graph/hygiene/run")
+async def run_graph_hygiene(
+    project_id: str,
+    request: Request,
+    _auth: str = Depends(require_auth),
+):
+    """Run Graph Hygiene Engine: dedup, lifecycle normalization, importance scoring.
+
+    Body (all optional):
+    {
+        "dry_run": true,         // default true — compute but don't write
+        "scope": "all"|"recent", // default "all"
+        "since": "ISO8601"       // required when scope=recent
+    }
+
+    Returns: {"ok": bool, "dry_run": bool, "changes": [...], "stats": {...}}
+    """
+    body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
+    dry_run = body.get("dry_run", True)
+    scope = body.get("scope", "all")
+    since = body.get("since")
+    try:
+        result = await _app_db.run_graph_hygiene(
+            project_id=project_id,
+            dry_run=dry_run,
+            scope=scope,
+            since=since,
+        )
+        return JSONResponse(status_code=200, content=result)
+    except Exception as e:
+        logger.error("run_graph_hygiene failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Self-Reflection Engine ──────────────────────────────────────────────────────
+
+@app.post("/api/projects/{project_id}/supervisor/reflect")
+async def supervisor_reflect(
+    project_id: str,
+    request: Request,
+    _auth: str = Depends(require_auth),
+):
+    """Create a Self-Reflection artifact for a completed Supervisor run.
+
+    Analyzes the Evidence Pack and creates a 'decision' node (reflection)
+    linked to the agent_run node via 'reflects_on' edge.
+
+    Body: {
+        "run_id": str,           // required
+        "evidence": {            // optional — pass evidence data for richer analysis
+            "summary": ...,
+            "findings": [...],
+            "recommendations": [...],
+            "follow_up_tasks": [...]
+        }
+    }
+
+    Returns: {node_id, reflection: {...scores, risks, ...}, edge_id, task_ids}
+    """
+    body = await request.json()
+    run_id = body.get("run_id")
+    if not run_id:
+        raise HTTPException(status_code=400, detail="run_id is required")
+    evidence_data = body.get("evidence") or {}
+    try:
+        result = await _app_db.create_run_reflection(
+            project_id=project_id,
+            run_id=run_id,
+            evidence_data=evidence_data,
+            created_by="sofiia",
+        )
+        return JSONResponse(status_code=201, content=result)
+    except Exception as e:
+        logger.error("supervisor_reflect failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Strategic CTO Layer: Snapshots ───────────────────────────────────────────
+
+@app.post("/api/projects/{project_id}/graph/snapshot")
+async def compute_snapshot(
+    project_id: str,
+    window: str = "7d",
+    _auth: str = Depends(require_auth),
+):
+    """Compute and store a graph analytics snapshot for the project."""
+    try:
+        result = await _app_db.compute_graph_snapshot(project_id=project_id, window=window)
+        return JSONResponse(status_code=201, content=result)
+    except Exception as e:
+        logger.error("compute_snapshot failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/projects/{project_id}/graph/snapshot")
+async def get_snapshot(
+    project_id: str,
+    window: str = "7d",
+    _auth: str = Depends(require_auth),
+):
+    """Get the latest snapshot for the project and window."""
+    snap = await _app_db.get_latest_snapshot(project_id=project_id, window=window)
+    if not snap:
+        raise HTTPException(status_code=404, detail="No snapshot found. Run POST first.")
+    return JSONResponse(content=snap)
+
+
+# ── Strategic CTO Layer: Signals ─────────────────────────────────────────────
+
+@app.post("/api/projects/{project_id}/graph/signals/recompute")
+async def recompute_signals(
+    project_id: str,
+    window: str = "7d",
+    dry_run: bool = True,
+    _auth: str = Depends(require_auth),
+):
+    """Run signal detection rules and upsert graph_signals."""
+    try:
+        result = await _app_db.recompute_graph_signals(
+            project_id=project_id,
+            window=window,
+            dry_run=dry_run,
+        )
+        return JSONResponse(status_code=200, content=result)
+    except Exception as e:
+        logger.error("recompute_signals failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/projects/{project_id}/graph/signals")
+async def list_signals(
+    project_id: str,
+    status: str = "open",
+    limit: int = 50,
+    _auth: str = Depends(require_auth),
+):
+    """List graph signals for the project."""
+    signals = await _app_db.get_graph_signals(project_id=project_id, status=status, limit=limit)
+    return JSONResponse(content={"signals": signals, "count": len(signals)})
+
+
+@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/ack")
+async def ack_signal(
+    project_id: str,
+    signal_id: str,
+    _auth: str = Depends(require_auth),
+):
+    result = await _app_db.update_signal_status(signal_id=signal_id, new_status="ack")
+    if not result:
+        raise HTTPException(status_code=404, detail="Signal not found")
+    return JSONResponse(content=result)
+
+
+@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/resolve")
+async def resolve_signal(
+    project_id: str,
+    signal_id: str,
+    _auth: str = Depends(require_auth),
+):
+    result = await _app_db.update_signal_status(signal_id=signal_id, new_status="resolved")
+    if not result:
+        raise HTTPException(status_code=404, detail="Signal not found")
+    return JSONResponse(content=result)
+
+
+@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/dismiss")
+async def dismiss_signal(
+    project_id: str,
+    signal_id: str,
+    _auth: str = Depends(require_auth),
+):
+    result = await _app_db.update_signal_status(signal_id=signal_id, new_status="dismissed")
+    if not result:
+        raise HTTPException(status_code=404, detail="Signal not found")
+    return JSONResponse(content=result)
+
+
+@app.post("/api/projects/{project_id}/graph/signals/auto-resolve")
+async def auto_resolve_signals(
+    project_id: str,
+    dry_run: bool = True,
+    _auth: str = Depends(require_auth),
+):
+    """Check resolution criteria for all open/ack signals and auto-resolve if met.
+
+    ?dry_run=true  — compute without writing (default)
+    ?dry_run=false — apply resolutions
+
+    Returns: {ok, dry_run, checked, resolved, diff: [{signal_id, signal_type, action, reason}]}
+    """
+    try:
+        result = await _app_db.auto_resolve_signals(
+            project_id=project_id,
+            dry_run=dry_run,
+        )
+        return JSONResponse(content=result)
+    except Exception as e:
+        logger.error("auto_resolve_signals failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/mitigate")
+async def mitigate_signal(
+    project_id: str,
+    signal_id: str,
+    playbook_id: str = "",
+    _auth: str = Depends(require_auth),
+):
+    """Create a deterministic mitigation plan for a signal.
+
+    If playbook_id is provided, creates tasks from the playbook steps instead of templates.
+    Otherwise uses built-in mitigation templates.
+
+    Returns: {plan_node_id, task_ids, task_count, signal_type}
+    """
+    try:
+        if playbook_id:
+            result = await _app_db.apply_playbook_to_signal(
+                project_id=project_id,
+                signal_id=signal_id,
+                playbook_id=playbook_id,
+                created_by="sofiia",
+            )
+        else:
+            result = await _app_db.create_mitigation_plan(
+                project_id=project_id,
+                signal_id=signal_id,
+                created_by="sofiia",
+            )
+            await _app_db.update_signal_status(signal_id=signal_id, new_status="ack")
+        return JSONResponse(status_code=201, content=result)
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        logger.error("mitigate_signal failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── CTO Portfolio (Cross-Project) ────────────────────────────────────────────
+
+@app.get("/api/cto/portfolio/snapshots")
+async def portfolio_snapshots(
+    window: str = "7d",
+    _auth: str = Depends(require_auth),
+):
+    """Get the latest snapshot for every project (cross-project portfolio view).
+
+    Returns: {projects: [{project_id, name, metrics, snapshot_at}], window}
+    """
+    db = await _app_db.get_db()
+    # All projects
+    async with db.execute("SELECT project_id, name FROM projects ORDER BY name") as cur:
+        projects = await cur.fetchall()
+    result = []
+    for pid, pname in projects:
+        snap = await _app_db.get_latest_snapshot(pid, window)
+        # Get latest lesson bucket + trend_flags
+        async with db.execute(
+            "SELECT date_bucket, metrics_json FROM lessons WHERE project_id=? ORDER BY date_bucket DESC LIMIT 1",
+            (pid,),
+        ) as cur:
+            lrow = await cur.fetchone()
+        lesson_bucket = None
+        lesson_trend_flags = None
+        if lrow:
+            lesson_bucket = lrow[0]
+            try:
+                import json as _json
+                lm = _json.loads(lrow[1] or "{}")
+                lesson_trend_flags = lm.get("trend_flags")
+            except Exception:
+                pass
+        # Compute streaks
+        try:
+            lesson_streaks = await _app_db.compute_lesson_streaks(pid)
+        except Exception:
+            lesson_streaks = None
+        result.append({
+            "project_id": pid,
+            "name": pname,
+            "metrics": snap["metrics"] if snap else None,
+            "snapshot_at": snap["created_at"] if snap else None,
+            "latest_lesson_bucket": lesson_bucket,
+            "latest_lesson_trend_flags": lesson_trend_flags,
+            "latest_lesson_streaks": lesson_streaks,
+        })
+    return JSONResponse(content={"projects": result, "window": window, "count": len(result)})
+
+
+@app.get("/api/cto/portfolio/signals")
+async def portfolio_signals(
+    status: str = "open",
+    severity: str = "",
+    limit: int = 50,
+    _auth: str = Depends(require_auth),
+):
+    """Get signals across all projects, ordered by severity then created_at.
+
+    ?status=open|ack|resolved|dismissed|all
+    ?severity=high,critical  (comma-separated filter, optional)
+    """
+    db = await _app_db.get_db()
+    async with db.execute("SELECT project_id, name FROM projects") as cur:
+        projects = {r[0]: r[1] for r in await cur.fetchall()}
+
+    if status == "all":
+        q = "SELECT *, rowid FROM graph_signals ORDER BY severity DESC, created_at DESC LIMIT ?"
+        params: tuple = (limit,)
+    else:
+        q = "SELECT *, rowid FROM graph_signals WHERE status=? ORDER BY severity DESC, created_at DESC LIMIT ?"
+        params = (status, limit)
+
+    async with db.execute(q, params) as cur:
+        rows = await cur.fetchall()
+
+    # Severity order for sorting
+    SEV_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3}
+    sev_filter = {s.strip() for s in severity.split(",") if s.strip()} if severity else set()
+
+    signals = []
+    for row in rows:
+        d = dict(row)
+        if "rowid" in d:
+            del d["rowid"]
+        try:
+            d["evidence"] = json.loads(d["evidence"])
+        except Exception:
+            d["evidence"] = {}
+        if sev_filter and d.get("severity") not in sev_filter:
+            continue
+        d["project_name"] = projects.get(d["project_id"], d["project_id"])
+        signals.append(d)
+
+    signals.sort(key=lambda s: (SEV_ORDER.get(s.get("severity", "low"), 3), s.get("created_at", "")))
+    return JSONResponse(content={"signals": signals[:limit], "count": len(signals), "status": status})
+
+
+@app.post("/api/cto/portfolio/drift/recompute")
+async def portfolio_drift_recompute(
+    window: str = "7d",
+    dry_run: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Recompute portfolio-level drift signals based on lesson streaks across all projects."""
+    try:
+        result = await _app_db.recompute_portfolio_signals(window=window, dry_run=dry_run)
+        return JSONResponse(content=result)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/cto/portfolio/drift/signals")
+async def portfolio_drift_signals(
+    status: str = "open",
+    _auth: str = Depends(require_auth),
+):
+    """Get portfolio-level drift signals."""
+    try:
+        signals = await _app_db.list_portfolio_signals(status=status)
+        return JSONResponse(content={"signals": signals, "count": len(signals)})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/cto/portfolio/streaks")
+async def portfolio_streaks(
+    _auth: str = Depends(require_auth),
+):
+    """Get streak data for all projects."""
+    try:
+        db = await _app_db.get_db()
+        async with db.execute("SELECT project_id, name FROM projects") as cur:
+            projects = await cur.fetchall()
+        result = []
+        for pid, pname in projects:
+            streaks = await _app_db.compute_lesson_streaks(pid)
+            result.append({"project_id": pid, "name": pname, "streaks": streaks})
+        return JSONResponse(content={"projects": result, "count": len(result)})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Playbooks (Graph Learning Layer) ─────────────────────────────────────────
+
+@app.get("/api/projects/{project_id}/playbooks")
+async def list_playbooks(
+    project_id: str,
+    signal_type: str = "",
+    limit: int = 10,
+    _auth: str = Depends(require_auth),
+):
+    """List playbooks for a project, ordered by success_rate desc."""
+    try:
+        pbs = await _app_db.list_playbooks(
+            project_id=project_id,
+            signal_type=signal_type,
+            limit=limit,
+        )
+        return JSONResponse(content={"playbooks": pbs, "count": len(pbs)})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/projects/{project_id}/playbooks/from-signal/{signal_id}")
+async def create_playbook_from_signal(
+    project_id: str,
+    signal_id: str,
+    _auth: str = Depends(require_auth),
+):
+    """Promote current mitigation of a signal into a playbook (or update existing).
+
+    Requires signal to have plan_node_id and mitigation_task_ids in evidence.
+    Returns: {playbook_id, doc_id, version_id, context_key, created, stats}
+    """
+    try:
+        # Check if signal is resolved to update stats
+        db = await _app_db.get_db()
+        async with db.execute(
+            "SELECT status, evidence FROM graph_signals WHERE id=? AND project_id=?",
+            (signal_id, project_id),
+        ) as cur:
+            srow = await cur.fetchone()
+        resolved = srow[0] == "resolved" if srow else False
+        result = await _app_db.upsert_playbook_from_signal(
+            project_id=project_id,
+            signal_id=signal_id,
+            resolved=resolved,
+        )
+        return JSONResponse(status_code=201, content=result)
+    except ValueError as e:
+        raise HTTPException(status_code=409, detail=str(e))
+    except Exception as e:
+        logger.error("create_playbook_from_signal failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Portfolio Batch Recompute ─────────────────────────────────────────────────
+
+@app.post("/api/cto/portfolio/snapshots/recompute")
+async def portfolio_snapshots_recompute(
+    window: str = "7d",
+    force: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Recompute graph snapshots for ALL projects.
+
+    Skips projects that already have a snapshot for today (date_bucket) unless force=true.
+    Returns: {computed, skipped, errors[]}
+    """
+    import datetime as _dt2
+    db = await _app_db.get_db()
+    async with db.execute("SELECT project_id FROM projects") as cur:
+        project_ids = [r[0] for r in await cur.fetchall()]
+
+    today = _dt2.datetime.utcnow().strftime("%Y-%m-%d")
+    computed, skipped, errors = 0, 0, []
+    for pid in project_ids:
+        try:
+            if not force:
+                async with db.execute(
+                    "SELECT id FROM graph_snapshots WHERE project_id=? AND window=? AND date_bucket=?",
+                    (pid, window, today),
+                ) as cur:
+                    exists = await cur.fetchone()
+                if exists:
+                    skipped += 1
+                    continue
+            await _app_db.compute_graph_snapshot(project_id=pid, window=window)
+            computed += 1
+        except Exception as e:
+            errors.append({"project_id": pid, "error": str(e)})
+    return JSONResponse(content={"computed": computed, "skipped": skipped, "errors": errors})
+
+
+@app.post("/api/cto/portfolio/signals/recompute")
+async def portfolio_signals_recompute(
+    window: str = "7d",
+    dry_run: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Recompute signals for ALL projects.
+
+    Returns: {results: [{project_id, new, refreshed, total}], errors[]}
+    """
+    db = await _app_db.get_db()
+    async with db.execute("SELECT project_id FROM projects") as cur:
+        project_ids = [r[0] for r in await cur.fetchall()]
+
+    results, errors = [], []
+    for pid in project_ids:
+        try:
+            diff = await _app_db.recompute_graph_signals(
+                project_id=pid, window=window, dry_run=dry_run
+            )
+            new_count = sum(1 for d in diff if d.get("action") == "new")
+            refresh_count = sum(1 for d in diff if d.get("action") in ("refresh", "reopen"))
+            results.append({
+                "project_id": pid,
+                "new": new_count,
+                "refreshed": refresh_count,
+                "total": len(diff),
+            })
+        except Exception as e:
+            errors.append({"project_id": pid, "error": str(e)})
+    return JSONResponse(content={"results": results, "errors": errors, "dry_run": dry_run})
+
+
+# ── Lessons (Graph Learning Layer) ────────────────────────────────────────────
+
+@app.post("/api/projects/{project_id}/lessons/generate")
+async def generate_lesson(
+    project_id: str,
+    window: str = "7d",
+    dry_run: bool = True,
+    _auth: str = Depends(require_auth),
+):
+    """Generate a weekly Lessons Learned report for a project.
+
+    dry_run=true  (default): compute and return without writing to DB.
+    dry_run=false: persist lesson node + metrics + improvement tasks.
+
+    Returns: {dry_run, date_bucket, markdown, metrics, planned_improvement_tasks, evidence}
+    """
+    try:
+        result = await _app_db.upsert_lesson(
+            project_id=project_id,
+            window=window,
+            dry_run=dry_run,
+            created_by="sofiia",
+        )
+        return JSONResponse(status_code=200 if dry_run else 201, content=result)
+    except Exception as e:
+        logger.error("generate_lesson failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/projects/{project_id}/lessons")
+async def list_lessons_endpoint(
+    project_id: str,
+    window: str = "7d",
+    limit: int = 8,
+    _auth: str = Depends(require_auth),
+):
+    """List lessons for a project, ordered by date_bucket desc."""
+    try:
+        lessons = await _app_db.list_lessons(project_id=project_id, window=window, limit=limit)
+        return JSONResponse(content={"lessons": lessons, "count": len(lessons)})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/projects/{project_id}/lessons/{lesson_id}")
+async def get_lesson_endpoint(
+    project_id: str,
+    lesson_id: str,
+    _auth: str = Depends(require_auth),
+):
+    """Get full lesson detail including markdown and linked evidence."""
+    try:
+        lesson = await _app_db.get_lesson_detail(project_id=project_id, lesson_id=lesson_id)
+        if not lesson:
+            raise HTTPException(status_code=404, detail="Lesson not found")
+        return JSONResponse(content=lesson)
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/projects/{project_id}/lessons/impact/recompute")
+async def recompute_lesson_impact(
+    project_id: str,
+    window: str = "7d",
+    dry_run: bool = False,
+    force: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Recompute impact score for the prior-bucket lesson based on current-bucket metrics."""
+    try:
+        if dry_run:
+            # Preview: just return what would be computed, no write
+            result = await _app_db.evaluate_lesson_impact(
+                project_id=project_id, window=window, force=True
+            )
+            return JSONResponse(content={"dry_run": True, "preview": result})
+        result = await _app_db.evaluate_lesson_impact(
+            project_id=project_id, window=window, force=force
+        )
+        return JSONResponse(content={"dry_run": False, "result": result})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/cto/portfolio/lessons/generate")
+async def portfolio_lessons_generate(
+    window: str = "7d",
+    dry_run: bool = False,
+    force: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Generate lessons for ALL projects.
+
+    Skips projects that already have a lesson for the current bucket (unless force=true).
+    Returns: {generated, skipped, errors[]}
+    """
+    db = await _app_db.get_db()
+    async with db.execute("SELECT project_id FROM projects") as cur:
+        project_ids = [r[0] for r in await cur.fetchall()]
+
+    current_bucket = _app_db.compute_lesson_bucket()
+    generated, skipped, errors = 0, 0, []
+    for pid in project_ids:
+        try:
+            if not force and not dry_run:
+                async with db.execute(
+                    "SELECT lesson_id FROM lessons WHERE project_id=? AND date_bucket=? AND window=?",
+                    (pid, current_bucket, window),
+                ) as cur:
+                    exists = await cur.fetchone()
+                if exists:
+                    skipped += 1
+                    continue
+            await _app_db.upsert_lesson(project_id=pid, window=window, dry_run=dry_run)
+            generated += 1
+        except Exception as e:
+            errors.append({"project_id": pid, "error": str(e)})
+    return JSONResponse(content={
+        "generated": generated,
+        "skipped": skipped,
+        "errors": errors,
+        "dry_run": dry_run,
+        "date_bucket": current_bucket,
+    })
+
+
+# ── Level 6: Governance Gates ─────────────────────────────────────────────────
+
+@app.get("/api/projects/{project_id}/governance/gates")
+async def get_governance_gates(
+    project_id: str,
+    window: str = "7d",
+    _auth: str = Depends(require_auth),
+):
+    """Return latest governance gate evaluation (dry_run, no persist)."""
+    try:
+        result = await _app_db.evaluate_governance_gates(
+            project_id=project_id, window=window, dry_run=True
+        )
+        return JSONResponse(content=result)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/projects/{project_id}/governance/gates/evaluate")
+async def evaluate_governance_gates_endpoint(
+    project_id: str,
+    window: str = "7d",
+    dry_run: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Evaluate governance gates and optionally persist decision node."""
+    try:
+        result = await _app_db.evaluate_governance_gates(
+            project_id=project_id, window=window, dry_run=dry_run
+        )
+        return JSONResponse(content=result)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Level 6: Portfolio Drift Auto-plan / Auto-run ────────────────────────────
+
+@app.post("/api/cto/portfolio/drift/{signal_id}/auto-plan")
+async def portfolio_drift_auto_plan(
+    signal_id: str,
+    _auth: str = Depends(require_auth),
+):
+    """Populate evidence.auto_actions.runs with planned entries (dry_run=True)."""
+    try:
+        result = await _app_db.auto_plan_drift_signal(signal_id=signal_id)
+        if "error" in result:
+            raise HTTPException(status_code=404, detail=result["error"])
+        return JSONResponse(content=result)
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/cto/portfolio/drift/{signal_id}/auto-run")
+async def portfolio_drift_auto_run(
+    signal_id: str,
+    dry_run: bool = False,
+    force: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Execute planned/queued workflow runs for a portfolio drift signal."""
+    try:
+        supervisor_url = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080")
+        result = await _app_db.auto_run_drift_signal(
+            signal_id=signal_id,
+            dry_run=dry_run,
+            force=force,
+            supervisor_url=supervisor_url,
+        )
+        if "error" in result:
+            raise HTTPException(status_code=404, detail=result["error"])
+        return JSONResponse(content=result)
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Level 7: Governance Audit Trail ──────────────────────────────────────────
+
+@app.get("/api/cto/audit/events")
+async def audit_events_portfolio(
+    scope: Optional[str] = "portfolio",
+    limit: int = 100,
+    event_type: Optional[str] = None,
+    status: Optional[str] = None,
+    since: Optional[str] = None,
+    _auth: str = Depends(require_auth),
+):
+    """List governance audit events for portfolio (or any scope)."""
+    try:
+        items = await _app_db.list_governance_events(
+            scope=scope, project_id="portfolio" if scope == "portfolio" else None,
+            event_type=event_type, status=status, since=since, limit=limit,
+        )
+        return JSONResponse(content={"items": items, "count": len(items)})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/projects/{project_id}/audit/events")
+async def audit_events_project(
+    project_id: str,
+    limit: int = 100,
+    event_type: Optional[str] = None,
+    status: Optional[str] = None,
+    since: Optional[str] = None,
+    _auth: str = Depends(require_auth),
+):
+    """List governance audit events for a specific project."""
+    try:
+        items = await _app_db.list_governance_events(
+            scope="project", project_id=project_id,
+            event_type=event_type, status=status, since=since, limit=limit,
+        )
+        return JSONResponse(content={"items": items, "count": len(items)})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ── Level 8: Agents as Projects ───────────────────────────────────────────────
+
+import difflib as _difflib
+import time as _time
+
+# ── Agent Ops helpers ──────────────────────────────────────────────────────────
+
+async def _fetch_agents_from_gateway(
+    node_id: str,
+    gateway_url: str,
+    timeout_ms: Optional[int] = None,
+    get_retry: int = 1,
+) -> tuple:
+    """Fetch agents list from gateway /health. Returns (agents, error_str|None, latency_ms).
+
+    Respects per-node timeout_ms and retry policy.
+    """
+    if not gateway_url:
+        return [], f"No gateway_url configured for {node_id}", None
+    timeout_sec = (timeout_ms or 2500) / 1000.0
+    last_err = None
+    attempts = get_retry + 1
+    t0 = _time.monotonic()
+    for attempt in range(attempts):
+        try:
+            async with httpx.AsyncClient(timeout=timeout_sec) as client:
+                resp = await client.get(f"{gateway_url.rstrip('/')}/health")
+            latency_ms = int((_time.monotonic() - t0) * 1000)
+            if resp.status_code != 200:
+                last_err = f"HTTP {resp.status_code}"
+                continue
+            data = resp.json()
+            raw = data.get("agents", {})
+            agents: List[Dict] = []
+            if isinstance(raw, dict):
+                for aid, info in raw.items():
+                    agents.append({
+                        "agent_id": aid,
+                        "display_name": info.get("name", aid),
+                        "status": "healthy" if info.get("prompt_loaded") else "degraded",
+                        "telegram_token_configured": info.get("telegram_token_configured", False),
+                        "prompt_loaded": info.get("prompt_loaded", False),
+                        "node_id": node_id,
+                        "active_prompt": info.get("active_prompt"),
+                        "badges": info.get("badges", []),
+                        "visibility": info.get("visibility", "public"),
+                        "telegram_mode": info.get("telegram_mode", "on"),
+                        "lifecycle_status": info.get("lifecycle_status", "active"),
+                    })
+            elif isinstance(raw, list):
+                for a in raw:
+                    agents.append({**a, "node_id": node_id})
+            return agents, None, latency_ms
+        except Exception as e:
+            last_err = str(e)[:200]
+    latency_ms = int((_time.monotonic() - t0) * 1000)
+    return [], last_err, latency_ms
+
+
+def _node_info(node_id: str) -> Dict:
+    """Return {gateway_url, policy} for a node."""
+    from .config import get_node_policy
+    return {
+        "gateway_url": get_gateway_url(node_id),
+        "policy": get_node_policy(node_id),
+    }
+
+
+def _agent_desired_payload(override: Dict) -> Dict:
+    """Canonical desired-state payload from an override row."""
+    return {
+        "display_name": override.get("display_name"),
+        "domain": override.get("domain"),
+        "system_prompt_md": override.get("system_prompt_md"),
+    }
+
+
+def _merge_agent_with_override(agent: Dict, override: Optional[Dict]) -> Dict:
+    result = dict(agent)
+    if not override:
+        result["has_override"] = False
+        result["drift"] = False
+        return result
+    if override.get("display_name"): result["display_name"] = override["display_name"]
+    if override.get("domain"): result["domain"] = override["domain"]
+    if override.get("system_prompt_md"): result["system_prompt_md"] = override["system_prompt_md"]
+    result["is_hidden"] = bool(override.get("is_hidden"))
+    result["has_override"] = True
+    result["override_updated_at"] = override.get("updated_at")
+    result["last_applied_hash"] = override.get("last_applied_hash")
+    result["last_applied_at"] = override.get("last_applied_at")
+    # Drift: desired hash != last applied hash
+    desired = _agent_desired_payload(override)
+    desired_hash = _app_db._agent_payload_hash(desired)
+    result["desired_hash"] = desired_hash
+    active_hash = override.get("last_applied_hash")
+    result["drift"] = bool(active_hash and active_hash != desired_hash)
+    return result
+
+
+async def _check_prompt_freeze(node_id: str, agent_id: str) -> bool:
+    """Return True if PROMPT_FREEZE gate is active for any related project."""
+    try:
+        # Check portfolio gate
+        gates = await _app_db.evaluate_governance_gates("portfolio", window="7d", dry_run=True)
+        for g in gates.get("gates", []):
+            if g.get("name") == "PROMPT_FREEZE" and g.get("status") != "PASS":
+                return True
+    except Exception:
+        pass
+    return False
+
+
+# ── Agent CRUD endpoints ───────────────────────────────────────────────────────
+
+# Agents required on every online node — if absent, signal is raised
+_REQUIRED_PER_NODE_AGENTS: List[str] = ["monitor"]
+
+
+def _normalize_agent_capabilities(agent: Dict) -> Dict:
+    """Add normalized capabilities: {voice, telegram} to agent dict."""
+    badges = agent.get("badges", [])
+    telegram_mode = agent.get("telegram_mode", "on")
+    agent_id = agent.get("agent_id", "")
+    agent["capabilities"] = {
+        "voice": agent_id == "aistalk" or "voice" in badges,
+        "telegram": telegram_mode != "off",
+    }
+    return agent
+
+
+async def _emit_monitor_missing_event(node_id: str, bucket: str) -> None:
+    """Write a governance_event when monitor is confirmed absent on an online node."""
+    try:
+        await _app_db.append_governance_event(
+            scope="portfolio",
+            project_id="portfolio",
+            actor_type="system",
+            actor_id=None,
+            event_type="node_required_agent_missing",
+            idempotency_key=f"req|missing|{node_id}|monitor|{bucket}",
+            severity="high",
+            status="error",
+            ref_type="node",
+            ref_id=node_id,
+            evidence={
+                "v": 1,
+                "message": f"Required agent 'monitor' absent on {node_id}",
+                "inputs": {"node_id": node_id, "required_agent": "monitor"},
+                "outputs": {"missing": True},
+                "links": {},
+                "timings": {},
+            },
+        )
+    except Exception as exc:
+        logger.warning("_emit_monitor_missing_event failed: %s", exc)
+
+
+@app.get("/api/agents")
+async def list_agents(
+    nodes: str = "NODA1",
+    include_hidden: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Fetch agents from node gateways, merge with local overrides.
+
+    Returns {items, node_errors, stats, required_missing_nodes, nodes_queried}.
+    Partial node failure never blocks other nodes — always HTTP 200.
+    """
+    node_ids = [n.strip().upper() for n in nodes.split(",") if n.strip()]
+    today_bucket = datetime.utcnow().strftime("%Y-%m-%d")
+
+    all_agents: List[Dict] = []
+    node_errors: List[Dict] = []
+    node_stats: List[Dict] = []
+    required_missing_nodes: List[Dict] = []  # nodes where required agents absent
+
+    overrides_list = await _app_db.list_agent_overrides()
+    overrides_map = {(o["node_id"], o["agent_id"]): o for o in overrides_list}
+
+    for node_id in node_ids:
+        ni = _node_info(node_id)
+        gw_url = ni["gateway_url"]
+        policy = ni["policy"]
+        agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
+            node_id, gw_url,
+            timeout_ms=policy["gateway_timeout_ms"],
+            get_retry=policy["get_retry"],
+        )
+        if err:
+            node_errors.append({
+                "node_id": node_id, "error": err,
+                "gateway_url": gw_url, "latency_ms": latency_ms,
+                "node_role": policy["node_role"],
+            })
+            node_stats.append({"node_id": node_id, "ok": False, "count": 0,
+                                "latency_ms": latency_ms})
+            # Node offline → skip required check (not "missing", just "unreachable")
+        else:
+            count = 0
+            present_agent_ids: Set[str] = set()
+            for agent in agents_raw:
+                override = overrides_map.get((node_id, agent["agent_id"]))
+                merged = _merge_agent_with_override(agent, override)
+                merged = _normalize_agent_capabilities(merged)
+                if not include_hidden and merged.get("is_hidden"):
+                    continue
+                merged["latency_ms"] = latency_ms
+                all_agents.append(merged)
+                present_agent_ids.add(agent["agent_id"])
+                count += 1
+            node_stats.append({"node_id": node_id, "ok": True, "count": count,
+                                "latency_ms": latency_ms,
+                                "node_role": policy["node_role"]})
+            # Required agent check — only for online nodes
+            for req_id in _REQUIRED_PER_NODE_AGENTS:
+                if req_id not in present_agent_ids:
+                    required_missing_nodes.append({
+                        "node_id": node_id,
+                        "agent_id": req_id,
+                        "reason": "absent_from_registry",
+                    })
+                    asyncio.create_task(_emit_monitor_missing_event(node_id, today_bucket))
+
+    all_agents.sort(key=lambda a: (a.get("status") != "healthy", a.get("display_name", "").lower()))
+    nodes_ok = sum(1 for s in node_stats if s["ok"])
+    return JSONResponse(content={
+        "items": all_agents,
+        "node_errors": node_errors,
+        "stats": {"nodes_ok": nodes_ok, "nodes_total": len(node_ids), "agents_total": len(all_agents)},
+        "required_missing_nodes": required_missing_nodes,
+        "nodes_queried": node_ids,
+    })
+
+
+@app.get("/api/agents/{node_id}/{agent_id}")
+async def get_agent(node_id: str, agent_id: str, _auth: str = Depends(require_auth)):
+    node_id = node_id.upper()
+    ni = _node_info(node_id)
+    policy = ni["policy"]
+    agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
+        node_id, ni["gateway_url"],
+        timeout_ms=policy["gateway_timeout_ms"],
+        get_retry=policy["get_retry"],
+    )
+    override = await _app_db.get_agent_override(node_id, agent_id)
+    agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None)
+    if not agent:
+        if override:
+            desired = _agent_desired_payload(override)
+            return JSONResponse(content={"agent": {
+                **override, "status": "unknown", "node_offline": True,
+                "desired_hash": _app_db._agent_payload_hash(desired), "drift": False,
+                "latency_ms": latency_ms,
+            }})
+        raise HTTPException(status_code=404, detail=f"Agent '{agent_id}' not found on {node_id}")
+    merged = _merge_agent_with_override(agent, override)
+    merged["latency_ms"] = latency_ms
+    return JSONResponse(content={"agent": merged, "node_error": err})
+
+
+@app.get("/api/agents/{node_id}/{agent_id}/versions")
+async def list_agent_versions(
+    node_id: str, agent_id: str,
+    limit: int = 10,
+    _auth: str = Depends(require_auth),
+):
+    """Return version history for an agent override."""
+    node_id = node_id.upper()
+    versions = await _app_db.list_agent_versions(node_id, agent_id, limit=limit)
+    return JSONResponse(content={"versions": versions})
+
+
+class AgentOverridePatch(BaseModel):
+    display_name: Optional[str] = None
+    domain: Optional[str] = None
+    system_prompt_md: Optional[str] = None
+    is_hidden: Optional[bool] = None
+
+
+@app.patch("/api/agents/{node_id}/{agent_id}")
+async def patch_agent_override(
+    node_id: str, agent_id: str,
+    body: AgentOverridePatch,
+    _auth: str = Depends(require_auth),
+):
+    """Save local override (does NOT push to node). Creates a version snapshot."""
+    node_id = node_id.upper()
+    override = await _app_db.upsert_agent_override(
+        node_id, agent_id,
+        display_name=body.display_name,
+        domain=body.domain,
+        system_prompt_md=body.system_prompt_md,
+        is_hidden=body.is_hidden,
+    )
+    # Audit: agent_override_saved
+    await _app_db.append_governance_event(
+        scope="project", project_id=agent_id, actor_type="user",
+        event_type="agent_override_saved",
+        idempotency_key=f"aos|{node_id}|{agent_id}|{override.get('version_hash','')}",
+        severity="info", status="ok",
+        ref_type="agent", ref_id=agent_id,
+        evidence=_app_db._make_evidence(
+            message=f"Override saved for {agent_id} on {node_id}",
+            outputs={"version_hash": override.get("version_hash"), "fields_changed": [
+                k for k, v in body.dict(exclude_none=True).items()
+            ]},
+        ),
+    )
+    return JSONResponse(content={"override": override, "saved": True})
+
+
+@app.post("/api/agents/{node_id}/{agent_id}/reset")
+async def reset_agent_override(node_id: str, agent_id: str, _auth: str = Depends(require_auth)):
+    """Remove local override, revert to registry state."""
+    node_id = node_id.upper()
+    await _app_db.delete_agent_override(node_id, agent_id)
+    return JSONResponse(content={"reset": True, "node_id": node_id, "agent_id": agent_id})
+
+
+# ── Safe Apply v2 ──────────────────────────────────────────────────────────────
+
+@app.post("/api/agents/{node_id}/{agent_id}/apply")
+async def apply_agent_override(
+    node_id: str, agent_id: str,
+    dry_run: bool = True,
+    plan_id: Optional[str] = None,
+    force: bool = False,
+    _auth: str = Depends(require_auth),
+):
+    """Safe Apply v2.
+
+    dry_run=true  → returns diff_text + will_change + plan_id (sha256 of desired state).
+    dry_run=false → requires plan_id to match; applies and stores last_applied_hash.
+    """
+    node_id = node_id.upper()
+
+    # Governance gate check: PROMPT_FREEZE
+    if not dry_run and not force:
+        frozen = await _check_prompt_freeze(node_id, agent_id)
+        if frozen:
+            return JSONResponse(
+                status_code=423,
+                content={"error": "PROMPT_FREEZE gate is active. Use force=true to override (requires review).",
+                         "gate": "PROMPT_FREEZE", "node_id": node_id, "agent_id": agent_id},
+            )
+
+    override = await _app_db.get_agent_override(node_id, agent_id)
+    if not override:
+        raise HTTPException(status_code=404, detail="No local override found. Use PATCH first.")
+
+    desired = _agent_desired_payload(override)
+    computed_plan_id = _app_db._agent_payload_hash(desired)
+
+    # Fetch current active prompt for diff
+    gw_url = get_gateway_url(node_id)
+    agents_raw, _ = await _fetch_agents_from_gateway(node_id, gw_url)
+    active_agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None)
+    active_prompt = active_agent.get("active_prompt", "") if active_agent else ""
+    desired_prompt = desired.get("system_prompt_md") or ""
+
+    # Build unified diff
+    diff_lines = list(_difflib.unified_diff(
+        (active_prompt or "").splitlines(keepends=True),
+        desired_prompt.splitlines(keepends=True),
+        fromfile=f"{agent_id}:active",
+        tofile=f"{agent_id}:desired",
+        n=3,
+    ))
+    diff_text = "".join(diff_lines) if diff_lines else ""
+    will_change = bool(diff_text) or (override.get("domain") is not None)
+
+    if dry_run:
+        # Audit: agent_apply_planned
+        await _app_db.append_governance_event(
+            scope="project", project_id=agent_id, actor_type="user",
+            event_type="agent_apply_planned",
+            idempotency_key=f"aap|{node_id}|{agent_id}|{computed_plan_id}",
+            severity="info", status="ok",
+            ref_type="agent", ref_id=agent_id,
+            evidence=_app_db._make_evidence(
+                message=f"Apply planned (dry-run) for {agent_id}@{node_id}",
+                outputs={"will_change": will_change, "plan_id": computed_plan_id,
+                         "diff_lines": len(diff_lines)},
+            ),
+        )
+        return JSONResponse(content={
+            "dry_run": True, "will_change": will_change,
+            "plan_id": computed_plan_id,
+            "diff_text": diff_text,
+            "desired": desired,
+            "node_id": node_id, "agent_id": agent_id,
+        })
+
+    # Apply: validate plan_id
+    if plan_id and plan_id != computed_plan_id:
+        raise HTTPException(
+            status_code=409,
+            detail=f"plan_id mismatch: provided={plan_id} computed={computed_plan_id}. "
+                   "Re-run dry_run=true to get fresh plan_id.",
+        )
+
+    applied: List[Dict] = []
+    errors_apply: List[Dict] = []
+
+    if desired_prompt and gw_url:
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                resp = await client.post(
+                    f"{gw_url.rstrip('/')}/admin/agents/{agent_id}/prompt",
+                    json={"prompt": desired_prompt},
+                    headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")},
+                )
+                if resp.status_code in (200, 201, 204):
+                    applied.append({"action": "update_system_prompt", "status": "ok"})
+                else:
+                    errors_apply.append({"action": "update_system_prompt",
+                                         "error": f"HTTP {resp.status_code}: {resp.text[:200]}"})
+        except Exception as e:
+            errors_apply.append({"action": "update_system_prompt", "error": str(e)[:300]})
+
+    success = len(applied) > 0 and len(errors_apply) == 0
+
+    # Mark last_applied_hash if successful
+    if success:
+        await _app_db.upsert_agent_override(
+            node_id, agent_id, _mark_applied_hash=computed_plan_id,
+        )
+
+    # Audit
+    evt_type = "agent_apply_executed" if success else "agent_apply_failed"
+    await _app_db.append_governance_event(
+        scope="project", project_id=agent_id, actor_type="user",
+        event_type=evt_type,
+        idempotency_key=f"aae|{node_id}|{agent_id}|{computed_plan_id}|{'ok' if success else 'fail'}",
+        severity="info" if success else "high", status="ok" if success else "error",
+        ref_type="agent", ref_id=agent_id,
+        evidence=_app_db._make_evidence(
+            message=f"Apply {'succeeded' if success else 'failed'} for {agent_id}@{node_id}",
+            outputs={"plan_id": computed_plan_id, "applied": applied, "errors": errors_apply},
+        ),
+    )
+
+    return JSONResponse(content={
+        "dry_run": False, "success": success,
+        "plan_id": computed_plan_id,
+        "applied": applied, "errors": errors_apply,
+        "node_id": node_id, "agent_id": agent_id,
+    })
+
+
+@app.post("/api/agents/{node_id}/{agent_id}/rollback")
+async def rollback_agent_override(
+    node_id: str, agent_id: str,
+    version_hash: str,
+    _auth: str = Depends(require_auth),
+):
+    """Rollback agent override to a specific version by version_hash."""
+    node_id = node_id.upper()
+    version = await _app_db.get_agent_version_by_hash(node_id, agent_id, version_hash)
+    if not version:
+        raise HTTPException(status_code=404, detail=f"Version {version_hash} not found")
+
+    payload = version["payload"]
+    # Restore the override to this version's payload
+    updated = await _app_db.upsert_agent_override(
+        node_id, agent_id,
+        display_name=payload.get("display_name"),
+        domain=payload.get("domain"),
+        system_prompt_md=payload.get("system_prompt_md"),
+    )
+
+    # Audit
+    await _app_db.append_governance_event(
+        scope="project", project_id=agent_id, actor_type="user",
+        event_type="agent_rollback_executed",
+        idempotency_key=f"arb|{node_id}|{agent_id}|{version_hash}|{_app_db._now()}",
+        severity="warn", status="ok",
+        ref_type="agent", ref_id=agent_id,
+        evidence=_app_db._make_evidence(
+            message=f"Rollback to version {version_hash} for {agent_id}@{node_id}",
+            outputs={"version_hash": version_hash, "created_at": version.get("created_at")},
+        ),
+    )
+    return JSONResponse(content={
+        "rolled_back": True, "version_hash": version_hash,
+        "override": updated, "node_id": node_id, "agent_id": agent_id,
+    })
+
+
+# ── Bulk Agent Actions (multi-node + canary) ───────────────────────────────────
+
+async def _apply_single_agent(
+    node_id: str,
+    override: Dict,
+    agents_map: Dict,
+    gw_url: str,
+    apply_timeout_sec: float,
+) -> Dict:
+    """Apply a single agent override. Returns result dict with status field."""
+    aid = override["agent_id"]
+    desired = _agent_desired_payload(override)
+    plan_id = _app_db._agent_payload_hash(desired)
+    active_agent = agents_map.get(aid, {})
+    active_prompt = active_agent.get("active_prompt", "") or ""
+    desired_prompt = desired.get("system_prompt_md") or ""
+    will_change = desired_prompt != active_prompt
+
+    if not desired_prompt or not gw_url:
+        return {"node_id": node_id, "agent_id": aid, "status": "skipped",
+                "plan_id": plan_id, "drift": will_change,
+                "error": "no prompt or no gateway_url"}
+
+    applied_ok = False
+    err_msg = None
+    try:
+        async with httpx.AsyncClient(timeout=apply_timeout_sec) as client:
+            resp = await client.post(
+                f"{gw_url.rstrip('/')}/admin/agents/{aid}/prompt",
+                json={"prompt": desired_prompt},
+                headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")},
+            )
+            applied_ok = resp.status_code in (200, 201, 204)
+            if not applied_ok:
+                err_msg = f"HTTP {resp.status_code}: {resp.text[:100]}"
+    except Exception as e:
+        err_msg = str(e)[:200]
+
+    if applied_ok:
+        await _app_db.upsert_agent_override(node_id, aid, _mark_applied_hash=plan_id)
+
+    return {
+        "node_id": node_id, "agent_id": aid,
+        "status": "applied" if applied_ok else "failed",
+        "plan_id": plan_id, "drift": will_change,
+        "error": err_msg,
+    }
+
+
+@app.post("/api/agents/bulk/apply")
+async def bulk_apply_agents(
+    nodes: str = "NODA1",
+    node: Optional[str] = None,       # legacy single-node param
+    dry_run: bool = True,
+    mode: str = "all",                 # "all" | "canary"
+    limit: int = 2,                    # canary: max N agents
+    _auth: str = Depends(require_auth),
+):
+    """Apply local overrides across one or many nodes.
+
+    mode=canary: apply first `limit` agents with drift=True, stop on first failure.
+    Returns {results, node_errors, summary}.
+    """
+    # Support legacy ?node= param
+    raw_nodes = node.upper() if node else nodes
+    node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
+
+    all_results: List[Dict] = []
+    node_errors: List[Dict] = []
+    bulk_run_id = str(uuid.uuid4())[:8]
+
+    # Audit: bulk plan created
+    await _app_db.append_governance_event(
+        scope="portfolio", project_id="portfolio", actor_type="user",
+        event_type="agent_bulk_plan_created",
+        idempotency_key=f"abpc|{bulk_run_id}|{raw_nodes}|{mode}",
+        severity="info", status="ok",
+        evidence=_app_db._make_evidence(
+            message=f"Bulk {'canary' if mode=='canary' else 'apply'} planned: nodes={raw_nodes} dry_run={dry_run}",
+            outputs={"mode": mode, "limit": limit, "nodes": node_ids, "dry_run": dry_run},
+        ),
+    )
+
+    for node_id in node_ids:
+        ni = _node_info(node_id)
+        policy = ni["policy"]
+        gw_url = ni["gateway_url"]
+        apply_timeout_sec = policy["apply_timeout_ms"] / 1000.0
+
+        overrides = await _app_db.list_agent_overrides(node_id)
+        agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
+            node_id, gw_url,
+            timeout_ms=policy["gateway_timeout_ms"],
+            get_retry=policy["get_retry"],
+        )
+        if err and not agents_raw:
+            node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
+            continue
+
+        agents_map = {a["agent_id"]: a for a in agents_raw}
+
+        # Select candidates: non-hidden, sorted deterministically by agent_id
+        candidates = sorted(
+            [o for o in overrides if not o.get("is_hidden")],
+            key=lambda o: o["agent_id"],
+        )
+
+        if mode == "canary":
+            # For canary: only agents with drift
+            drift_candidates = []
+            for o in candidates:
+                desired = _agent_desired_payload(o)
+                plan_id = _app_db._agent_payload_hash(desired)
+                is_drift = bool(o.get("last_applied_hash") and o["last_applied_hash"] != plan_id)
+                if is_drift:
+                    drift_candidates.append(o)
+            candidates = drift_candidates[:limit]
+
+        if dry_run:
+            for override in candidates:
+                aid = override["agent_id"]
+                desired = _agent_desired_payload(override)
+                plan_id = _app_db._agent_payload_hash(desired)
+                active_agent = agents_map.get(aid, {})
+                active_prompt = active_agent.get("active_prompt", "") or ""
+                desired_prompt = desired.get("system_prompt_md") or ""
+                all_results.append({
+                    "node_id": node_id, "agent_id": aid, "status": "planned",
+                    "plan_id": plan_id, "drift": desired_prompt != active_prompt, "error": None,
+                })
+            continue
+
+        # Canary: log start
+        if mode == "canary" and candidates:
+            await _app_db.append_governance_event(
+                scope="portfolio", project_id="portfolio", actor_type="user",
+                event_type="agent_bulk_canary_started",
+                idempotency_key=f"abcs|{bulk_run_id}|{node_id}",
+                severity="info", status="ok",
+                evidence=_app_db._make_evidence(
+                    message=f"Canary apply started: {len(candidates)} agents on {node_id}",
+                    outputs={"agents": [o["agent_id"] for o in candidates], "limit": limit},
+                ),
+            )
+
+        canary_stopped = False
+        for override in candidates:
+            # Check governance gate per agent
+            frozen = await _check_prompt_freeze(node_id, override["agent_id"])
+            if frozen:
+                all_results.append({
+                    "node_id": node_id, "agent_id": override["agent_id"],
+                    "status": "blocked", "plan_id": None, "drift": True,
+                    "error": "PROMPT_FREEZE gate active",
+                })
+                continue
+
+            result = await _apply_single_agent(
+                node_id, override, agents_map, gw_url, apply_timeout_sec,
+            )
+            all_results.append(result)
+
+            # Canary stop-on-failure
+            if mode == "canary" and result["status"] == "failed":
+                canary_stopped = True
+                # Mark remaining as skipped
+                remaining_ids = {o["agent_id"] for o in candidates} - {r["agent_id"] for r in all_results if r["node_id"] == node_id}
+                for rid in sorted(remaining_ids):
+                    all_results.append({
+                        "node_id": node_id, "agent_id": rid, "status": "skipped",
+                        "plan_id": None, "drift": True,
+                        "error": f"canary stopped after failure of {result['agent_id']}",
+                    })
+                await _app_db.append_governance_event(
+                    scope="portfolio", project_id="portfolio", actor_type="user",
+                    event_type="agent_bulk_canary_stopped",
+                    idempotency_key=f"abcstop|{bulk_run_id}|{node_id}|{result['agent_id']}",
+                    severity="high", status="error",
+                    evidence=_app_db._make_evidence(
+                        message=f"Canary stopped on {result['agent_id']}@{node_id}: {result['error']}",
+                        outputs={"failed_agent": result["agent_id"], "error": result["error"]},
+                    ),
+                )
+                break
+
+        if mode == "canary" and not canary_stopped and candidates:
+            await _app_db.append_governance_event(
+                scope="portfolio", project_id="portfolio", actor_type="user",
+                event_type="agent_bulk_apply_completed",
+                idempotency_key=f"abac|{bulk_run_id}|{node_id}",
+                severity="info", status="ok",
+                evidence=_app_db._make_evidence(
+                    message=f"Canary apply completed on {node_id}: {len(candidates)} agents",
+                    outputs={"agents_applied": [r["agent_id"] for r in all_results
+                                                 if r["node_id"] == node_id and r["status"] == "applied"]},
+                ),
+            )
+
+    # Build summary
+    status_counts: Dict[str, int] = {}
+    for r in all_results:
+        status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1
+
+    return JSONResponse(content={
+        "results": all_results,
+        "node_errors": node_errors,
+        "summary": status_counts,
+        "dry_run": dry_run,
+        "mode": mode,
+        "bulk_run_id": bulk_run_id,
+    })
+
+
+@app.post("/api/agents/bulk/diff")
+async def bulk_diff_agents(
+    nodes: str = "NODA1",
+    node: Optional[str] = None,
+    _auth: str = Depends(require_auth),
+):
+    """Return diff summary for all agents with local overrides. Supports multi-node."""
+    raw_nodes = node.upper() if node else nodes
+    node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
+
+    report: List[Dict] = []
+    node_errors: List[Dict] = []
+
+    for node_id in node_ids:
+        ni = _node_info(node_id)
+        policy = ni["policy"]
+        gw_url = ni["gateway_url"]
+        overrides = await _app_db.list_agent_overrides(node_id)
+        agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
+            node_id, gw_url,
+            timeout_ms=policy["gateway_timeout_ms"],
+            get_retry=policy["get_retry"],
+        )
+        if err:
+            node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
+        agents_map = {a["agent_id"]: a for a in agents_raw}
+
+        for override in overrides:
+            aid = override["agent_id"]
+            desired = _agent_desired_payload(override)
+            plan_id = _app_db._agent_payload_hash(desired)
+            active_agent = agents_map.get(aid, {})
+            active_prompt = active_agent.get("active_prompt") or ""
+            desired_prompt = desired.get("system_prompt_md") or ""
+            diff_lines = list(_difflib.unified_diff(
+                active_prompt.splitlines(keepends=True),
+                desired_prompt.splitlines(keepends=True),
+                fromfile=f"{aid}:active", tofile=f"{aid}:desired", n=2,
+            ))
+            is_drift = bool(override.get("last_applied_hash") and
+                            override["last_applied_hash"] != plan_id)
+            report.append({
+                "node_id": node_id, "agent_id": aid,
+                "plan_id": plan_id,
+                "last_applied_hash": override.get("last_applied_hash"),
+                "drift": is_drift,
+                "diff_lines": len(diff_lines),
+                "diff_text": "".join(diff_lines[:60]),
+            })
+
+    return JSONResponse(content={"report": report, "node_errors": node_errors,
+                                  "nodes_queried": node_ids})
+
+
+@app.get("/api/agents/export/prompts")
+async def export_agent_prompts(
+    nodes: str = "NODA1",
+    node: Optional[str] = None,
+    _auth: str = Depends(require_auth),
+):
+    """Export all agent system prompts as a JSON bundle (multi-node)."""
+    raw_nodes = node.upper() if node else nodes
+    node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
+
+    bundle: List[Dict] = []
+    node_errors: List[Dict] = []
+
+    for node_id in node_ids:
+        ni = _node_info(node_id)
+        policy = ni["policy"]
+        gw_url = ni["gateway_url"]
+        overrides = await _app_db.list_agent_overrides(node_id)
+        agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
+            node_id, gw_url,
+            timeout_ms=policy["gateway_timeout_ms"],
+            get_retry=policy["get_retry"],
+        )
+        if err:
+            node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
+        agents_map = {a["agent_id"]: a for a in agents_raw}
+        overrides_map = {o["agent_id"]: o for o in overrides}
+
+        for aid, agent in agents_map.items():
+            override = overrides_map.get(aid)
+            merged = _merge_agent_with_override(agent, override)
+            bundle.append({
+                "agent_id": aid, "node_id": node_id,
+                "display_name": merged.get("display_name", aid),
+                "domain": merged.get("domain"),
+                "system_prompt_md": merged.get("system_prompt_md"),
+                "has_override": merged.get("has_override", False),
+            })
+
+    bundle.sort(key=lambda x: (x["node_id"], x["agent_id"]))
+    return JSONResponse(content={
+        "nodes_queried": node_ids,
+        "exported_at": _app_db._now(),
+        "count": len(bundle),
+        "agents": bundle,
+        "node_errors": node_errors,
+    })
+
+
+# ── Kling AI proxy ────────────────────────────────────────────────────────────
+
+@app.get("/api/aurora/kling/health")
+async def console_kling_health() -> Dict[str, Any]:
+    try:
+        return await _aurora_request_json("GET", "/api/aurora/kling/health", timeout=12.0, retries=1)
+    except Exception as exc:
+        return {"ok": False, "error": str(exc)}
+
+
+@app.post("/api/aurora/kling/enhance/{job_id}")
+async def console_kling_enhance(
+    job_id: str,
+    prompt: str = Form("enhance video quality, improve sharpness and clarity"),
+    negative_prompt: str = Form("noise, blur, artifacts, distortion"),
+    mode: str = Form("pro"),
+    duration: str = Form("5"),
+    cfg_scale: float = Form(0.5),
+) -> Dict[str, Any]:
+    return await _aurora_request_json(
+        "POST",
+        "/api/aurora/kling/enhance",
+        data={
+            "job_id": job_id,
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "mode": mode,
+            "duration": duration,
+            "cfg_scale": str(cfg_scale),
+        },
+        timeout=120.0,
+        retries=1,
+    )
+
+
+@app.get("/api/aurora/kling/status/{job_id}")
+async def console_kling_status(job_id: str) -> Dict[str, Any]:
+    return await _aurora_request_json("GET", f"/api/aurora/kling/status/{job_id}", timeout=20.0, retries=2)
+
+
+@app.get("/api/aurora/kling/task/{task_id}")
+async def console_kling_task(task_id: str, endpoint: str = Query("video2video")) -> Dict[str, Any]:
+    return await _aurora_request_json("GET", f"/api/aurora/kling/task/{task_id}?endpoint={endpoint}", timeout=20.0, retries=2)
+
+
+@app.get("/api/aurora/plates/{job_id}")
+async def console_plates(job_id: str) -> Dict[str, Any]:
+    return await _aurora_request_json("GET", f"/api/aurora/plates/{job_id}", timeout=15.0, retries=2)
+
+
+# ── Sofiia Auto-Router & Budget Dashboard proxy ────────────────────────────────
+
+async def _router_request_json(method: str, path: str, json_body: Optional[Dict] = None, timeout: float = 20.0) -> Dict[str, Any]:
+    """Forward request to the Router service (noda1 or local)."""
+    import aiohttp as _aiohttp
+    # Use the first configured node's router URL
+    nodes_reg = load_nodes_registry()
+    nodes = (nodes_reg.get("nodes") or {}) if isinstance(nodes_reg, dict) else {}
+    node_id = next(iter(nodes), "noda1")
+    router_url = get_router_url(node_id)
+    url = f"{router_url.rstrip('/')}{path}"
+    try:
+        async with _aiohttp.ClientSession() as sess:
+            if method.upper() == "GET":
+                async with sess.get(url, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp:
+                    return await resp.json(content_type=None)
+            else:
+                async with sess.post(url, json=json_body, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp:
+                    return await resp.json(content_type=None)
+    except Exception as e:
+        return {"error": str(e)}
+
+
+@app.post("/api/sofiia/auto-route")
+async def console_auto_route(body: Dict[str, Any]) -> Dict[str, Any]:
+    """Proxy: classify prompt and get recommended model."""
+    return await _router_request_json("POST", "/v1/sofiia/auto-route", json_body=body)
+
+
+@app.get("/api/sofiia/budget")
+async def console_budget_dashboard() -> Dict[str, Any]:
+    """Proxy: get budget dashboard data from router."""
+    return await _router_request_json("GET", "/v1/sofiia/budget")
+
+
+@app.post("/api/sofiia/budget/limits")
+async def console_set_budget_limits(body: Dict[str, Any]) -> Dict[str, Any]:
+    """Proxy: set provider budget limit."""
+    return await _router_request_json("POST", "/v1/sofiia/budget/limits", json_body=body)
+
+
+@app.get("/api/sofiia/budget/stats")
+async def console_budget_stats(window_hours: int = 24) -> Dict[str, Any]:
+    """Proxy: get budget stats for time window."""
+    return await _router_request_json("GET", f"/v1/sofiia/budget/stats?window_hours={window_hours}")
+
+
+@app.get("/api/sofiia/catalog")
+async def console_model_catalog(refresh_ollama: bool = False) -> Dict[str, Any]:
+    """Proxy: get full model catalog with availability."""
+    return await _router_request_json("GET", f"/v1/sofiia/catalog?refresh_ollama={str(refresh_ollama).lower()}")