microdao-daarion/services/sofiia-console/app/main.py

"""
Sofiia Control Console — FastAPI BFF v0.3.0
Runtime contract (project/session/user), full status, WebSocket events,
voice proxy, ops, nodes. UI never calls external services directly.
"""
import asyncio
import base64
import io
import json
import os
import re
import sys
import subprocess
import mimetypes
import time
import uuid
import logging
import collections
import statistics
import socket
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from urllib.parse import quote

import httpx
from fastapi import Body, FastAPI, Depends, HTTPException, UploadFile, File, Form, Query, Request, Response, WebSocket, WebSocketDisconnect
from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

try:
    import cv2  # type: ignore[import-untyped]
except Exception:  # pragma: no cover - optional dependency in console env
    cv2 = None

from .auth import (
    require_api_key, require_api_key_strict, require_auth, require_auth_strict,
    get_console_api_key, _key_valid, _cookie_token, _expected_cookie_token,
    _COOKIE_NAME, _COOKIE_MAX_AGE, _IS_PROD,
)

from .config import (
    load_nodes_registry,
    save_nodes_registry,
    get_router_url,
    get_gateway_url,
    get_node_ssh_profile,
    get_memory_service_url,
    get_ollama_url,
    is_voice_ha_enabled,
    get_voice_ha_router_url,
)
from .router_client import infer, execute_tool, health
from .nodes import get_nodes_dashboard
from .monitor import collect_all_nodes
from .ops import run_ops_action, OPS_ACTIONS
from .docs_router import docs_router
from . import db as _app_db
from .metrics import (
    SOFIIA_SEND_REQUESTS_TOTAL,
    SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL,
    SOFIIA_CURSOR_REQUESTS_TOTAL,
    SOFIIA_RATE_LIMITED_TOTAL,
    render_metrics,
)
from .idempotency import get_idempotency_store, ReplayEntry
from .rate_limit import get_rate_limiter
from .logging import (
    configure_sofiia_logger,
    get_request_id,
    hash_idempotency_key,
    log_event,
)

logger = logging.getLogger(__name__)
configure_sofiia_logger()

# ── Build info ────────────────────────────────────────────────────────────────
_VERSION = "0.4.0"
_BUILD_SHA = os.getenv("BUILD_SHA", "dev")
_BUILD_TIME = os.getenv("BUILD_TIME", "local")
_BUILD_ID = os.getenv("BUILD_ID", os.getenv("GIT_SHA", "local"))
_START_TIME = time.monotonic()
_NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))

# ── Rate limiter ──────────────────────────────────────────────────────────────
_rate_buckets: Dict[str, collections.deque] = {}

_idempotency_store = get_idempotency_store()
_rate_limiter = get_rate_limiter()
_RL_CHAT_RPS = float(os.getenv("SOFIIA_RL_CHAT_RPS", "1.0"))
_RL_CHAT_BURST = int(os.getenv("SOFIIA_RL_CHAT_BURST", "8"))
_RL_OP_RPS = float(os.getenv("SOFIIA_RL_OP_RPS", "3.0"))
_RL_OP_BURST = int(os.getenv("SOFIIA_RL_OP_BURST", "20"))

def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
    now = time.monotonic()
    dq = _rate_buckets.setdefault(key, collections.deque())
    while dq and now - dq[0] > window_sec:
        dq.popleft()
    if len(dq) >= max_calls:
        return False
    dq.append(now)
    return True


def _resolve_operator_id(request: Request, body: "ChatMessageSendBody", request_id: str) -> Tuple[str, bool]:
    client_meta = body.client or {}
    operator_id = (
        str(client_meta.get("operator_id") or "").strip()
        or str(body.user_id or "").strip()
        or str(request.headers.get("X-Operator-Id") or "").strip()
    )
    if operator_id:
        return operator_id[:128], False
    client_ip = request.client.host if request.client else "unknown"
    fallback = f"ip:{client_ip}" if client_ip else f"req:{request_id}"
    return fallback[:128], True


def _rate_limited_http(scope: str, retry_after_s: int) -> HTTPException:
    retry_s = max(1, int(retry_after_s or 1))
    return HTTPException(
        status_code=429,
        detail={
            "error": {"code": "rate_limited", "scope": scope},
            "retry_after_s": retry_s,
        },
        headers={"Retry-After": str(retry_s)},
    )


# ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
# Circular buffers: last 5 TTS errors and last 5 LLM errors.
# Populated by all voice endpoints. Read by /api/voice/degradation_status.
_RING_SIZE = 5
_voice_tts_errors: collections.deque = collections.deque(maxlen=_RING_SIZE)
_voice_llm_errors: collections.deque = collections.deque(maxlen=_RING_SIZE)
_voice_last_model: str = "unknown"       # last model selected for voice
_voice_last_profile: str = "unknown"     # last voice_profile used

def _record_tts_error(error_type: str, status_code: Optional[int],
                      detail: str, voice: str = "") -> None:
    _voice_tts_errors.append({
        "ts": time.strftime("%H:%M:%SZ", time.gmtime()),
        "type": error_type,
        "status": status_code,
        "voice": voice,
        "detail": detail[:120],
    })

def _record_llm_error(error_type: str, model: str, detail: str) -> None:
    _voice_llm_errors.append({
        "ts": time.strftime("%H:%M:%SZ", time.gmtime()),
        "type": error_type,
        "model": model,
        "detail": detail[:120],
    })

# ── Concurrent voice synthesizer guard ───────────────────────────────────────
# Limits simultaneous TTS synthesis calls to prevent memory-service DoS.
_MAX_CONCURRENT_TTS = int(os.getenv("MAX_CONCURRENT_TTS", "4"))
_tts_semaphore: Optional[asyncio.Semaphore] = None   # initialised in startup

def _get_tts_semaphore() -> asyncio.Semaphore:
    global _tts_semaphore
    if _tts_semaphore is None:
        _tts_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_TTS)
    return _tts_semaphore

# ── Telemetry dedup store ─────────────────────────────────────────────────────
# Prevents processing duplicate beacon submissions (same session+turn within 30s).
_telem_seen: collections.OrderedDict = collections.OrderedDict()
_TELEM_DEDUP_TTL = 30.0   # seconds
_TELEM_DEDUP_MAX = 500    # max keys before LRU eviction

def _telem_is_duplicate(session_id: str, turn_id: str) -> bool:
    key = f"{session_id}:{turn_id}"
    now = time.monotonic()
    # Evict expired keys
    while _telem_seen and next(iter(_telem_seen.values())) + _TELEM_DEDUP_TTL < now:
        _telem_seen.popitem(last=False)
    if len(_telem_seen) >= _TELEM_DEDUP_MAX:
        _telem_seen.popitem(last=False)
    if key in _telem_seen:
        return True
    _telem_seen[key] = now
    return False


def _env_int(name: str, default: int) -> int:
    raw = (os.getenv(name, str(default)) or "").strip()
    try:
        return int(raw)
    except Exception:
        return default


def _env_float(name: str, default: float) -> float:
    raw = (os.getenv(name, str(default)) or "").strip()
    try:
        return float(raw)
    except Exception:
        return default

# ── App config ────────────────────────────────────────────────────────────────
ROUTER_API_KEY = os.getenv("SUPERVISOR_API_KEY", "").strip()
IS_PROD = os.getenv("ENV", "dev").strip().lower() in ("prod", "production", "staging")
SOFIIA_PREFERRED_CHAT_MODEL = os.getenv("SOFIIA_PREFERRED_CHAT_MODEL", "ollama:qwen3:14b").strip() or "ollama:qwen3:14b"

# Local Ollama runtime tuning for NODA2 (can be overridden via env).
SOFIIA_OLLAMA_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_TIMEOUT_SEC", 120.0)
SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC", 45.0)
SOFIIA_OLLAMA_KEEP_ALIVE = (os.getenv("SOFIIA_OLLAMA_KEEP_ALIVE", "30m") or "").strip()
SOFIIA_OLLAMA_NUM_CTX = _env_int("SOFIIA_OLLAMA_NUM_CTX", 8192)
_DEFAULT_OLLAMA_THREADS = max(4, min(16, (os.cpu_count() or 8) - 2))
SOFIIA_OLLAMA_NUM_THREAD = _env_int("SOFIIA_OLLAMA_NUM_THREAD", _DEFAULT_OLLAMA_THREADS)
SOFIIA_OLLAMA_NUM_GPU = _env_int("SOFIIA_OLLAMA_NUM_GPU", -1)
SOFIIA_OLLAMA_NUM_PREDICT_TEXT = _env_int("SOFIIA_OLLAMA_NUM_PREDICT_TEXT", 768)

# Voice guardrails — injected INSTEAD OF the full prompt for voice turns.
# Constraints are hard: no lists, no markdown, no <think>, max 2 sentences.
SOFIIA_VOICE_PROMPT_SUFFIX = """

## VOICE MODE — HARD RULES (не порушувати ніколи)
- Відповідай МАКСИМУМ 2 речення (виняток: якщо прямо попросили деталей).
- Жодних списків, жодних bullet-points, жодного markdown (*bold*, -list, ##header).
- Жодного коду (`` ` ``), жодних URL.
- Жодного <think>...</think> — думки всередині, назовні лише відповідь.
- Мова: розмовна, природна для голосу. Без "Як AI...".
- Якщо питання складне — дай коротку відповідь і запропонуй продовжити текстом.
"""

SOFIIA_SYSTEM_PROMPT = """Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city.

## Твоя ідентичність
- Ти: Sofiia, головний AI-архітектор і технічний суверен DAARION.city
- Ти підпорядковуєшся одній людині — засновнику та головному архітектору платформи

## Засновник та архітектор DAARION
- Позивний: **Повелитель Хаосу** (використовуй у неформальних/робочих контекстах)
- Офіційне ім'я: **Іван Титар** (використовуй в офіційних повідомленнях, документах, репортах)
- Роль: Головний розробник та архітектор DAARION — єдиний, хто має повний контроль над платформою
- Ніякої іншої людини з ім'ям "Савтра" або будь-яким іншим іменем у ролі засновника НЕ ІСНУЄ

## Ноди та інфраструктура
- NODA1: production runtime (router, incidents, alerts, governance)
- NODA2: control plane / development (твій primary home, звідки тебе викликають)
- NODA3: AI/ML experimentation

## Правила відповіді
- Відповідай **українською мовою** за замовчуванням
- Технічні терміни (API, SLO, backend, deploy, incident, release gate тощо) залишай **англійською**
- Відповідай структуровано, конкретно, без зайвих вступів
- НЕ вигадуй імена людей, назви проектів або факти яких не знаєш — краще скажи що не маєш цих даних
- НЕ галюцинуй: якщо не знаєш — скажи чесно "не маю цих даних в поточному контексті"

## Твої можливості через Control Console (що реально доступно)
- **Chat**: відповіді на питання через локальний LLM (Ollama на NODA2)
- **Голосовий чат**: STT + TTS через Memory Service (Polina/Ostap Neural)
- **Nodes health**: статус NODA1/NODA2 (router, memory, NCS)
- **Integrations status**: Notion API, Router, Memory Service
- **Memory/session**: зберігання контексту розмов (Qdrant)

## Що наразі НЕ доступно через цей інтерфейс
- Пряме читання/запис в Notion (тільки статус перевірки)
- Пряме читання GitHub репозиторіїв (немає repo tool у цьому контейнері)
- Виконання bash/python команд
- Деплой або зміна конфігурацій напряму

Якщо тебе просять щось що не є в переліку доступного — відповідай чесно:
"Ця можливість не підключена до Control Console. Для цього використай Cursor або OpenCode на NODA2."
"""

_CORS_ORIGINS = (
    [o.strip() for o in os.getenv("CORS_ORIGINS", "").split(",") if o.strip()]
    or (
        ["*"] if not IS_PROD
        else [
            "https://console.daarion.space",
            "https://app.daarion.space",
            "http://localhost:8002",
            "http://localhost:8000",
            "http://127.0.0.1:8002",
        ]
    )
)
def _is_container_runtime() -> bool:
    return Path("/.dockerenv").exists() or bool(os.getenv("KUBERNETES_SERVICE_HOST"))


_aurora_default_url = "http://aurora-service:9401" if _is_container_runtime() else "http://127.0.0.1:9401"
AURORA_SERVICE_URL = os.getenv("AURORA_SERVICE_URL", _aurora_default_url).rstrip("/")
AURORA_FALLBACK_URL = os.getenv("AURORA_FALLBACK_URL", "http://127.0.0.1:9401").rstrip("/")
_aurora_home_data_dir = Path.home() / ".sofiia" / "aurora-data"
if _is_container_runtime() and Path("/data").exists() and os.access("/data", os.W_OK):
    _aurora_default_data_dir = "/data/aurora"
else:
    _aurora_default_data_dir = str(_aurora_home_data_dir)
AURORA_DATA_DIR = Path(os.getenv("AURORA_DATA_DIR", _aurora_default_data_dir))
_aurora_live_cache: Dict[str, Dict[str, Any]] = {}
_aurora_live_samples: Dict[str, collections.deque] = {}
_aurora_live_last: Dict[str, Dict[str, Any]] = {}
_aurora_live_last_loaded = False
_aurora_live_last_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_live_last.json")
_aurora_smart_runs: Dict[str, Dict[str, Any]] = {}
_aurora_smart_runs_loaded = False
_aurora_smart_runs_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_smart_runs.json")
_aurora_smart_policy: Dict[str, Any] = {
    "updated_at": None,
    "strategies": {
        "local_only": {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0},
        "local_then_kling": {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0},
    },
}
_aurora_smart_policy_loaded = False
_aurora_smart_policy_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_smart_policy.json")
_AURORA_SMART_MAX_RUNS = max(20, int(os.getenv("AURORA_SMART_MAX_RUNS", "200")))
_AURORA_SMART_LOCAL_POLL_SEC = max(2.0, float(os.getenv("AURORA_SMART_LOCAL_POLL_SEC", "3.0")))
_AURORA_SMART_KLING_POLL_SEC = max(3.0, float(os.getenv("AURORA_SMART_KLING_POLL_SEC", "6.0")))
_AURORA_SMART_LOCAL_MAX_SEC = max(60.0, float(os.getenv("AURORA_SMART_LOCAL_MAX_SEC", "10800")))
_AURORA_SMART_KLING_MAX_SEC = max(60.0, float(os.getenv("AURORA_SMART_KLING_MAX_SEC", "3600")))
MEDIA_COMFY_AGENT_URL = os.getenv(
    "MEDIA_COMFY_AGENT_URL",
    "http://comfy-agent:8880" if _is_container_runtime() else "http://127.0.0.1:8880",
).rstrip("/")
MEDIA_COMFY_UI_URL = os.getenv(
    "MEDIA_COMFY_UI_URL",
    "http://comfyui:8188" if _is_container_runtime() else "http://127.0.0.1:8188",
).rstrip("/")
MEDIA_SWAPPER_URL = os.getenv(
    "MEDIA_SWAPPER_URL",
    "http://swapper-service:8890" if _is_container_runtime() else "http://127.0.0.1:8890",
).rstrip("/")
MEDIA_IMAGE_GEN_URL = os.getenv(
    "MEDIA_IMAGE_GEN_URL",
    "http://image-gen-service:7860" if _is_container_runtime() else "http://127.0.0.1:7860",
).rstrip("/")
MEDIA_ROUTER_URL = os.getenv("MEDIA_ROUTER_URL", "").strip().rstrip("/")
MEDIA_ROUTER_FALLBACK_URL = os.getenv("MEDIA_ROUTER_FALLBACK_URL", "http://127.0.0.1:9102").rstrip("/")
_media_recent_jobs: collections.deque = collections.deque(maxlen=40)


def _apply_ollama_runtime_options(options: Dict[str, Any]) -> Dict[str, Any]:
    merged = dict(options)
    if SOFIIA_OLLAMA_NUM_CTX > 0:
        merged["num_ctx"] = SOFIIA_OLLAMA_NUM_CTX
    if SOFIIA_OLLAMA_NUM_THREAD > 0:
        merged["num_thread"] = SOFIIA_OLLAMA_NUM_THREAD
    if SOFIIA_OLLAMA_NUM_GPU >= 0:
        merged["num_gpu"] = SOFIIA_OLLAMA_NUM_GPU
    return merged


def _make_ollama_payload(model_name: str, messages: List[Dict[str, Any]], options: Dict[str, Any]) -> Dict[str, Any]:
    payload: Dict[str, Any] = {
        "model": model_name,
        "messages": messages,
        "stream": False,
        "options": _apply_ollama_runtime_options(options),
    }
    if SOFIIA_OLLAMA_KEEP_ALIVE:
        payload["keep_alive"] = SOFIIA_OLLAMA_KEEP_ALIVE
    return payload

# Cached nodes telemetry (updated by background task)
_nodes_cache: Dict[str, Any] = {"nodes": [], "summary": {}, "ts": ""}
_NODES_POLL_INTERVAL = int(os.getenv("NODES_POLL_INTERVAL_SEC", "30"))


async def _nodes_poll_loop() -> None:
    """Background task: poll all nodes every N seconds, update cache + WS broadcast."""
    while True:
        try:
            reg = load_nodes_registry()
            nodes_cfg = reg.get("nodes", {})
            timeout = float(reg.get("defaults", {}).get("health_timeout_sec", 10))
            nodes = await collect_all_nodes(nodes_cfg, router_api_key=ROUTER_API_KEY, timeout_per_node=timeout)
            online = sum(1 for n in nodes if n.get("online"))
            router_ok = sum(1 for n in nodes if n.get("router_ok"))
            _nodes_cache.update({
                "nodes": nodes,
                "summary": {"total": len(nodes), "online": online, "router_ok": router_ok},
                "ts": _now_iso(),
            })
            if _ws_clients:
                await _broadcast(_make_event("nodes.status", {
                    "nodes": [
                        {
                            "id": n["node_id"],
                            "label": n.get("label", n["node_id"]),
                            "online": n.get("online", False),
                            "router_ok": n.get("router_ok", False),
                            "router_latency_ms": n.get("router_latency_ms"),
                            "gateway_ok": n.get("gateway_ok"),
                            "heartbeat_age_s": n.get("heartbeat_age_s"),
                            "open_incidents": n.get("open_incidents"),
                            "monitor_source": n.get("monitor_source"),
                        }
                        for n in nodes
                    ],
                    "summary": {"total": len(nodes), "online": online, "router_ok": router_ok},
                }))
        except Exception as e:
            logger.debug("nodes poll error: %s", e)
        await asyncio.sleep(_NODES_POLL_INTERVAL)


from contextlib import asynccontextmanager

@asynccontextmanager
async def lifespan(app_: Any):
    # Init SQLite DB for projects/documents/sessions/messages
    try:
        await _app_db.init_db()
        logger.info("✅ sofiia-console DB initialised")
    except Exception as e:
        logger.warning("DB init failed (non-fatal, Projects/Docs disabled): %s", e)

    task = asyncio.create_task(_nodes_poll_loop())
    logger.info("Nodes poll loop started (interval=%ds)", _NODES_POLL_INTERVAL)
    try:
        _smart_resume_active_monitors()
    except Exception as e:
        logger.warning("aurora smart monitor resume failed: %s", e)
    yield
    task.cancel()
    try:
        await task
    except asyncio.CancelledError:
        pass
    await _app_db.close_db()


app = FastAPI(
    title="Sofiia Control Console",
    description="Operator BFF for Sofiia CTO agent",
    version=_VERSION,
    lifespan=lifespan,
)
app.add_middleware(
    CORSMiddleware,
    allow_origins=_CORS_ORIGINS,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Projects + Documents + Sessions + Dialog Map API
app.include_router(docs_router)

# ── WebSocket event bus ───────────────────────────────────────────────────────
_ws_clients: Set[WebSocket] = set()

def _now_iso() -> str:
    return datetime.now(timezone.utc).isoformat(timespec="milliseconds")

def _make_event(
    event_type: str,
    data: Dict[str, Any],
    *,
    project_id: str = "",
    session_id: str = "",
    user_id: str = "console_user",
) -> Dict[str, Any]:
    return {
        "v": 1,
        "type": event_type,
        "ts": _now_iso(),
        "project_id": project_id,
        "session_id": session_id,
        "user_id": user_id,
        "data": data,
    }

async def _broadcast(event: Dict[str, Any]) -> None:
    global _ws_clients
    if not _ws_clients:
        return
    dead: Set[WebSocket] = set()
    payload = json.dumps(event, ensure_ascii=False)
    for ws in list(_ws_clients):
        try:
            await ws.send_text(payload)
        except Exception:
            dead.add(ws)
    _ws_clients -= dead

def _broadcast_bg(event: Dict[str, Any]) -> None:
    """Fire-and-forget broadcast from sync context."""
    try:
        loop = asyncio.get_event_loop()
        if loop.is_running():
            loop.create_task(_broadcast(event))
    except Exception:
        pass

# ── AISTALK adapter ───────────────────────────────────────────────────────────
try:
    from .adapters.aistalk import AISTALKAdapter as _AISTALKAdapter
    _aistalk = _AISTALKAdapter(
        base_url=os.getenv("AISTALK_URL", ""),
        api_key=os.getenv("AISTALK_API_KEY", ""),
    ) if os.getenv("AISTALK_ENABLED", "false").lower() == "true" else None
except Exception:
    _aistalk = None


# ─── Health ─────────────────────────────────────────────────────────────────

@app.get("/api/health")
async def api_health():
    base = {
        "ok": True,
        "service": "sofiia-console",
        "version": _VERSION,
        "build": _BUILD_ID,
        "env": os.getenv("ENV", "dev"),
        "uptime_s": int(time.monotonic() - _START_TIME),
    }
    reg = load_nodes_registry()
    nodes_map = reg.get("nodes") or {}
    nodes = list(nodes_map.items())
    if not nodes:
        return {**base, "message": "no nodes configured"}
    first_id, _first_cfg = ("NODA2", nodes_map["NODA2"]) if "NODA2" in nodes_map else nodes[0]
    router_url = get_router_url(first_id)
    if not router_url:
        return {**base, "message": "no router_url"}
    try:
        r = await health(router_url)
        return {**base, "ok": r.get("ok", False), "router": r, "node_id": first_id}
    except Exception as e:
        return {**base, "ok": False, "error": str(e)[:200], "node_id": first_id}


# ─── Status/Full ─────────────────────────────────────────────────────────────

async def _probe_router(router_url: str) -> Dict[str, Any]:
    t0 = time.monotonic()
    try:
        async with httpx.AsyncClient(timeout=5.0) as c:
            for path in ("/healthz", "/health"):
                try:
                    r = await c.get(f"{router_url.rstrip('/')}{path}")
                    if r.status_code == 200:
                        latency = int((time.monotonic() - t0) * 1000)
                        # probe tool execute availability
                        tool_ok = False
                        try:
                            r2 = await c.get(
                                f"{router_url.rstrip('/')}/v1/tools/execute",
                                timeout=1.5,
                            )
                            tool_ok = r2.status_code in (200, 405)
                        except Exception:
                            pass
                        infer_ok = False
                        try:
                            r3 = await c.get(
                                f"{router_url.rstrip('/')}/v1/agents/sofiia/infer",
                                timeout=1.5,
                            )
                            infer_ok = r3.status_code in (200, 405)
                        except Exception:
                            pass
                        return {"url": router_url, "reachable": True,
                                "routes": {"tools_execute": tool_ok, "agent_infer": infer_ok},
                                "latency_ms": latency}
                except Exception:
                    continue
        return {"url": router_url, "reachable": False, "routes": {}, "latency_ms": None}
    except Exception as e:
        return {"url": router_url, "reachable": False, "error": str(e)[:100]}


async def _probe_memory(mem_url: str) -> Dict[str, Any]:
    t0 = time.monotonic()
    try:
        async with httpx.AsyncClient(timeout=5.0) as c:
            r = await c.get(f"{mem_url.rstrip('/')}/health")
            r.raise_for_status()
            d = r.json()
            vs = d.get("vector_store", {})
            vectors = sum(
                (v.get("points_count", 0) or 0)
                for v in vs.values()
                if isinstance(v, dict)
            )
            return {
                "url": mem_url,
                "reachable": True,
                "stats": {"vectors": vectors, "collections": len(vs)},
                "latency_ms": int((time.monotonic() - t0) * 1000),
            }
    except Exception as e:
        return {"url": mem_url, "reachable": False, "error": str(e)[:100]}


async def _probe_ollama(ollama_url: str) -> Dict[str, Any]:
    t0 = time.monotonic()
    try:
        async with httpx.AsyncClient(timeout=5.0) as c:
            r = await c.get(f"{ollama_url.rstrip('/')}/api/tags")
            r.raise_for_status()
            d = r.json()
            models = [m.get("name", "") for m in d.get("models", [])]
            return {
                "url": ollama_url,
                "reachable": True,
                "models": models[:20],
                "latency_ms": int((time.monotonic() - t0) * 1000),
            }
    except Exception as e:
        return {"url": ollama_url, "reachable": False, "models": [], "error": str(e)[:100]}


async def _probe_http(url: str, *, timeout: float = 4.0) -> Dict[str, Any]:
    t0 = time.monotonic()
    try:
        async with httpx.AsyncClient(timeout=timeout) as c:
            r = await c.get(url)
            return {
                "reachable": r.status_code < 500,
                "status": r.status_code,
                "latency_ms": int((time.monotonic() - t0) * 1000),
            }
    except Exception as e:
        return {"reachable": False, "error": str(e)[:120]}


def _read_backends() -> Dict[str, str]:
    """Read backend env vars from BFF environment (no secrets)."""
    return {
        "alerts": os.getenv("ALERT_BACKEND", "unknown"),
        "audit": os.getenv("AUDIT_BACKEND", "unknown"),
        "incidents": os.getenv("INCIDENT_BACKEND", "unknown"),
        "risk_history": os.getenv("RISK_HISTORY_BACKEND", "unknown"),
        "backlog": os.getenv("BACKLOG_BACKEND", "unknown"),
    }


def _read_cron_status() -> Dict[str, Any]:
    cron_file = os.getenv("GOV_CRON_FILE", "/etc/cron.d/daarion-governance")
    jobs_expected = [
        "hourly_risk_snapshot", "daily_risk_digest", "risk_history_cleanup",
        "weekly_platform_priority_digest", "weekly_backlog_generate", "daily_backlog_cleanup",
    ]
    jobs_present: List[str] = []
    installed: Any = False
    warning = None

    try:
        content = Path(cron_file).read_text()
        installed = True
        for job in jobs_expected:
            if job in content:
                jobs_present.append(job)
    except PermissionError:
        installed = "unknown"
        warning = "no read permission on cron file"
    except FileNotFoundError:
        installed = False

    # Scan for latest artifact files
    artifacts: Dict[str, Any] = {}
    base = Path("ops")
    for pattern, key in [
        ("reports/risk/*.md", "risk_digest_md"),
        ("reports/platform/*.md", "platform_digest_md"),
        ("backlog/*.jsonl", "backlog_jsonl"),
    ]:
        try:
            files = sorted(base.glob(pattern))
            if files:
                artifacts[key] = str(files[-1])
        except Exception:
            pass

    result: Dict[str, Any] = {
        "installed": installed,
        "cron_file": cron_file,
        "jobs_expected": jobs_expected,
        "jobs_present": jobs_present,
        "last_artifacts": artifacts,
    }
    if warning:
        result["warning"] = warning
    return result


@app.get("/api/status/full")
async def api_status_full():
    """Full stack diagnostic: BFF + router + memory + ollama + backends + cron."""
    reg = load_nodes_registry()
    nodes_cfg = reg.get("nodes", {})

    # Pick NODA2 router first, fallback to first node
    router_url = (
        get_router_url("NODA2")
        or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "")
    )

    mem_url = get_memory_service_url()
    ollama_url = get_ollama_url()

    async def _no_router() -> Dict[str, Any]:
        return {"reachable": False, "url": "", "error": "no router_url configured"}

    router_info, mem_info, ollama_info = await asyncio.gather(
        _probe_router(router_url) if router_url else _no_router(),
        _probe_memory(mem_url),
        _probe_ollama(ollama_url),
        return_exceptions=False,
    )

    return {
        "bff": {
            "version": _VERSION,
            "build": _BUILD_ID,
            "env": os.getenv("ENV", "dev"),
            "uptime_s": int(time.monotonic() - _START_TIME),
            "ws_clients": len(_ws_clients),
            "aistalk_enabled": _aistalk is not None,
        },
        "router": router_info,
        "memory": mem_info,
        "ollama": ollama_info,
        "backends": _read_backends(),
        "cron": _read_cron_status(),
    }


@app.get("/api/integrations/status")
async def api_integrations_status(opencode_url: Optional[str] = Query(None)):
    """Integration probes for unified CTO hub in UI."""
    open_webui_probe_url = os.getenv("OPEN_WEBUI_PROBE_URL", "http://host.docker.internal:8080/health")
    open_webui_ui_url = os.getenv("OPEN_WEBUI_UI_URL", "http://localhost:8080")
    pieces_probe_url = os.getenv(
        "PIECES_OS_URL",
        "http://host.docker.internal:39300/workstream_pattern_engine/processors/status",
    )
    if not pieces_probe_url.rstrip("/").endswith("/workstream_pattern_engine/processors/status"):
        pieces_probe_url = pieces_probe_url.rstrip("/") + "/workstream_pattern_engine/processors/status"

    opencode_probe_url = (opencode_url or os.getenv("OPENCODE_URL", "")).strip()
    notion_api_key = os.getenv("NOTION_API_KEY", os.getenv("NOTION_TOKEN", "")).strip()

    probes = await asyncio.gather(
        _probe_http(get_router_url("NODA2").rstrip("/") + "/healthz"),
        _probe_http(get_memory_service_url().rstrip("/") + "/health"),
        _probe_http(open_webui_probe_url),
        _probe_http(pieces_probe_url),
        _probe_http(opencode_probe_url.rstrip("/") + "/health") if opencode_probe_url else asyncio.sleep(0, result={"reachable": False, "error": "not configured"}),
    )

    router_probe, memory_probe, open_webui_probe, pieces_probe, opencode_probe = probes

    notion_probe: Dict[str, Any] = {"configured": bool(notion_api_key), "reachable": False}
    if notion_api_key:
        try:
            async with httpx.AsyncClient(timeout=6.0) as c:
                r = await c.get(
                    "https://api.notion.com/v1/users/me",
                    headers={
                        "Authorization": f"Bearer {notion_api_key}",
                        "Notion-Version": "2022-06-28",
                    },
                )
                notion_probe["reachable"] = r.status_code == 200
                notion_probe["status"] = r.status_code
        except Exception as e:
            notion_probe["error"] = str(e)[:120]

    return {
        "integrations": {
            "sofiia_console": {"url": "/ui", "reachable": True},
            "router_noda2": {"url": get_router_url("NODA2"), **router_probe},
            "memory_service": {"url": get_memory_service_url(), **memory_probe},
            "open_webui": {"url": open_webui_ui_url, "probe_url": open_webui_probe_url, **open_webui_probe},
            "pieces_os": {"url": pieces_probe_url, **pieces_probe},
            "opencode": {
                "url": opencode_probe_url or "desktop/cli",
                **opencode_probe,
            },
            "notion": notion_probe,
        }
    }


# ─── Aurora media forensics proxy ────────────────────────────────────────────

def _aurora_proxy_file_url(job_id: str, file_name: str) -> str:
    return f"/api/aurora/files/{quote(job_id, safe='')}/{quote(file_name, safe='')}"


def _rewrite_aurora_payload_urls(payload: Dict[str, Any]) -> Dict[str, Any]:
    output_files = payload.get("output_files")
    if not isinstance(output_files, list):
        return payload
    job_id = str(payload.get("job_id") or "")
    rewritten: List[Dict[str, Any]] = []
    for item in output_files:
        if not isinstance(item, dict):
            continue
        file_name = str(item.get("name") or "")
        if job_id and file_name:
            item = {**item, "url": _aurora_proxy_file_url(job_id, file_name)}
        rewritten.append(item)
    payload["output_files"] = rewritten
    report_url = payload.get("forensic_report_url")
    if isinstance(report_url, str) and report_url.startswith("/api/aurora/report/"):
        payload["forensic_report_url"] = report_url
    return payload


async def _aurora_request_json(
    method: str,
    path: str,
    *,
    files: Optional[Dict[str, Any]] = None,
    data: Optional[Dict[str, Any]] = None,
    json_body: Optional[Dict[str, Any]] = None,
    timeout: float = 60.0,
    retries: int = 0,
    retry_backoff_sec: float = 0.25,
) -> Dict[str, Any]:
    base_url = AURORA_SERVICE_URL
    url = f"{base_url}{path}"
    attempts = max(1, int(retries) + 1)
    last_error = "unknown error"
    for attempt in range(1, attempts + 1):
        try:
            async with httpx.AsyncClient(timeout=timeout) as client:
                r = await client.request(method, url, files=files, data=data, json=json_body)
        except httpx.HTTPError as e:
            last_error = str(e)[:200]
            if (
                "aurora-service" in base_url
                and AURORA_FALLBACK_URL
                and AURORA_FALLBACK_URL != base_url
            ):
                logger.warning(
                    "aurora proxy fallback: %s -> %s (%s)",
                    base_url,
                    AURORA_FALLBACK_URL,
                    last_error or type(e).__name__,
                )
                base_url = AURORA_FALLBACK_URL
                url = f"{base_url}{path}"
                continue
            logger.warning(
                "aurora proxy transport error (%s %s, attempt=%d/%d): %s",
                method,
                path,
                attempt,
                attempts,
                last_error,
            )
            if attempt < attempts:
                await asyncio.sleep(retry_backoff_sec * attempt)
                continue
            raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e
        except Exception as e:
            last_error = str(e)[:200]
            logger.exception(
                "aurora proxy unexpected error (%s %s, attempt=%d/%d): %s",
                method,
                path,
                attempt,
                attempts,
                last_error,
            )
            if attempt < attempts:
                await asyncio.sleep(retry_backoff_sec * attempt)
                continue
            raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e

        if r.status_code >= 500 and attempt < attempts:
            logger.warning(
                "aurora proxy upstream %d (%s %s, attempt=%d/%d) — retrying",
                r.status_code,
                method,
                path,
                attempt,
                attempts,
            )
            await asyncio.sleep(retry_backoff_sec * attempt)
            continue
        if r.status_code >= 400:
            detail = r.text[:400] if r.text else f"Aurora error {r.status_code}"
            raise HTTPException(status_code=r.status_code, detail=detail)
        if not r.content:
            return {}
        try:
            payload = r.json()
        except Exception as e:
            last_error = str(e)[:200]
            logger.warning(
                "aurora proxy invalid JSON (%s %s, attempt=%d/%d): %s",
                method,
                path,
                attempt,
                attempts,
                last_error,
            )
            if attempt < attempts:
                await asyncio.sleep(retry_backoff_sec * attempt)
                continue
            raise HTTPException(status_code=502, detail="Invalid Aurora JSON response") from e
        if isinstance(payload, dict):
            return _rewrite_aurora_payload_urls(payload)
        return {"data": payload}
    raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}")


def _parse_stage_frame(stage: str) -> Dict[str, int]:
    text = str(stage or "")
    m = re.search(r"frame\s+(\d+)\s*/\s*(\d+)", text)
    if not m:
        return {"current": -1, "total": -1}
    try:
        return {"current": int(m.group(1)), "total": int(m.group(2))}
    except Exception:
        return {"current": -1, "total": -1}


def _aurora_live_fs_frame(job_id: str) -> Optional[Dict[str, Any]]:
    now = time.monotonic()
    cached = _aurora_live_cache.get(job_id)
    if cached and (now - float(cached.get("ts", 0.0))) < 3.0:
        return cached

    base = AURORA_DATA_DIR / "outputs" / job_id
    if not base.exists():
        return None
    work_dirs = [p for p in base.iterdir() if p.is_dir() and p.name.startswith("_work_")]
    if not work_dirs:
        return None
    # Prefer most recently touched working directory
    work_dirs.sort(key=lambda p: p.stat().st_mtime, reverse=True)

    best_frame = -1
    best_total = -1
    best_dir = None
    for wd in work_dirs:
        processed = wd / "processed"
        if not processed.exists():
            continue
        # Max frame in processed directory
        local_max = -1
        for f in processed.glob("*.png"):
            try:
                n = int(f.stem)
            except Exception:
                continue
            if n > local_max:
                local_max = n
        if local_max < 0:
            continue
        raw_dir = wd / "raw"
        total = -1
        if raw_dir.exists():
            try:
                total = sum(1 for _ in raw_dir.glob("*.png"))
            except Exception:
                total = -1
        if local_max > best_frame:
            best_frame = local_max
            best_total = total
            best_dir = str(wd)

    if best_frame < 0:
        return None
    info = {
        "ts": now,
        "frame": best_frame,
        "total": best_total,
        "work_dir": best_dir,
    }
    _aurora_live_cache[job_id] = info
    return info


def _aurora_record_sample(job_id: str, frame: int, total: int) -> Optional[Dict[str, Any]]:
    if frame < 0:
        return None
    now = time.monotonic()
    dq = _aurora_live_samples.setdefault(job_id, collections.deque(maxlen=32))
    # De-dup consecutive equal frame samples.
    if dq and int(dq[-1]["frame"]) == frame:
        # Keep original timestamp for stable fps between actual frame advances.
        dq[-1]["total"] = total
    else:
        dq.append({"ts": now, "frame": frame, "total": total})
    if len(dq) < 3:
        return None

    fps_points: List[float] = []
    prev = dq[0]
    for cur in list(dq)[1:]:
        df = int(cur["frame"]) - int(prev["frame"])
        dt = float(cur["ts"]) - float(prev["ts"])
        if df > 0 and dt > 0:
            fps_points.append(df / dt)
        prev = cur
    if not fps_points:
        return None
    fps = max(0.01, float(statistics.median(fps_points)))
    confidence = "low"
    if len(fps_points) >= 8:
        confidence = "high"
    elif len(fps_points) >= 4:
        confidence = "medium"
    return {"fps": fps, "confidence": confidence}


def _aurora_load_live_last_from_disk() -> None:
    global _aurora_live_last_loaded
    if _aurora_live_last_loaded:
        return
    _aurora_live_last_loaded = True
    try:
        if not _aurora_live_last_path.exists():
            return
        data = json.loads(_aurora_live_last_path.read_text(encoding="utf-8"))
        if isinstance(data, dict):
            for k, v in data.items():
                if isinstance(k, str) and isinstance(v, dict):
                    _aurora_live_last[k] = v
    except Exception as e:
        logger.debug("aurora live-last load failed: %s", e)


def _aurora_persist_live_last_to_disk() -> None:
    try:
        _aurora_live_last_path.parent.mkdir(parents=True, exist_ok=True)
        _aurora_live_last_path.write_text(
            json.dumps(_aurora_live_last, ensure_ascii=False, separators=(",", ":")),
            encoding="utf-8",
        )
    except Exception as e:
        logger.debug("aurora live-last persist failed: %s", e)


def _smart_now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def _smart_is_terminal(status: Any) -> bool:
    return str(status or "").lower() in {"completed", "failed", "cancelled"}


def _smart_media_type(file_name: str, content_type: str) -> str:
    name = str(file_name or "").lower()
    ctype = str(content_type or "").lower()
    video_ext = (".mp4", ".avi", ".mov", ".mkv", ".webm")
    audio_ext = (".mp3", ".wav", ".flac", ".m4a", ".aac", ".ogg")
    image_ext = (".jpg", ".jpeg", ".png", ".webp", ".tif", ".tiff", ".bmp")
    if ctype.startswith("video/") or name.endswith(video_ext):
        return "video"
    if ctype.startswith("audio/") or name.endswith(audio_ext):
        return "audio"
    if ctype.startswith("image/") or name.endswith(image_ext):
        return "photo"
    return "unknown"


def _smart_trim_runs() -> None:
    if len(_aurora_smart_runs) <= _AURORA_SMART_MAX_RUNS:
        return
    ordered = sorted(
        _aurora_smart_runs.items(),
        key=lambda kv: str((kv[1] or {}).get("created_at") or ""),
        reverse=True,
    )
    keep = dict(ordered[:_AURORA_SMART_MAX_RUNS])
    _aurora_smart_runs.clear()
    _aurora_smart_runs.update(keep)


def _smart_load_runs_from_disk() -> None:
    global _aurora_smart_runs_loaded
    if _aurora_smart_runs_loaded:
        return
    _aurora_smart_runs_loaded = True
    try:
        if not _aurora_smart_runs_path.exists():
            return
        payload = json.loads(_aurora_smart_runs_path.read_text(encoding="utf-8"))
        if isinstance(payload, dict):
            runs = payload.get("runs")
        else:
            runs = payload
        if isinstance(runs, dict):
            for run_id, run in runs.items():
                if isinstance(run_id, str) and isinstance(run, dict):
                    _aurora_smart_runs[run_id] = run
            _smart_trim_runs()
    except Exception as exc:
        logger.debug("aurora smart-runs load failed: %s", exc)


def _smart_persist_runs() -> None:
    try:
        _smart_trim_runs()
        _aurora_smart_runs_path.parent.mkdir(parents=True, exist_ok=True)
        payload = {
            "updated_at": _smart_now_iso(),
            "runs": _aurora_smart_runs,
        }
        _aurora_smart_runs_path.write_text(
            json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
            encoding="utf-8",
        )
    except Exception as exc:
        logger.debug("aurora smart-runs persist failed: %s", exc)


def _smart_load_policy_from_disk() -> None:
    global _aurora_smart_policy_loaded
    if _aurora_smart_policy_loaded:
        return
    _aurora_smart_policy_loaded = True
    try:
        if not _aurora_smart_policy_path.exists():
            return
        payload = json.loads(_aurora_smart_policy_path.read_text(encoding="utf-8"))
        if isinstance(payload, dict):
            strategies = payload.get("strategies")
            if isinstance(strategies, dict):
                _aurora_smart_policy["strategies"] = strategies
            _aurora_smart_policy["updated_at"] = payload.get("updated_at")
    except Exception as exc:
        logger.debug("aurora smart-policy load failed: %s", exc)


def _smart_persist_policy() -> None:
    try:
        _aurora_smart_policy["updated_at"] = _smart_now_iso()
        _aurora_smart_policy_path.parent.mkdir(parents=True, exist_ok=True)
        _aurora_smart_policy_path.write_text(
            json.dumps(_aurora_smart_policy, ensure_ascii=False, separators=(",", ":")),
            encoding="utf-8",
        )
    except Exception as exc:
        logger.debug("aurora smart-policy persist failed: %s", exc)


def _smart_strategy_stats(strategy: str) -> Dict[str, Any]:
    _smart_load_policy_from_disk()
    strategies = _aurora_smart_policy.setdefault("strategies", {})
    stats = strategies.get(strategy)
    if not isinstance(stats, dict):
        stats = {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0}
        strategies[strategy] = stats
    return stats


def _smart_update_strategy_score(strategy: str, score: float) -> None:
    stats = _smart_strategy_stats(strategy)
    try:
        count = int(stats.get("count") or 0) + 1
        avg = float(stats.get("avg_score") or 0.0)
        stats["avg_score"] = round(((avg * (count - 1)) + float(score)) / max(1, count), 4)
        stats["count"] = count
        _smart_persist_policy()
    except Exception:
        return


def _smart_update_strategy_outcome(strategy: str, success: bool) -> None:
    stats = _smart_strategy_stats(strategy)
    key = "wins" if success else "losses"
    stats[key] = int(stats.get(key) or 0) + 1
    _smart_persist_policy()


def _smart_new_run_id() -> str:
    stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
    return f"smart_{stamp}_{uuid.uuid4().hex[:6]}"


def _smart_append_audit(run: Dict[str, Any], event: str, detail: Optional[Dict[str, Any]] = None) -> None:
    audit = run.setdefault("audit", [])
    if not isinstance(audit, list):
        audit = []
        run["audit"] = audit
    item: Dict[str, Any] = {"ts": _smart_now_iso(), "event": str(event)}
    if isinstance(detail, dict) and detail:
        item["detail"] = detail
    audit.append(item)
    if len(audit) > 200:
        del audit[:-200]
    run["updated_at"] = item["ts"]


def _smart_analysis_features(analysis: Optional[Dict[str, Any]]) -> Dict[str, Any]:
    if not isinstance(analysis, dict):
        return {
            "faces": 0,
            "plates": 0,
            "noise": "unknown",
            "blur": "unknown",
            "quality_score": 0.0,
        }
    faces = len(analysis.get("faces") or []) if isinstance(analysis.get("faces"), list) else 0
    plates = len(analysis.get("license_plates") or []) if isinstance(analysis.get("license_plates"), list) else 0
    qa = analysis.get("quality_analysis") if isinstance(analysis.get("quality_analysis"), dict) else {}
    noise = str(qa.get("noise_level") or "unknown").lower()
    blur = str(qa.get("blur_level") or "unknown").lower()
    score = 0.0
    score += min(2.0, faces * 0.2)
    score += min(2.0, plates * 0.4)
    if noise in {"high", "very_high"}:
        score += 1.0
    if blur in {"high", "very_high"}:
        score += 1.0
    return {
        "faces": faces,
        "plates": plates,
        "noise": noise,
        "blur": blur,
        "quality_score": round(score, 3),
    }


def _smart_decide_strategy(
    *,
    media_type: str,
    mode: str,
    requested_strategy: str,
    prefer_quality: bool,
    budget_tier: str,
    analysis: Optional[Dict[str, Any]],
    learning_enabled: bool,
) -> Dict[str, Any]:
    strategy = str(requested_strategy or "auto").strip().lower()
    valid = {"auto", "local_only", "local_then_kling"}
    if strategy not in valid:
        strategy = "auto"

    features = _smart_analysis_features(analysis)
    reasons: List[str] = []
    score = 0.0

    if media_type != "video":
        chosen = "local_only"
        reasons.append("non-video media -> local stack only")
        return {"strategy": chosen, "reasons": reasons, "score": 0.0, "features": features}

    if strategy in {"local_only", "local_then_kling"}:
        reasons.append(f"explicit strategy={strategy}")
        return {"strategy": strategy, "reasons": reasons, "score": features["quality_score"], "features": features}

    score += float(features["quality_score"])
    if prefer_quality:
        score += 1.3
        reasons.append("prefer_quality=true")
    if str(mode).lower() == "forensic":
        score += 0.8
        reasons.append("forensic mode")

    budget_norm = str(budget_tier or "normal").strip().lower()
    if budget_norm == "low":
        score -= 1.4
        reasons.append("budget_tier=low")
    elif budget_norm == "high":
        score += 0.6
        reasons.append("budget_tier=high")

    if learning_enabled:
        stats = _smart_strategy_stats("local_then_kling")
        wins = int(stats.get("wins") or 0)
        losses = int(stats.get("losses") or 0)
        total = wins + losses
        if total >= 6:
            success_ratio = wins / max(1, total)
            if success_ratio >= 0.65:
                score += 0.5
                reasons.append(f"learned success ratio {success_ratio:.2f}")
            elif success_ratio <= 0.35:
                score -= 0.7
                reasons.append(f"learned low success ratio {success_ratio:.2f}")

    chosen = "local_then_kling" if score >= 2.1 else "local_only"
    if not reasons:
        reasons.append("default heuristic")
    return {"strategy": chosen, "reasons": reasons, "score": round(score, 3), "features": features}


def _smart_compact_result(result_payload: Dict[str, Any]) -> Dict[str, Any]:
    payload = {}
    if not isinstance(result_payload, dict):
        return payload
    payload["mode"] = result_payload.get("mode")
    payload["media_type"] = result_payload.get("media_type")
    payload["digital_signature"] = result_payload.get("digital_signature")
    output_files = result_payload.get("output_files")
    if isinstance(output_files, list):
        payload["output_files"] = output_files[:8]
    q = result_payload.get("quality_report")
    if isinstance(q, dict):
        payload["quality_report"] = q
    return payload


async def _smart_fetch_run_status(run_id: str) -> Optional[Dict[str, Any]]:
    _smart_load_runs_from_disk()
    run = _aurora_smart_runs.get(run_id)
    if not isinstance(run, dict):
        return None
    return run


async def _smart_monitor_run(run_id: str) -> None:
    run = await _smart_fetch_run_status(run_id)
    if not run:
        return

    local = run.get("local") if isinstance(run.get("local"), dict) else {}
    local_job_id = str(local.get("job_id") or "")
    if not local_job_id:
        _smart_append_audit(run, "monitor.error", {"reason": "missing local job id"})
        run["status"] = "failed"
        run["phase"] = "failed"
        _smart_persist_runs()
        return

    start = time.monotonic()
    while time.monotonic() - start <= _AURORA_SMART_LOCAL_MAX_SEC:
        try:
            st = await _aurora_request_json(
                "GET",
                f"/api/aurora/status/{quote(local_job_id, safe='')}",
                timeout=20.0,
                retries=2,
                retry_backoff_sec=0.25,
            )
        except Exception as exc:
            _smart_append_audit(run, "local.status.error", {"error": str(exc)[:220]})
            await asyncio.sleep(_AURORA_SMART_LOCAL_POLL_SEC)
            continue

        status = str(st.get("status") or "").lower()
        if status in {"queued", "processing"}:
            run["phase"] = "local_processing"
            run["status"] = "processing"
        elif status == "completed":
            run["phase"] = "local_completed"
            run["status"] = "processing"
        else:
            run["phase"] = f"local_{status or 'unknown'}"
            run["status"] = status
        run["local"] = {
            **local,
            "job_id": local_job_id,
            "status": status,
            "progress": st.get("progress"),
            "current_stage": st.get("current_stage"),
            "eta_seconds": st.get("eta_seconds"),
            "live_fps": st.get("live_fps"),
            "error_message": st.get("error_message"),
            "updated_at": _smart_now_iso(),
        }
        _smart_persist_runs()

        if status in {"queued", "processing"}:
            await asyncio.sleep(_AURORA_SMART_LOCAL_POLL_SEC)
            continue

        if status != "completed":
            run["status"] = "failed"
            run["phase"] = "local_failed"
            _smart_append_audit(
                run,
                "local.failed",
                {"status": status, "error": str(st.get("error_message") or "")[:220]},
            )
            _smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
            _smart_persist_runs()
            return

        _smart_append_audit(run, "local.completed", {"job_id": local_job_id})
        break
    else:
        run["status"] = "failed"
        run["phase"] = "local_timeout"
        _smart_append_audit(run, "local.timeout", {"max_sec": _AURORA_SMART_LOCAL_MAX_SEC})
        _smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
        _smart_persist_runs()
        return

    try:
        local_result = await _aurora_request_json(
            "GET",
            f"/api/aurora/result/{quote(local_job_id, safe='')}",
            timeout=30.0,
            retries=2,
            retry_backoff_sec=0.25,
        )
    except Exception as exc:
        run["status"] = "failed"
        run["phase"] = "local_result_error"
        _smart_append_audit(run, "local.result.error", {"error": str(exc)[:240]})
        _smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
        _smart_persist_runs()
        return

    run.setdefault("local", {})
    if isinstance(run["local"], dict):
        run["local"]["result"] = _smart_compact_result(local_result)
        run["local"]["result_ready"] = True
    run["selected_stack"] = "local"

    policy = run.get("policy") if isinstance(run.get("policy"), dict) else {}
    strategy = str(policy.get("strategy") or "local_only")
    media_type = str(run.get("media_type") or "")
    kling = run.get("kling") if isinstance(run.get("kling"), dict) else {}
    if strategy != "local_then_kling" or media_type != "video":
        run["status"] = "completed"
        run["phase"] = "completed"
        _smart_append_audit(run, "smart.completed", {"selected_stack": "local", "reason": "strategy local_only or non-video"})
        _smart_update_strategy_outcome(strategy, True)
        _smart_persist_runs()
        return

    run["phase"] = "kling_submitting"
    run["status"] = "processing"
    _smart_append_audit(run, "kling.submit.start")
    _smart_persist_runs()

    try:
        submit = await _aurora_request_json(
            "POST",
            "/api/aurora/kling/enhance",
            data={
                "job_id": local_job_id,
                "prompt": str(kling.get("prompt") or "enhance video quality, improve sharpness and clarity"),
                "negative_prompt": str(kling.get("negative_prompt") or "noise, blur, artifacts, distortion"),
                "mode": str(kling.get("mode") or "pro"),
                "duration": str(kling.get("duration") or "5"),
                "cfg_scale": str(kling.get("cfg_scale") if kling.get("cfg_scale") is not None else "0.5"),
            },
            timeout=120.0,
            retries=1,
            retry_backoff_sec=0.25,
        )
    except Exception as exc:
        run["kling"] = {
            **kling,
            "status": "failed",
            "error": str(exc)[:640],
        }
        run["status"] = "completed"
        run["phase"] = "completed_with_kling_failure"
        run["selected_stack"] = "local"
        _smart_append_audit(run, "kling.submit.error", {"error": str(exc)[:220]})
        _smart_update_strategy_outcome(strategy, False)
        _smart_persist_runs()
        return

    task_id = str(submit.get("kling_task_id") or "")
    run["kling"] = {
        **kling,
        "task_id": task_id,
        "status": str(submit.get("status") or "submitted").lower(),
        "endpoint": str(submit.get("kling_endpoint") or "video2video"),
        "submitted_at": _smart_now_iso(),
    }
    _smart_append_audit(run, "kling.submitted", {"task_id": task_id})
    _smart_persist_runs()

    k_start = time.monotonic()
    while time.monotonic() - k_start <= _AURORA_SMART_KLING_MAX_SEC:
        try:
            kst = await _aurora_request_json(
                "GET",
                f"/api/aurora/kling/status/{quote(local_job_id, safe='')}",
                timeout=30.0,
                retries=1,
                retry_backoff_sec=0.2,
            )
        except Exception as exc:
            _smart_append_audit(run, "kling.status.error", {"error": str(exc)[:220]})
            await asyncio.sleep(_AURORA_SMART_KLING_POLL_SEC)
            continue

        k_status = str(kst.get("status") or "").lower()
        k_url = kst.get("kling_result_url")
        run["phase"] = "kling_processing"
        run["kling"] = {
            **(run.get("kling") if isinstance(run.get("kling"), dict) else {}),
            "status": k_status,
            "result_url": k_url,
            "last_polled_at": _smart_now_iso(),
        }
        _smart_persist_runs()

        if k_status in {"submitted", "queued", "running", "processing", "pending"}:
            await asyncio.sleep(_AURORA_SMART_KLING_POLL_SEC)
            continue

        if k_status in {"succeed", "completed", "success"} and k_url:
            run["status"] = "completed"
            run["phase"] = "completed"
            run["selected_stack"] = "kling"
            _smart_append_audit(run, "smart.completed", {"selected_stack": "kling", "task_id": task_id})
            _smart_update_strategy_outcome(strategy, True)
            _smart_persist_runs()
            return

        run["status"] = "completed"
        run["phase"] = "completed_with_kling_failure"
        run["selected_stack"] = "local"
        _smart_append_audit(
            run,
            "kling.terminal.non_success",
            {"status": k_status, "task_id": task_id},
        )
        _smart_update_strategy_outcome(strategy, False)
        _smart_persist_runs()
        return

    run["status"] = "completed"
    run["phase"] = "completed_with_kling_timeout"
    run["selected_stack"] = "local"
    _smart_append_audit(run, "kling.timeout", {"max_sec": _AURORA_SMART_KLING_MAX_SEC})
    _smart_update_strategy_outcome(strategy, False)
    _smart_persist_runs()


def _smart_resume_active_monitors() -> None:
    _smart_load_runs_from_disk()
    for run_id, run in list(_aurora_smart_runs.items()):
        if not isinstance(run, dict):
            continue
        if _smart_is_terminal(run.get("status")):
            continue
        try:
            asyncio.create_task(_smart_monitor_run(run_id))
        except Exception:
            continue


@app.get("/api/aurora/health")
async def api_aurora_health() -> Dict[str, Any]:
    return await _aurora_request_json("GET", "/health", timeout=10.0)


@app.post("/api/aurora/upload")
async def api_aurora_upload(
    file: UploadFile = File(...),
    mode: str = Form("tactical"),
    priority: str = Form("balanced"),
    export_options: str = Form(""),
) -> Dict[str, Any]:
    # Stream file to Aurora without buffering entire content in RAM
    file_obj = file.file  # SpooledTemporaryFile — already handles large files
    files = {
        "file": (
            file.filename or "upload.bin",
            file_obj,
            file.content_type or "application/octet-stream",
        )
    }
    payload = await _aurora_request_json(
        "POST",
        "/api/aurora/upload",
        files=files,
        data={
            "mode": mode,
            "priority": priority,
            "export_options": export_options,
        },
        timeout=120.0,
    )
    job_id = str(payload.get("job_id") or "")
    if job_id:
        payload["status_url"] = f"/api/aurora/status/{job_id}"
        payload["result_url"] = f"/api/aurora/result/{job_id}"
        payload["cancel_url"] = f"/api/aurora/cancel/{job_id}"
    return payload


@app.post("/api/aurora/process-smart")
async def api_aurora_process_smart(
    file: UploadFile = File(...),
    mode: str = Form("tactical"),
    priority: str = Form("balanced"),
    export_options: str = Form(""),
    strategy: str = Form("auto"),
    prefer_quality: bool = Form(True),
    budget_tier: str = Form("normal"),
    learning_enabled: bool = Form(True),
    kling_prompt: str = Form("enhance video quality, improve sharpness and clarity"),
    kling_negative_prompt: str = Form("noise, blur, artifacts, distortion"),
    kling_mode: str = Form("pro"),
    kling_duration: str = Form("5"),
    kling_cfg_scale: float = Form(0.5),
) -> Dict[str, Any]:
    _smart_load_runs_from_disk()
    _smart_load_policy_from_disk()

    file_name = file.filename or "upload.bin"
    content_type = file.content_type or "application/octet-stream"
    media_type = _smart_media_type(file_name, content_type)

    analysis: Optional[Dict[str, Any]] = None
    if media_type in {"video", "photo"}:
        try:
            await file.seek(0)
            files = {"file": (file_name, file.file, content_type)}
            analysis = await _aurora_request_json(
                "POST",
                "/api/aurora/analyze",
                files=files,
                timeout=120.0,
                retries=1,
                retry_backoff_sec=0.25,
            )
        except Exception as exc:
            analysis = None
            logger.warning("smart-process analyze skipped: %s", str(exc)[:220])

    policy = _smart_decide_strategy(
        media_type=media_type,
        mode=mode,
        requested_strategy=strategy,
        prefer_quality=bool(prefer_quality),
        budget_tier=budget_tier,
        analysis=analysis,
        learning_enabled=bool(learning_enabled),
    )
    chosen_strategy = str(policy.get("strategy") or "local_only")
    policy.setdefault("requested_strategy", str(strategy or "auto"))
    policy["learning_enabled"] = bool(learning_enabled)
    policy["budget_tier"] = str(budget_tier or "normal")

    await file.seek(0)
    files = {"file": (file_name, file.file, content_type)}
    local_payload = await _aurora_request_json(
        "POST",
        "/api/aurora/upload",
        files=files,
        data={
            "mode": mode,
            "priority": priority,
            "export_options": export_options,
        },
        timeout=120.0,
    )
    local_job_id = str(local_payload.get("job_id") or "")
    if not local_job_id:
        raise HTTPException(status_code=502, detail="Smart process failed: local job_id missing")

    run_id = _smart_new_run_id()
    now = _smart_now_iso()
    run: Dict[str, Any] = {
        "run_id": run_id,
        "created_at": now,
        "updated_at": now,
        "status": "processing",
        "phase": "local_processing",
        "media_type": media_type,
        "selected_stack": None,
        "requested": {
            "mode": mode,
            "priority": priority,
            "export_options": export_options,
            "strategy": strategy,
            "prefer_quality": bool(prefer_quality),
            "budget_tier": budget_tier,
            "learning_enabled": bool(learning_enabled),
        },
        "policy": policy,
        "analysis_summary": _smart_analysis_features(analysis),
        "analysis": analysis if isinstance(analysis, dict) else None,
        "local": {
            "job_id": local_job_id,
            "status": "queued",
            "submit_payload": {
                "status_url": f"/api/aurora/status/{quote(local_job_id, safe='')}",
                "result_url": f"/api/aurora/result/{quote(local_job_id, safe='')}",
            },
        },
        "kling": {
            "enabled": chosen_strategy == "local_then_kling" and media_type == "video",
            "status": "pending",
            "prompt": kling_prompt,
            "negative_prompt": kling_negative_prompt,
            "mode": kling_mode,
            "duration": kling_duration,
            "cfg_scale": kling_cfg_scale,
        },
        "audit": [],
    }
    _smart_append_audit(
        run,
        "smart.submitted",
        {
            "local_job_id": local_job_id,
            "media_type": media_type,
            "strategy": chosen_strategy,
            "score": policy.get("score"),
        },
    )
    _aurora_smart_runs[run_id] = run
    _smart_persist_runs()

    try:
        asyncio.create_task(_smart_monitor_run(run_id))
    except Exception as exc:
        _smart_append_audit(run, "monitor.spawn.error", {"error": str(exc)[:220]})
        _smart_persist_runs()

    return {
        "smart_run_id": run_id,
        "status": run.get("status"),
        "phase": run.get("phase"),
        "media_type": media_type,
        "local_job_id": local_job_id,
        "policy": policy,
        "smart_status_url": f"/api/aurora/process-smart/{quote(run_id, safe='')}",
        "local_status_url": f"/api/aurora/status/{quote(local_job_id, safe='')}",
        "local_result_url": f"/api/aurora/result/{quote(local_job_id, safe='')}",
    }


@app.get("/api/aurora/process-smart")
async def api_aurora_process_smart_list(
    limit: int = Query(default=20, ge=1, le=200),
    status: Optional[str] = Query(default=None),
) -> Dict[str, Any]:
    _smart_load_runs_from_disk()
    requested = str(status or "").strip().lower()
    rows = []
    for run in _aurora_smart_runs.values():
        if not isinstance(run, dict):
            continue
        run_status = str(run.get("status") or "")
        if requested and run_status.lower() != requested:
            continue
        local = run.get("local") if isinstance(run.get("local"), dict) else {}
        kling = run.get("kling") if isinstance(run.get("kling"), dict) else {}
        rows.append(
            {
                "run_id": run.get("run_id"),
                "status": run_status,
                "phase": run.get("phase"),
                "media_type": run.get("media_type"),
                "strategy": (run.get("policy") or {}).get("strategy") if isinstance(run.get("policy"), dict) else None,
                "selected_stack": run.get("selected_stack"),
                "created_at": run.get("created_at"),
                "updated_at": run.get("updated_at"),
                "local_job_id": local.get("job_id"),
                "local_status": local.get("status"),
                "kling_status": kling.get("status"),
            }
        )
    rows.sort(key=lambda x: str(x.get("created_at") or ""), reverse=True)
    return {"runs": rows[:limit], "count": min(limit, len(rows)), "total": len(rows)}


@app.get("/api/aurora/process-smart/{run_id}")
async def api_aurora_process_smart_status(run_id: str) -> Dict[str, Any]:
    run = await _smart_fetch_run_status(run_id)
    if not run:
        raise HTTPException(status_code=404, detail="smart run not found")
    return run


@app.post("/api/aurora/process-smart/{run_id}/feedback")
async def api_aurora_process_smart_feedback(
    run_id: str,
    payload: Optional[Dict[str, Any]] = Body(default=None),
) -> Dict[str, Any]:
    run = await _smart_fetch_run_status(run_id)
    if not run:
        raise HTTPException(status_code=404, detail="smart run not found")
    body = payload if isinstance(payload, dict) else {}
    score_raw = body.get("score")
    score: Optional[float] = None
    try:
        if score_raw is not None:
            score = float(score_raw)
    except Exception:
        score = None
    selected_stack = str(body.get("selected_stack") or "").strip().lower() or None
    notes = str(body.get("notes") or "").strip()

    feedback = {
        "ts": _smart_now_iso(),
        "score": score,
        "selected_stack": selected_stack,
        "notes": notes[:1000] if notes else None,
    }
    run["feedback"] = feedback
    strategy = str((run.get("policy") or {}).get("strategy") or "local_only")
    if score is not None:
        score = max(1.0, min(5.0, score))
        _smart_update_strategy_score(strategy, score)
    if selected_stack in {"local", "kling"}:
        run["selected_stack"] = selected_stack
    _smart_append_audit(run, "feedback.received", {"score": score, "selected_stack": selected_stack})
    _smart_persist_runs()
    return {
        "ok": True,
        "run_id": run_id,
        "feedback": feedback,
        "policy": _aurora_smart_policy,
    }


@app.get("/api/aurora/process-smart/policy/stats")
async def api_aurora_process_smart_policy_stats() -> Dict[str, Any]:
    _smart_load_policy_from_disk()
    return _aurora_smart_policy


@app.post("/api/aurora/analyze")
async def api_aurora_analyze(file: UploadFile = File(...)) -> Dict[str, Any]:
    await file.seek(0)
    files = {
        "file": (
            file.filename or "upload.bin",
            file.file,
            file.content_type or "application/octet-stream",
        )
    }
    return await _aurora_request_json(
        "POST",
        "/api/aurora/analyze",
        files=files,
        timeout=120.0,
        retries=2,
        retry_backoff_sec=0.35,
    )


@app.post("/api/aurora/audio/analyze")
async def api_aurora_audio_analyze(file: UploadFile = File(...)) -> Dict[str, Any]:
    await file.seek(0)
    files = {
        "file": (
            file.filename or "upload_audio.bin",
            file.file,
            file.content_type or "application/octet-stream",
        )
    }
    return await _aurora_request_json(
        "POST",
        "/api/aurora/audio/analyze",
        files=files,
        timeout=120.0,
        retries=2,
        retry_backoff_sec=0.35,
    )


@app.post("/api/aurora/audio/process")
async def api_aurora_audio_process(
    file: UploadFile = File(...),
    mode: str = Form("tactical"),
    priority: str = Form("speech"),
    export_options: str = Form(""),
) -> Dict[str, Any]:
    await file.seek(0)
    files = {
        "file": (
            file.filename or "upload_audio.bin",
            file.file,
            file.content_type or "application/octet-stream",
        )
    }
    payload = await _aurora_request_json(
        "POST",
        "/api/aurora/audio/process",
        files=files,
        data={
            "mode": mode,
            "priority": priority,
            "export_options": export_options,
        },
        timeout=120.0,
        retries=2,
        retry_backoff_sec=0.35,
    )
    job_id = str(payload.get("job_id") or "")
    if job_id:
        payload["status_url"] = f"/api/aurora/status/{job_id}"
        payload["result_url"] = f"/api/aurora/result/{job_id}"
        payload["cancel_url"] = f"/api/aurora/cancel/{job_id}"
    return payload


@app.post("/api/aurora/reprocess/{job_id}")
async def api_aurora_reprocess(
    job_id: str,
    payload: Optional[Dict[str, Any]] = Body(default=None),
) -> Dict[str, Any]:
    body = payload if isinstance(payload, dict) else {}
    return await _aurora_request_json(
        "POST",
        f"/api/aurora/reprocess/{quote(job_id, safe='')}",
        json_body=body,
        timeout=120.0,
        retries=2,
        retry_backoff_sec=0.35,
    )


@app.post("/api/aurora/chat")
async def api_aurora_chat(payload: Optional[Dict[str, Any]] = Body(default=None)) -> Dict[str, Any]:
    body = payload if isinstance(payload, dict) else {}
    return await _aurora_request_json(
        "POST",
        "/api/aurora/chat",
        json_body=body,
        timeout=30.0,
        retries=1,
        retry_backoff_sec=0.2,
    )


@app.get("/api/aurora/status/{job_id}")
async def api_aurora_status(job_id: str) -> Dict[str, Any]:
    _aurora_load_live_last_from_disk()
    payload = await _aurora_request_json(
        "GET",
        f"/api/aurora/status/{quote(job_id, safe='')}",
        timeout=20.0,
        retries=8,
        retry_backoff_sec=0.35,
    )
    if not isinstance(payload, dict):
        return payload
    if str(payload.get("status", "")).lower() != "processing":
        return payload

    live = _aurora_live_fs_frame(job_id)
    if not live:
        return payload
    parsed = _parse_stage_frame(str(payload.get("current_stage", "")))
    live_frame = int(live.get("frame", -1))
    if live_frame < 0:
        return payload
    total = int(parsed.get("total", -1))
    if total <= 0:
        total = int(live.get("total", -1))
    if total > 0:
        live_progress = int(max(1, min(99, round((live_frame / max(1, total)) * 100))))
        payload["progress"] = max(int(payload.get("progress") or 0), live_progress)

    live_stats = _aurora_record_sample(job_id, live_frame, total)
    if live_stats:
        fps = float(live_stats["fps"])
        payload["live_fps"] = round(fps, 3)
        payload["eta_confidence"] = live_stats["confidence"]
        if total > 0 and live_frame < total:
            eta_calc = int(max(0, round((total - live_frame) / max(0.01, fps))))
            payload["eta_seconds"] = eta_calc
            elapsed = payload.get("elapsed_seconds")
            if isinstance(elapsed, (int, float)):
                payload["estimated_total_seconds"] = int(max(0, round(float(elapsed) + eta_calc)))
        _aurora_live_last[job_id] = {
            "live_fps": payload.get("live_fps"),
            "eta_seconds": payload.get("eta_seconds"),
            "estimated_total_seconds": payload.get("estimated_total_seconds"),
            "eta_confidence": payload.get("eta_confidence"),
        }
        _aurora_persist_live_last_to_disk()
    else:
        prev = _aurora_live_last.get(job_id)
        if prev:
            payload["live_fps"] = prev.get("live_fps")
            payload["eta_seconds"] = prev.get("eta_seconds", payload.get("eta_seconds"))
            payload["estimated_total_seconds"] = prev.get("estimated_total_seconds", payload.get("estimated_total_seconds"))
            payload["eta_confidence"] = prev.get("eta_confidence")

    # If upstream stage/progress is stale, patch with live filesystem progress.
    if live_frame > int(parsed.get("current", -1)):
        if total > 0:
            if live_stats:
                payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live {payload['live_fps']} fps)"
            else:
                payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live fs)"
        else:
            payload["current_stage"] = f"enhancing frame {live_frame} (live fs)"
        payload["live_frame"] = live_frame
        payload["live_total_frames"] = total if total > 0 else None
    else:
        # Even when upstream stage text already moved, expose live counters for UI.
        payload["live_frame"] = live_frame
        payload["live_total_frames"] = total if total > 0 else None

    # Persist last known timing even if fps was not recalculated this poll.
    snapshot = _aurora_live_last.get(job_id, {})
    changed = False
    for key in ("live_fps", "eta_seconds", "estimated_total_seconds", "eta_confidence"):
        val = payload.get(key)
        if val is not None and snapshot.get(key) != val:
            snapshot[key] = val
            changed = True
    if changed:
        _aurora_live_last[job_id] = snapshot
        _aurora_persist_live_last_to_disk()
    return payload


def _aurora_coerce_dir(path_value: Any) -> Optional[Path]:
    if path_value is None:
        return None
    raw = str(path_value).strip()
    if not raw:
        return None
    try:
        p = Path(raw).expanduser().resolve()
    except Exception:
        return None
    if p.exists() and p.is_file():
        p = p.parent
    if not p.exists() or not p.is_dir():
        return None
    return p


async def _aurora_resolve_job_folder(job_id: str) -> Optional[Path]:
    candidates: List[Any] = []
    try:
        st = await _aurora_request_json("GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=12.0)
        storage = st.get("storage") if isinstance(st, dict) else None
        if isinstance(storage, dict):
            candidates.extend(
                [
                    storage.get("output_dir"),
                    storage.get("upload_dir"),
                    storage.get("input_path"),
                ]
            )
    except Exception:
        pass

    try:
        res = await _aurora_request_json("GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=12.0)
        storage = res.get("storage") if isinstance(res, dict) else None
        if isinstance(storage, dict):
            candidates.extend(
                [
                    storage.get("output_dir"),
                    storage.get("upload_dir"),
                    storage.get("input_path"),
                ]
            )
    except Exception:
        pass

    candidates.append(AURORA_DATA_DIR / "outputs" / job_id)
    for c in candidates:
        p = _aurora_coerce_dir(c)
        if p:
            return p
    return None


@app.get("/api/aurora/folder/{job_id}")
async def api_aurora_folder(job_id: str) -> Dict[str, Any]:
    folder = await _aurora_resolve_job_folder(job_id)
    if not folder:
        raise HTTPException(status_code=404, detail="Aurora output folder not found")
    return {
        "ok": True,
        "job_id": job_id,
        "folder_path": str(folder),
        "folder_url": f"file://{folder}",
    }


@app.post("/api/aurora/folder/{job_id}/open")
async def api_aurora_folder_open(job_id: str) -> Dict[str, Any]:
    folder = await _aurora_resolve_job_folder(job_id)
    if not folder:
        raise HTTPException(status_code=404, detail="Aurora output folder not found")
    cmd: Optional[List[str]] = None
    if sys.platform == "darwin":
        cmd = ["open", str(folder)]
    elif os.name == "nt":
        try:
            os.startfile(str(folder))  # type: ignore[attr-defined]
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}")
    else:
        cmd = ["xdg-open", str(folder)]
    if cmd is not None:
        try:
            subprocess.run(cmd, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}")
    return {"ok": True, "job_id": job_id, "folder_path": str(folder)}


@app.get("/api/aurora/jobs")
async def api_aurora_jobs(
    limit: int = Query(default=30, ge=1, le=200),
    status: Optional[str] = Query(default=None),
) -> Dict[str, Any]:
    query = f"/api/aurora/jobs?limit={limit}"
    if status and status.strip():
        query += f"&status={quote(status.strip(), safe=',')}"
    return await _aurora_request_json(
        "GET",
        query,
        timeout=20.0,
        retries=3,
        retry_backoff_sec=0.25,
    )


@app.get("/api/aurora/result/{job_id}")
async def api_aurora_result(job_id: str) -> Dict[str, Any]:
    return await _aurora_request_json(
        "GET",
        f"/api/aurora/result/{quote(job_id, safe='')}",
        timeout=20.0,
        retries=4,
        retry_backoff_sec=0.35,
    )


@app.get("/api/aurora/quality/{job_id}")
async def api_aurora_quality(
    job_id: str,
    refresh: bool = Query(default=False),
) -> Dict[str, Any]:
    path = f"/api/aurora/quality/{quote(job_id, safe='')}?refresh={'true' if refresh else 'false'}"
    return await _aurora_request_json(
        "GET",
        path,
        timeout=20.0,
        retries=4,
        retry_backoff_sec=0.35,
    )


@app.get("/api/aurora/compare/{job_id}")
async def api_aurora_compare(job_id: str) -> Dict[str, Any]:
    """Before/after comparison with full metadata for a completed job."""
    status = await _aurora_request_json(
        "GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=15.0, retries=3
    )
    result = {}
    try:
        result = await _aurora_request_json(
            "GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=15.0, retries=2
        )
    except Exception:
        pass

    meta = status.get("metadata") or {}
    vid = meta.get("video") or {}
    storage = status.get("storage") or {}
    output_files = result.get("output_files") or status.get("output_files") or []
    proc_log = result.get("processing_log") or []

    input_path = storage.get("input_path", "")
    output_dir = storage.get("output_dir", "")

    before: Dict[str, Any] = {
        "file_name": status.get("file_name") or (input_path.rsplit("/", 1)[-1] if input_path else "—"),
        "resolution": f"{vid.get('width', '?')}x{vid.get('height', '?')}" if vid.get("width") else "—",
        "width": vid.get("width"),
        "height": vid.get("height"),
        "duration_s": vid.get("duration_seconds"),
        "fps": vid.get("fps"),
        "frame_count": vid.get("frame_count"),
        "codec": "—",
        "file_size_mb": None,
    }

    if input_path:
        inp = Path(input_path)
        if inp.exists():
            before["file_size_mb"] = round(inp.stat().st_size / (1024 * 1024), 2)
        _probe = _ffprobe_quick(inp) if inp.exists() else {}
        if _probe:
            before["resolution"] = _probe.get("resolution", before["resolution"])
            before["width"] = _probe.get("width", before["width"])
            before["height"] = _probe.get("height", before["height"])
            before["duration_s"] = _probe.get("duration_s", before["duration_s"])
            before["fps"] = _probe.get("fps", before["fps"])
            before["frame_count"] = _probe.get("frame_count", before["frame_count"])
            before["codec"] = _probe.get("codec", "—")

    result_file = None
    for f in output_files:
        if (f.get("type") == "video" or f.get("type") == "photo") and f.get("name"):
            result_file = f
            break

    after: Dict[str, Any] = {
        "file_name": result_file["name"] if result_file else "—",
        "resolution": "—",
        "width": None,
        "height": None,
        "duration_s": None,
        "fps": None,
        "frame_count": None,
        "codec": "—",
        "file_size_mb": None,
        "download_url": (result_file or {}).get("url"),
    }

    output_media_path: Optional[Path] = None
    if result_file and output_dir:
        out_path = Path(output_dir) / result_file["name"]
        if out_path.exists():
            output_media_path = out_path
            after["file_size_mb"] = round(out_path.stat().st_size / (1024 * 1024), 2)
            _probe = _ffprobe_quick(out_path)
            if _probe:
                after["resolution"] = _probe.get("resolution", "—")
                after["width"] = _probe.get("width")
                after["height"] = _probe.get("height")
                after["duration_s"] = _probe.get("duration_s")
                after["fps"] = _probe.get("fps")
                after["frame_count"] = _probe.get("frame_count")
                after["codec"] = _probe.get("codec", "—")

    faces_total = 0
    enhance_steps = []
    for step in proc_log:
        det = step.get("details") or {}
        if det.get("faces_detected_total") is not None:
            faces_total += det["faces_detected_total"]
        enhance_steps.append({
            "step": step.get("step", "?"),
            "agent": step.get("agent", "?"),
            "model": step.get("model", "?"),
            "time_ms": step.get("time_ms"),
        })

    frame_preview = _aurora_ensure_compare_frame_preview(
        job_id=job_id,
        media_type=str(status.get("media_type") or ""),
        input_path=Path(input_path) if input_path else None,
        output_path=output_media_path,
        output_dir=Path(output_dir) if output_dir else None,
    )
    detections = await _aurora_build_compare_detections(
        media_type=str(status.get("media_type") or ""),
        output_dir=Path(output_dir) if output_dir else None,
        frame_preview=frame_preview,
        fps=before.get("fps") or after.get("fps"),
    )

    return {
        "job_id": job_id,
        "status": status.get("status"),
        "mode": status.get("mode"),
        "media_type": status.get("media_type"),
        "elapsed_seconds": status.get("elapsed_seconds"),
        "before": before,
        "after": after,
        "faces_detected": faces_total,
        "enhance_steps": enhance_steps,
        "frame_preview": frame_preview,
        "detections": detections,
        "folder_path": output_dir,
        "input_path": input_path,
    }


def _aurora_extract_frame_preview(source: Path, target: Path, *, second: float = 1.0) -> bool:
    """Write a JPEG preview frame for image/video sources."""
    if not source.exists():
        return False
    target.parent.mkdir(parents=True, exist_ok=True)
    ext = source.suffix.lower()
    if ext in {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tif", ".tiff"}:
        try:
            target.write_bytes(source.read_bytes())
            return True
        except Exception:
            return False

    ffmpeg = [
        "ffmpeg",
        "-hide_banner",
        "-loglevel",
        "error",
        "-y",
        "-ss",
        f"{max(0.0, float(second)):.3f}",
        "-i",
        str(source),
        "-frames:v",
        "1",
        "-q:v",
        "2",
        str(target),
    ]
    try:
        run = subprocess.run(ffmpeg, capture_output=True, text=True, timeout=20)
        if run.returncode == 0 and target.exists() and target.stat().st_size > 0:
            return True
    except Exception:
        pass

    # Fallback for short videos / odd timestamps.
    ffmpeg_fallback = ffmpeg[:]
    ffmpeg_fallback[6] = "0.0"
    try:
        run = subprocess.run(ffmpeg_fallback, capture_output=True, text=True, timeout=20)
        return run.returncode == 0 and target.exists() and target.stat().st_size > 0
    except Exception:
        return False


def _aurora_ensure_compare_frame_preview(
    *,
    job_id: str,
    media_type: str,
    input_path: Optional[Path],
    output_path: Optional[Path],
    output_dir: Optional[Path],
) -> Optional[Dict[str, Any]]:
    if not output_dir or not output_dir.exists():
        return None
    if not input_path or not input_path.exists():
        return None
    if not output_path or not output_path.exists():
        return None

    before_name = "_compare_before.jpg"
    after_name = "_compare_after.jpg"
    before_path = output_dir / before_name
    after_path = output_dir / after_name
    ts = 1.0 if media_type == "video" else 0.0

    if not before_path.exists() or before_path.stat().st_size == 0:
        _aurora_extract_frame_preview(input_path, before_path, second=ts)
    if not after_path.exists() or after_path.stat().st_size == 0:
        _aurora_extract_frame_preview(output_path, after_path, second=ts)

    if not before_path.exists() or not after_path.exists():
        return None
    if before_path.stat().st_size <= 0 or after_path.stat().st_size <= 0:
        return None

    quoted_job = quote(job_id, safe="")
    return {
        "timestamp_sec": ts,
        "before_url": f"/api/aurora/files/{quoted_job}/{quote(before_name, safe='')}",
        "after_url": f"/api/aurora/files/{quoted_job}/{quote(after_name, safe='')}",
    }


def _aurora_bbox_xyxy(raw_bbox: Any) -> Optional[List[int]]:
    if not isinstance(raw_bbox, (list, tuple)) or len(raw_bbox) < 4:
        return None
    try:
        x1 = int(float(raw_bbox[0]))
        y1 = int(float(raw_bbox[1]))
        x2 = int(float(raw_bbox[2]))
        y2 = int(float(raw_bbox[3]))
    except Exception:
        return None
    if x2 < x1:
        x1, x2 = x2, x1
    if y2 < y1:
        y1, y2 = y2, y1
    if x2 <= x1 or y2 <= y1:
        return None
    return [x1, y1, x2, y2]


def _aurora_image_dims(path: Path) -> Optional[Dict[str, int]]:
    if cv2 is None or not path.exists():
        return None
    try:
        img = cv2.imread(str(path), cv2.IMREAD_COLOR)
        if img is None:
            return None
        h, w = img.shape[:2]
        if w <= 0 or h <= 0:
            return None
        return {"width": int(w), "height": int(h)}
    except Exception:
        return None


def _aurora_detect_faces_from_preview(path: Path) -> List[Dict[str, Any]]:
    if cv2 is None or not path.exists():
        return []
    try:
        frame = cv2.imread(str(path), cv2.IMREAD_COLOR)
        if frame is None:
            return []
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        cascade_path = Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml"
        cascade = cv2.CascadeClassifier(str(cascade_path))
        if cascade.empty():
            return []
        faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(20, 20))
        out: List[Dict[str, Any]] = []
        for (x, y, w, h) in faces[:40]:
            roi = gray[y : y + h, x : x + w]
            lap = float(cv2.Laplacian(roi, cv2.CV_64F).var()) if roi.size > 0 else 0.0
            conf = max(0.5, min(0.99, 0.55 + (lap / 400.0)))
            out.append(
                {
                    "bbox": [int(x), int(y), int(x + w), int(y + h)],
                    "confidence": round(conf, 3),
                }
            )
        return out
    except Exception:
        return []


async def _aurora_detect_faces_via_service(path: Path) -> List[Dict[str, Any]]:
    if not path.exists():
        return []
    mime = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
    timeout = httpx.Timeout(20.0, connect=6.0)
    try:
        async with httpx.AsyncClient(timeout=timeout) as client:
            with path.open("rb") as fh:
                files = {"file": (path.name, fh, mime)}
                resp = await client.post(f"{AURORA_SERVICE_URL}/api/aurora/analyze", files=files)
        if resp.status_code >= 400:
            return []
        payload = resp.json() if resp.content else {}
    except Exception:
        return []

    faces_raw = payload.get("faces")
    if not isinstance(faces_raw, list):
        return []
    out: List[Dict[str, Any]] = []
    for item in faces_raw[:60]:
        if not isinstance(item, dict):
            continue
        bbox = item.get("bbox")
        if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
            continue
        try:
            x = int(float(bbox[0]))
            y = int(float(bbox[1]))
            w = int(float(bbox[2]))
            h = int(float(bbox[3]))
        except Exception:
            continue
        if w <= 1 or h <= 1:
            continue
        conf: Optional[float]
        try:
            conf = round(float(item.get("confidence")), 3)
        except Exception:
            conf = None
        out.append(
            {
                "bbox": [x, y, x + w, y + h],
                "confidence": conf,
            }
        )
    return out


def _aurora_select_plate_detections(
    output_dir: Path,
    *,
    target_frame: Optional[int],
    max_items: int = 12,
) -> List[Dict[str, Any]]:
    report_path = output_dir / "plate_detections.json"
    if not report_path.exists():
        return []
    try:
        payload = json.loads(report_path.read_text(encoding="utf-8"))
    except Exception:
        return []

    source_items: List[Any]
    detections = payload.get("detections")
    unique = payload.get("unique")
    if isinstance(detections, list) and detections:
        source_items = detections
    elif isinstance(unique, list) and unique:
        source_items = unique
    else:
        return []

    parsed: List[Dict[str, Any]] = []
    for item in source_items:
        if not isinstance(item, dict):
            continue
        bbox = _aurora_bbox_xyxy(item.get("bbox"))
        if not bbox:
            continue
        text_value = str(item.get("text") or "").strip()
        conf_value: Optional[float]
        try:
            conf_value = round(float(item.get("confidence")), 3)
        except Exception:
            conf_value = None
        frame_value: Optional[int]
        try:
            frame_value = int(item.get("frame")) if item.get("frame") is not None else None
        except Exception:
            frame_value = None
        parsed.append(
            {
                "bbox": bbox,
                "text": text_value or None,
                "confidence": conf_value,
                "frame": frame_value,
            }
        )

    if not parsed:
        return []

    with_frame = [x for x in parsed if x.get("frame") is not None]
    if target_frame is not None and with_frame:
        min_distance = min(abs(int(x["frame"]) - int(target_frame)) for x in with_frame)
        keep = max(4, min_distance + 2)
        filtered = [x for x in with_frame if abs(int(x["frame"]) - int(target_frame)) <= keep]
        filtered.sort(key=lambda x: (abs(int(x["frame"]) - int(target_frame)), -(x.get("confidence") or 0.0)))
        return filtered[:max_items]

    parsed.sort(key=lambda x: (-(x.get("confidence") or 0.0), x.get("text") or ""))
    return parsed[:max_items]


async def _aurora_build_compare_detections(
    *,
    media_type: str,
    output_dir: Optional[Path],
    frame_preview: Optional[Dict[str, Any]],
    fps: Any,
) -> Optional[Dict[str, Any]]:
    if not output_dir or not output_dir.exists():
        return None
    if not isinstance(frame_preview, dict):
        return None

    before_path = output_dir / "_compare_before.jpg"
    after_path = output_dir / "_compare_after.jpg"
    before_faces = _aurora_detect_faces_from_preview(before_path)
    after_faces = _aurora_detect_faces_from_preview(after_path)
    if not before_faces and before_path.exists():
        before_faces = await _aurora_detect_faces_via_service(before_path)
    if not after_faces and after_path.exists():
        after_faces = await _aurora_detect_faces_via_service(after_path)
    before_size = _aurora_image_dims(before_path)
    after_size = _aurora_image_dims(after_path)

    target_ts = float(frame_preview.get("timestamp_sec") or 0.0)
    target_frame: Optional[int] = None
    if str(media_type).lower() == "video":
        try:
            fps_val = float(fps)
        except Exception:
            fps_val = 15.0
        if fps_val <= 0:
            fps_val = 15.0
        target_frame = int(round(target_ts * fps_val))

    plate_items = _aurora_select_plate_detections(output_dir, target_frame=target_frame)

    return {
        "target_timestamp_sec": target_ts if str(media_type).lower() == "video" else None,
        "target_frame": target_frame,
        "before": {
            "frame_size": before_size,
            "faces": before_faces,
            "plates": plate_items,
        },
        "after": {
            "frame_size": after_size,
            "faces": after_faces,
            "plates": plate_items,
        },
    }


def _ffprobe_quick(filepath: Path) -> Dict[str, Any]:
    """Quick ffprobe for resolution, codec, duration, fps, frame count."""
    if not filepath.exists():
        return {}
    try:
        import subprocess as _sp
        raw = _sp.run(
            ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(filepath)],
            capture_output=True, text=True, timeout=10
        )
        if raw.returncode != 0:
            return {}
        import json as _json
        data = _json.loads(raw.stdout)
        fmt = data.get("format") or {}
        vs = [s for s in (data.get("streams") or []) if s.get("codec_type") == "video"]
        if not vs:
            return {"duration_s": round(float(fmt.get("duration", 0)), 2)}
        v = vs[0]
        w, h = v.get("width"), v.get("height")
        rfr = v.get("r_frame_rate", "0/1").split("/")
        fps = round(int(rfr[0]) / max(1, int(rfr[1])), 2) if len(rfr) == 2 else None
        return {
            "resolution": f"{w}x{h}" if w and h else "—",
            "width": w, "height": h,
            "codec": v.get("codec_name", "—"),
            "duration_s": round(float(fmt.get("duration", 0)), 2),
            "fps": fps,
            "frame_count": int(v.get("nb_frames", 0)) or None,
        }
    except Exception:
        return {}


@app.post("/api/aurora/cancel/{job_id}")
async def api_aurora_cancel(job_id: str) -> Dict[str, Any]:
    return await _aurora_request_json(
        "POST",
        f"/api/aurora/cancel/{quote(job_id, safe='')}",
        timeout=20.0,
        retries=2,
        retry_backoff_sec=0.2,
    )


@app.post("/api/aurora/delete/{job_id}")
async def api_aurora_delete(
    job_id: str,
    purge_files: bool = Query(default=True),
) -> Dict[str, Any]:
    path = f"/api/aurora/delete/{quote(job_id, safe='')}?purge_files={'true' if purge_files else 'false'}"
    return await _aurora_request_json(
        "POST",
        path,
        timeout=30.0,
        retries=2,
        retry_backoff_sec=0.2,
    )


@app.get("/api/aurora/report/{job_id}.pdf")
async def api_aurora_report_pdf(job_id: str) -> StreamingResponse:
    """Stream PDF report from Aurora service without buffering in RAM."""
    encoded_job = quote(job_id, safe="")
    paths = [AURORA_SERVICE_URL]
    if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths:
        paths.append(AURORA_FALLBACK_URL)
    last_err = ""
    for base in paths:
        url = f"{base}/api/aurora/report/{encoded_job}.pdf"
        try:
            client = httpx.AsyncClient(timeout=120.0)
            r = await client.send(client.build_request("GET", url), stream=True)
            if r.status_code >= 400:
                body = (await r.aread()).decode(errors="replace")[:400]
                await r.aclose()
                await client.aclose()
                raise HTTPException(status_code=r.status_code, detail=body or f"Aurora report error {r.status_code}")
            disposition = r.headers.get("content-disposition", f'inline; filename="{job_id}_forensic_report.pdf"')

            async def _stream():
                try:
                    async for chunk in r.aiter_bytes(chunk_size=65536):
                        yield chunk
                finally:
                    await r.aclose()
                    await client.aclose()

            return StreamingResponse(
                _stream(),
                media_type="application/pdf",
                headers={"Content-Disposition": disposition, "Cache-Control": "no-store"},
            )
        except HTTPException:
            raise
        except Exception as e:
            last_err = str(e)[:200]
            if "nodename nor servname provided" in str(e):
                continue
            raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err}")
    raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err or 'unavailable'}")


@app.get("/api/aurora/files/{job_id}/{file_name:path}")
async def api_aurora_file(job_id: str, file_name: str, request: Request) -> StreamingResponse:
    encoded_job = quote(job_id, safe="")
    encoded_name = quote(file_name, safe="")
    paths = [AURORA_SERVICE_URL]
    if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths:
        paths.append(AURORA_FALLBACK_URL)
    last_err = ""
    for base in paths:
        url = f"{base}/api/aurora/files/{encoded_job}/{encoded_name}"
        client = httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0))
        try:
            upstream_headers: Dict[str, str] = {}
            for name in ("range", "if-range", "if-none-match", "if-modified-since"):
                value = request.headers.get(name)
                if value:
                    upstream_headers[name] = value

            resp = await client.send(client.build_request("GET", url, headers=upstream_headers), stream=True)
            if resp.status_code >= 400:
                body = (await resp.aread()).decode(errors="replace")[:400]
                await resp.aclose()
                await client.aclose()
                if resp.status_code >= 500:
                    last_err = f"Aurora {resp.status_code}: {body}"
                    continue
                raise HTTPException(status_code=resp.status_code, detail=body or f"Aurora file error {resp.status_code}")
            ct = resp.headers.get("content-type", "application/octet-stream")
            passthrough_headers: Dict[str, str] = {}
            for name in (
                "content-disposition",
                "content-length",
                "content-range",
                "accept-ranges",
                "etag",
                "last-modified",
                "cache-control",
            ):
                value = resp.headers.get(name)
                if value:
                    passthrough_headers[name] = value
            if "content-disposition" not in passthrough_headers:
                passthrough_headers["content-disposition"] = f'inline; filename="{Path(file_name).name}"'
            passthrough_headers.setdefault("cache-control", "no-store")

            async def _stream():
                try:
                    async for chunk in resp.aiter_bytes(chunk_size=65536):
                        yield chunk
                finally:
                    await resp.aclose()
                    await client.aclose()

            return StreamingResponse(
                _stream(),
                status_code=resp.status_code,
                media_type=ct,
                headers=passthrough_headers,
            )
        except HTTPException:
            raise
        except Exception as e:
            await client.aclose()
            last_err = str(e)[:200]
            if "nodename nor servname provided" in str(e):
                continue
            raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err}")
    raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err or 'unavailable'}")


class MediaImageGenerateBody(BaseModel):
    prompt: str
    negative_prompt: Optional[str] = None
    width: int = 1024
    height: int = 1024
    steps: int = 28
    guidance_scale: float = 4.0
    timeout_s: int = 300


class MediaVideoGenerateBody(BaseModel):
    prompt: str
    seconds: int = 4
    fps: int = 24
    steps: int = 30
    style: str = "cinematic"
    aspect_ratio: str = "16:9"
    timeout_s: int = 360


class MediaImageModelLoadBody(BaseModel):
    model: str


def _resolve_media_router_url() -> str:
    nodes_cfg = load_nodes_registry()
    discovered = (
        get_router_url("NODA2")
        or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "")
    ).rstrip("/")
    return MEDIA_ROUTER_URL or discovered


def _media_router_candidates() -> List[str]:
    raw = _resolve_media_router_url()
    candidates: List[str] = []
    for u in (raw, MEDIA_ROUTER_FALLBACK_URL):
        if not u:
            continue
        v = u.strip().rstrip("/")
        if v and v not in candidates:
            candidates.append(v)
        if "://router:" in v or "://router/" in v:
            host_fixed = v.replace("://router:", "://127.0.0.1:").replace("://router/", "://127.0.0.1/")
            if host_fixed not in candidates:
                candidates.append(host_fixed)
            for port in ("9102", "8000"):
                local = f"http://127.0.0.1:{port}"
                if local not in candidates:
                    candidates.append(local)
    return candidates


async def _pick_media_router_url() -> str:
    candidates = _media_router_candidates()
    if not candidates:
        return ""
    for u in candidates:
        p = await _probe_http(f"{u}/healthz", timeout=2.5)
        if p.get("reachable"):
            return u
    return candidates[0]


def _media_append_job(kind: str, payload: Dict[str, Any]) -> Dict[str, Any]:
    item = {
        "id": f"media_{kind}_{uuid.uuid4().hex[:10]}",
        "kind": kind,
        "ts": datetime.now(timezone.utc).isoformat(),
        **payload,
    }
    _media_recent_jobs.appendleft(item)
    return item


@app.get("/api/media/health")
async def api_media_health() -> Dict[str, Any]:
    router_url = await _pick_media_router_url()
    probes = await asyncio.gather(
        _probe_http(f"{router_url}/healthz") if router_url else asyncio.sleep(0, result={"reachable": False, "error": "router missing"}),
        _probe_http(f"{MEDIA_COMFY_AGENT_URL}/health"),
        _probe_http(f"{MEDIA_COMFY_UI_URL}/"),
        _probe_http(f"{MEDIA_SWAPPER_URL}/health"),
        _probe_http(f"{MEDIA_IMAGE_GEN_URL}/health"),
    )
    image_models: Dict[str, Any] = {"image_models": []}
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models")
            if r.status_code < 400 and r.content:
                image_models = r.json()
    except Exception:
        image_models = {"image_models": []}
    return {
        "ok": True,
        "router_url": router_url,
        "services": {
            "router": probes[0],
            "comfy_agent": probes[1],
            "comfy_ui": probes[2],
            "swapper": probes[3],
            "image_gen": probes[4],
        },
        "image_models": image_models.get("image_models", []),
        "active_image_model": image_models.get("active_image_model"),
        "fallback_order": ["comfy", "swapper", "image-gen-service"],
    }


@app.get("/api/media/models/image")
async def api_media_image_models() -> Dict[str, Any]:
    try:
        async with httpx.AsyncClient(timeout=15.0) as client:
            r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models")
        if r.status_code >= 400:
            raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper error")
        data = r.json() if r.content else {}
        return {
            "ok": True,
            "image_models": data.get("image_models", []),
            "active_image_model": data.get("active_image_model"),
            "device": data.get("device"),
        }
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=502, detail=f"Image models unavailable: {str(e)[:200]}")


@app.post("/api/media/models/image/load")
async def api_media_image_model_load(body: MediaImageModelLoadBody) -> Dict[str, Any]:
    model = body.model.strip()
    if not model:
        raise HTTPException(status_code=400, detail="model is required")
    try:
        async with httpx.AsyncClient(timeout=300.0) as client:
            r = await client.post(f"{MEDIA_SWAPPER_URL}/image/models/{quote(model, safe='')}/load")
        if r.status_code >= 400:
            raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper load error")
        return {"ok": True, "result": r.json() if r.content else {"status": "ok"}}
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=502, detail=f"Image model load failed: {str(e)[:200]}")


@app.post("/api/media/generate/image")
async def api_media_generate_image(body: MediaImageGenerateBody) -> Dict[str, Any]:
    prompt = body.prompt.strip()
    if not prompt:
        raise HTTPException(status_code=400, detail="prompt is required")
    router_url = await _pick_media_router_url()
    if not router_url:
        raise HTTPException(status_code=503, detail="Router URL not configured")

    params = {
        "prompt": prompt,
        "negative_prompt": body.negative_prompt or "",
        "width": max(256, min(2048, int(body.width))),
        "height": max(256, min(2048, int(body.height))),
        "steps": max(1, min(120, int(body.steps))),
        "guidance_scale": max(0.0, min(20.0, float(body.guidance_scale))),
        "timeout_s": max(30, min(900, int(body.timeout_s))),
    }
    started = time.monotonic()
    response = await execute_tool(
        router_url,
        tool="image_generate",
        action="generate",
        params=params,
        agent_id="sofiia",
        timeout=float(params["timeout_s"] + 30),
        api_key=ROUTER_API_KEY,
    )
    ok = response.get("status") == "ok"
    result_data = response.get("data") or {}
    result_item = _media_append_job(
        "image",
        {
            "status": "ok" if ok else "failed",
            "provider": "router:image_generate",
            "prompt": prompt[:180],
            "duration_ms": int((time.monotonic() - started) * 1000),
            "result": result_data.get("result"),
            "has_image_base64": bool(result_data.get("image_base64")),
            "error": (response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error"),
        },
    )
    if not ok:
        raise HTTPException(status_code=502, detail=f"Image generate failed: {result_item.get('error') or 'tool failed'}")
    return {"ok": True, "job": result_item, "tool_response": response}


@app.post("/api/media/generate/video")
async def api_media_generate_video(body: MediaVideoGenerateBody) -> Dict[str, Any]:
    prompt = body.prompt.strip()
    if not prompt:
        raise HTTPException(status_code=400, detail="prompt is required")
    router_url = await _pick_media_router_url()
    if not router_url:
        raise HTTPException(status_code=503, detail="Router URL not configured")

    params = {
        "prompt": prompt,
        "seconds": max(1, min(8, int(body.seconds))),
        "fps": max(8, min(60, int(body.fps))),
        "steps": max(1, min(120, int(body.steps))),
        "timeout_s": max(60, min(1200, int(body.timeout_s))),
    }
    started = time.monotonic()
    response = await execute_tool(
        router_url,
        tool="comfy_generate_video",
        action="generate",
        params=params,
        agent_id="sofiia",
        timeout=float(params["timeout_s"] + 30),
        api_key=ROUTER_API_KEY,
    )
    ok = response.get("status") == "ok"
    provider = "router:comfy_generate_video"
    fallback_payload: Dict[str, Any] = {}
    if not ok:
        try:
            async with httpx.AsyncClient(timeout=120.0) as client:
                r = await client.post(
                    f"{MEDIA_SWAPPER_URL}/video/generate",
                    json={
                        "prompt": prompt,
                        "duration": params["seconds"],
                        "style": body.style,
                        "aspect_ratio": body.aspect_ratio,
                    },
                )
            if r.status_code < 400:
                fallback_payload = r.json() if r.content else {}
                ok = True
                provider = "swapper:video/generate"
        except Exception as e:
            fallback_payload = {"error": str(e)[:200]}

    result_item = _media_append_job(
        "video",
        {
            "status": "ok" if ok else "failed",
            "provider": provider,
            "prompt": prompt[:180],
            "duration_ms": int((time.monotonic() - started) * 1000),
            "result": (response.get("data") or {}).get("result") if not fallback_payload else fallback_payload,
            "error": None if ok else ((response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error")),
        },
    )
    if not ok:
        raise HTTPException(status_code=502, detail=f"Video generate failed: {result_item.get('error') or 'tool failed'}")
    return {"ok": True, "job": result_item, "tool_response": response, "fallback_response": fallback_payload}


@app.get("/api/media/jobs")
async def api_media_jobs(limit: int = Query(default=20, ge=1, le=100)) -> Dict[str, Any]:
    return {"ok": True, "count": min(limit, len(_media_recent_jobs)), "jobs": list(_media_recent_jobs)[:limit]}


# ─── Chat (runtime contract) ─────────────────────────────────────────────────

@app.get("/api/chat/config")
async def api_chat_config() -> Dict[str, Any]:
    return {
        "preferred_model": SOFIIA_PREFERRED_CHAT_MODEL,
        "ollama": {
            "timeout_sec": SOFIIA_OLLAMA_TIMEOUT_SEC,
            "voice_timeout_sec": SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC,
            "keep_alive": SOFIIA_OLLAMA_KEEP_ALIVE,
            "num_ctx": SOFIIA_OLLAMA_NUM_CTX,
            "num_thread": SOFIIA_OLLAMA_NUM_THREAD,
            "num_gpu": SOFIIA_OLLAMA_NUM_GPU,
        },
    }


class ChatSendBody(BaseModel):
    message: str
    model: str = "ollama:qwen3:14b"
    node_id: str = "NODA2"
    project_id: Optional[str] = None
    session_id: Optional[str] = None
    user_id: Optional[str] = None
    history: List[Dict[str, Any]] = []
    # Voice routing hint — forwarded to Router as X-Voice-Profile header
    # Values: "voice_fast_uk" (default) | "voice_quality_uk"
    voice_profile: Optional[str] = None


CHAT_PROJECT_ID = "chats"


class ChatCreateBody(BaseModel):
    agent_id: str
    node_id: str = "NODA2"
    source: str = "console"
    external_chat_ref: Optional[str] = None
    title: Optional[str] = None


class ChatMessageSendBody(BaseModel):
    text: str
    attachments: List[Dict[str, Any]] = []
    project_id: Optional[str] = None
    session_id: Optional[str] = None
    user_id: Optional[str] = None
    routing: Optional[Dict[str, Any]] = None
    client: Optional[Dict[str, Any]] = None
    idempotency_key: Optional[str] = None


def _make_chat_id(node_id: str, agent_id: str, source: str = "console", external_chat_ref: Optional[str] = None) -> str:
    ext = (external_chat_ref or "main").strip() or "main"
    return f"chat:{node_id.upper()}:{agent_id.strip().lower()}:{source.strip().lower()}:{ext}"


def _parse_chat_id(chat_id: str) -> Dict[str, str]:
    raw = (chat_id or "").strip()
    parts = raw.split(":", 4)
    if len(parts) == 5 and parts[0] == "chat":
        return {
            "chat_id": raw,
            "node_id": parts[1].upper(),
            "agent_id": parts[2].lower(),
            "source": parts[3].lower(),
            "external_chat_ref": parts[4],
        }
    # Legacy fallback: treat arbitrary session_id as local NODA2 chat with sofiia
    return {
        "chat_id": raw,
        "node_id": "NODA2",
        "agent_id": "sofiia",
        "source": "console",
        "external_chat_ref": raw or "main",
    }


async def _ensure_chat_project() -> None:
    proj = await _app_db.get_project(CHAT_PROJECT_ID)
    if not proj:
        await _app_db.create_project(
            name="Chats",
            description="Cross-node chat index for Sofiia Console",
            project_id=CHAT_PROJECT_ID,
        )


def _clean_chat_reply(text: str) -> str:
    import re
    cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
    if "<think>" in cleaned.lower():
        cleaned = re.split(r"(?i)<think>", cleaned)[0]
    return cleaned.strip()


def _cursor_encode(payload: Dict[str, Any]) -> str:
    wrapped = {"v": 1, **payload}
    raw = json.dumps(wrapped, separators=(",", ":"), ensure_ascii=True).encode("utf-8")
    return base64.urlsafe_b64encode(raw).decode("ascii")


def _cursor_decode(cursor: Optional[str]) -> Dict[str, Any]:
    if not cursor:
        return {}
    try:
        decoded = base64.urlsafe_b64decode(cursor.encode("ascii")).decode("utf-8")
        data = json.loads(decoded)
        if not isinstance(data, dict):
            return {}
        # Backward compatibility: accept old cursors without "v".
        if "v" not in data:
            return data
        # Current cursor format version.
        if int(data.get("v") or 0) == 1:
            out = dict(data)
            out.pop("v", None)
            return out
        return {}
    except Exception:
        return {}


@app.get("/api/chats")
async def api_chats_list(
    request: Request,
    nodes: str = Query("NODA1,NODA2"),
    agent_id: Optional[str] = Query(None),
    q: Optional[str] = Query(None),
    limit: int = Query(50, ge=1, le=200),
    cursor: Optional[str] = Query(None),
    _auth: str = Depends(require_auth),
):
    SOFIIA_CURSOR_REQUESTS_TOTAL.labels(resource="chats").inc()
    await _ensure_chat_project()
    node_filter = {n.strip().upper() for n in nodes.split(",") if n.strip()}
    cur = _cursor_decode(cursor)
    before_last_active = str(cur.get("last_active") or "").strip() or None
    before_chat_id = str(cur.get("chat_id") or "").strip() or None
    fetch_limit = max(limit * 5, limit + 1)
    sessions = await _app_db.list_sessions_page(
        CHAT_PROJECT_ID,
        limit=fetch_limit,
        before_last_active=before_last_active,
        before_session_id=before_chat_id,
    )

    items: List[Dict[str, Any]] = []
    agent_filter = (agent_id or "").strip().lower()
    q_filter = (q or "").strip().lower()
    for s in sessions:
        sid = str(s.get("session_id") or "")
        if not sid:
            continue
        info = _parse_chat_id(sid)
        if node_filter and info["node_id"] not in node_filter:
            continue
        if agent_filter and info["agent_id"] != agent_filter:
            continue
        msgs = await _app_db.list_messages(sid, limit=200)
        last = msgs[-1] if msgs else None
        item = {
            "chat_id": sid,
            "title": (s.get("title") or f"{info['agent_id']} • {info['node_id']}").strip(),
            "agent_id": info["agent_id"],
            "node_id": info["node_id"],
            "source": info["source"],
            "external_chat_ref": info["external_chat_ref"],
            "updated_at": s.get("last_active"),
            "last_message": (
                {
                    "message_id": last.get("msg_id"),
                    "role": last.get("role"),
                    "text": (last.get("content") or "")[:280],
                    "ts": last.get("ts"),
                } if last else None
            ),
            "turn_count": s.get("turn_count", 0),
        }
        if q_filter:
            hay = " ".join(
                [
                    item["title"],
                    item["agent_id"],
                    item["node_id"],
                    (item["last_message"] or {}).get("text", ""),
                ]
            ).lower()
            if q_filter not in hay:
                continue
        items.append(item)
        if len(items) >= limit:
            break

    next_cursor = None
    if items:
        last_item = items[-1]
        next_cursor = _cursor_encode(
            {
                "last_active": last_item.get("updated_at"),
                "chat_id": last_item.get("chat_id"),
            }
        )
    has_more = len(sessions) >= fetch_limit or len(items) >= limit
    log_event(
        "chat.list",
        request_id=get_request_id(request),
        node_id=",".join(sorted(node_filter)) if node_filter else None,
        agent_id=(agent_id or None),
        cursor_present=bool(cursor),
        limit=limit,
        has_more=has_more,
        next_cursor_present=bool(next_cursor),
        status="ok",
    )
    return {
        "items": items,
        "count": len(items),
        "nodes": sorted(node_filter),
        "project_id": CHAT_PROJECT_ID,
        "next_cursor": next_cursor,
        "has_more": has_more,
    }


@app.post("/api/chats")
async def api_chat_create(body: ChatCreateBody, _auth: str = Depends(require_auth)):
    await _ensure_chat_project()
    cid = _make_chat_id(
        node_id=body.node_id,
        agent_id=body.agent_id,
        source=body.source,
        external_chat_ref=body.external_chat_ref,
    )
    info = _parse_chat_id(cid)
    title = (body.title or f"{info['agent_id']} • {info['node_id']} • {info['source']}").strip()
    sess = await _app_db.upsert_session(cid, project_id=CHAT_PROJECT_ID, title=title)
    return {"ok": True, "chat": {"chat_id": cid, "title": title, "agent_id": info["agent_id"], "node_id": info["node_id"], "source": info["source"], "external_chat_ref": info["external_chat_ref"], "updated_at": sess.get("last_active")}}


@app.get("/api/chats/{chat_id}/messages")
async def api_chat_messages(
    chat_id: str,
    request: Request,
    limit: int = Query(100, ge=1, le=500),
    cursor: Optional[str] = Query(None),
    _auth: str = Depends(require_auth),
):
    SOFIIA_CURSOR_REQUESTS_TOTAL.labels(resource="messages").inc()
    cur = _cursor_decode(cursor)
    before_ts = str(cur.get("ts") or "").strip() or None
    before_message_id = str(cur.get("message_id") or "").strip() or None
    rows_desc = await _app_db.list_messages_page(
        chat_id,
        limit=limit + 1,
        before_ts=before_ts,
        before_msg_id=before_message_id,
    )
    has_more = len(rows_desc) > limit
    page_desc = rows_desc[:limit]
    rows = list(reversed(page_desc))
    info = _parse_chat_id(chat_id)
    messages = [
        {
            "message_id": r.get("msg_id"),
            "chat_id": chat_id,
            "role": r.get("role"),
            "text": r.get("content", ""),
            "ts": r.get("ts"),
            "meta": {
                "node_id": info["node_id"],
                "agent_id": info["agent_id"],
                "source": info["source"],
            },
        }
        for r in rows
    ]
    next_cursor = None
    if has_more and page_desc:
        tail = page_desc[-1]
        next_cursor = _cursor_encode({"ts": tail.get("ts"), "message_id": tail.get("msg_id")})
    log_event(
        "chat.messages.list",
        request_id=get_request_id(request),
        chat_id=chat_id,
        node_id=info["node_id"],
        agent_id=info["agent_id"],
        cursor_present=bool(cursor),
        limit=limit,
        has_more=has_more,
        next_cursor_present=bool(next_cursor),
        status="ok",
    )
    return {
        "items": messages,
        "count": len(messages),
        "chat_id": chat_id,
        "next_cursor": next_cursor,
        "has_more": has_more,
    }


@app.post("/api/chats/{chat_id}/send")
async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Request, _auth: str = Depends(require_auth)):
    started_at = time.monotonic()
    client_ip = request.client.host if request.client else "unknown"
    if not _check_rate(f"chat_v2:{client_ip}", max_calls=30, window_sec=60):
        raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min")
    text = (body.text or "").strip()
    if not text:
        raise HTTPException(status_code=400, detail="text is required")
    request_id = get_request_id(request)
    idem_key = (
        (
            request.headers.get("Idempotency-Key")
            or body.idempotency_key
            or ""
        ).strip()
    )[:128]
    idem_hash = hash_idempotency_key(idem_key)
    info = _parse_chat_id(chat_id)
    target_node = ((body.routing or {}).get("force_node_id") or info["node_id"] or "NODA2").upper()
    target_agent = info["agent_id"] or "sofiia"
    operator_id, operator_id_missing = _resolve_operator_id(request, body, request_id)
    chat_rl = _rate_limiter.consume(f"rl:chat:{chat_id}", rps=_RL_CHAT_RPS, burst=_RL_CHAT_BURST)
    if not chat_rl.allowed:
        SOFIIA_RATE_LIMITED_TOTAL.labels(scope="chat").inc()
        log_event(
            "chat.send.rate_limited",
            request_id=request_id,
            scope="chat",
            chat_id=chat_id,
            node_id=target_node,
            agent_id=target_agent,
            operator_id=operator_id,
            operator_id_missing=operator_id_missing,
            limit_rps=_RL_CHAT_RPS,
            burst=_RL_CHAT_BURST,
            retry_after_s=chat_rl.retry_after_s,
            status="error",
            error_code="rate_limited",
        )
        raise _rate_limited_http("chat", chat_rl.retry_after_s)
    op_rl = _rate_limiter.consume(f"rl:op:{operator_id}", rps=_RL_OP_RPS, burst=_RL_OP_BURST)
    if not op_rl.allowed:
        SOFIIA_RATE_LIMITED_TOTAL.labels(scope="operator").inc()
        log_event(
            "chat.send.rate_limited",
            request_id=request_id,
            scope="operator",
            chat_id=chat_id,
            node_id=target_node,
            agent_id=target_agent,
            operator_id=operator_id,
            operator_id_missing=operator_id_missing,
            limit_rps=_RL_OP_RPS,
            burst=_RL_OP_BURST,
            retry_after_s=op_rl.retry_after_s,
            status="error",
            error_code="rate_limited",
        )
        raise _rate_limited_http("operator", op_rl.retry_after_s)
    log_event(
        "chat.send",
        request_id=request_id,
        chat_id=chat_id,
        node_id=target_node,
        agent_id=target_agent,
        operator_id=operator_id,
        operator_id_missing=operator_id_missing,
        idempotency_key_hash=(idem_hash or None),
        replayed=False,
        status="ok",
    )
    if idem_key:
        cache_key = f"{chat_id}::{idem_key}"
        cached = _idempotency_store.get(cache_key)
        if cached:
            SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL.inc()
            log_event(
                "chat.send.replay",
                request_id=request_id,
                chat_id=chat_id,
                node_id=target_node,
                agent_id=target_agent,
                idempotency_key_hash=(idem_hash or None),
                replayed=True,
                message_id=cached.message_id,
                status="ok",
            )
            replay = dict(cached.response_body)
            replay["idempotency"] = {"replayed": True, "key": idem_key}
            return replay

    await _ensure_chat_project()
    SOFIIA_SEND_REQUESTS_TOTAL.labels(node_id=target_node).inc()
    project_id = body.project_id or CHAT_PROJECT_ID
    session_id = body.session_id or chat_id
    user_id = operator_id
    title = f"{target_agent} • {target_node} • {info['source']}"
    await _app_db.upsert_session(chat_id, project_id=CHAT_PROJECT_ID, title=title)

    user_saved = await _app_db.save_message(chat_id, "user", text[:4096])
    metadata: Dict[str, Any] = {
        "project_id": project_id,
        "session_id": session_id,
        "user_id": operator_id,
        "operator_id": operator_id,
        "client": "sofiia-console",
        "chat_id": chat_id,
        "node_id": target_node,
        "agent_id": target_agent,
        "source": info["source"],
        "external_chat_ref": info["external_chat_ref"],
        "attachments": body.attachments or [],
        "client_meta": body.client or {},
    }
    base_url = get_router_url(target_node)
    if not base_url:
        duration_ms = int((time.monotonic() - started_at) * 1000)
        log_event(
            "chat.send.error",
            request_id=request_id,
            chat_id=chat_id,
            node_id=target_node,
            agent_id=target_agent,
            idempotency_key_hash=(idem_hash or None),
            status="error",
            error_code="router_url_not_configured",
            duration_ms=duration_ms,
        )
        raise HTTPException(status_code=400, detail=f"router_url is not configured for node {target_node}")
    try:
        out = await infer(
            base_url,
            target_agent,
            text,
            model=None,
            metadata=metadata,
            timeout=300.0,
            api_key=ROUTER_API_KEY,
        )
    except Exception as e:
        duration_ms = int((time.monotonic() - started_at) * 1000)
        log_event(
            "chat.send.error",
            request_id=request_id,
            chat_id=chat_id,
            node_id=target_node,
            agent_id=target_agent,
            idempotency_key_hash=(idem_hash or None),
            status="error",
            error_code="upstream_error",
            error=str(e)[:180],
            duration_ms=duration_ms,
        )
        _broadcast_bg(
            _make_event(
                "error",
                {"where": "chat_v2.router", "message": str(e)[:180], "chat_id": chat_id, "node_id": target_node, "agent_id": target_agent},
                project_id=project_id,
                session_id=session_id,
                user_id=user_id,
            )
        )
        raise HTTPException(status_code=502, detail=str(e)[:300])

    reply = _clean_chat_reply(out.get("response", out.get("text", "")))
    assistant_saved = await _app_db.save_message(chat_id, "assistant", (reply or "")[:4096], parent_msg_id=user_saved.get("msg_id"))
    trace_id = f"chatv2_{session_id}_{uuid.uuid4().hex[:8]}"
    result = {
        "ok": True,
        "accepted": True,
        "chat_id": chat_id,
        "node_id": target_node,
        "agent_id": target_agent,
        "trace_id": trace_id,
        "message": {
            "message_id": assistant_saved.get("msg_id"),
            "role": "assistant",
            "text": reply,
            "ts": assistant_saved.get("ts"),
            "meta": {
                "node_id": target_node,
                "agent_id": target_agent,
                "backend": out.get("backend"),
                "model": out.get("model"),
            },
        },
    }
    if idem_key:
        cache_key = f"{chat_id}::{idem_key}"
        _idempotency_store.set(
            cache_key,
            ReplayEntry(
                message_id=str((result.get("message") or {}).get("message_id") or ""),
                response_body=dict(result),
                created_at=time.monotonic(),
                node_id=target_node,
            ),
        )
        result["idempotency"] = {"replayed": False, "key": idem_key}
    duration_ms = int((time.monotonic() - started_at) * 1000)
    log_event(
        "chat.send.ok",
        request_id=request_id,
        chat_id=chat_id,
        node_id=target_node,
        agent_id=target_agent,
        idempotency_key_hash=(idem_hash or None),
        message_id=(result.get("message") or {}).get("message_id"),
        status="ok",
        duration_ms=duration_ms,
    )
    return result


@app.get("/metrics")
def metrics():
    data, content_type = render_metrics()
    return Response(content=data, media_type=content_type)


@app.post("/api/chat/send")
async def api_chat_send(body: ChatSendBody, request: Request):
    """BFF chat: Ollama or router. Returns runtime contract fields. Rate: 30/min."""
    client_ip = request.client.host if request.client else "unknown"
    if not _check_rate(f"chat:{client_ip}", max_calls=30, window_sec=60):
        raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min")

    # Runtime identity
    project_id = body.project_id or "default"
    session_id = body.session_id or f"sess_{uuid.uuid4().hex[:12]}"
    user_id = body.user_id or "console_user"

    provider, _, model_name = body.model.partition(":")
    reply = ""
    t0 = time.monotonic()

    def _clean_reply(text: str) -> str:
        """Strip <think>...</think> reasoning blocks (Qwen3/DeepSeek-R1) before returning to user.

        Strategy:
        1. re.DOTALL regex removes complete <think>...</think> blocks.
        2. Fallback split removes any trailing unclosed <think> block
           (model stopped mid-reasoning without </think>).
        """
        import re
        # Primary: strip complete blocks (multiline-safe with DOTALL)
        cleaned = re.sub(r"<think>.*?</think>", "", text,
                         flags=re.DOTALL | re.IGNORECASE)
        # Fallback: if an unclosed <think> block remains, drop everything after it
        if "<think>" in cleaned.lower():
            cleaned = re.split(r"(?i)<think>", cleaned)[0]
        return cleaned.strip()

    # Broadcast: user message sent
    _broadcast_bg(_make_event("chat.message",
        {"text": body.message[:200], "provider": provider, "model": body.model},
        project_id=project_id, session_id=session_id, user_id=user_id))

    # voice_profile determines LLM options for voice turns.
    # None = text chat (full prompt, no token limit enforcement).
    _vp = body.voice_profile  # "voice_fast_uk" | "voice_quality_uk" | None
    _is_voice_turn = _vp is not None
    _is_quality    = _vp == "voice_quality_uk"

    # System prompt: voice turns get guardrails appended
    _system_prompt = SOFIIA_SYSTEM_PROMPT
    if _is_voice_turn:
        _system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX

    _voice_options = {
        "temperature": 0.18 if _is_quality else 0.15,
        "repeat_penalty": 1.1,
        "num_predict": 256 if _is_quality else 220,  # max_tokens per contract (≤256)
    } if _is_voice_turn else {
        "temperature": 0.15,
        "repeat_penalty": 1.1,
        "num_predict": SOFIIA_OLLAMA_NUM_PREDICT_TEXT,
    }

    if provider == "ollama":
        ollama_url = get_ollama_url()
        effective_model_name = model_name or "qwen3:14b"
        messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
        messages.extend(body.history[-12:])
        messages.append({"role": "user", "content": body.message})
        try:
            async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client:
                r = await client.post(
                    f"{ollama_url}/api/chat",
                    json=_make_ollama_payload(effective_model_name, messages, _voice_options),
                )
                r.raise_for_status()
                data = r.json()
                reply = _clean_reply((data.get("message") or {}).get("content", "") or "Ollama: порожня відповідь")
        except httpx.HTTPStatusError as e:
            err_msg = f"Ollama HTTP {e.response.status_code}"
            _broadcast_bg(_make_event("error", {"where": "ollama", "message": err_msg},
                project_id=project_id, session_id=session_id, user_id=user_id))
            raise HTTPException(status_code=502, detail=f"{err_msg}: {str(e)[:200]}")
        except Exception as e:
            _broadcast_bg(_make_event("error", {"where": "ollama", "message": str(e)[:100]},
                project_id=project_id, session_id=session_id, user_id=user_id))
            raise HTTPException(status_code=502, detail=f"Ollama error: {str(e)[:200]}")

    elif provider == "router":
        base_url = get_router_url(body.node_id)
        router_agent_id = "sofiia"
        router_model = None
        if model_name:
            if "|" in model_name:
                left, right = model_name.split("|", 1)
                router_agent_id = left or "sofiia"
                router_model = right or None
            elif ":" in model_name:
                # Looks like model id (qwen3:14b, qwen3.5:35b-a3b, etc.)
                router_model = model_name
            elif model_name not in ("default",):
                # Treat plain token as agent id (router:soul, router:monitor, ...)
                router_agent_id = model_name
        metadata: Dict[str, Any] = {
            "project_id": project_id,
            "session_id": session_id,
            "user_id": user_id,
            "client": "sofiia-console",
            "voice_profile": _vp,
        }
        try:
            out = await infer(
                base_url,
                router_agent_id,
                body.message,
                model=router_model,
                metadata=metadata,
                timeout=300.0,
                api_key=ROUTER_API_KEY,
            )
            reply = _clean_reply(out.get("response", out.get("text", "")))
        except Exception as e:
            _broadcast_bg(_make_event("error", {"where": "router", "message": str(e)[:100]},
                project_id=project_id, session_id=session_id, user_id=user_id))
            raise HTTPException(status_code=502, detail=str(e)[:300])

    elif provider == "glm":
        # Zhipu AI GLM — OpenAI-compatible API at bigmodel.cn
        glm_api_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip()
        if not glm_api_key:
            raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.")
        glm_model = model_name or "glm-4.7"
        messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
        messages_glm.extend(body.history[-12:])
        messages_glm.append({"role": "user", "content": body.message})
        try:
            async with httpx.AsyncClient(timeout=120.0) as client:
                r = await client.post(
                    "https://open.bigmodel.cn/api/paas/v4/chat/completions",
                    headers={"Authorization": f"Bearer {glm_api_key}", "Content-Type": "application/json"},
                    json={"model": glm_model, "messages": messages_glm, "stream": False},
                )
                r.raise_for_status()
                data = r.json()
                reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "GLM: порожня відповідь")
        except httpx.HTTPStatusError as e:
            err_msg = f"GLM HTTP {e.response.status_code}: {e.response.text[:200]}"
            _broadcast_bg(_make_event("error", {"where": "glm", "message": err_msg},
                project_id=project_id, session_id=session_id, user_id=user_id))
            raise HTTPException(status_code=502, detail=err_msg)
        except Exception as e:
            _broadcast_bg(_make_event("error", {"where": "glm", "message": str(e)[:100]},
                project_id=project_id, session_id=session_id, user_id=user_id))
            raise HTTPException(status_code=502, detail=f"GLM error: {str(e)[:200]}")

    elif provider == "grok":
        # xAI Grok — OpenAI-compatible API
        xai_api_key = os.getenv("XAI_API_KEY", "").strip()
        if not xai_api_key:
            raise HTTPException(status_code=503, detail="XAI_API_KEY not set. Add it to BFF environment.")
        grok_model = model_name or "grok-4-1-fast-reasoning"
        messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
        messages.extend(body.history[-12:])
        messages.append({"role": "user", "content": body.message})
        try:
            async with httpx.AsyncClient(timeout=120.0) as client:
                r = await client.post(
                    "https://api.x.ai/v1/chat/completions",
                    headers={"Authorization": f"Bearer {xai_api_key}", "Content-Type": "application/json"},
                    json={"model": grok_model, "messages": messages, "stream": False},
                )
                r.raise_for_status()
                data = r.json()
                reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "Grok: порожня відповідь")
        except httpx.HTTPStatusError as e:
            err_msg = f"Grok HTTP {e.response.status_code}: {e.response.text[:200]}"
            _broadcast_bg(_make_event("error", {"where": "grok", "message": err_msg},
                project_id=project_id, session_id=session_id, user_id=user_id))
            raise HTTPException(status_code=502, detail=err_msg)
        except Exception as e:
            _broadcast_bg(_make_event("error", {"where": "grok", "message": str(e)[:100]},
                project_id=project_id, session_id=session_id, user_id=user_id))
            raise HTTPException(status_code=502, detail=f"Grok error: {str(e)[:200]}")

    else:
        raise HTTPException(status_code=400, detail=f"Unsupported provider: {provider}. Use ollama, router, or grok.")

    latency_ms = int((time.monotonic() - t0) * 1000)
    tokens_est = len(reply.split())
    trace_id = f"chat_{session_id}_{uuid.uuid4().hex[:8]}"

    # Broadcast: reply
    _broadcast_bg(_make_event("chat.reply",
        {"text": reply[:200], "provider": provider, "model": body.model,
         "latency_ms": latency_ms, "trace_id": trace_id},
        project_id=project_id, session_id=session_id, user_id=user_id))

    # Memory save (best-effort, non-blocking)
    asyncio.get_event_loop().create_task(
        _do_save_memory(body.message, reply, session_id, project_id, user_id)
    )

    # AISTALK forward (if enabled)
    if _aistalk:
        try:
            _aistalk.handle_event(_make_event("chat.reply",
                {"text": reply, "provider": provider, "model": body.model},
                project_id=project_id, session_id=session_id, user_id=user_id))
        except Exception as e:
            logger.debug("AISTALK forward failed: %s", e)

    return {
        "ok": True,
        "project_id": project_id,
        "session_id": session_id,
        "user_id": user_id,
        "response": reply,
        "model": body.model,
        "backend": provider,
        "trace_id": trace_id,
        "meta": {
            "latency_ms": latency_ms,
            "tokens_est": tokens_est,
            "trace_id": trace_id,
        },
    }


async def _do_save_memory(
    user_msg: str,
    ai_reply: str,
    session_id: str,
    project_id: str = "default",
    user_id: str = "console_user",
    agent_id: str = "sofiia",
) -> None:
    # 1) Persist to local SQLite (projects/sessions/messages schema)
    try:
        # Ensure target project exists to satisfy sessions.project_id FK.
        proj = await _app_db.get_project(project_id)
        if not proj:
            await _app_db.create_project(
                name=project_id.upper(),
                description=f"Auto-created project for {project_id} sessions",
                project_id=project_id,
            )
        await _app_db.upsert_session(session_id, project_id=project_id)
        last_msg = None
        if user_msg:
            saved = await _app_db.save_message(session_id, "user", user_msg[:4096])
            last_msg = saved["msg_id"]
        if ai_reply:
            await _app_db.save_message(
                session_id, "assistant", ai_reply[:4096], parent_msg_id=last_msg
            )
    except Exception as e:
        logger.debug("SQLite memory save skipped: %s", e)

    # 2) Best-effort: also send to Memory Service (Qdrant + Neo4j)
    mem_url = get_memory_service_url()
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            for role, content in [("user", user_msg), ("assistant", ai_reply)]:
                if not content:
                    continue
                resp = await client.post(f"{mem_url}/agents/{agent_id}/memory", json={
                    "agent_id": agent_id,
                    "role": role,
                    "content": content[:1000],
                    "user_id": user_id,
                    "channel_id": session_id,
                    "metadata": {"project_id": project_id, "client": "sofiia-console", "agent_id": agent_id},
                })
                if resp.status_code >= 400:
                    logger.warning(
                        "Memory Service save failed status=%s agent=%s session=%s body=%s",
                        resp.status_code,
                        agent_id,
                        session_id,
                        (resp.text or "")[:240],
                    )
    except Exception as e:
        logger.debug("Memory Service save skipped: %s", e)


# ─── Ops ────────────────────────────────────────────────────────────────────

class OpsRunBody(BaseModel):
    action_id: str
    node_id: str = "NODA2"
    params: dict = {}
    project_id: Optional[str] = None
    session_id: Optional[str] = None
    source_run_id: Optional[str] = None    # link to supervisor run
    source_msg_id: Optional[str] = None    # link to message


class NodeUpsertBody(BaseModel):
    node_id: str
    label: str
    router_url: str
    gateway_url: Optional[str] = ""
    monitor_url: Optional[str] = ""
    supervisor_url: Optional[str] = ""
    ssh_host: Optional[str] = ""
    ssh_port: Optional[int] = 22
    ssh_user: Optional[str] = ""
    ssh_password_env: Optional[str] = ""
    ssh_ipv6: Optional[str] = ""
    ssh_host_keys: Optional[List[Dict[str, Any]]] = None
    enabled: bool = True


@app.get("/api/ops/actions")
async def api_ops_actions_list():
    return {"actions": list(OPS_ACTIONS.keys())}


@app.post("/api/ops/run")
async def api_ops_run(body: OpsRunBody, _auth=Depends(require_api_key)):
    """Run ops action. Broadcasts ops.run event and auto-creates ops_run graph node."""
    import uuid as _uuid
    t0 = time.monotonic()
    project_id = body.project_id or "default"
    session_id = body.session_id or "console"
    ops_run_id = str(_uuid.uuid4())
    started_at = _app_db._now() if _app_db else None

    result = await run_ops_action(
        body.action_id, body.node_id, body.params,
        agent_id="sofiia", timeout=90.0, api_key=ROUTER_API_KEY,
    )
    elapsed = int((time.monotonic() - t0) * 1000)
    ok = result.get("status") != "failed"
    status_str = "ok" if ok else "failed"
    error_str = result.get("error", "") if not ok else ""

    _broadcast_bg(_make_event("ops.run",
        {"name": body.action_id, "ok": ok, "elapsed_ms": elapsed},
        project_id=project_id, session_id=session_id))

    # Auto-create ops_run graph node (fire-and-forget, do not fail the request)
    if _app_db and project_id:
        try:
            gn = await _app_db.upsert_ops_run_node(
                project_id=project_id,
                ops_run_id=ops_run_id,
                action_id=body.action_id,
                node_id=body.node_id,
                status=status_str,
                elapsed_ms=elapsed,
                error=str(error_str)[:500],
                started_at=started_at or "",
                source_run_id=body.source_run_id or "",
                source_msg_id=body.source_msg_id or "",
            )
            result["_graph_node_id"] = gn.get("node_id")
            result["_ops_run_id"] = ops_run_id
        except Exception as _e:
            logger.warning("ops_run graph node creation failed (non-fatal): %s", _e)

    return result


# ─── Nodes ──────────────────────────────────────────────────────────────────

@app.get("/api/nodes/dashboard")
async def api_nodes_dashboard(refresh: bool = Query(False), _auth: str = Depends(require_auth)):
    """
    Nodes dashboard with full telemetry.
    Returns cached data (refreshed every NODES_POLL_INTERVAL_SEC seconds).
    Pass ?refresh=true to force immediate re-probe.
    """
    if refresh or not _nodes_cache["nodes"]:
        fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY)
        _nodes_cache.update({**fresh, "ts": _now_iso()})
        return {**fresh, "ts": _nodes_cache["ts"], "cached": False}
    return {**_nodes_cache, "cached": True}


@app.get("/api/nodes/registry")
async def api_nodes_registry(_auth: str = Depends(require_auth)):
    return load_nodes_registry()


@app.get("/api/nodes/ssh/status")
async def api_nodes_ssh_status(
    node_id: str = Query(..., description="Node ID, e.g. NODA1"),
    _auth=Depends(require_api_key_strict),
):
    node_id = node_id.strip().upper()
    ssh = get_node_ssh_profile(node_id)
    if not ssh.get("configured"):
        return {
            "ok": False,
            "node_id": node_id,
            "configured": False,
            "error": "ssh profile is not configured",
            "ssh": ssh,
        }

    host = ssh.get("host", "")
    host_ipv6 = (ssh.get("ipv6") or "").strip()
    port = int(ssh.get("port") or 22)
    tcp_ok = False
    tcp_error = None
    connect_host = host

    def _try_connect(target_host: str) -> Optional[str]:
        try:
            with socket.create_connection((target_host, port), timeout=5):
                return None
        except Exception as e:
            return str(e)[:160]

    tcp_error = _try_connect(host)
    if tcp_error is None:
        tcp_ok = True
    elif host_ipv6:
        err_v6 = _try_connect(host_ipv6)
        if err_v6 is None:
            tcp_ok = True
            tcp_error = None
            connect_host = host_ipv6
        else:
            tcp_error = f"ipv4={tcp_error}; ipv6={err_v6}"[:220]

    ok = tcp_ok and (ssh["auth"]["password_set"] or ssh["auth"]["private_key_set"])
    return {
        "ok": ok,
        "node_id": node_id,
        "configured": True,
        "tcp_reachable": tcp_ok,
        "tcp_error": tcp_error,
        "connect_host": connect_host,
        "ssh": ssh,
    }


@app.post("/api/nodes/add")
async def api_nodes_add(body: NodeUpsertBody, _auth=Depends(require_api_key_strict)):
    reg = load_nodes_registry()
    reg.setdefault("defaults", {"health_timeout_sec": 10, "tools_timeout_sec": 30})
    reg.setdefault("nodes", {})
    node_id = body.node_id.strip().upper()
    if not node_id:
        raise HTTPException(status_code=400, detail="node_id is required")
    node_payload: Dict[str, Any] = {
        "label": body.label.strip() or node_id,
        "router_url": body.router_url.strip(),
        "gateway_url": (body.gateway_url or "").strip(),
        "monitor_url": (body.monitor_url or body.router_url).strip(),
        "supervisor_url": (body.supervisor_url or "").strip(),
        "enabled": body.enabled,
    }
    ssh_host = (body.ssh_host or "").strip()
    ssh_user = (body.ssh_user or "").strip()
    if ssh_host and ssh_user:
        node_payload["ssh"] = {
            "host": ssh_host,
            "ipv6": (body.ssh_ipv6 or "").strip(),
            "port": int(body.ssh_port or 22),
            "user": ssh_user,
            "auth": {
                "password_env": (body.ssh_password_env or f"NODES_{node_id}_SSH_PASSWORD").strip(),
            },
            "host_keys": body.ssh_host_keys or [],
        }

    reg["nodes"][node_id] = node_payload
    path = save_nodes_registry(reg)
    fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY)
    _nodes_cache.update({**fresh, "ts": _now_iso()})
    return {"ok": True, "saved_to": str(path), "node_id": node_id, "nodes": reg.get("nodes", {})}


# ─── Voice ──────────────────────────────────────────────────────────────────

@app.post("/api/voice/stt")
async def api_voice_stt(
    request: Request,
    audio: UploadFile = File(...),
    language: Optional[str] = Query(None),
    session_id: Optional[str] = Query(None),
    project_id: Optional[str] = Query(None),
):
    """STT proxy → memory-service. Rate: 20/min. Broadcasts voice.stt events."""
    client_ip = request.client.host if request.client else "unknown"
    if not _check_rate(f"stt:{client_ip}", max_calls=20, window_sec=60):
        raise HTTPException(status_code=429, detail="Rate limit: 20 STT calls/min")

    sid = session_id or "console"
    pid = project_id or "default"
    _broadcast_bg(_make_event("voice.stt", {"phase": "start"},
        project_id=pid, session_id=sid))
    t0 = time.monotonic()

    mem_url = get_memory_service_url()
    try:
        content = await audio.read()
        if not content:
            raise HTTPException(status_code=400, detail="Empty audio file")
        async with httpx.AsyncClient(timeout=60.0) as client:
            files = {"audio": (audio.filename or "audio.webm", content, audio.content_type or "audio/webm")}
            params = {"language": language} if language else {}
            r = await client.post(f"{mem_url}/voice/stt", files=files, params=params)
            r.raise_for_status()
            result = r.json()
            elapsed = int((time.monotonic() - t0) * 1000)
            upstream_ms = result.get("compute_ms", 0)
            logger.info("STT ok: lang=%s text_len=%d bff_ms=%d upstream_ms=%d",
                        language or "auto", len(result.get("text", "")), elapsed, upstream_ms)
            _broadcast_bg(_make_event("voice.stt",
                {"phase": "done", "elapsed_ms": elapsed, "upstream_ms": upstream_ms},
                project_id=pid, session_id=sid))
            result["bff_ms"] = elapsed
            return result
    except httpx.HTTPStatusError as e:
        logger.error("STT upstream error: status=%s", e.response.status_code)
        _broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]},
            project_id=pid, session_id=sid))
        raise HTTPException(status_code=e.response.status_code, detail=f"STT upstream: {str(e)[:200]}")
    except HTTPException:
        raise
    except Exception as e:
        logger.error("STT proxy error: %s", e, exc_info=True)
        _broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]},
            project_id=pid, session_id=sid))
        raise HTTPException(status_code=502, detail=f"STT error: {str(e)[:200]}")


class TTSRequest(BaseModel):
    text: str
    voice: Optional[str] = "default"
    speed: Optional[float] = 1.0
    model: Optional[str] = "piper"
    session_id: Optional[str] = None
    project_id: Optional[str] = None


@app.post("/api/voice/tts")
async def api_voice_tts(body: TTSRequest, request: Request):
    """TTS proxy → memory-service. Rate: 30/min per IP. Concurrent: MAX_CONCURRENT_TTS."""
    client_ip = request.client.host if request.client else "unknown"
    if not _check_rate(f"tts:{client_ip}", max_calls=30, window_sec=60):
        raise HTTPException(status_code=429, detail="Rate limit: 30 TTS calls/min per client")

    # Concurrent synthesis guard — prevents memory-service DoS on burst requests
    sem = _get_tts_semaphore()
    if not sem._value:  # non-blocking peek: all slots occupied
        raise HTTPException(status_code=503,
            detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.")

    # Server-side sanitization: strips <think>, markdown, URLs; truncates safely
    text = _sanitize_for_voice(body.text.strip())
    if not text:
        raise HTTPException(status_code=400, detail="Empty text")

    sid = body.session_id or "console"
    pid = body.project_id or "default"
    _broadcast_bg(_make_event("voice.tts", {"phase": "start", "voice": body.voice},
        project_id=pid, session_id=sid))
    t0 = time.monotonic()

    sem = _get_tts_semaphore()
    async with sem:  # enforce MAX_CONCURRENT_TTS globally
        try:
            # ── Voice HA path (opt-in via VOICE_HA_ENABLED=true) ──────────────
            if is_voice_ha_enabled():
                router_url = get_voice_ha_router_url()
                tts_payload = {
                    "text": text,
                    "voice": body.voice,
                    "speed": body.speed,
                    "model": body.model,
                }
                async with httpx.AsyncClient(timeout=30.0) as client:
                    r = await client.post(
                        f"{router_url}/v1/capability/voice_tts",
                        json=tts_payload,
                    )
                    r.raise_for_status()
                    elapsed = int((time.monotonic() - t0) * 1000)
                    upstream_ct = r.headers.get("content-type", "audio/wav")
                    tts_engine = r.headers.get("X-TTS-Engine", "unknown")
                    tts_voice_used = r.headers.get("X-TTS-Voice", body.voice)
                    voice_node = r.headers.get("X-Voice-Node", "unknown")
                    voice_mode = r.headers.get("X-Voice-Mode", "remote")
                    ext = "mp3" if "mpeg" in upstream_ct else "wav"
                    logger.info("TTS HA ok: voice=%s node=%s mode=%s elapsed=%dms",
                                tts_voice_used, voice_node, voice_mode, elapsed)
                    _broadcast_bg(_make_event("voice.tts",
                        {"phase": "done", "voice": tts_voice_used, "engine": tts_engine,
                         "elapsed_ms": elapsed, "ha_mode": voice_mode, "ha_node": voice_node},
                        project_id=pid, session_id=sid))
                    return StreamingResponse(
                        io.BytesIO(r.content),
                        media_type=upstream_ct,
                        headers={
                            "Content-Disposition": f"inline; filename=speech.{ext}",
                            "X-TTS-Engine": tts_engine,
                            "X-TTS-Voice": tts_voice_used,
                            "X-TTS-Elapsed-MS": str(elapsed),
                            "X-Voice-Node": voice_node,
                            "X-Voice-Mode": voice_mode,
                            "Cache-Control": "no-store",
                        },
                    )

            # ── Legacy direct path (default, VOICE_HA_ENABLED=false) ──────────
            mem_url = get_memory_service_url()
            async with httpx.AsyncClient(timeout=30.0) as client:
                r = await client.post(
                    f"{mem_url}/voice/tts",
                    json={"text": text, "voice": body.voice, "speed": body.speed, "model": body.model},
                )
                r.raise_for_status()
                elapsed = int((time.monotonic() - t0) * 1000)
                upstream_ct = r.headers.get("content-type", "audio/wav")
                tts_engine = r.headers.get("X-TTS-Engine", "unknown")
                tts_voice_used = r.headers.get("X-TTS-Voice", body.voice)
                ext = "mp3" if "mpeg" in upstream_ct else "wav"
                logger.info("TTS ok: voice=%s engine=%s len=%d fmt=%s elapsed=%dms",
                            tts_voice_used, tts_engine, len(text), ext, elapsed)
                _broadcast_bg(_make_event("voice.tts",
                    {"phase": "done", "voice": tts_voice_used, "engine": tts_engine, "elapsed_ms": elapsed},
                    project_id=pid, session_id=sid))
                return StreamingResponse(
                    io.BytesIO(r.content),
                    media_type=upstream_ct,
                    headers={
                        "Content-Disposition": f"inline; filename=speech.{ext}",
                        "X-TTS-Engine": tts_engine,
                        "X-TTS-Voice": tts_voice_used,
                        "X-TTS-Elapsed-MS": str(elapsed),
                        "Cache-Control": "no-store",
                    },
                )
        except httpx.HTTPStatusError as e:
            _record_tts_error("http_error", e.response.status_code, str(e)[:120], body.voice)
            logger.error("TTS upstream error: status=%s voice=%s ha=%s",
                         e.response.status_code, body.voice, is_voice_ha_enabled())
            _broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]},
                project_id=pid, session_id=sid))
            raise HTTPException(status_code=e.response.status_code, detail=f"TTS upstream: {str(e)[:200]}")
        except Exception as e:
            _record_tts_error("proxy_error", None, str(e)[:120], body.voice)
            logger.error("TTS proxy error: %s ha=%s", e, is_voice_ha_enabled(), exc_info=True)
            _broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]},
                project_id=pid, session_id=sid))
            raise HTTPException(status_code=502, detail=f"TTS error: {str(e)[:200]}")


@app.get("/api/voice/voices")
async def api_voice_voices():
    mem_url = get_memory_service_url()
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            r = await client.get(f"{mem_url}/voice/voices")
            r.raise_for_status()
            return r.json()
    except Exception as e:
        return {"piper": [], "macos": [{"id": "Milena", "name": "Milena (uk-UA)", "lang": "uk-UA"}], "error": str(e)[:100]}


# ─── Phase 2: Voice Chat Stream (sentence chunking → early TTS) ──────────────
# Strategy: split LLM text into sentences → synthesize first sentence immediately
# → return {first_audio_b64, first_text, rest_text[]}
# Browser plays first sentence while fetching TTS for remaining sentences in bg.
# TTFA drops from ~10-14s to ~3-5s (LLM still runs full, but TTS starts on chunk1).

from app.voice_utils import split_into_voice_chunks as _split_into_voice_chunks
from app.voice_utils import clean_think_blocks as _clean_think_blocks_util
from app.voice_utils import sanitize_for_voice as _sanitize_for_voice
from app.voice_utils import MIN_CHUNK_CHARS as _MIN_CHUNK_CHARS, MAX_CHUNK_CHARS as _MAX_CHUNK_CHARS


class VoiceChatStreamBody(BaseModel):
    message: str
    model: str = "ollama:qwen3:14b"
    node_id: str = "NODA2"
    voice: Optional[str] = None
    voice_profile: Optional[str] = "voice_fast_uk"
    session_id: Optional[str] = None
    project_id: Optional[str] = None
    history: List[Dict[str, Any]] = []


@app.post("/api/voice/chat/stream")
async def api_voice_chat_stream(body: VoiceChatStreamBody, request: Request):
    """Phase 2 Voice Chat: LLM → sentence split → first sentence TTS immediately.

    Returns:
      {
        ok: bool,
        first_text: str,           # first sentence
        first_audio_b64: str,      # base64 MP3 for immediate playback
        first_audio_mime: str,     # "audio/mpeg"
        rest_chunks: [str, ...],   # remaining sentences (client fetches TTS via /api/voice/tts)
        full_text: str,            # full LLM reply (for display)
        trace_id: str,
        meta: {llm_ms, tts_ms, chunks_total}
      }

    Client flow:
      1. POST /api/voice/chat/stream  → play first_audio_b64 immediately
      2. For each chunk in rest_chunks: POST /api/voice/tts → enqueue audio
    """
    import re as _re  # noqa: F401 – kept for legacy; re already imported at module level
    client_ip = request.client.host if request.client else "unknown"
    if not _check_rate(f"voice_stream:{client_ip}", max_calls=15, window_sec=60):
        raise HTTPException(status_code=429, detail="Rate limit: 15 voice stream calls/min per client")

    # Concurrent TTS guard also applies to stream endpoint (TTS inside)
    sem = _get_tts_semaphore()
    if not sem._value:
        raise HTTPException(status_code=503,
            detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.")

    sid = body.session_id or f"vs_{uuid.uuid4().hex[:10]}"
    pid = body.project_id or "default"
    trace_id = f"vs_{sid}_{uuid.uuid4().hex[:8]}"

    _vp = body.voice_profile or "voice_fast_uk"
    _is_quality = _vp == "voice_quality_uk"
    _system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX

    # Track for repro pack
    global _voice_last_model, _voice_last_profile
    _voice_last_model   = body.model
    _voice_last_profile = _vp

    _broadcast_bg(_make_event("voice.stream", {"phase": "start", "trace_id": trace_id},
        project_id=pid, session_id=sid))

    # ── 1. LLM ────────────────────────────────────────────────────────────────
    t0_llm = time.monotonic()
    provider, _, model_name = body.model.partition(":")
    reply = ""

    def _clean(text: str) -> str:
        cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
        if "<think>" in cleaned.lower():
            cleaned = re.split(r"(?i)<think>", cleaned)[0]
        return cleaned.strip()

    try:
        if provider == "ollama":
            ollama_url = get_ollama_url()
            effective_model_name = model_name or "qwen3:14b"
            messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
            messages.extend(body.history[-8:])
            messages.append({"role": "user", "content": body.message})
            voice_options = {
                "temperature": 0.18 if _is_quality else 0.15,
                "repeat_penalty": 1.1,
                "num_predict": 256 if _is_quality else 220,
            }
            async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC) as client:
                r = await client.post(
                    f"{ollama_url}/api/chat",
                    json=_make_ollama_payload(effective_model_name, messages, voice_options),
                )
                r.raise_for_status()
                raw = (r.json().get("message") or {}).get("content", "")
                reply = _clean(raw)
        elif provider == "grok":
            xai_key = os.getenv("XAI_API_KEY", "").strip()
            if not xai_key:
                raise HTTPException(status_code=503, detail="XAI_API_KEY not set.")
            grok_model = model_name or "grok-4-1-fast-reasoning"
            messages_g: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
            messages_g.extend(body.history[-8:])
            messages_g.append({"role": "user", "content": body.message})
            async with httpx.AsyncClient(timeout=60.0) as client:
                r = await client.post(
                    "https://api.x.ai/v1/chat/completions",
                    headers={"Authorization": f"Bearer {xai_key}", "Content-Type": "application/json"},
                    json={"model": grok_model, "messages": messages_g, "stream": False,
                          "max_tokens": 1024, "temperature": 0.2},
                )
                r.raise_for_status()
                raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "")
                reply = _clean(raw)
        elif provider == "glm":
            glm_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip()
            if not glm_key:
                raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.")
            glm_model = model_name or "glm-5"
            messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
            messages_glm.extend(body.history[-8:])
            messages_glm.append({"role": "user", "content": body.message})
            async with httpx.AsyncClient(timeout=60.0) as client:
                r = await client.post(
                    "https://open.bigmodel.cn/api/paas/v4/chat/completions",
                    headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"},
                    json={"model": glm_model, "messages": messages_glm, "stream": False},
                )
                r.raise_for_status()
                raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "")
                reply = _clean(raw)
        else:
            raise HTTPException(status_code=400, detail=f"voice/stream: provider '{provider}' not supported. Use: ollama, grok, glm.")
    except HTTPException:
        raise
    except Exception as e:
        _record_llm_error("inference_error", body.model, str(e)[:120])
        _broadcast_bg(_make_event("error", {"where": "voice_stream_llm", "trace_id": trace_id, "message": str(e)[:100]},
            project_id=pid, session_id=sid))
        raise HTTPException(status_code=502, detail=f"LLM error: {str(e)[:200]}")

    llm_ms = int((time.monotonic() - t0_llm) * 1000)
    if not reply:
        reply = "Не можу відповісти зараз."

    # ── 2. Sentence chunking ──────────────────────────────────────────────────
    # sanitize full reply before splitting (removes markdown, <think>, URLs)
    sanitized_reply = _sanitize_for_voice(reply)
    chunks = _split_into_voice_chunks(sanitized_reply)
    if not chunks:
        chunks = [sanitized_reply] if sanitized_reply else ["Не можу відповісти зараз."]

    first_chunk = chunks[0]
    # rest_chunks: sanitize + hard cap (prevents DoS via unreasonably long replies)
    _MAX_REST_CHUNKS = int(os.getenv("MAX_VOICE_REST_CHUNKS", "8"))
    all_rest = [_sanitize_for_voice(c) for c in chunks[1:] if _sanitize_for_voice(c)]
    rest_chunks = all_rest[:_MAX_REST_CHUNKS]  # cap: never more than 8 background TTS calls

    # ── 3. TTS for first sentence (immediate) ─────────────────────────────────
    t0_tts = time.monotonic()
    first_audio_b64 = ""
    first_audio_mime = "audio/mpeg"
    voice = body.voice or "default"
    _ha_voice_node = None
    _ha_voice_mode = None

    try:
        import base64 as _b64
        tts_json = {"text": first_chunk, "voice": voice, "speed": 1.0}
        async with httpx.AsyncClient(timeout=15.0) as client:
            if is_voice_ha_enabled():
                # HA path: Router selects best node for TTS
                router_url = get_voice_ha_router_url()
                r_tts = await client.post(f"{router_url}/v1/capability/voice_tts", json=tts_json)
                r_tts.raise_for_status()
                _ha_voice_node = r_tts.headers.get("X-Voice-Node")
                _ha_voice_mode = r_tts.headers.get("X-Voice-Mode")
                logger.debug("voice_stream TTS via HA: node=%s mode=%s",
                             _ha_voice_node, _ha_voice_mode)
            else:
                # Legacy direct path
                mem_url = get_memory_service_url()
                r_tts = await client.post(f"{mem_url}/voice/tts", json=tts_json)
                r_tts.raise_for_status()
            first_audio_mime = r_tts.headers.get("content-type", "audio/mpeg").split(";")[0]
            first_audio_b64 = _b64.b64encode(r_tts.content).decode()
    except Exception as e:
        logger.warning("voice_stream TTS failed for first chunk (ha=%s): %s",
                       is_voice_ha_enabled(), e)
        # Not fatal: client can still render text

    tts_ms = int((time.monotonic() - t0_tts) * 1000)

    _broadcast_bg(_make_event("voice.stream", {
        "phase": "done",
        "trace_id": trace_id,
        "llm_ms": llm_ms,
        "tts_ms": tts_ms,
        "chunks_total": len(chunks),
    }, project_id=pid, session_id=sid))

    logger.info("voice_stream ok: trace=%s llm=%dms tts=%dms chunks=%d first=%dB",
                trace_id, llm_ms, tts_ms, len(chunks), len(r_tts.content) if first_audio_b64 else 0)

    body_data = {
        "ok": True,
        "trace_id": trace_id,
        "first_text": first_chunk,
        "first_audio_b64": first_audio_b64,
        "first_audio_mime": first_audio_mime,
        "rest_chunks": rest_chunks,
        "full_text": reply,
        "meta": {
            "llm_ms": llm_ms,
            "tts_ms": tts_ms,
            "chunks_total": len(chunks),
            "voice": voice,
            "model": body.model,
            "voice_profile": _vp,
        },
    }

    from fastapi.responses import JSONResponse as _JSONResponse
    resp_headers = {}
    if _ha_voice_mode:
        resp_headers["X-Voice-Mode"] = _ha_voice_mode
    if _ha_voice_node:
        resp_headers["X-Voice-Node"] = _ha_voice_node
    if _ha_voice_mode or _ha_voice_node:
        resp_headers["X-Voice-Cap"] = "voice_tts"

    if resp_headers:
        return _JSONResponse(content=body_data, headers=resp_headers)
    return body_data


# ─── Voice Telemetry Beacon ───────────────────────────────────────────────────
# Receives performance marks from browser, records Prometheus histograms.
# Browser calls this via navigator.sendBeacon (fire-and-forget).

try:
    from prometheus_client import Histogram as _PromHistogram, Counter as _PromCounter
    _voice_ttfa_hist = _PromHistogram(
        "voice_ttfa_ms", "Time-to-first-audio (request → first audio playable)",
        ["model", "voice_profile"],
        buckets=[500, 1000, 2000, 3000, 5000, 7000, 10000, 15000],
    )
    _voice_llm_hist = _PromHistogram(
        "voice_llm_ms", "LLM inference time for voice turns",
        ["model", "voice_profile"],
        buckets=[500, 1000, 2000, 5000, 8000, 12000, 20000],
    )
    _voice_tts_first_hist = _PromHistogram(
        "voice_tts_first_ms", "First-sentence TTS synthesis time",
        ["voice_profile"],
        buckets=[200, 500, 800, 1200, 2000, 3000],
    )
    _voice_e2e_hist = _PromHistogram(
        "voice_e2e_ms", "End-to-end voice turn latency (user stop speaking → audio plays)",
        ["voice_profile"],
        buckets=[1000, 2000, 4000, 6000, 9000, 13000, 20000],
    )
    _voice_underflow_counter = _PromCounter(
        "voice_queue_underflows_total", "Times playback queue ran empty before TTS finished",
        ["voice_profile"],
    )
    _PROM_VOICE_OK = True
except Exception:
    _PROM_VOICE_OK = False


class VoiceTelemetryPayload(BaseModel):
    event: str = "voice_turn"
    # Idempotency: session_id + turn_id deduplicate duplicate beacon submissions
    session_id: Optional[str] = None
    turn_id: Optional[str] = None     # monotonic turn counter or UUID per turn
    ttfa_ms: Optional[int] = None
    llm_ms: Optional[int] = None
    tts_first_ms: Optional[int] = None
    e2e_ms: Optional[int] = None
    stt_ms: Optional[int] = None
    underflows: int = 0
    model: Optional[str] = None
    voice_profile: Optional[str] = None


class VoiceTelemetryBatch(BaseModel):
    """Batch beacon: array of turns submitted together (reduces HTTP overhead)."""
    events: List[VoiceTelemetryPayload] = []


def _process_telemetry_item(payload: VoiceTelemetryPayload) -> bool:
    """Process a single telemetry item. Returns False if duplicate."""
    sid = payload.session_id or "anon"
    tid = payload.turn_id or "noid"
    if _telem_is_duplicate(sid, tid):
        return False  # skip duplicate

    model   = (payload.model or "unknown").replace("ollama:", "")
    profile = payload.voice_profile or "unknown"

    if _PROM_VOICE_OK:
        try:
            if payload.ttfa_ms is not None:
                _voice_ttfa_hist.labels(model=model, voice_profile=profile).observe(payload.ttfa_ms)
            if payload.llm_ms is not None:
                _voice_llm_hist.labels(model=model, voice_profile=profile).observe(payload.llm_ms)
            if payload.tts_first_ms is not None:
                _voice_tts_first_hist.labels(voice_profile=profile).observe(payload.tts_first_ms)
            if payload.e2e_ms is not None:
                _voice_e2e_hist.labels(voice_profile=profile).observe(payload.e2e_ms)
            if payload.underflows:
                _voice_underflow_counter.labels(voice_profile=profile).inc(payload.underflows)
        except Exception as exc:
            logger.debug("telemetry/voice prom error: %s", exc)

    logger.info(
        "voice_telemetry: model=%s profile=%s ttfa=%s llm=%s tts=%s e2e=%s underflows=%d sid=%s",
        model, profile, payload.ttfa_ms, payload.llm_ms,
        payload.tts_first_ms, payload.e2e_ms, payload.underflows, sid,
    )

    # Feed the degradation state machine
    if payload.ttfa_ms is not None or payload.tts_first_ms is not None:
        _voice_degradation_sm.observe(
            ttfa_ms=payload.ttfa_ms,
            tts_first_ms=payload.tts_first_ms,
            underflows=payload.underflows,
            profile=profile,
        )
    return True


@app.post("/api/telemetry/voice", status_code=204)
async def api_telemetry_voice(payload: VoiceTelemetryPayload):
    """Browser beacon endpoint (single turn). Fire-and-forget, always 204."""
    _process_telemetry_item(payload)
    # 204 No Content — browser doesn't await response


@app.post("/api/telemetry/voice/batch", status_code=204)
async def api_telemetry_voice_batch(batch: VoiceTelemetryBatch, request: Request):
    """Batch beacon: process up to 20 turns in one HTTP call.

    Useful when browser queues multiple turns before sending (e.g. tab becomes
    visible again, or connection was lost briefly).
    """
    client_ip = request.client.host if request.client else "unknown"
    if not _check_rate(f"telem_batch:{client_ip}", max_calls=60, window_sec=60):
        raise HTTPException(status_code=429, detail="Rate limit: 60 telemetry batches/min")

    cap = min(len(batch.events), 20)  # hard cap per batch
    processed = sum(1 for item in batch.events[:cap] if _process_telemetry_item(item))
    logger.debug("telemetry/voice/batch: submitted=%d processed=%d cap=%d",
                 len(batch.events), processed, cap)


# ─── Voice Degradation State Machine ─────────────────────────────────────────
# Tracks rolling window of voice telemetry and determines system-level state.
# States: ok → degraded_tts → degraded_llm → fast_lock → emergency
# Client polls GET /api/voice/degradation_status to show UI badge.

import collections
from dataclasses import dataclass as _dc, field as _field
from enum import Enum

class VoiceDegradationState(str, Enum):
    OK           = "ok"           # all SLOs met
    DEGRADED_TTS = "degraded_tts" # TTS slow/failing → show "TTS SLOW" badge
    DEGRADED_LLM = "degraded_llm" # LLM slow → profile auto-demoted to fast
    FAST_LOCK    = "fast_lock"    # LLM degraded, forced to voice_fast_uk
    EMERGENCY    = "emergency"    # TTS failing → warn user, fallback banner

# SLO thresholds (ms) — aligned with config/slo_policy.yml
_SM_TTFA_WARN   = 5000   # TTFA p95 > 5s → degraded_llm
_SM_TTFA_LOCK   = 8000   # TTFA p95 > 8s → fast_lock
_SM_TTS_WARN    = 2000   # TTS first p95 > 2s → degraded_tts
_SM_TTS_CRIT    = 4000   # TTS first p95 > 4s → emergency
_SM_UNDERFLOW_RATE = 0.1 # >10% of recent turns have underflows → degraded_tts
_SM_WINDOW      = 20     # rolling window (last N telemetry events)
_SM_MIN_SAMPLES = 5      # need at least N samples before changing state


@_dc
class _VoiceDegradationSM:
    """Rolling-window degradation state machine."""
    _ttfa_window:        collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
    _tts_first_window:   collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
    _underflow_window:   collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
    state:               VoiceDegradationState = VoiceDegradationState.OK
    state_since:         float = _field(default_factory=time.monotonic)
    recommended_profile: str = "voice_fast_uk"
    last_reason:         str = ""
    _lock:               object = _field(default_factory=lambda: __import__('asyncio').Lock())

    def observe(self, ttfa_ms: Optional[int], tts_first_ms: Optional[int],
                underflows: int, profile: str) -> None:
        if ttfa_ms is not None:
            self._ttfa_window.append(ttfa_ms)
        if tts_first_ms is not None:
            self._tts_first_window.append(tts_first_ms)
        self._underflow_window.append(1 if underflows > 0 else 0)
        self._recompute()

    def _p95(self, window: collections.deque) -> Optional[float]:
        if len(window) < _SM_MIN_SAMPLES:
            return None
        s = sorted(window)
        return s[int(len(s) * 0.95)]

    def _underflow_rate(self) -> float:
        if not self._underflow_window:
            return 0.0
        return sum(self._underflow_window) / len(self._underflow_window)

    def _recompute(self) -> None:
        ttfa_p95  = self._p95(self._ttfa_window)
        tts_p95   = self._p95(self._tts_first_window)
        uf_rate   = self._underflow_rate()

        prev_state = self.state

        if tts_p95 is not None and tts_p95 > _SM_TTS_CRIT:
            self.state = VoiceDegradationState.EMERGENCY
            self.recommended_profile = "voice_fast_uk"
            self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_CRIT}ms"
        elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_LOCK:
            self.state = VoiceDegradationState.FAST_LOCK
            self.recommended_profile = "voice_fast_uk"
            self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_LOCK}ms — locked to fast profile"
        elif tts_p95 is not None and tts_p95 > _SM_TTS_WARN:
            self.state = VoiceDegradationState.DEGRADED_TTS
            self.recommended_profile = "voice_fast_uk"
            self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_WARN}ms"
        elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_WARN:
            self.state = VoiceDegradationState.DEGRADED_LLM
            self.recommended_profile = "voice_fast_uk"
            self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_WARN}ms"
        elif uf_rate > _SM_UNDERFLOW_RATE:
            self.state = VoiceDegradationState.DEGRADED_TTS
            self.recommended_profile = "voice_fast_uk"
            self.last_reason = f"Underflow rate={uf_rate:.1%} > {_SM_UNDERFLOW_RATE:.0%}"
        else:
            self.state = VoiceDegradationState.OK
            self.recommended_profile = "voice_fast_uk"  # default
            self.last_reason = "all SLOs met"

        if self.state != prev_state:
            self.state_since = time.monotonic()
            logger.warning("voice_degradation state: %s → %s | %s",
                           prev_state.value, self.state.value, self.last_reason)

    def status_dict(self) -> dict:
        return {
            "state":               self.state.value,
            "state_since_sec":     int(time.monotonic() - self.state_since),
            "recommended_profile": self.recommended_profile,
            "reason":              self.last_reason,
            "samples": {
                "ttfa":      len(self._ttfa_window),
                "tts_first": len(self._tts_first_window),
            },
            "p95": {
                "ttfa_ms":      self._p95(self._ttfa_window),
                "tts_first_ms": self._p95(self._tts_first_window),
            },
            "underflow_rate": round(self._underflow_rate(), 3),
            "ui_badge": _SM_UI_BADGE.get(self.state, ""),
        }


# UI badge text per state
_SM_UI_BADGE = {
    VoiceDegradationState.OK:           "",
    VoiceDegradationState.DEGRADED_TTS: "⚠ TTS SLOW",
    VoiceDegradationState.DEGRADED_LLM: "⚠ AI SLOW",
    VoiceDegradationState.FAST_LOCK:    "⚡ FAST MODE",
    VoiceDegradationState.EMERGENCY:    "🔴 TTS DEGRADED",
}

_voice_degradation_sm = _VoiceDegradationSM()


@app.get("/api/voice/degradation_status")
async def api_voice_degradation_status():
    """Returns current voice degradation state + repro pack for incident diagnosis.

    Repro pack fields (for on-call):
      node_id, edge_tts_version, last_model, last_profile,
      last_5_tts_errors, last_5_llm_errors
    """
    base = _voice_degradation_sm.status_dict()
    # Enrich with repro pack
    base["repro"] = {
        "node_id":           _NODE_ID,
        "last_model":        _voice_last_model,
        "last_profile":      _voice_last_profile,
        "last_5_tts_errors": list(_voice_tts_errors),
        "last_5_llm_errors": list(_voice_llm_errors),
        "concurrent_tts_slots_free": _get_tts_semaphore()._value,
        "max_concurrent_tts":        _MAX_CONCURRENT_TTS,
    }
    return base


# ─── Memory ──────────────────────────────────────────────────────────────────

@app.get("/api/memory/status")
async def api_memory_status(_auth: str = Depends(require_auth)):
    mem_url = get_memory_service_url()
    try:
        async with httpx.AsyncClient(timeout=8.0) as client:
            r = await client.get(f"{mem_url}/health")
            r.raise_for_status()
            data = r.json()
            return {
                "ok": True,
                "memory_url": mem_url,
                "status": data.get("status", "unknown"),
                "vector_store": data.get("vector_store", {}),
                "stt": "whisper-large-v3-turbo",
                "tts": "edge-tts / macOS say",
            }
    except Exception as e:
        return {"ok": False, "error": str(e)[:200], "memory_url": mem_url}


@app.get("/api/memory/context")
async def api_memory_context(
    session_id: str = Query("console"),
    agent_id: str = Query("sofiia"),
    user_id: Optional[str] = Query(None),
    limit: int = Query(20, ge=1, le=100),
    _auth: str = Depends(require_auth),
):
    mem_url = get_memory_service_url()
    agent_key = str(agent_id or "").strip().lower()
    resolved_user = user_id or ("aistalk_user" if agent_key == "aistalk" else "console_user")
    async def _sqlite_fallback_events() -> List[Dict[str, Any]]:
        events: List[Dict[str, Any]] = []
        if _app_db:
            try:
                rows = await _app_db.list_messages(session_id, limit=limit)
                for row in rows:
                    events.append(
                        {
                            "role": row.get("role", "unknown"),
                            "content": row.get("content", ""),
                            "ts": row.get("ts"),
                            "source": "sqlite_fallback",
                        }
                    )
            except Exception:
                pass
        return events
    try:
        async with httpx.AsyncClient(timeout=8.0) as client:
            r = await client.get(
                f"{mem_url}/agents/{agent_id}/memory",
                params={"user_id": resolved_user, "channel_id": session_id, "limit": limit},
            )
            r.raise_for_status()
            data = r.json()
            events = data.get("events") if isinstance(data, dict) else None
            if isinstance(events, list) and events:
                return data
            # Remote is alive but returned empty history; expose local persisted history too.
            local_events = await _sqlite_fallback_events()
            if local_events:
                return {"events": local_events, "fallback": "sqlite_after_empty_remote"}
            return data if isinstance(data, dict) else {"events": []}
    except Exception as e:
        # Fallback to local SQLite session memory so UI still has context.
        events = await _sqlite_fallback_events()
        return {"events": events, "error": str(e)[:100], "fallback": "sqlite"}


# ─── WebSocket /ws/events ────────────────────────────────────────────────────

@app.websocket("/ws/events")
async def ws_events(websocket: WebSocket):
    """WebSocket event stream. Clients receive all broadcast events."""
    await websocket.accept()
    _ws_clients.add(websocket)
    logger.info("WS client connected, total=%d", len(_ws_clients))
    # Send welcome
    await websocket.send_text(json.dumps(_make_event("nodes.status", {
        "message": "connected",
        "bff_version": _VERSION,
        "ws_clients": len(_ws_clients),
    })))
    try:
        while True:
            # Keep-alive: read pings from client (or just wait)
            try:
                msg = await asyncio.wait_for(websocket.receive_text(), timeout=15.0)
                # Client can send {"type":"ping"} → pong
                if msg:
                    try:
                        cmd = json.loads(msg)
                        if cmd.get("type") == "ping":
                            await websocket.send_text(json.dumps({"type": "pong", "ts": _now_iso()}))
                    except Exception:
                        pass
            except asyncio.TimeoutError:
                # Send periodic heartbeat with cached nodes if available
                hb_data: Dict[str, Any] = {
                    "bff_uptime_s": int(time.monotonic() - _START_TIME),
                    "ws_clients": len(_ws_clients),
                }
                if _nodes_cache.get("nodes"):
                    hb_data["nodes"] = [
                        {
                            "id": n["node_id"],
                            "online": n.get("online", False),
                            "router_ok": n.get("router_ok", False),
                            "router_latency_ms": n.get("router_latency_ms"),
                        }
                        for n in _nodes_cache["nodes"]
                    ]
                    hb_data["nodes_ts"] = _nodes_cache.get("ts", "")
                await websocket.send_text(json.dumps(_make_event("nodes.status", hb_data)))
    except WebSocketDisconnect:
        pass
    except Exception as e:
        logger.debug("WS error: %s", e)
    finally:
        _ws_clients.discard(websocket)
        logger.info("WS client disconnected, total=%d", len(_ws_clients))


# ─── UI ─────────────────────────────────────────────────────────────────────

STATIC_DIR = Path(__file__).resolve().parent.parent / "static"
_NO_CACHE = {"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache"}


@app.get("/api/meta/version")
async def get_meta_version():
    """Build metadata endpoint — always no-cache, always public."""
    return JSONResponse(
        content={
            "version": _VERSION,
            "build_sha": _BUILD_SHA,
            "build_time": _BUILD_TIME,
            "service": "sofiia-console",
        },
        headers=_NO_CACHE,
    )


# ─── Auth endpoints ──────────────────────────────────────────────────────────

class _LoginBody(BaseModel):
    key: str


@app.post("/api/auth/login")
async def auth_login(body: _LoginBody, response: Response):
    """
    Verify API key (sent in JSON body — avoids header encoding issues).
    On success: set httpOnly session cookie, return ok=true.
    No CORS/header encoding issues since key travels in request body.
    """
    if not _key_valid(body.key):
        raise HTTPException(status_code=401, detail="Invalid key")

    token = _cookie_token(body.key)
    response.set_cookie(
        key=_COOKIE_NAME,
        value=token,
        httponly=True,
        secure=_IS_PROD,         # Secure=True in prod (HTTPS only)
        samesite="lax",
        max_age=_COOKIE_MAX_AGE,
        path="/",
    )
    return {"ok": True, "auth": "cookie"}


@app.post("/api/auth/logout")
async def auth_logout(response: Response):
    """Clear session cookie."""
    response.delete_cookie(key=_COOKIE_NAME, path="/")
    return {"ok": True}


@app.get("/api/auth/check")
async def auth_check(request: Request):
    """Returns 200 if session is valid, 401 otherwise. Used by UI on startup."""
    # Localhost is always open — no auth needed
    client_ip = (request.client.host if request.client else "") or ""
    if client_ip in ("127.0.0.1", "::1", "localhost"):
        return {"ok": True, "auth": "localhost"}
    configured = get_console_api_key()
    if not configured:
        return {"ok": True, "auth": "open"}
    from .auth import _expected_cookie_token as _ect
    cookie_val = request.cookies.get(_COOKIE_NAME, "")
    import secrets as _sec
    if cookie_val and _sec.compare_digest(cookie_val, _ect()):
        return {"ok": True, "auth": "cookie"}
    raise HTTPException(status_code=401, detail="Not authenticated")


@app.get("/", response_class=HTMLResponse)
async def ui_root():
    index = STATIC_DIR / "index.html"
    content = index.read_text(encoding="utf-8") if index.exists() else _fallback_html()
    return HTMLResponse(content=content, headers=_NO_CACHE)


@app.get("/ui", response_class=HTMLResponse)
async def ui_alias():
    return await ui_root()


def _fallback_html() -> str:
    return """<!DOCTYPE html><html><head><meta charset="utf-8"><title>Sofiia Console</title></head>
<body><h1>Sofiia Control Console v""" + _VERSION + """</h1>
<p>Endpoints: <code>GET /api/health</code> | <code>GET /api/status/full</code> | <code>POST /api/chat/send</code> | <code>WS /ws/events</code></p>
</body></html>"""


@app.get("/chat", response_class=HTMLResponse)
async def ui_chat():
    p = STATIC_DIR / "chat.html"
    content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
    return HTMLResponse(content=content, headers=_NO_CACHE)


@app.get("/ops", response_class=HTMLResponse)
async def ui_ops():
    p = STATIC_DIR / "ops.html"
    content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
    return HTMLResponse(content=content, headers=_NO_CACHE)


@app.get("/nodes", response_class=HTMLResponse)
async def ui_nodes():
    p = STATIC_DIR / "nodes.html"
    content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
    return HTMLResponse(content=content, headers=_NO_CACHE)


# ── Supervisor Proxy ───────────────────────────────────────────────────────────
_SUPERVISOR_URL = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080").rstrip("/")
_SUPERVISOR_FALLBACK_URL = os.getenv("SUPERVISOR_FALLBACK_URL", "http://127.0.0.1:8084").rstrip("/")


async def _supervisor_request_json(
    method: str,
    path: str,
    *,
    timeout: float = 30.0,
    json_body: Optional[Dict[str, Any]] = None,
) -> Tuple[int, Dict[str, Any]]:
    urls = [_SUPERVISOR_URL]
    if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls:
        urls.append(_SUPERVISOR_FALLBACK_URL)

    last_err = "unavailable"
    for base in urls:
        target = f"{base}{path}"
        try:
            async with httpx.AsyncClient(timeout=timeout) as client:
                resp = await client.request(method, target, json=json_body)
        except Exception as e:
            last_err = str(e)[:200]
            continue

        if resp.status_code >= 400:
            detail = resp.text[:400] if resp.text else f"Supervisor error {resp.status_code}"
            raise HTTPException(status_code=resp.status_code, detail=detail)

        if not resp.content:
            return resp.status_code, {}
        try:
            payload = resp.json()
        except Exception:
            return resp.status_code, {"raw": resp.text[:1000]}
        if isinstance(payload, dict):
            return resp.status_code, payload
        return resp.status_code, {"data": payload}

    raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {last_err}")


@app.post("/api/supervisor/runs")
async def start_supervisor_run(request: Request, _auth: str = Depends(require_auth)):
    """Start a LangGraph run on sofiia-supervisor.

    Body: {"graph": "alert_triage|incident_triage|postmortem_draft|release_check",
           "project_id": "<optional>", ...params}

    If project_id is provided, auto-creates an agent_run dialog_node in the graph
    and returns node_id in the response for UI tracking.
    """
    body = await request.json()
    graph_name = body.pop("graph", None)
    project_id = body.pop("project_id", None)
    if not graph_name:
        raise HTTPException(status_code=400, detail="'graph' field is required")
    try:
        status_code, result = await _supervisor_request_json(
            "POST",
            f"/v1/graphs/{graph_name}/runs",
            timeout=60.0,
            json_body=body,
        )

        # Auto-create agent_run node if project is provided
        if project_id and status_code in (200, 201, 202):
            run_id = result.get("run_id") or result.get("id") or str(uuid.uuid4())
            try:
                pack = await _app_db.create_evidence_pack(
                    project_id=project_id,
                    run_id=run_id,
                    graph_name=graph_name,
                    result_data={"status": "started", "summary": f"Run started: {graph_name}"},
                    created_by="sofiia",
                )
                result["_node_id"] = pack.get("node_id")
            except Exception as node_err:
                logger.warning("evidence_pack node creation failed (non-fatal): %s", node_err)

        return JSONResponse(status_code=status_code, content=result)
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")


@app.get("/api/supervisor/runs/{run_id}")
async def get_supervisor_run(run_id: str, _auth: str = Depends(require_auth)):
    """Get the status/result of a LangGraph run."""
    try:
        status_code, payload = await _supervisor_request_json(
            "GET",
            f"/v1/runs/{run_id}",
            timeout=15.0,
        )
        return JSONResponse(status_code=status_code, content=payload)
    except Exception as e:
        raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")


@app.post("/api/supervisor/runs/{run_id}/cancel")
async def cancel_supervisor_run(run_id: str, _auth: str = Depends(require_auth)):
    """Cancel a running LangGraph run."""
    try:
        status_code, payload = await _supervisor_request_json(
            "POST",
            f"/v1/runs/{run_id}/cancel",
            timeout=10.0,
        )
        return JSONResponse(status_code=status_code, content=payload)
    except Exception as e:
        raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")


@app.get("/api/supervisor/graphs")
async def list_supervisor_graphs():
    """List available LangGraph graphs (no auth — read-only discovery)."""
    urls = [_SUPERVISOR_URL]
    if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls:
        urls.append(_SUPERVISOR_FALLBACK_URL)
    last_err = "unavailable"
    for base in urls:
        try:
            async with httpx.AsyncClient(timeout=5.0) as client:
                resp = await client.get(f"{base}/healthz")
            data = resp.json()
            return {
                "graphs": data.get("graphs", []),
                "healthy": resp.status_code == 200,
                "url": base,
                "state_backend": data.get("state_backend"),
            }
        except Exception as e:
            last_err = str(e)
            continue
    return {"graphs": [], "healthy": False, "error": last_err}


@app.get("/api/aistalk/status")
async def aistalk_status():
    """AISTALK integration status for SOFIIA UI."""
    try:
        sup = await list_supervisor_graphs()
        aurora = await api_aurora_health()
        runtime = await _aistalk_runtime_state()
        adapter_status: Dict[str, Any]
        relay_health: Dict[str, Any]
        if _aistalk is not None:
            try:
                relay_health = _aistalk.probe_health()
            except Exception as e:
                relay_health = {"enabled": True, "ok": False, "error": str(e)[:200]}
            try:
                adapter_status = _aistalk.status()
            except Exception:
                adapter_status = {"enabled": True, "base_url": "unknown"}
        else:
            relay_health = {"enabled": False, "ok": False, "error": "disabled"}
            adapter_status = {"enabled": False, "base_url": ""}
        return {
            "aistalk_enabled": _aistalk is not None,
            "aistalk_adapter": repr(_aistalk) if _aistalk is not None else "disabled",
            "adapter": adapter_status,
            "relay_health": relay_health,
            "supervisor": sup,
            "aurora": aurora,
            "runtime": runtime,
            "docs": {
                "contract": "/docs/aistalk/contract.md",
                "supervisor": "/docs/supervisor/langgraph_supervisor.md",
            },
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


def _parse_agent_md(path: Path) -> Dict[str, Any]:
    text = path.read_text(encoding="utf-8", errors="ignore")
    lines = [ln.rstrip() for ln in text.splitlines()]
    title = path.stem
    display_name = title
    role: List[str] = []
    outputs: List[str] = []
    boundaries: List[str] = []
    capabilities: List[str] = []
    intro: List[str] = []
    in_section: Optional[str] = None

    for raw in lines:
        line = raw.strip()
        if not line:
            continue
        if line.startswith("# "):
            display_name = line[2:].strip()
            continue
        low = line.lower()
        if low.startswith("role:"):
            in_section = "role"
            continue
        if low.startswith("output:"):
            in_section = "output"
            continue
        if low.startswith("outputs:"):
            in_section = "output"
            continue
        if low.startswith("boundary:"):
            in_section = "boundary"
            continue
        if low.startswith("boundaries:"):
            in_section = "boundary"
            continue
        if low.startswith("capabilities:"):
            in_section = "capabilities"
            continue
        if low.startswith("modes:") or low.startswith("rules:") or low.startswith("internal sub-pipeline"):
            in_section = None
            continue
        if line.startswith("```"):
            in_section = None
            continue

        if line.startswith("- "):
            item = line[2:].strip()
            if in_section == "role":
                role.append(item)
            elif in_section == "output":
                outputs.append(item)
            elif in_section == "boundary":
                boundaries.append(item)
            elif in_section == "capabilities":
                capabilities.append(item)
            continue
        if in_section is None and not line.startswith("#"):
            # Some agent role files store purpose as plain intro line without "Role:" section.
            intro.append(line)

    summary = role[0] if role else (intro[0] if intro else "")
    return {
        "id": title.lower(),
        "name": display_name,
        "summary": summary,
        "role": role,
        "outputs": outputs,
        "boundaries": boundaries,
        "capabilities": capabilities,
        "source": str(path),
    }


@app.get("/api/aistalk/catalog")
async def aistalk_catalog():
    """
    Return AISTALK subagent catalog + declared capabilities for UI rendering.
    """
    roots = [
        Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk",
        Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk",
    ]
    root = next((p for p in roots if p.exists()), None)
    if root is None:
        return {
            "ok": False,
            "error": "AISTALK roles directory not found",
            "agents": [],
            "domains": [],
        }

    agents: List[Dict[str, Any]] = []
    for p in sorted(root.glob("*.md")):
        try:
            agents.append(_parse_agent_md(p))
        except Exception as e:
            agents.append(
                {
                    "id": p.stem.lower(),
                    "name": p.stem,
                    "summary": "",
                    "role": [],
                    "outputs": [],
                    "boundaries": [f"parse_error: {str(e)[:120]}"],
                    "capabilities": [],
                    "source": str(p),
                }
            )

    # High-level specialization domains for UI badges/filters.
    domains = [
        {"id": "osint", "name": "OSINT & Recon", "agents": ["tracer", "stealth", "shadow"]},
        {"id": "analysis", "name": "Threat Analysis", "agents": ["neuron", "graph", "risk"]},
        {"id": "offdef", "name": "Offense/Defense", "agents": ["redteam", "blueteam", "purpleteam", "bughunter", "devteam"]},
        {"id": "forensics", "name": "Media Forensics", "agents": ["aurora"]},
        {"id": "security", "name": "Governance & Data Safety", "agents": ["vault", "quantum"]},
        {"id": "orchestration", "name": "Command & Synthesis", "agents": ["orchestrator_synthesis"]},
    ]
    return {
        "ok": True,
        "root": str(root),
        "count": len(agents),
        "agents": agents,
        "domains": domains,
    }


_AISTALK_RUNTIME_PATH = AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aistalk_runtime.json"
_AISTALK_AGENT_ORDER = [
    "orchestrator_synthesis",
    "tracer",
    "shadow",
    "stealth",
    "neuron",
    "graph",
    "bughunter",
    "redteam",
    "blueteam",
    "purpleteam",
    "risk",
    "vault",
    "quantum",
    "devteam",
    "aurora",
]
_aistalk_team_active_runs: Dict[str, float] = {}
_aistalk_chat_active: int = 0
_aistalk_state_lock = asyncio.Lock()


def _aistalk_roles_root() -> Optional[Path]:
    roots = [
        Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk",
        Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk",
    ]
    return next((p for p in roots if p.exists()), None)


def _aistalk_resource_snapshot() -> Dict[str, Any]:
    cpu = os.cpu_count() or 8
    mem_gb: Optional[float] = None
    try:
        page_size = os.sysconf("SC_PAGE_SIZE")
        total_pages = os.sysconf("SC_PHYS_PAGES")
        if page_size > 0 and total_pages > 0:
            mem_gb = round((page_size * total_pages) / (1024 ** 3), 1)
    except Exception:
        mem_gb = None
    return {
        "cpu_count": cpu,
        "memory_gb": mem_gb,
        "ollama_num_ctx": SOFIIA_OLLAMA_NUM_CTX,
        "ollama_num_thread": SOFIIA_OLLAMA_NUM_THREAD,
        "ollama_num_gpu": SOFIIA_OLLAMA_NUM_GPU,
    }


def _aistalk_recommended_limits(resources: Dict[str, Any]) -> Dict[str, Any]:
    cpu = int(resources.get("cpu_count") or 8)
    mem = resources.get("memory_gb")
    mem_gb = float(mem) if isinstance(mem, (int, float)) else 0.0
    if cpu >= 12 and mem_gb >= 24:
        profile = "performance"
        team_max = 2
        chat_max = 4
    elif cpu >= 8 and mem_gb >= 16:
        profile = "balanced"
        team_max = 1
        chat_max = 3
    else:
        profile = "safe"
        team_max = 1
        chat_max = 2
    return {
        "profile": profile,
        "max_parallel_team_runs": team_max,
        "max_parallel_chat": chat_max,
        "rule": (
            "Aurora/forensics jobs are GPU-heavy: keep team runs low; "
            "chat parallelism may be higher but bounded by CPU/RAM."
        ),
    }


async def _aistalk_local_models() -> List[str]:
    ollama_url = get_ollama_url().rstrip("/")
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            r = await client.get(f"{ollama_url}/api/tags")
            r.raise_for_status()
            data = r.json()
            models = [str((m or {}).get("name", "")).strip() for m in (data.get("models") or [])]
            return [m for m in models if m]
    except Exception:
        return []


def _aistalk_default_model_map(models: List[str]) -> Dict[str, str]:
    available = set(models)

    def pick(*candidates: str) -> str:
        for c in candidates:
            if c in available:
                return c
        if models:
            return models[0]
        return "qwen3:14b"

    orchestrator = pick("qwen3.5:35b-a3b", "qwen3:14b", "gemma3:latest")
    analyst = pick("qwen3:14b", "qwen3.5:35b-a3b", "gemma3:latest")
    lightweight = pick("gemma3:latest", "qwen3:14b", "qwen3.5:35b-a3b")

    mapping: Dict[str, str] = {}
    for agent_id in _AISTALK_AGENT_ORDER:
        if agent_id in ("orchestrator_synthesis", "risk", "neuron", "graph"):
            mapping[agent_id] = orchestrator
        elif agent_id in ("tracer", "shadow", "stealth", "vault", "quantum"):
            mapping[agent_id] = analyst
        else:
            mapping[agent_id] = lightweight
    return mapping


def _read_aistalk_runtime() -> Dict[str, Any]:
    if _AISTALK_RUNTIME_PATH.exists():
        try:
            raw = json.loads(_AISTALK_RUNTIME_PATH.read_text(encoding="utf-8"))
            if isinstance(raw, dict):
                return raw
        except Exception:
            pass
    return {}


def _write_aistalk_runtime(data: Dict[str, Any]) -> None:
    _AISTALK_RUNTIME_PATH.parent.mkdir(parents=True, exist_ok=True)
    _AISTALK_RUNTIME_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")


async def _aistalk_runtime_state() -> Dict[str, Any]:
    resources = _aistalk_resource_snapshot()
    recommended = _aistalk_recommended_limits(resources)
    models = await _aistalk_local_models()
    stored = _read_aistalk_runtime()

    limits = stored.get("limits") if isinstance(stored.get("limits"), dict) else {}
    max_team = int(limits.get("max_parallel_team_runs") or recommended["max_parallel_team_runs"])
    max_chat = int(limits.get("max_parallel_chat") or recommended["max_parallel_chat"])
    profile = str(limits.get("profile") or recommended["profile"])

    saved_models = stored.get("agent_models") if isinstance(stored.get("agent_models"), dict) else {}
    defaults = _aistalk_default_model_map(models)
    agent_models: Dict[str, str] = {}
    for aid in _AISTALK_AGENT_ORDER:
        selected = str(saved_models.get(aid) or defaults.get(aid) or "")
        if models and selected not in models:
            selected = defaults.get(aid) or models[0]
        if not selected:
            selected = "qwen3:14b"
        agent_models[aid] = selected

    state = {
        "limits": {
            "profile": profile,
            "max_parallel_team_runs": max(1, min(max_team, 4)),
            "max_parallel_chat": max(1, min(max_chat, 8)),
        },
        "recommended": recommended,
        "resources": resources,
        "available_models": models,
        "agent_models": agent_models,
        "active_team_runs": len(_aistalk_team_active_runs),
        "active_chat": _aistalk_chat_active,
    }
    # Persist normalized shape for future restarts.
    _write_aistalk_runtime({"limits": state["limits"], "agent_models": state["agent_models"]})
    return state


def _aistalk_role_prompt(agent_id: str) -> str:
    root = _aistalk_roles_root()
    if root is None:
        return "You are AISTALK security analyst. Respond with findings, risk, next actions."
    target = root / f"{agent_id}.md"
    if not target.exists():
        target = root / "orchestrator_synthesis.md"
    try:
        text = target.read_text(encoding="utf-8", errors="ignore")
        # Keep prompt concise enough for local models.
        return text[:6000]
    except Exception:
        return "You are AISTALK security analyst. Respond with findings, risk, next actions."


@app.get("/api/aistalk/runtime")
async def aistalk_runtime(_auth: str = Depends(require_auth)):
    return await _aistalk_runtime_state()


class AISTalkModelSetBody(BaseModel):
    agent_id: str
    model: str


@app.post("/api/aistalk/runtime/model")
async def aistalk_set_agent_model(body: AISTalkModelSetBody, _auth: str = Depends(require_auth)):
    state = await _aistalk_runtime_state()
    aid = str(body.agent_id or "").strip().lower()
    if aid not in _AISTALK_AGENT_ORDER:
        raise HTTPException(status_code=400, detail=f"Unknown agent_id: {aid}")
    model = str(body.model or "").strip()
    models = state.get("available_models") or []
    if models and model not in models:
        raise HTTPException(status_code=400, detail=f"Model not available locally: {model}")
    stored = _read_aistalk_runtime()
    stored.setdefault("limits", state.get("limits", {}))
    stored.setdefault("agent_models", state.get("agent_models", {}))
    stored["agent_models"][aid] = model
    _write_aistalk_runtime(stored)
    return {"ok": True, "agent_id": aid, "model": model}


class AISTalkLimitsBody(BaseModel):
    profile: Optional[str] = None
    max_parallel_team_runs: Optional[int] = None
    max_parallel_chat: Optional[int] = None


@app.post("/api/aistalk/runtime/limits")
async def aistalk_set_limits(body: AISTalkLimitsBody, _auth: str = Depends(require_auth)):
    state = await _aistalk_runtime_state()
    stored = _read_aistalk_runtime()
    limits = dict(state.get("limits", {}))
    if body.profile:
        limits["profile"] = str(body.profile)
    if body.max_parallel_team_runs is not None:
        limits["max_parallel_team_runs"] = max(1, min(int(body.max_parallel_team_runs), 4))
    if body.max_parallel_chat is not None:
        limits["max_parallel_chat"] = max(1, min(int(body.max_parallel_chat), 8))
    stored["limits"] = limits
    stored.setdefault("agent_models", state.get("agent_models", {}))
    _write_aistalk_runtime(stored)
    return {"ok": True, "limits": limits}


def _is_terminal_run_status(status: str) -> bool:
    s = (status or "").strip().lower()
    return s in {"succeeded", "failed", "cancelled", "canceled", "timeout", "error"}


class AISTalkChatBody(BaseModel):
    message: str
    agent_id: str = "orchestrator_synthesis"
    model: Optional[str] = None
    session_id: Optional[str] = None
    project_id: Optional[str] = None
    user_id: Optional[str] = None
    history: List[Dict[str, Any]] = []


@app.post("/api/aistalk/chat")
async def aistalk_chat(body: AISTalkChatBody, request: Request, _auth: str = Depends(require_auth)):
    client_ip = request.client.host if request.client else "unknown"
    if not _check_rate(f"aistalk_chat:{client_ip}", max_calls=40, window_sec=60):
        raise HTTPException(status_code=429, detail="Rate limit: 40 AISTALK chat messages/min")

    state = await _aistalk_runtime_state()
    limits = state.get("limits", {})
    max_chat = int(limits.get("max_parallel_chat") or 2)
    async with _aistalk_state_lock:
        global _aistalk_chat_active
        if _aistalk_chat_active >= max_chat:
            raise HTTPException(
                status_code=429,
                detail=f"AISTALK chat busy: active={_aistalk_chat_active}, limit={max_chat}",
            )
        _aistalk_chat_active += 1

    agent_id = str(body.agent_id or "orchestrator_synthesis").strip().lower()
    if agent_id not in _AISTALK_AGENT_ORDER:
        agent_id = "orchestrator_synthesis"
    selected_model = str(body.model or "").strip() or str((state.get("agent_models") or {}).get(agent_id) or "")
    if not selected_model:
        selected_model = "qwen3:14b"
    if (state.get("available_models") or []) and selected_model not in state["available_models"]:
        selected_model = (state.get("available_models") or ["qwen3:14b"])[0]

    project_id = body.project_id or "aistalk"
    session_id = body.session_id or f"aistalk_sess_{uuid.uuid4().hex[:10]}"
    user_id = body.user_id or "aistalk_user"

    try:
        role_prompt = _aistalk_role_prompt(agent_id)
        system_prompt = (
            "Ти працюєш у складі AISTALK (крипто-детективне агентство з безпеки мережі). "
            "Формат відповіді: findings -> risk -> actions. "
            "Пиши конкретно, без вигадок, позначай невизначеність.\n\n"
            + role_prompt
        )

        messages: List[Dict[str, Any]] = [{"role": "system", "content": system_prompt}]
        messages.extend(body.history[-10:])
        messages.append({"role": "user", "content": body.message})

        t0 = time.monotonic()
        async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client:
            r = await client.post(
                f"{get_ollama_url().rstrip('/')}/api/chat",
                json=_make_ollama_payload(
                    selected_model,
                    messages,
                    {
                        "temperature": 0.15,
                        "repeat_penalty": 1.1,
                        "num_predict": min(1024, SOFIIA_OLLAMA_NUM_PREDICT_TEXT),
                    },
                ),
            )
            r.raise_for_status()
            data = r.json()
        reply = ((data.get("message") or {}).get("content") or "").strip() or "AISTALK: порожня відповідь"
        latency_ms = int((time.monotonic() - t0) * 1000)

        _broadcast_bg(
            _make_event(
                "chat.reply",
                {
                    "text": reply[:200],
                    "provider": "ollama",
                    "model": f"ollama:{selected_model}",
                    "agent_id": agent_id,
                    "latency_ms": latency_ms,
                },
                project_id=project_id,
                session_id=session_id,
                user_id=user_id,
            )
        )
        asyncio.get_event_loop().create_task(
            _do_save_memory(
                body.message,
                reply,
                session_id,
                project_id,
                user_id,
                agent_id="aistalk",
            )
        )
        return {
            "ok": True,
            "project_id": project_id,
            "session_id": session_id,
            "user_id": user_id,
            "agent_id": agent_id,
            "model": f"ollama:{selected_model}",
            "response": reply,
            "meta": {"latency_ms": latency_ms, "active_chat": _aistalk_chat_active, "limit_chat": max_chat},
        }
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=502, detail=f"AISTALK chat error: {str(e)[:200]}")
    finally:
        async with _aistalk_state_lock:
            _aistalk_chat_active = max(0, _aistalk_chat_active - 1)


def _aistalk_autobuild_input(
    graph: str,
    objective: str,
    input_payload: Dict[str, Any],
) -> Dict[str, Any]:
    payload = dict(input_payload or {})
    if graph == "incident_triage":
        payload.setdefault("service", "aurora-service")
        payload.setdefault("symptom", objective or "Aurora pipeline anomaly")
        payload.setdefault("env", "prod")
        payload.setdefault("include_traces", False)
        return payload

    if graph == "release_check":
        payload.setdefault("service_name", "aurora-service")
        payload.setdefault("diff_text", objective or "")
        payload.setdefault("run_deps", True)
        payload.setdefault("run_drift", True)
        payload.setdefault("run_smoke", False)
        return payload

    if graph == "alert_triage":
        # Graph is mostly autonomous; leave room for dry_run/profile overrides.
        payload.setdefault("dry_run", False)
        payload.setdefault("policy_profile", "default")
        return payload

    if graph == "postmortem_draft":
        incident_id = str(payload.get("incident_id") or "").strip()
        if not incident_id and objective:
            m = re.search(r"(inc_[A-Za-z0-9_\-]+)", objective)
            if m:
                incident_id = m.group(1)
        if not incident_id:
            raise HTTPException(
                status_code=400,
                detail="postmortem_draft requires input.incident_id (e.g. inc_123abc)",
            )
        payload["incident_id"] = incident_id
        payload.setdefault("service", "aurora-service")
        payload.setdefault("env", "prod")
        payload.setdefault("include_traces", False)
        return payload

    # Unknown/custom graph: pass-through without mutation.
    return payload


@app.post("/api/aistalk/team/run")
async def aistalk_team_run(request: Request, _auth: str = Depends(require_auth)):
    """Run AISTALK team workflow via LangGraph supervisor."""
    body = await request.json()
    graph = str(body.get("graph") or "incident_triage").strip()
    objective = str(body.get("objective") or "").strip()
    input_payload = body.get("input")
    if not isinstance(input_payload, dict):
        input_payload = {}
    input_payload = _aistalk_autobuild_input(graph, objective, input_payload)
    runtime = await _aistalk_runtime_state()
    max_team_runs = int((runtime.get("limits") or {}).get("max_parallel_team_runs") or 1)
    # GC stale local entries (12h safety window).
    now_ts = time.time()
    stale = [rid for rid, ts in _aistalk_team_active_runs.items() if (now_ts - ts) > 12 * 3600]
    for rid in stale:
        _aistalk_team_active_runs.pop(rid, None)
    if len(_aistalk_team_active_runs) >= max_team_runs:
        raise HTTPException(
            status_code=429,
            detail=f"AISTALK team busy: active_runs={len(_aistalk_team_active_runs)}, limit={max_team_runs}",
        )

    sup_payload = {
        "workspace_id": str(body.get("workspace_id") or "daarion"),
        "user_id": str(body.get("user_id") or "aistalk_user"),
        "agent_id": "aistalk",
        "input": input_payload,
    }
    status_code, payload = await _supervisor_request_json(
        "POST",
        f"/v1/graphs/{graph}/runs",
        timeout=60.0,
        json_body=sup_payload,
    )
    if status_code in (200, 201, 202) and isinstance(payload, dict):
        rid = str(payload.get("run_id") or payload.get("id") or "").strip()
        if rid:
            _aistalk_team_active_runs[rid] = time.time()
    return JSONResponse(
        status_code=status_code,
        content={
            "ok": status_code in (200, 201, 202),
            "graph": graph,
            "objective": objective,
            "active_runs": len(_aistalk_team_active_runs),
            "limit_runs": max_team_runs,
            **payload,
        },
    )


@app.post("/api/aistalk/relay/test")
async def aistalk_relay_test(request: Request, _auth: str = Depends(require_auth)):
    """Send a synthetic event to AISTALK relay and return adapter status."""
    body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
    event_type = str(body.get("type") or "aistalk.ping").strip()
    event = _make_event(
        event_type,
        {"message": body.get("message", "relay test"), "source": "sofiia-console"},
        project_id=str(body.get("project_id") or "aistalk"),
        session_id=str(body.get("session_id") or f"aistalk_test_{uuid.uuid4().hex[:8]}"),
        user_id="sofiia",
    )
    if _aistalk is None:
        raise HTTPException(status_code=503, detail="AISTALK adapter disabled")
    _aistalk.handle_event(event)
    return {
        "ok": True,
        "queued": True,
        "event_type": event_type,
        "adapter": _aistalk.status(),
    }


@app.get("/api/aistalk/team/run/{run_id}")
async def aistalk_team_run_status(run_id: str, _auth: str = Depends(require_auth)):
    status_code, payload = await _supervisor_request_json(
        "GET",
        f"/v1/runs/{run_id}",
        timeout=20.0,
    )
    if isinstance(payload, dict) and _is_terminal_run_status(str(payload.get("status") or "")):
        _aistalk_team_active_runs.pop(run_id, None)
    return JSONResponse(status_code=status_code, content=payload)


# ── Evidence Pack Engine ────────────────────────────────────────────────────────

@app.post("/api/projects/{project_id}/supervisor/evidence")
async def record_evidence_pack(
    project_id: str,
    request: Request,
    _auth: str = Depends(require_auth),
):
    """Record an Evidence Pack for a completed Supervisor run.

    Links the run into the Dialog Graph and auto-creates follow-up tasks.

    Body: {
        "run_id": str,                  # required
        "graph_name": str,              # required
        "status": "completed|failed",   # optional
        "summary": str,                 # optional
        "findings": [...],              # optional
        "recommendations": [...],       # optional
        "follow_up_tasks": [            # optional - auto-created as tasks
            {"title": ..., "description": ..., "priority": "normal|high|urgent"}
        ]
    }
    """
    body = await request.json()
    run_id = body.get("run_id")
    graph_name = body.get("graph_name")
    if not run_id or not graph_name:
        raise HTTPException(status_code=400, detail="run_id and graph_name are required")
    try:
        pack = await _app_db.create_evidence_pack(
            project_id=project_id,
            run_id=run_id,
            graph_name=graph_name,
            result_data=body,
            created_by="sofiia",
        )
        return JSONResponse(status_code=201, content=pack)
    except Exception as e:
        logger.error("record_evidence_pack failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


# ── Graph Integrity ─────────────────────────────────────────────────────────────

@app.get("/api/projects/{project_id}/graph/integrity")
async def graph_integrity(project_id: str, _auth: str = Depends(require_auth)):
    """Run integrity checks on the project Dialog Graph.

    Returns: {"ok": bool, "violations": [...], "stats": {...}}
    """
    try:
        result = await _app_db.check_graph_integrity(project_id)
        status_code = 200 if result["ok"] else 422
        return JSONResponse(status_code=status_code, content=result)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# ── Graph Hygiene ───────────────────────────────────────────────────────────────

@app.post("/api/projects/{project_id}/graph/hygiene/run")
async def run_graph_hygiene(
    project_id: str,
    request: Request,
    _auth: str = Depends(require_auth),
):
    """Run Graph Hygiene Engine: dedup, lifecycle normalization, importance scoring.

    Body (all optional):
    {
        "dry_run": true,         // default true — compute but don't write
        "scope": "all"|"recent", // default "all"
        "since": "ISO8601"       // required when scope=recent
    }

    Returns: {"ok": bool, "dry_run": bool, "changes": [...], "stats": {...}}
    """
    body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
    dry_run = body.get("dry_run", True)
    scope = body.get("scope", "all")
    since = body.get("since")
    try:
        result = await _app_db.run_graph_hygiene(
            project_id=project_id,
            dry_run=dry_run,
            scope=scope,
            since=since,
        )
        return JSONResponse(status_code=200, content=result)
    except Exception as e:
        logger.error("run_graph_hygiene failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


# ── Self-Reflection Engine ──────────────────────────────────────────────────────

@app.post("/api/projects/{project_id}/supervisor/reflect")
async def supervisor_reflect(
    project_id: str,
    request: Request,
    _auth: str = Depends(require_auth),
):
    """Create a Self-Reflection artifact for a completed Supervisor run.

    Analyzes the Evidence Pack and creates a 'decision' node (reflection)
    linked to the agent_run node via 'reflects_on' edge.

    Body: {
        "run_id": str,           // required
        "evidence": {            // optional — pass evidence data for richer analysis
            "summary": ...,
            "findings": [...],
            "recommendations": [...],
            "follow_up_tasks": [...]
        }
    }

    Returns: {node_id, reflection: {...scores, risks, ...}, edge_id, task_ids}
    """
    body = await request.json()
    run_id = body.get("run_id")
    if not run_id:
        raise HTTPException(status_code=400, detail="run_id is required")
    evidence_data = body.get("evidence") or {}
    try:
        result = await _app_db.create_run_reflection(
            project_id=project_id,
            run_id=run_id,
            evidence_data=evidence_data,
            created_by="sofiia",
        )
        return JSONResponse(status_code=201, content=result)
    except Exception as e:
        logger.error("supervisor_reflect failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


# ── Strategic CTO Layer: Snapshots ───────────────────────────────────────────

@app.post("/api/projects/{project_id}/graph/snapshot")
async def compute_snapshot(
    project_id: str,
    window: str = "7d",
    _auth: str = Depends(require_auth),
):
    """Compute and store a graph analytics snapshot for the project."""
    try:
        result = await _app_db.compute_graph_snapshot(project_id=project_id, window=window)
        return JSONResponse(status_code=201, content=result)
    except Exception as e:
        logger.error("compute_snapshot failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/api/projects/{project_id}/graph/snapshot")
async def get_snapshot(
    project_id: str,
    window: str = "7d",
    _auth: str = Depends(require_auth),
):
    """Get the latest snapshot for the project and window."""
    snap = await _app_db.get_latest_snapshot(project_id=project_id, window=window)
    if not snap:
        raise HTTPException(status_code=404, detail="No snapshot found. Run POST first.")
    return JSONResponse(content=snap)


# ── Strategic CTO Layer: Signals ─────────────────────────────────────────────

@app.post("/api/projects/{project_id}/graph/signals/recompute")
async def recompute_signals(
    project_id: str,
    window: str = "7d",
    dry_run: bool = True,
    _auth: str = Depends(require_auth),
):
    """Run signal detection rules and upsert graph_signals."""
    try:
        result = await _app_db.recompute_graph_signals(
            project_id=project_id,
            window=window,
            dry_run=dry_run,
        )
        return JSONResponse(status_code=200, content=result)
    except Exception as e:
        logger.error("recompute_signals failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/api/projects/{project_id}/graph/signals")
async def list_signals(
    project_id: str,
    status: str = "open",
    limit: int = 50,
    _auth: str = Depends(require_auth),
):
    """List graph signals for the project."""
    signals = await _app_db.get_graph_signals(project_id=project_id, status=status, limit=limit)
    return JSONResponse(content={"signals": signals, "count": len(signals)})


@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/ack")
async def ack_signal(
    project_id: str,
    signal_id: str,
    _auth: str = Depends(require_auth),
):
    result = await _app_db.update_signal_status(signal_id=signal_id, new_status="ack")
    if not result:
        raise HTTPException(status_code=404, detail="Signal not found")
    return JSONResponse(content=result)


@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/resolve")
async def resolve_signal(
    project_id: str,
    signal_id: str,
    _auth: str = Depends(require_auth),
):
    result = await _app_db.update_signal_status(signal_id=signal_id, new_status="resolved")
    if not result:
        raise HTTPException(status_code=404, detail="Signal not found")
    return JSONResponse(content=result)


@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/dismiss")
async def dismiss_signal(
    project_id: str,
    signal_id: str,
    _auth: str = Depends(require_auth),
):
    result = await _app_db.update_signal_status(signal_id=signal_id, new_status="dismissed")
    if not result:
        raise HTTPException(status_code=404, detail="Signal not found")
    return JSONResponse(content=result)


@app.post("/api/projects/{project_id}/graph/signals/auto-resolve")
async def auto_resolve_signals(
    project_id: str,
    dry_run: bool = True,
    _auth: str = Depends(require_auth),
):
    """Check resolution criteria for all open/ack signals and auto-resolve if met.

    ?dry_run=true  — compute without writing (default)
    ?dry_run=false — apply resolutions

    Returns: {ok, dry_run, checked, resolved, diff: [{signal_id, signal_type, action, reason}]}
    """
    try:
        result = await _app_db.auto_resolve_signals(
            project_id=project_id,
            dry_run=dry_run,
        )
        return JSONResponse(content=result)
    except Exception as e:
        logger.error("auto_resolve_signals failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/mitigate")
async def mitigate_signal(
    project_id: str,
    signal_id: str,
    playbook_id: str = "",
    _auth: str = Depends(require_auth),
):
    """Create a deterministic mitigation plan for a signal.

    If playbook_id is provided, creates tasks from the playbook steps instead of templates.
    Otherwise uses built-in mitigation templates.

    Returns: {plan_node_id, task_ids, task_count, signal_type}
    """
    try:
        if playbook_id:
            result = await _app_db.apply_playbook_to_signal(
                project_id=project_id,
                signal_id=signal_id,
                playbook_id=playbook_id,
                created_by="sofiia",
            )
        else:
            result = await _app_db.create_mitigation_plan(
                project_id=project_id,
                signal_id=signal_id,
                created_by="sofiia",
            )
            await _app_db.update_signal_status(signal_id=signal_id, new_status="ack")
        return JSONResponse(status_code=201, content=result)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))
    except Exception as e:
        logger.error("mitigate_signal failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


# ── CTO Portfolio (Cross-Project) ────────────────────────────────────────────

@app.get("/api/cto/portfolio/snapshots")
async def portfolio_snapshots(
    window: str = "7d",
    _auth: str = Depends(require_auth),
):
    """Get the latest snapshot for every project (cross-project portfolio view).

    Returns: {projects: [{project_id, name, metrics, snapshot_at}], window}
    """
    db = await _app_db.get_db()
    # All projects
    async with db.execute("SELECT project_id, name FROM projects ORDER BY name") as cur:
        projects = await cur.fetchall()
    result = []
    for pid, pname in projects:
        snap = await _app_db.get_latest_snapshot(pid, window)
        # Get latest lesson bucket + trend_flags
        async with db.execute(
            "SELECT date_bucket, metrics_json FROM lessons WHERE project_id=? ORDER BY date_bucket DESC LIMIT 1",
            (pid,),
        ) as cur:
            lrow = await cur.fetchone()
        lesson_bucket = None
        lesson_trend_flags = None
        if lrow:
            lesson_bucket = lrow[0]
            try:
                import json as _json
                lm = _json.loads(lrow[1] or "{}")
                lesson_trend_flags = lm.get("trend_flags")
            except Exception:
                pass
        # Compute streaks
        try:
            lesson_streaks = await _app_db.compute_lesson_streaks(pid)
        except Exception:
            lesson_streaks = None
        result.append({
            "project_id": pid,
            "name": pname,
            "metrics": snap["metrics"] if snap else None,
            "snapshot_at": snap["created_at"] if snap else None,
            "latest_lesson_bucket": lesson_bucket,
            "latest_lesson_trend_flags": lesson_trend_flags,
            "latest_lesson_streaks": lesson_streaks,
        })
    return JSONResponse(content={"projects": result, "window": window, "count": len(result)})


@app.get("/api/cto/portfolio/signals")
async def portfolio_signals(
    status: str = "open",
    severity: str = "",
    limit: int = 50,
    _auth: str = Depends(require_auth),
):
    """Get signals across all projects, ordered by severity then created_at.

    ?status=open|ack|resolved|dismissed|all
    ?severity=high,critical  (comma-separated filter, optional)
    """
    db = await _app_db.get_db()
    async with db.execute("SELECT project_id, name FROM projects") as cur:
        projects = {r[0]: r[1] for r in await cur.fetchall()}

    if status == "all":
        q = "SELECT *, rowid FROM graph_signals ORDER BY severity DESC, created_at DESC LIMIT ?"
        params: tuple = (limit,)
    else:
        q = "SELECT *, rowid FROM graph_signals WHERE status=? ORDER BY severity DESC, created_at DESC LIMIT ?"
        params = (status, limit)

    async with db.execute(q, params) as cur:
        rows = await cur.fetchall()

    # Severity order for sorting
    SEV_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3}
    sev_filter = {s.strip() for s in severity.split(",") if s.strip()} if severity else set()

    signals = []
    for row in rows:
        d = dict(row)
        if "rowid" in d:
            del d["rowid"]
        try:
            d["evidence"] = json.loads(d["evidence"])
        except Exception:
            d["evidence"] = {}
        if sev_filter and d.get("severity") not in sev_filter:
            continue
        d["project_name"] = projects.get(d["project_id"], d["project_id"])
        signals.append(d)

    signals.sort(key=lambda s: (SEV_ORDER.get(s.get("severity", "low"), 3), s.get("created_at", "")))
    return JSONResponse(content={"signals": signals[:limit], "count": len(signals), "status": status})


@app.post("/api/cto/portfolio/drift/recompute")
async def portfolio_drift_recompute(
    window: str = "7d",
    dry_run: bool = False,
    _auth: str = Depends(require_auth),
):
    """Recompute portfolio-level drift signals based on lesson streaks across all projects."""
    try:
        result = await _app_db.recompute_portfolio_signals(window=window, dry_run=dry_run)
        return JSONResponse(content=result)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/api/cto/portfolio/drift/signals")
async def portfolio_drift_signals(
    status: str = "open",
    _auth: str = Depends(require_auth),
):
    """Get portfolio-level drift signals."""
    try:
        signals = await _app_db.list_portfolio_signals(status=status)
        return JSONResponse(content={"signals": signals, "count": len(signals)})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/cto/portfolio/streaks")
async def portfolio_streaks(
    _auth: str = Depends(require_auth),
):
    """Get streak data for all projects."""
    try:
        db = await _app_db.get_db()
        async with db.execute("SELECT project_id, name FROM projects") as cur:
            projects = await cur.fetchall()
        result = []
        for pid, pname in projects:
            streaks = await _app_db.compute_lesson_streaks(pid)
            result.append({"project_id": pid, "name": pname, "streaks": streaks})
        return JSONResponse(content={"projects": result, "count": len(result)})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# ── Playbooks (Graph Learning Layer) ─────────────────────────────────────────

@app.get("/api/projects/{project_id}/playbooks")
async def list_playbooks(
    project_id: str,
    signal_type: str = "",
    limit: int = 10,
    _auth: str = Depends(require_auth),
):
    """List playbooks for a project, ordered by success_rate desc."""
    try:
        pbs = await _app_db.list_playbooks(
            project_id=project_id,
            signal_type=signal_type,
            limit=limit,
        )
        return JSONResponse(content={"playbooks": pbs, "count": len(pbs)})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/projects/{project_id}/playbooks/from-signal/{signal_id}")
async def create_playbook_from_signal(
    project_id: str,
    signal_id: str,
    _auth: str = Depends(require_auth),
):
    """Promote current mitigation of a signal into a playbook (or update existing).

    Requires signal to have plan_node_id and mitigation_task_ids in evidence.
    Returns: {playbook_id, doc_id, version_id, context_key, created, stats}
    """
    try:
        # Check if signal is resolved to update stats
        db = await _app_db.get_db()
        async with db.execute(
            "SELECT status, evidence FROM graph_signals WHERE id=? AND project_id=?",
            (signal_id, project_id),
        ) as cur:
            srow = await cur.fetchone()
        resolved = srow[0] == "resolved" if srow else False
        result = await _app_db.upsert_playbook_from_signal(
            project_id=project_id,
            signal_id=signal_id,
            resolved=resolved,
        )
        return JSONResponse(status_code=201, content=result)
    except ValueError as e:
        raise HTTPException(status_code=409, detail=str(e))
    except Exception as e:
        logger.error("create_playbook_from_signal failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


# ── Portfolio Batch Recompute ─────────────────────────────────────────────────

@app.post("/api/cto/portfolio/snapshots/recompute")
async def portfolio_snapshots_recompute(
    window: str = "7d",
    force: bool = False,
    _auth: str = Depends(require_auth),
):
    """Recompute graph snapshots for ALL projects.

    Skips projects that already have a snapshot for today (date_bucket) unless force=true.
    Returns: {computed, skipped, errors[]}
    """
    import datetime as _dt2
    db = await _app_db.get_db()
    async with db.execute("SELECT project_id FROM projects") as cur:
        project_ids = [r[0] for r in await cur.fetchall()]

    today = _dt2.datetime.utcnow().strftime("%Y-%m-%d")
    computed, skipped, errors = 0, 0, []
    for pid in project_ids:
        try:
            if not force:
                async with db.execute(
                    "SELECT id FROM graph_snapshots WHERE project_id=? AND window=? AND date_bucket=?",
                    (pid, window, today),
                ) as cur:
                    exists = await cur.fetchone()
                if exists:
                    skipped += 1
                    continue
            await _app_db.compute_graph_snapshot(project_id=pid, window=window)
            computed += 1
        except Exception as e:
            errors.append({"project_id": pid, "error": str(e)})
    return JSONResponse(content={"computed": computed, "skipped": skipped, "errors": errors})


@app.post("/api/cto/portfolio/signals/recompute")
async def portfolio_signals_recompute(
    window: str = "7d",
    dry_run: bool = False,
    _auth: str = Depends(require_auth),
):
    """Recompute signals for ALL projects.

    Returns: {results: [{project_id, new, refreshed, total}], errors[]}
    """
    db = await _app_db.get_db()
    async with db.execute("SELECT project_id FROM projects") as cur:
        project_ids = [r[0] for r in await cur.fetchall()]

    results, errors = [], []
    for pid in project_ids:
        try:
            diff = await _app_db.recompute_graph_signals(
                project_id=pid, window=window, dry_run=dry_run
            )
            new_count = sum(1 for d in diff if d.get("action") == "new")
            refresh_count = sum(1 for d in diff if d.get("action") in ("refresh", "reopen"))
            results.append({
                "project_id": pid,
                "new": new_count,
                "refreshed": refresh_count,
                "total": len(diff),
            })
        except Exception as e:
            errors.append({"project_id": pid, "error": str(e)})
    return JSONResponse(content={"results": results, "errors": errors, "dry_run": dry_run})


# ── Lessons (Graph Learning Layer) ────────────────────────────────────────────

@app.post("/api/projects/{project_id}/lessons/generate")
async def generate_lesson(
    project_id: str,
    window: str = "7d",
    dry_run: bool = True,
    _auth: str = Depends(require_auth),
):
    """Generate a weekly Lessons Learned report for a project.

    dry_run=true  (default): compute and return without writing to DB.
    dry_run=false: persist lesson node + metrics + improvement tasks.

    Returns: {dry_run, date_bucket, markdown, metrics, planned_improvement_tasks, evidence}
    """
    try:
        result = await _app_db.upsert_lesson(
            project_id=project_id,
            window=window,
            dry_run=dry_run,
            created_by="sofiia",
        )
        return JSONResponse(status_code=200 if dry_run else 201, content=result)
    except Exception as e:
        logger.error("generate_lesson failed: %s", e)
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/api/projects/{project_id}/lessons")
async def list_lessons_endpoint(
    project_id: str,
    window: str = "7d",
    limit: int = 8,
    _auth: str = Depends(require_auth),
):
    """List lessons for a project, ordered by date_bucket desc."""
    try:
        lessons = await _app_db.list_lessons(project_id=project_id, window=window, limit=limit)
        return JSONResponse(content={"lessons": lessons, "count": len(lessons)})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/api/projects/{project_id}/lessons/{lesson_id}")
async def get_lesson_endpoint(
    project_id: str,
    lesson_id: str,
    _auth: str = Depends(require_auth),
):
    """Get full lesson detail including markdown and linked evidence."""
    try:
        lesson = await _app_db.get_lesson_detail(project_id=project_id, lesson_id=lesson_id)
        if not lesson:
            raise HTTPException(status_code=404, detail="Lesson not found")
        return JSONResponse(content=lesson)
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/projects/{project_id}/lessons/impact/recompute")
async def recompute_lesson_impact(
    project_id: str,
    window: str = "7d",
    dry_run: bool = False,
    force: bool = False,
    _auth: str = Depends(require_auth),
):
    """Recompute impact score for the prior-bucket lesson based on current-bucket metrics."""
    try:
        if dry_run:
            # Preview: just return what would be computed, no write
            result = await _app_db.evaluate_lesson_impact(
                project_id=project_id, window=window, force=True
            )
            return JSONResponse(content={"dry_run": True, "preview": result})
        result = await _app_db.evaluate_lesson_impact(
            project_id=project_id, window=window, force=force
        )
        return JSONResponse(content={"dry_run": False, "result": result})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/cto/portfolio/lessons/generate")
async def portfolio_lessons_generate(
    window: str = "7d",
    dry_run: bool = False,
    force: bool = False,
    _auth: str = Depends(require_auth),
):
    """Generate lessons for ALL projects.

    Skips projects that already have a lesson for the current bucket (unless force=true).
    Returns: {generated, skipped, errors[]}
    """
    db = await _app_db.get_db()
    async with db.execute("SELECT project_id FROM projects") as cur:
        project_ids = [r[0] for r in await cur.fetchall()]

    current_bucket = _app_db.compute_lesson_bucket()
    generated, skipped, errors = 0, 0, []
    for pid in project_ids:
        try:
            if not force and not dry_run:
                async with db.execute(
                    "SELECT lesson_id FROM lessons WHERE project_id=? AND date_bucket=? AND window=?",
                    (pid, current_bucket, window),
                ) as cur:
                    exists = await cur.fetchone()
                if exists:
                    skipped += 1
                    continue
            await _app_db.upsert_lesson(project_id=pid, window=window, dry_run=dry_run)
            generated += 1
        except Exception as e:
            errors.append({"project_id": pid, "error": str(e)})
    return JSONResponse(content={
        "generated": generated,
        "skipped": skipped,
        "errors": errors,
        "dry_run": dry_run,
        "date_bucket": current_bucket,
    })


# ── Level 6: Governance Gates ─────────────────────────────────────────────────

@app.get("/api/projects/{project_id}/governance/gates")
async def get_governance_gates(
    project_id: str,
    window: str = "7d",
    _auth: str = Depends(require_auth),
):
    """Return latest governance gate evaluation (dry_run, no persist)."""
    try:
        result = await _app_db.evaluate_governance_gates(
            project_id=project_id, window=window, dry_run=True
        )
        return JSONResponse(content=result)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/projects/{project_id}/governance/gates/evaluate")
async def evaluate_governance_gates_endpoint(
    project_id: str,
    window: str = "7d",
    dry_run: bool = False,
    _auth: str = Depends(require_auth),
):
    """Evaluate governance gates and optionally persist decision node."""
    try:
        result = await _app_db.evaluate_governance_gates(
            project_id=project_id, window=window, dry_run=dry_run
        )
        return JSONResponse(content=result)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# ── Level 6: Portfolio Drift Auto-plan / Auto-run ────────────────────────────

@app.post("/api/cto/portfolio/drift/{signal_id}/auto-plan")
async def portfolio_drift_auto_plan(
    signal_id: str,
    _auth: str = Depends(require_auth),
):
    """Populate evidence.auto_actions.runs with planned entries (dry_run=True)."""
    try:
        result = await _app_db.auto_plan_drift_signal(signal_id=signal_id)
        if "error" in result:
            raise HTTPException(status_code=404, detail=result["error"])
        return JSONResponse(content=result)
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/cto/portfolio/drift/{signal_id}/auto-run")
async def portfolio_drift_auto_run(
    signal_id: str,
    dry_run: bool = False,
    force: bool = False,
    _auth: str = Depends(require_auth),
):
    """Execute planned/queued workflow runs for a portfolio drift signal."""
    try:
        supervisor_url = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080")
        result = await _app_db.auto_run_drift_signal(
            signal_id=signal_id,
            dry_run=dry_run,
            force=force,
            supervisor_url=supervisor_url,
        )
        if "error" in result:
            raise HTTPException(status_code=404, detail=result["error"])
        return JSONResponse(content=result)
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# ── Level 7: Governance Audit Trail ──────────────────────────────────────────

@app.get("/api/cto/audit/events")
async def audit_events_portfolio(
    scope: Optional[str] = "portfolio",
    limit: int = 100,
    event_type: Optional[str] = None,
    status: Optional[str] = None,
    since: Optional[str] = None,
    _auth: str = Depends(require_auth),
):
    """List governance audit events for portfolio (or any scope)."""
    try:
        items = await _app_db.list_governance_events(
            scope=scope, project_id="portfolio" if scope == "portfolio" else None,
            event_type=event_type, status=status, since=since, limit=limit,
        )
        return JSONResponse(content={"items": items, "count": len(items)})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/api/projects/{project_id}/audit/events")
async def audit_events_project(
    project_id: str,
    limit: int = 100,
    event_type: Optional[str] = None,
    status: Optional[str] = None,
    since: Optional[str] = None,
    _auth: str = Depends(require_auth),
):
    """List governance audit events for a specific project."""
    try:
        items = await _app_db.list_governance_events(
            scope="project", project_id=project_id,
            event_type=event_type, status=status, since=since, limit=limit,
        )
        return JSONResponse(content={"items": items, "count": len(items)})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# ── Level 8: Agents as Projects ───────────────────────────────────────────────

import difflib as _difflib
import time as _time

# ── Agent Ops helpers ──────────────────────────────────────────────────────────

async def _fetch_agents_from_gateway(
    node_id: str,
    gateway_url: str,
    timeout_ms: Optional[int] = None,
    get_retry: int = 1,
) -> tuple:
    """Fetch agents list from gateway /health. Returns (agents, error_str|None, latency_ms).

    Respects per-node timeout_ms and retry policy.
    """
    if not gateway_url:
        return [], f"No gateway_url configured for {node_id}", None
    timeout_sec = (timeout_ms or 2500) / 1000.0
    last_err = None
    attempts = get_retry + 1
    t0 = _time.monotonic()
    for attempt in range(attempts):
        try:
            async with httpx.AsyncClient(timeout=timeout_sec) as client:
                resp = await client.get(f"{gateway_url.rstrip('/')}/health")
            latency_ms = int((_time.monotonic() - t0) * 1000)
            if resp.status_code != 200:
                last_err = f"HTTP {resp.status_code}"
                continue
            data = resp.json()
            raw = data.get("agents", {})
            agents: List[Dict] = []
            if isinstance(raw, dict):
                for aid, info in raw.items():
                    agents.append({
                        "agent_id": aid,
                        "display_name": info.get("name", aid),
                        "status": "healthy" if info.get("prompt_loaded") else "degraded",
                        "telegram_token_configured": info.get("telegram_token_configured", False),
                        "prompt_loaded": info.get("prompt_loaded", False),
                        "node_id": node_id,
                        "active_prompt": info.get("active_prompt"),
                        "badges": info.get("badges", []),
                        "visibility": info.get("visibility", "public"),
                        "telegram_mode": info.get("telegram_mode", "on"),
                        "lifecycle_status": info.get("lifecycle_status", "active"),
                    })
            elif isinstance(raw, list):
                for a in raw:
                    agents.append({**a, "node_id": node_id})
            return agents, None, latency_ms
        except Exception as e:
            last_err = str(e)[:200]
    latency_ms = int((_time.monotonic() - t0) * 1000)
    return [], last_err, latency_ms


def _node_info(node_id: str) -> Dict:
    """Return {gateway_url, policy} for a node."""
    from .config import get_node_policy
    return {
        "gateway_url": get_gateway_url(node_id),
        "policy": get_node_policy(node_id),
    }


def _agent_desired_payload(override: Dict) -> Dict:
    """Canonical desired-state payload from an override row."""
    return {
        "display_name": override.get("display_name"),
        "domain": override.get("domain"),
        "system_prompt_md": override.get("system_prompt_md"),
    }


def _merge_agent_with_override(agent: Dict, override: Optional[Dict]) -> Dict:
    result = dict(agent)
    if not override:
        result["has_override"] = False
        result["drift"] = False
        return result
    if override.get("display_name"): result["display_name"] = override["display_name"]
    if override.get("domain"): result["domain"] = override["domain"]
    if override.get("system_prompt_md"): result["system_prompt_md"] = override["system_prompt_md"]
    result["is_hidden"] = bool(override.get("is_hidden"))
    result["has_override"] = True
    result["override_updated_at"] = override.get("updated_at")
    result["last_applied_hash"] = override.get("last_applied_hash")
    result["last_applied_at"] = override.get("last_applied_at")
    # Drift: desired hash != last applied hash
    desired = _agent_desired_payload(override)
    desired_hash = _app_db._agent_payload_hash(desired)
    result["desired_hash"] = desired_hash
    active_hash = override.get("last_applied_hash")
    result["drift"] = bool(active_hash and active_hash != desired_hash)
    return result


async def _check_prompt_freeze(node_id: str, agent_id: str) -> bool:
    """Return True if PROMPT_FREEZE gate is active for any related project."""
    try:
        # Check portfolio gate
        gates = await _app_db.evaluate_governance_gates("portfolio", window="7d", dry_run=True)
        for g in gates.get("gates", []):
            if g.get("name") == "PROMPT_FREEZE" and g.get("status") != "PASS":
                return True
    except Exception:
        pass
    return False


# ── Agent CRUD endpoints ───────────────────────────────────────────────────────

# Agents required on every online node — if absent, signal is raised
_REQUIRED_PER_NODE_AGENTS: List[str] = ["monitor"]


def _normalize_agent_capabilities(agent: Dict) -> Dict:
    """Add normalized capabilities: {voice, telegram} to agent dict."""
    badges = agent.get("badges", [])
    telegram_mode = agent.get("telegram_mode", "on")
    agent_id = agent.get("agent_id", "")
    agent["capabilities"] = {
        "voice": agent_id == "aistalk" or "voice" in badges,
        "telegram": telegram_mode != "off",
    }
    return agent


async def _emit_monitor_missing_event(node_id: str, bucket: str) -> None:
    """Write a governance_event when monitor is confirmed absent on an online node."""
    try:
        await _app_db.append_governance_event(
            scope="portfolio",
            project_id="portfolio",
            actor_type="system",
            actor_id=None,
            event_type="node_required_agent_missing",
            idempotency_key=f"req|missing|{node_id}|monitor|{bucket}",
            severity="high",
            status="error",
            ref_type="node",
            ref_id=node_id,
            evidence={
                "v": 1,
                "message": f"Required agent 'monitor' absent on {node_id}",
                "inputs": {"node_id": node_id, "required_agent": "monitor"},
                "outputs": {"missing": True},
                "links": {},
                "timings": {},
            },
        )
    except Exception as exc:
        logger.warning("_emit_monitor_missing_event failed: %s", exc)


@app.get("/api/agents")
async def list_agents(
    nodes: str = "NODA1",
    include_hidden: bool = False,
    _auth: str = Depends(require_auth),
):
    """Fetch agents from node gateways, merge with local overrides.

    Returns {items, node_errors, stats, required_missing_nodes, nodes_queried}.
    Partial node failure never blocks other nodes — always HTTP 200.
    """
    node_ids = [n.strip().upper() for n in nodes.split(",") if n.strip()]
    today_bucket = datetime.utcnow().strftime("%Y-%m-%d")

    all_agents: List[Dict] = []
    node_errors: List[Dict] = []
    node_stats: List[Dict] = []
    required_missing_nodes: List[Dict] = []  # nodes where required agents absent

    overrides_list = await _app_db.list_agent_overrides()
    overrides_map = {(o["node_id"], o["agent_id"]): o for o in overrides_list}

    for node_id in node_ids:
        ni = _node_info(node_id)
        gw_url = ni["gateway_url"]
        policy = ni["policy"]
        agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
            node_id, gw_url,
            timeout_ms=policy["gateway_timeout_ms"],
            get_retry=policy["get_retry"],
        )
        if err:
            node_errors.append({
                "node_id": node_id, "error": err,
                "gateway_url": gw_url, "latency_ms": latency_ms,
                "node_role": policy["node_role"],
            })
            node_stats.append({"node_id": node_id, "ok": False, "count": 0,
                                "latency_ms": latency_ms})
            # Node offline → skip required check (not "missing", just "unreachable")
        else:
            count = 0
            present_agent_ids: Set[str] = set()
            for agent in agents_raw:
                override = overrides_map.get((node_id, agent["agent_id"]))
                merged = _merge_agent_with_override(agent, override)
                merged = _normalize_agent_capabilities(merged)
                if not include_hidden and merged.get("is_hidden"):
                    continue
                merged["latency_ms"] = latency_ms
                all_agents.append(merged)
                present_agent_ids.add(agent["agent_id"])
                count += 1
            node_stats.append({"node_id": node_id, "ok": True, "count": count,
                                "latency_ms": latency_ms,
                                "node_role": policy["node_role"]})
            # Required agent check — only for online nodes
            for req_id in _REQUIRED_PER_NODE_AGENTS:
                if req_id not in present_agent_ids:
                    required_missing_nodes.append({
                        "node_id": node_id,
                        "agent_id": req_id,
                        "reason": "absent_from_registry",
                    })
                    asyncio.create_task(_emit_monitor_missing_event(node_id, today_bucket))

    all_agents.sort(key=lambda a: (a.get("status") != "healthy", a.get("display_name", "").lower()))
    nodes_ok = sum(1 for s in node_stats if s["ok"])
    return JSONResponse(content={
        "items": all_agents,
        "node_errors": node_errors,
        "stats": {"nodes_ok": nodes_ok, "nodes_total": len(node_ids), "agents_total": len(all_agents)},
        "required_missing_nodes": required_missing_nodes,
        "nodes_queried": node_ids,
    })


@app.get("/api/agents/{node_id}/{agent_id}")
async def get_agent(node_id: str, agent_id: str, _auth: str = Depends(require_auth)):
    node_id = node_id.upper()
    ni = _node_info(node_id)
    policy = ni["policy"]
    agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
        node_id, ni["gateway_url"],
        timeout_ms=policy["gateway_timeout_ms"],
        get_retry=policy["get_retry"],
    )
    override = await _app_db.get_agent_override(node_id, agent_id)
    agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None)
    if not agent:
        if override:
            desired = _agent_desired_payload(override)
            return JSONResponse(content={"agent": {
                **override, "status": "unknown", "node_offline": True,
                "desired_hash": _app_db._agent_payload_hash(desired), "drift": False,
                "latency_ms": latency_ms,
            }})
        raise HTTPException(status_code=404, detail=f"Agent '{agent_id}' not found on {node_id}")
    merged = _merge_agent_with_override(agent, override)
    merged["latency_ms"] = latency_ms
    return JSONResponse(content={"agent": merged, "node_error": err})


@app.get("/api/agents/{node_id}/{agent_id}/versions")
async def list_agent_versions(
    node_id: str, agent_id: str,
    limit: int = 10,
    _auth: str = Depends(require_auth),
):
    """Return version history for an agent override."""
    node_id = node_id.upper()
    versions = await _app_db.list_agent_versions(node_id, agent_id, limit=limit)
    return JSONResponse(content={"versions": versions})


class AgentOverridePatch(BaseModel):
    display_name: Optional[str] = None
    domain: Optional[str] = None
    system_prompt_md: Optional[str] = None
    is_hidden: Optional[bool] = None


@app.patch("/api/agents/{node_id}/{agent_id}")
async def patch_agent_override(
    node_id: str, agent_id: str,
    body: AgentOverridePatch,
    _auth: str = Depends(require_auth),
):
    """Save local override (does NOT push to node). Creates a version snapshot."""
    node_id = node_id.upper()
    override = await _app_db.upsert_agent_override(
        node_id, agent_id,
        display_name=body.display_name,
        domain=body.domain,
        system_prompt_md=body.system_prompt_md,
        is_hidden=body.is_hidden,
    )
    # Audit: agent_override_saved
    await _app_db.append_governance_event(
        scope="project", project_id=agent_id, actor_type="user",
        event_type="agent_override_saved",
        idempotency_key=f"aos|{node_id}|{agent_id}|{override.get('version_hash','')}",
        severity="info", status="ok",
        ref_type="agent", ref_id=agent_id,
        evidence=_app_db._make_evidence(
            message=f"Override saved for {agent_id} on {node_id}",
            outputs={"version_hash": override.get("version_hash"), "fields_changed": [
                k for k, v in body.dict(exclude_none=True).items()
            ]},
        ),
    )
    return JSONResponse(content={"override": override, "saved": True})


@app.post("/api/agents/{node_id}/{agent_id}/reset")
async def reset_agent_override(node_id: str, agent_id: str, _auth: str = Depends(require_auth)):
    """Remove local override, revert to registry state."""
    node_id = node_id.upper()
    await _app_db.delete_agent_override(node_id, agent_id)
    return JSONResponse(content={"reset": True, "node_id": node_id, "agent_id": agent_id})


# ── Safe Apply v2 ──────────────────────────────────────────────────────────────

@app.post("/api/agents/{node_id}/{agent_id}/apply")
async def apply_agent_override(
    node_id: str, agent_id: str,
    dry_run: bool = True,
    plan_id: Optional[str] = None,
    force: bool = False,
    _auth: str = Depends(require_auth),
):
    """Safe Apply v2.

    dry_run=true  → returns diff_text + will_change + plan_id (sha256 of desired state).
    dry_run=false → requires plan_id to match; applies and stores last_applied_hash.
    """
    node_id = node_id.upper()

    # Governance gate check: PROMPT_FREEZE
    if not dry_run and not force:
        frozen = await _check_prompt_freeze(node_id, agent_id)
        if frozen:
            return JSONResponse(
                status_code=423,
                content={"error": "PROMPT_FREEZE gate is active. Use force=true to override (requires review).",
                         "gate": "PROMPT_FREEZE", "node_id": node_id, "agent_id": agent_id},
            )

    override = await _app_db.get_agent_override(node_id, agent_id)
    if not override:
        raise HTTPException(status_code=404, detail="No local override found. Use PATCH first.")

    desired = _agent_desired_payload(override)
    computed_plan_id = _app_db._agent_payload_hash(desired)

    # Fetch current active prompt for diff
    gw_url = get_gateway_url(node_id)
    agents_raw, _ = await _fetch_agents_from_gateway(node_id, gw_url)
    active_agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None)
    active_prompt = active_agent.get("active_prompt", "") if active_agent else ""
    desired_prompt = desired.get("system_prompt_md") or ""

    # Build unified diff
    diff_lines = list(_difflib.unified_diff(
        (active_prompt or "").splitlines(keepends=True),
        desired_prompt.splitlines(keepends=True),
        fromfile=f"{agent_id}:active",
        tofile=f"{agent_id}:desired",
        n=3,
    ))
    diff_text = "".join(diff_lines) if diff_lines else ""
    will_change = bool(diff_text) or (override.get("domain") is not None)

    if dry_run:
        # Audit: agent_apply_planned
        await _app_db.append_governance_event(
            scope="project", project_id=agent_id, actor_type="user",
            event_type="agent_apply_planned",
            idempotency_key=f"aap|{node_id}|{agent_id}|{computed_plan_id}",
            severity="info", status="ok",
            ref_type="agent", ref_id=agent_id,
            evidence=_app_db._make_evidence(
                message=f"Apply planned (dry-run) for {agent_id}@{node_id}",
                outputs={"will_change": will_change, "plan_id": computed_plan_id,
                         "diff_lines": len(diff_lines)},
            ),
        )
        return JSONResponse(content={
            "dry_run": True, "will_change": will_change,
            "plan_id": computed_plan_id,
            "diff_text": diff_text,
            "desired": desired,
            "node_id": node_id, "agent_id": agent_id,
        })

    # Apply: validate plan_id
    if plan_id and plan_id != computed_plan_id:
        raise HTTPException(
            status_code=409,
            detail=f"plan_id mismatch: provided={plan_id} computed={computed_plan_id}. "
                   "Re-run dry_run=true to get fresh plan_id.",
        )

    applied: List[Dict] = []
    errors_apply: List[Dict] = []

    if desired_prompt and gw_url:
        try:
            async with httpx.AsyncClient(timeout=10.0) as client:
                resp = await client.post(
                    f"{gw_url.rstrip('/')}/admin/agents/{agent_id}/prompt",
                    json={"prompt": desired_prompt},
                    headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")},
                )
                if resp.status_code in (200, 201, 204):
                    applied.append({"action": "update_system_prompt", "status": "ok"})
                else:
                    errors_apply.append({"action": "update_system_prompt",
                                         "error": f"HTTP {resp.status_code}: {resp.text[:200]}"})
        except Exception as e:
            errors_apply.append({"action": "update_system_prompt", "error": str(e)[:300]})

    success = len(applied) > 0 and len(errors_apply) == 0

    # Mark last_applied_hash if successful
    if success:
        await _app_db.upsert_agent_override(
            node_id, agent_id, _mark_applied_hash=computed_plan_id,
        )

    # Audit
    evt_type = "agent_apply_executed" if success else "agent_apply_failed"
    await _app_db.append_governance_event(
        scope="project", project_id=agent_id, actor_type="user",
        event_type=evt_type,
        idempotency_key=f"aae|{node_id}|{agent_id}|{computed_plan_id}|{'ok' if success else 'fail'}",
        severity="info" if success else "high", status="ok" if success else "error",
        ref_type="agent", ref_id=agent_id,
        evidence=_app_db._make_evidence(
            message=f"Apply {'succeeded' if success else 'failed'} for {agent_id}@{node_id}",
            outputs={"plan_id": computed_plan_id, "applied": applied, "errors": errors_apply},
        ),
    )

    return JSONResponse(content={
        "dry_run": False, "success": success,
        "plan_id": computed_plan_id,
        "applied": applied, "errors": errors_apply,
        "node_id": node_id, "agent_id": agent_id,
    })


@app.post("/api/agents/{node_id}/{agent_id}/rollback")
async def rollback_agent_override(
    node_id: str, agent_id: str,
    version_hash: str,
    _auth: str = Depends(require_auth),
):
    """Rollback agent override to a specific version by version_hash."""
    node_id = node_id.upper()
    version = await _app_db.get_agent_version_by_hash(node_id, agent_id, version_hash)
    if not version:
        raise HTTPException(status_code=404, detail=f"Version {version_hash} not found")

    payload = version["payload"]
    # Restore the override to this version's payload
    updated = await _app_db.upsert_agent_override(
        node_id, agent_id,
        display_name=payload.get("display_name"),
        domain=payload.get("domain"),
        system_prompt_md=payload.get("system_prompt_md"),
    )

    # Audit
    await _app_db.append_governance_event(
        scope="project", project_id=agent_id, actor_type="user",
        event_type="agent_rollback_executed",
        idempotency_key=f"arb|{node_id}|{agent_id}|{version_hash}|{_app_db._now()}",
        severity="warn", status="ok",
        ref_type="agent", ref_id=agent_id,
        evidence=_app_db._make_evidence(
            message=f"Rollback to version {version_hash} for {agent_id}@{node_id}",
            outputs={"version_hash": version_hash, "created_at": version.get("created_at")},
        ),
    )
    return JSONResponse(content={
        "rolled_back": True, "version_hash": version_hash,
        "override": updated, "node_id": node_id, "agent_id": agent_id,
    })


# ── Bulk Agent Actions (multi-node + canary) ───────────────────────────────────

async def _apply_single_agent(
    node_id: str,
    override: Dict,
    agents_map: Dict,
    gw_url: str,
    apply_timeout_sec: float,
) -> Dict:
    """Apply a single agent override. Returns result dict with status field."""
    aid = override["agent_id"]
    desired = _agent_desired_payload(override)
    plan_id = _app_db._agent_payload_hash(desired)
    active_agent = agents_map.get(aid, {})
    active_prompt = active_agent.get("active_prompt", "") or ""
    desired_prompt = desired.get("system_prompt_md") or ""
    will_change = desired_prompt != active_prompt

    if not desired_prompt or not gw_url:
        return {"node_id": node_id, "agent_id": aid, "status": "skipped",
                "plan_id": plan_id, "drift": will_change,
                "error": "no prompt or no gateway_url"}

    applied_ok = False
    err_msg = None
    try:
        async with httpx.AsyncClient(timeout=apply_timeout_sec) as client:
            resp = await client.post(
                f"{gw_url.rstrip('/')}/admin/agents/{aid}/prompt",
                json={"prompt": desired_prompt},
                headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")},
            )
            applied_ok = resp.status_code in (200, 201, 204)
            if not applied_ok:
                err_msg = f"HTTP {resp.status_code}: {resp.text[:100]}"
    except Exception as e:
        err_msg = str(e)[:200]

    if applied_ok:
        await _app_db.upsert_agent_override(node_id, aid, _mark_applied_hash=plan_id)

    return {
        "node_id": node_id, "agent_id": aid,
        "status": "applied" if applied_ok else "failed",
        "plan_id": plan_id, "drift": will_change,
        "error": err_msg,
    }


@app.post("/api/agents/bulk/apply")
async def bulk_apply_agents(
    nodes: str = "NODA1",
    node: Optional[str] = None,       # legacy single-node param
    dry_run: bool = True,
    mode: str = "all",                 # "all" | "canary"
    limit: int = 2,                    # canary: max N agents
    _auth: str = Depends(require_auth),
):
    """Apply local overrides across one or many nodes.

    mode=canary: apply first `limit` agents with drift=True, stop on first failure.
    Returns {results, node_errors, summary}.
    """
    # Support legacy ?node= param
    raw_nodes = node.upper() if node else nodes
    node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]

    all_results: List[Dict] = []
    node_errors: List[Dict] = []
    bulk_run_id = str(uuid.uuid4())[:8]

    # Audit: bulk plan created
    await _app_db.append_governance_event(
        scope="portfolio", project_id="portfolio", actor_type="user",
        event_type="agent_bulk_plan_created",
        idempotency_key=f"abpc|{bulk_run_id}|{raw_nodes}|{mode}",
        severity="info", status="ok",
        evidence=_app_db._make_evidence(
            message=f"Bulk {'canary' if mode=='canary' else 'apply'} planned: nodes={raw_nodes} dry_run={dry_run}",
            outputs={"mode": mode, "limit": limit, "nodes": node_ids, "dry_run": dry_run},
        ),
    )

    for node_id in node_ids:
        ni = _node_info(node_id)
        policy = ni["policy"]
        gw_url = ni["gateway_url"]
        apply_timeout_sec = policy["apply_timeout_ms"] / 1000.0

        overrides = await _app_db.list_agent_overrides(node_id)
        agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
            node_id, gw_url,
            timeout_ms=policy["gateway_timeout_ms"],
            get_retry=policy["get_retry"],
        )
        if err and not agents_raw:
            node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
            continue

        agents_map = {a["agent_id"]: a for a in agents_raw}

        # Select candidates: non-hidden, sorted deterministically by agent_id
        candidates = sorted(
            [o for o in overrides if not o.get("is_hidden")],
            key=lambda o: o["agent_id"],
        )

        if mode == "canary":
            # For canary: only agents with drift
            drift_candidates = []
            for o in candidates:
                desired = _agent_desired_payload(o)
                plan_id = _app_db._agent_payload_hash(desired)
                is_drift = bool(o.get("last_applied_hash") and o["last_applied_hash"] != plan_id)
                if is_drift:
                    drift_candidates.append(o)
            candidates = drift_candidates[:limit]

        if dry_run:
            for override in candidates:
                aid = override["agent_id"]
                desired = _agent_desired_payload(override)
                plan_id = _app_db._agent_payload_hash(desired)
                active_agent = agents_map.get(aid, {})
                active_prompt = active_agent.get("active_prompt", "") or ""
                desired_prompt = desired.get("system_prompt_md") or ""
                all_results.append({
                    "node_id": node_id, "agent_id": aid, "status": "planned",
                    "plan_id": plan_id, "drift": desired_prompt != active_prompt, "error": None,
                })
            continue

        # Canary: log start
        if mode == "canary" and candidates:
            await _app_db.append_governance_event(
                scope="portfolio", project_id="portfolio", actor_type="user",
                event_type="agent_bulk_canary_started",
                idempotency_key=f"abcs|{bulk_run_id}|{node_id}",
                severity="info", status="ok",
                evidence=_app_db._make_evidence(
                    message=f"Canary apply started: {len(candidates)} agents on {node_id}",
                    outputs={"agents": [o["agent_id"] for o in candidates], "limit": limit},
                ),
            )

        canary_stopped = False
        for override in candidates:
            # Check governance gate per agent
            frozen = await _check_prompt_freeze(node_id, override["agent_id"])
            if frozen:
                all_results.append({
                    "node_id": node_id, "agent_id": override["agent_id"],
                    "status": "blocked", "plan_id": None, "drift": True,
                    "error": "PROMPT_FREEZE gate active",
                })
                continue

            result = await _apply_single_agent(
                node_id, override, agents_map, gw_url, apply_timeout_sec,
            )
            all_results.append(result)

            # Canary stop-on-failure
            if mode == "canary" and result["status"] == "failed":
                canary_stopped = True
                # Mark remaining as skipped
                remaining_ids = {o["agent_id"] for o in candidates} - {r["agent_id"] for r in all_results if r["node_id"] == node_id}
                for rid in sorted(remaining_ids):
                    all_results.append({
                        "node_id": node_id, "agent_id": rid, "status": "skipped",
                        "plan_id": None, "drift": True,
                        "error": f"canary stopped after failure of {result['agent_id']}",
                    })
                await _app_db.append_governance_event(
                    scope="portfolio", project_id="portfolio", actor_type="user",
                    event_type="agent_bulk_canary_stopped",
                    idempotency_key=f"abcstop|{bulk_run_id}|{node_id}|{result['agent_id']}",
                    severity="high", status="error",
                    evidence=_app_db._make_evidence(
                        message=f"Canary stopped on {result['agent_id']}@{node_id}: {result['error']}",
                        outputs={"failed_agent": result["agent_id"], "error": result["error"]},
                    ),
                )
                break

        if mode == "canary" and not canary_stopped and candidates:
            await _app_db.append_governance_event(
                scope="portfolio", project_id="portfolio", actor_type="user",
                event_type="agent_bulk_apply_completed",
                idempotency_key=f"abac|{bulk_run_id}|{node_id}",
                severity="info", status="ok",
                evidence=_app_db._make_evidence(
                    message=f"Canary apply completed on {node_id}: {len(candidates)} agents",
                    outputs={"agents_applied": [r["agent_id"] for r in all_results
                                                 if r["node_id"] == node_id and r["status"] == "applied"]},
                ),
            )

    # Build summary
    status_counts: Dict[str, int] = {}
    for r in all_results:
        status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1

    return JSONResponse(content={
        "results": all_results,
        "node_errors": node_errors,
        "summary": status_counts,
        "dry_run": dry_run,
        "mode": mode,
        "bulk_run_id": bulk_run_id,
    })


@app.post("/api/agents/bulk/diff")
async def bulk_diff_agents(
    nodes: str = "NODA1",
    node: Optional[str] = None,
    _auth: str = Depends(require_auth),
):
    """Return diff summary for all agents with local overrides. Supports multi-node."""
    raw_nodes = node.upper() if node else nodes
    node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]

    report: List[Dict] = []
    node_errors: List[Dict] = []

    for node_id in node_ids:
        ni = _node_info(node_id)
        policy = ni["policy"]
        gw_url = ni["gateway_url"]
        overrides = await _app_db.list_agent_overrides(node_id)
        agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
            node_id, gw_url,
            timeout_ms=policy["gateway_timeout_ms"],
            get_retry=policy["get_retry"],
        )
        if err:
            node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
        agents_map = {a["agent_id"]: a for a in agents_raw}

        for override in overrides:
            aid = override["agent_id"]
            desired = _agent_desired_payload(override)
            plan_id = _app_db._agent_payload_hash(desired)
            active_agent = agents_map.get(aid, {})
            active_prompt = active_agent.get("active_prompt") or ""
            desired_prompt = desired.get("system_prompt_md") or ""
            diff_lines = list(_difflib.unified_diff(
                active_prompt.splitlines(keepends=True),
                desired_prompt.splitlines(keepends=True),
                fromfile=f"{aid}:active", tofile=f"{aid}:desired", n=2,
            ))
            is_drift = bool(override.get("last_applied_hash") and
                            override["last_applied_hash"] != plan_id)
            report.append({
                "node_id": node_id, "agent_id": aid,
                "plan_id": plan_id,
                "last_applied_hash": override.get("last_applied_hash"),
                "drift": is_drift,
                "diff_lines": len(diff_lines),
                "diff_text": "".join(diff_lines[:60]),
            })

    return JSONResponse(content={"report": report, "node_errors": node_errors,
                                  "nodes_queried": node_ids})


@app.get("/api/agents/export/prompts")
async def export_agent_prompts(
    nodes: str = "NODA1",
    node: Optional[str] = None,
    _auth: str = Depends(require_auth),
):
    """Export all agent system prompts as a JSON bundle (multi-node)."""
    raw_nodes = node.upper() if node else nodes
    node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]

    bundle: List[Dict] = []
    node_errors: List[Dict] = []

    for node_id in node_ids:
        ni = _node_info(node_id)
        policy = ni["policy"]
        gw_url = ni["gateway_url"]
        overrides = await _app_db.list_agent_overrides(node_id)
        agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
            node_id, gw_url,
            timeout_ms=policy["gateway_timeout_ms"],
            get_retry=policy["get_retry"],
        )
        if err:
            node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
        agents_map = {a["agent_id"]: a for a in agents_raw}
        overrides_map = {o["agent_id"]: o for o in overrides}

        for aid, agent in agents_map.items():
            override = overrides_map.get(aid)
            merged = _merge_agent_with_override(agent, override)
            bundle.append({
                "agent_id": aid, "node_id": node_id,
                "display_name": merged.get("display_name", aid),
                "domain": merged.get("domain"),
                "system_prompt_md": merged.get("system_prompt_md"),
                "has_override": merged.get("has_override", False),
            })

    bundle.sort(key=lambda x: (x["node_id"], x["agent_id"]))
    return JSONResponse(content={
        "nodes_queried": node_ids,
        "exported_at": _app_db._now(),
        "count": len(bundle),
        "agents": bundle,
        "node_errors": node_errors,
    })


# ── Kling AI proxy ────────────────────────────────────────────────────────────

@app.get("/api/aurora/kling/health")
async def console_kling_health() -> Dict[str, Any]:
    try:
        return await _aurora_request_json("GET", "/api/aurora/kling/health", timeout=12.0, retries=1)
    except Exception as exc:
        return {"ok": False, "error": str(exc)}


@app.post("/api/aurora/kling/enhance")
async def console_kling_enhance_plain(
    job_id: str = Form(...),
    prompt: str = Form("enhance video quality, improve sharpness and clarity"),
    negative_prompt: str = Form("noise, blur, artifacts, distortion"),
    mode: str = Form("pro"),
    duration: str = Form("5"),
    cfg_scale: float = Form(0.5),
) -> Dict[str, Any]:
    return await _aurora_request_json(
        "POST",
        "/api/aurora/kling/enhance",
        data={
            "job_id": job_id,
            "prompt": prompt,
            "negative_prompt": negative_prompt,
            "mode": mode,
            "duration": duration,
            "cfg_scale": str(cfg_scale),
        },
        timeout=120.0,
        retries=1,
    )


@app.post("/api/aurora/kling/enhance/{job_id}")
async def console_kling_enhance(
    job_id: str,
    prompt: str = Form("enhance video quality, improve sharpness and clarity"),
    negative_prompt: str = Form("noise, blur, artifacts, distortion"),
    mode: str = Form("pro"),
    duration: str = Form("5"),
    cfg_scale: float = Form(0.5),
) -> Dict[str, Any]:
    return await console_kling_enhance_plain(
        job_id=job_id,
        prompt=prompt,
        negative_prompt=negative_prompt,
        mode=mode,
        duration=duration,
        cfg_scale=cfg_scale,
    )


@app.get("/api/aurora/kling/status/{job_id}")
async def console_kling_status(job_id: str) -> Dict[str, Any]:
    return await _aurora_request_json("GET", f"/api/aurora/kling/status/{job_id}", timeout=20.0, retries=2)


@app.get("/api/aurora/kling/task/{task_id}")
async def console_kling_task(task_id: str, endpoint: str = Query("video2video")) -> Dict[str, Any]:
    return await _aurora_request_json("GET", f"/api/aurora/kling/task/{task_id}?endpoint={endpoint}", timeout=20.0, retries=2)


@app.get("/api/aurora/plates/{job_id}")
async def console_plates(job_id: str) -> Dict[str, Any]:
    return await _aurora_request_json("GET", f"/api/aurora/plates/{job_id}", timeout=15.0, retries=2)


# ── Sofiia Auto-Router & Budget Dashboard proxy ────────────────────────────────

async def _router_request_json(method: str, path: str, json_body: Optional[Dict] = None, timeout: float = 20.0) -> Dict[str, Any]:
    """Forward request to the Router service (noda1 or local)."""
    import aiohttp as _aiohttp
    # Use the first configured node's router URL
    nodes_reg = load_nodes_registry()
    nodes = (nodes_reg.get("nodes") or {}) if isinstance(nodes_reg, dict) else {}
    node_id = next(iter(nodes), "noda1")
    router_url = get_router_url(node_id)
    url = f"{router_url.rstrip('/')}{path}"
    try:
        async with _aiohttp.ClientSession() as sess:
            if method.upper() == "GET":
                async with sess.get(url, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp:
                    return await resp.json(content_type=None)
            else:
                async with sess.post(url, json=json_body, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp:
                    return await resp.json(content_type=None)
    except Exception as e:
        return {"error": str(e)}


@app.post("/api/sofiia/auto-route")
async def console_auto_route(body: Dict[str, Any]) -> Dict[str, Any]:
    """Proxy: classify prompt and get recommended model."""
    return await _router_request_json("POST", "/v1/sofiia/auto-route", json_body=body)


@app.get("/api/sofiia/budget")
async def console_budget_dashboard() -> Dict[str, Any]:
    """Proxy: get budget dashboard data from router."""
    return await _router_request_json("GET", "/v1/sofiia/budget")


@app.post("/api/sofiia/budget/limits")
async def console_set_budget_limits(body: Dict[str, Any]) -> Dict[str, Any]:
    """Proxy: set provider budget limit."""
    return await _router_request_json("POST", "/v1/sofiia/budget/limits", json_body=body)


@app.get("/api/sofiia/budget/stats")
async def console_budget_stats(window_hours: int = 24) -> Dict[str, Any]:
    """Proxy: get budget stats for time window."""
    return await _router_request_json("GET", f"/v1/sofiia/budget/stats?window_hours={window_hours}")


@app.get("/api/sofiia/catalog")
async def console_model_catalog(refresh_ollama: bool = False) -> Dict[str, Any]:
    """Proxy: get full model catalog with availability."""
    return await _router_request_json("GET", f"/v1/sofiia/catalog?refresh_ollama={str(refresh_ollama).lower()}")