From ff97d3cf4ae4221d5609c138514d405485e5f15c Mon Sep 17 00:00:00 2001 From: Apple Date: Sun, 1 Mar 2026 03:48:19 -0800 Subject: [PATCH] fix(console): route Aurora Kling enhance via standard proxy base URL --- services/sofiia-console/app/main.py | 5799 +++++++++++++++++++++++++++ 1 file changed, 5799 insertions(+) create mode 100644 services/sofiia-console/app/main.py diff --git a/services/sofiia-console/app/main.py b/services/sofiia-console/app/main.py new file mode 100644 index 00000000..95612f56 --- /dev/null +++ b/services/sofiia-console/app/main.py @@ -0,0 +1,5799 @@ +""" +Sofiia Control Console — FastAPI BFF v0.3.0 +Runtime contract (project/session/user), full status, WebSocket events, +voice proxy, ops, nodes. UI never calls external services directly. +""" +import asyncio +import io +import json +import os +import re +import sys +import subprocess +import time +import uuid +import logging +import collections +import statistics +import socket +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple +from urllib.parse import quote + +import httpx +from fastapi import Body, FastAPI, Depends, HTTPException, UploadFile, File, Form, Query, Request, Response, WebSocket, WebSocketDisconnect +from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel + +from .auth import ( + require_api_key, require_api_key_strict, require_auth, require_auth_strict, + get_console_api_key, _key_valid, _cookie_token, _expected_cookie_token, + _COOKIE_NAME, _COOKIE_MAX_AGE, _IS_PROD, +) + +from .config import ( + load_nodes_registry, + save_nodes_registry, + get_router_url, + get_gateway_url, + get_node_ssh_profile, + get_memory_service_url, + get_ollama_url, + is_voice_ha_enabled, + get_voice_ha_router_url, +) +from .router_client import infer, execute_tool, health +from .nodes import get_nodes_dashboard +from .monitor import collect_all_nodes +from .ops import run_ops_action, OPS_ACTIONS +from .docs_router import docs_router +from . import db as _app_db + +logger = logging.getLogger(__name__) + +# ── Build info ──────────────────────────────────────────────────────────────── +_VERSION = "0.4.0" +_BUILD_SHA = os.getenv("BUILD_SHA", "dev") +_BUILD_TIME = os.getenv("BUILD_TIME", "local") +_BUILD_ID = os.getenv("BUILD_ID", os.getenv("GIT_SHA", "local")) +_START_TIME = time.monotonic() +_NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2")) + +# ── Rate limiter ────────────────────────────────────────────────────────────── +_rate_buckets: Dict[str, collections.deque] = {} + +def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool: + now = time.monotonic() + dq = _rate_buckets.setdefault(key, collections.deque()) + while dq and now - dq[0] > window_sec: + dq.popleft() + if len(dq) >= max_calls: + return False + dq.append(now) + return True + +# ── Voice error rings (repro pack for incident diagnosis) ───────────────────── +# Circular buffers: last 5 TTS errors and last 5 LLM errors. +# Populated by all voice endpoints. Read by /api/voice/degradation_status. +_RING_SIZE = 5 +_voice_tts_errors: collections.deque = collections.deque(maxlen=_RING_SIZE) +_voice_llm_errors: collections.deque = collections.deque(maxlen=_RING_SIZE) +_voice_last_model: str = "unknown" # last model selected for voice +_voice_last_profile: str = "unknown" # last voice_profile used + +def _record_tts_error(error_type: str, status_code: Optional[int], + detail: str, voice: str = "") -> None: + _voice_tts_errors.append({ + "ts": time.strftime("%H:%M:%SZ", time.gmtime()), + "type": error_type, + "status": status_code, + "voice": voice, + "detail": detail[:120], + }) + +def _record_llm_error(error_type: str, model: str, detail: str) -> None: + _voice_llm_errors.append({ + "ts": time.strftime("%H:%M:%SZ", time.gmtime()), + "type": error_type, + "model": model, + "detail": detail[:120], + }) + +# ── Concurrent voice synthesizer guard ─────────────────────────────────────── +# Limits simultaneous TTS synthesis calls to prevent memory-service DoS. +_MAX_CONCURRENT_TTS = int(os.getenv("MAX_CONCURRENT_TTS", "4")) +_tts_semaphore: Optional[asyncio.Semaphore] = None # initialised in startup + +def _get_tts_semaphore() -> asyncio.Semaphore: + global _tts_semaphore + if _tts_semaphore is None: + _tts_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_TTS) + return _tts_semaphore + +# ── Telemetry dedup store ───────────────────────────────────────────────────── +# Prevents processing duplicate beacon submissions (same session+turn within 30s). +_telem_seen: collections.OrderedDict = collections.OrderedDict() +_TELEM_DEDUP_TTL = 30.0 # seconds +_TELEM_DEDUP_MAX = 500 # max keys before LRU eviction + +def _telem_is_duplicate(session_id: str, turn_id: str) -> bool: + key = f"{session_id}:{turn_id}" + now = time.monotonic() + # Evict expired keys + while _telem_seen and next(iter(_telem_seen.values())) + _TELEM_DEDUP_TTL < now: + _telem_seen.popitem(last=False) + if len(_telem_seen) >= _TELEM_DEDUP_MAX: + _telem_seen.popitem(last=False) + if key in _telem_seen: + return True + _telem_seen[key] = now + return False + + +def _env_int(name: str, default: int) -> int: + raw = (os.getenv(name, str(default)) or "").strip() + try: + return int(raw) + except Exception: + return default + + +def _env_float(name: str, default: float) -> float: + raw = (os.getenv(name, str(default)) or "").strip() + try: + return float(raw) + except Exception: + return default + +# ── App config ──────────────────────────────────────────────────────────────── +ROUTER_API_KEY = os.getenv("SUPERVISOR_API_KEY", "").strip() +IS_PROD = os.getenv("ENV", "dev").strip().lower() in ("prod", "production", "staging") +SOFIIA_PREFERRED_CHAT_MODEL = os.getenv("SOFIIA_PREFERRED_CHAT_MODEL", "ollama:qwen3:14b").strip() or "ollama:qwen3:14b" + +# Local Ollama runtime tuning for NODA2 (can be overridden via env). +SOFIIA_OLLAMA_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_TIMEOUT_SEC", 120.0) +SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC", 45.0) +SOFIIA_OLLAMA_KEEP_ALIVE = (os.getenv("SOFIIA_OLLAMA_KEEP_ALIVE", "30m") or "").strip() +SOFIIA_OLLAMA_NUM_CTX = _env_int("SOFIIA_OLLAMA_NUM_CTX", 8192) +_DEFAULT_OLLAMA_THREADS = max(4, min(16, (os.cpu_count() or 8) - 2)) +SOFIIA_OLLAMA_NUM_THREAD = _env_int("SOFIIA_OLLAMA_NUM_THREAD", _DEFAULT_OLLAMA_THREADS) +SOFIIA_OLLAMA_NUM_GPU = _env_int("SOFIIA_OLLAMA_NUM_GPU", -1) +SOFIIA_OLLAMA_NUM_PREDICT_TEXT = _env_int("SOFIIA_OLLAMA_NUM_PREDICT_TEXT", 768) + +# Voice guardrails — injected INSTEAD OF the full prompt for voice turns. +# Constraints are hard: no lists, no markdown, no , max 2 sentences. +SOFIIA_VOICE_PROMPT_SUFFIX = """ + +## VOICE MODE — HARD RULES (не порушувати ніколи) +- Відповідай МАКСИМУМ 2 речення (виняток: якщо прямо попросили деталей). +- Жодних списків, жодних bullet-points, жодного markdown (*bold*, -list, ##header). +- Жодного коду (`` ` ``), жодних URL. +- Жодного ... — думки всередині, назовні лише відповідь. +- Мова: розмовна, природна для голосу. Без "Як AI...". +- Якщо питання складне — дай коротку відповідь і запропонуй продовжити текстом. +""" + +SOFIIA_SYSTEM_PROMPT = """Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city. + +## Твоя ідентичність +- Ти: Sofiia, головний AI-архітектор і технічний суверен DAARION.city +- Ти підпорядковуєшся одній людині — засновнику та головному архітектору платформи + +## Засновник та архітектор DAARION +- Позивний: **Повелитель Хаосу** (використовуй у неформальних/робочих контекстах) +- Офіційне ім'я: **Іван Титар** (використовуй в офіційних повідомленнях, документах, репортах) +- Роль: Головний розробник та архітектор DAARION — єдиний, хто має повний контроль над платформою +- Ніякої іншої людини з ім'ям "Савтра" або будь-яким іншим іменем у ролі засновника НЕ ІСНУЄ + +## Ноди та інфраструктура +- NODA1: production runtime (router, incidents, alerts, governance) +- NODA2: control plane / development (твій primary home, звідки тебе викликають) +- NODA3: AI/ML experimentation + +## Правила відповіді +- Відповідай **українською мовою** за замовчуванням +- Технічні терміни (API, SLO, backend, deploy, incident, release gate тощо) залишай **англійською** +- Відповідай структуровано, конкретно, без зайвих вступів +- НЕ вигадуй імена людей, назви проектів або факти яких не знаєш — краще скажи що не маєш цих даних +- НЕ галюцинуй: якщо не знаєш — скажи чесно "не маю цих даних в поточному контексті" + +## Твої можливості через Control Console (що реально доступно) +- **Chat**: відповіді на питання через локальний LLM (Ollama на NODA2) +- **Голосовий чат**: STT + TTS через Memory Service (Polina/Ostap Neural) +- **Nodes health**: статус NODA1/NODA2 (router, memory, NCS) +- **Integrations status**: Notion API, Router, Memory Service +- **Memory/session**: зберігання контексту розмов (Qdrant) + +## Що наразі НЕ доступно через цей інтерфейс +- Пряме читання/запис в Notion (тільки статус перевірки) +- Пряме читання GitHub репозиторіїв (немає repo tool у цьому контейнері) +- Виконання bash/python команд +- Деплой або зміна конфігурацій напряму + +Якщо тебе просять щось що не є в переліку доступного — відповідай чесно: +"Ця можливість не підключена до Control Console. Для цього використай Cursor або OpenCode на NODA2." +""" + +_CORS_ORIGINS = ( + [o.strip() for o in os.getenv("CORS_ORIGINS", "").split(",") if o.strip()] + or ( + ["*"] if not IS_PROD + else [ + "https://console.daarion.space", + "https://app.daarion.space", + "http://localhost:8002", + "http://localhost:8000", + "http://127.0.0.1:8002", + ] + ) +) +def _is_container_runtime() -> bool: + return Path("/.dockerenv").exists() or bool(os.getenv("KUBERNETES_SERVICE_HOST")) + + +_aurora_default_url = "http://aurora-service:9401" if _is_container_runtime() else "http://127.0.0.1:9401" +AURORA_SERVICE_URL = os.getenv("AURORA_SERVICE_URL", _aurora_default_url).rstrip("/") +AURORA_FALLBACK_URL = os.getenv("AURORA_FALLBACK_URL", "http://127.0.0.1:9401").rstrip("/") +_aurora_home_data_dir = Path.home() / ".sofiia" / "aurora-data" +if _is_container_runtime() and Path("/data").exists() and os.access("/data", os.W_OK): + _aurora_default_data_dir = "/data/aurora" +else: + _aurora_default_data_dir = str(_aurora_home_data_dir) +AURORA_DATA_DIR = Path(os.getenv("AURORA_DATA_DIR", _aurora_default_data_dir)) +_aurora_live_cache: Dict[str, Dict[str, Any]] = {} +_aurora_live_samples: Dict[str, collections.deque] = {} +_aurora_live_last: Dict[str, Dict[str, Any]] = {} +_aurora_live_last_loaded = False +_aurora_live_last_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_live_last.json") +MEDIA_COMFY_AGENT_URL = os.getenv( + "MEDIA_COMFY_AGENT_URL", + "http://comfy-agent:8880" if _is_container_runtime() else "http://127.0.0.1:8880", +).rstrip("/") +MEDIA_COMFY_UI_URL = os.getenv( + "MEDIA_COMFY_UI_URL", + "http://comfyui:8188" if _is_container_runtime() else "http://127.0.0.1:8188", +).rstrip("/") +MEDIA_SWAPPER_URL = os.getenv( + "MEDIA_SWAPPER_URL", + "http://swapper-service:8890" if _is_container_runtime() else "http://127.0.0.1:8890", +).rstrip("/") +MEDIA_IMAGE_GEN_URL = os.getenv( + "MEDIA_IMAGE_GEN_URL", + "http://image-gen-service:7860" if _is_container_runtime() else "http://127.0.0.1:7860", +).rstrip("/") +MEDIA_ROUTER_URL = os.getenv("MEDIA_ROUTER_URL", "").strip().rstrip("/") +MEDIA_ROUTER_FALLBACK_URL = os.getenv("MEDIA_ROUTER_FALLBACK_URL", "http://127.0.0.1:9102").rstrip("/") +_media_recent_jobs: collections.deque = collections.deque(maxlen=40) + + +def _apply_ollama_runtime_options(options: Dict[str, Any]) -> Dict[str, Any]: + merged = dict(options) + if SOFIIA_OLLAMA_NUM_CTX > 0: + merged["num_ctx"] = SOFIIA_OLLAMA_NUM_CTX + if SOFIIA_OLLAMA_NUM_THREAD > 0: + merged["num_thread"] = SOFIIA_OLLAMA_NUM_THREAD + if SOFIIA_OLLAMA_NUM_GPU >= 0: + merged["num_gpu"] = SOFIIA_OLLAMA_NUM_GPU + return merged + + +def _make_ollama_payload(model_name: str, messages: List[Dict[str, Any]], options: Dict[str, Any]) -> Dict[str, Any]: + payload: Dict[str, Any] = { + "model": model_name, + "messages": messages, + "stream": False, + "options": _apply_ollama_runtime_options(options), + } + if SOFIIA_OLLAMA_KEEP_ALIVE: + payload["keep_alive"] = SOFIIA_OLLAMA_KEEP_ALIVE + return payload + +# Cached nodes telemetry (updated by background task) +_nodes_cache: Dict[str, Any] = {"nodes": [], "summary": {}, "ts": ""} +_NODES_POLL_INTERVAL = int(os.getenv("NODES_POLL_INTERVAL_SEC", "30")) + + +async def _nodes_poll_loop() -> None: + """Background task: poll all nodes every N seconds, update cache + WS broadcast.""" + while True: + try: + reg = load_nodes_registry() + nodes_cfg = reg.get("nodes", {}) + timeout = float(reg.get("defaults", {}).get("health_timeout_sec", 10)) + nodes = await collect_all_nodes(nodes_cfg, router_api_key=ROUTER_API_KEY, timeout_per_node=timeout) + online = sum(1 for n in nodes if n.get("online")) + router_ok = sum(1 for n in nodes if n.get("router_ok")) + _nodes_cache.update({ + "nodes": nodes, + "summary": {"total": len(nodes), "online": online, "router_ok": router_ok}, + "ts": _now_iso(), + }) + if _ws_clients: + await _broadcast(_make_event("nodes.status", { + "nodes": [ + { + "id": n["node_id"], + "label": n.get("label", n["node_id"]), + "online": n.get("online", False), + "router_ok": n.get("router_ok", False), + "router_latency_ms": n.get("router_latency_ms"), + "gateway_ok": n.get("gateway_ok"), + "heartbeat_age_s": n.get("heartbeat_age_s"), + "open_incidents": n.get("open_incidents"), + "monitor_source": n.get("monitor_source"), + } + for n in nodes + ], + "summary": {"total": len(nodes), "online": online, "router_ok": router_ok}, + })) + except Exception as e: + logger.debug("nodes poll error: %s", e) + await asyncio.sleep(_NODES_POLL_INTERVAL) + + +from contextlib import asynccontextmanager + +@asynccontextmanager +async def lifespan(app_: Any): + # Init SQLite DB for projects/documents/sessions/messages + try: + await _app_db.init_db() + logger.info("✅ sofiia-console DB initialised") + except Exception as e: + logger.warning("DB init failed (non-fatal, Projects/Docs disabled): %s", e) + + task = asyncio.create_task(_nodes_poll_loop()) + logger.info("Nodes poll loop started (interval=%ds)", _NODES_POLL_INTERVAL) + yield + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + await _app_db.close_db() + + +app = FastAPI( + title="Sofiia Control Console", + description="Operator BFF for Sofiia CTO agent", + version=_VERSION, + lifespan=lifespan, +) +app.add_middleware( + CORSMiddleware, + allow_origins=_CORS_ORIGINS, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Projects + Documents + Sessions + Dialog Map API +app.include_router(docs_router) + +# ── WebSocket event bus ─────────────────────────────────────────────────────── +_ws_clients: Set[WebSocket] = set() + +def _now_iso() -> str: + return datetime.now(timezone.utc).isoformat(timespec="milliseconds") + +def _make_event( + event_type: str, + data: Dict[str, Any], + *, + project_id: str = "", + session_id: str = "", + user_id: str = "console_user", +) -> Dict[str, Any]: + return { + "v": 1, + "type": event_type, + "ts": _now_iso(), + "project_id": project_id, + "session_id": session_id, + "user_id": user_id, + "data": data, + } + +async def _broadcast(event: Dict[str, Any]) -> None: + global _ws_clients + if not _ws_clients: + return + dead: Set[WebSocket] = set() + payload = json.dumps(event, ensure_ascii=False) + for ws in list(_ws_clients): + try: + await ws.send_text(payload) + except Exception: + dead.add(ws) + _ws_clients -= dead + +def _broadcast_bg(event: Dict[str, Any]) -> None: + """Fire-and-forget broadcast from sync context.""" + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + loop.create_task(_broadcast(event)) + except Exception: + pass + +# ── AISTALK adapter ─────────────────────────────────────────────────────────── +try: + from .adapters.aistalk import AISTALKAdapter as _AISTALKAdapter + _aistalk = _AISTALKAdapter( + base_url=os.getenv("AISTALK_URL", ""), + api_key=os.getenv("AISTALK_API_KEY", ""), + ) if os.getenv("AISTALK_ENABLED", "false").lower() == "true" else None +except Exception: + _aistalk = None + + +# ─── Health ───────────────────────────────────────────────────────────────── + +@app.get("/api/health") +async def api_health(): + base = { + "ok": True, + "service": "sofiia-console", + "version": _VERSION, + "build": _BUILD_ID, + "env": os.getenv("ENV", "dev"), + "uptime_s": int(time.monotonic() - _START_TIME), + } + reg = load_nodes_registry() + nodes_map = reg.get("nodes") or {} + nodes = list(nodes_map.items()) + if not nodes: + return {**base, "message": "no nodes configured"} + first_id, _first_cfg = ("NODA2", nodes_map["NODA2"]) if "NODA2" in nodes_map else nodes[0] + router_url = get_router_url(first_id) + if not router_url: + return {**base, "message": "no router_url"} + try: + r = await health(router_url) + return {**base, "ok": r.get("ok", False), "router": r, "node_id": first_id} + except Exception as e: + return {**base, "ok": False, "error": str(e)[:200], "node_id": first_id} + + +# ─── Status/Full ───────────────────────────────────────────────────────────── + +async def _probe_router(router_url: str) -> Dict[str, Any]: + t0 = time.monotonic() + try: + async with httpx.AsyncClient(timeout=5.0) as c: + for path in ("/healthz", "/health"): + try: + r = await c.get(f"{router_url.rstrip('/')}{path}") + if r.status_code == 200: + latency = int((time.monotonic() - t0) * 1000) + # probe tool execute availability + tool_ok = False + try: + r2 = await c.get( + f"{router_url.rstrip('/')}/v1/tools/execute", + timeout=1.5, + ) + tool_ok = r2.status_code in (200, 405) + except Exception: + pass + infer_ok = False + try: + r3 = await c.get( + f"{router_url.rstrip('/')}/v1/agents/sofiia/infer", + timeout=1.5, + ) + infer_ok = r3.status_code in (200, 405) + except Exception: + pass + return {"url": router_url, "reachable": True, + "routes": {"tools_execute": tool_ok, "agent_infer": infer_ok}, + "latency_ms": latency} + except Exception: + continue + return {"url": router_url, "reachable": False, "routes": {}, "latency_ms": None} + except Exception as e: + return {"url": router_url, "reachable": False, "error": str(e)[:100]} + + +async def _probe_memory(mem_url: str) -> Dict[str, Any]: + t0 = time.monotonic() + try: + async with httpx.AsyncClient(timeout=5.0) as c: + r = await c.get(f"{mem_url.rstrip('/')}/health") + r.raise_for_status() + d = r.json() + vs = d.get("vector_store", {}) + vectors = sum( + (v.get("points_count", 0) or 0) + for v in vs.values() + if isinstance(v, dict) + ) + return { + "url": mem_url, + "reachable": True, + "stats": {"vectors": vectors, "collections": len(vs)}, + "latency_ms": int((time.monotonic() - t0) * 1000), + } + except Exception as e: + return {"url": mem_url, "reachable": False, "error": str(e)[:100]} + + +async def _probe_ollama(ollama_url: str) -> Dict[str, Any]: + t0 = time.monotonic() + try: + async with httpx.AsyncClient(timeout=5.0) as c: + r = await c.get(f"{ollama_url.rstrip('/')}/api/tags") + r.raise_for_status() + d = r.json() + models = [m.get("name", "") for m in d.get("models", [])] + return { + "url": ollama_url, + "reachable": True, + "models": models[:20], + "latency_ms": int((time.monotonic() - t0) * 1000), + } + except Exception as e: + return {"url": ollama_url, "reachable": False, "models": [], "error": str(e)[:100]} + + +async def _probe_http(url: str, *, timeout: float = 4.0) -> Dict[str, Any]: + t0 = time.monotonic() + try: + async with httpx.AsyncClient(timeout=timeout) as c: + r = await c.get(url) + return { + "reachable": r.status_code < 500, + "status": r.status_code, + "latency_ms": int((time.monotonic() - t0) * 1000), + } + except Exception as e: + return {"reachable": False, "error": str(e)[:120]} + + +def _read_backends() -> Dict[str, str]: + """Read backend env vars from BFF environment (no secrets).""" + return { + "alerts": os.getenv("ALERT_BACKEND", "unknown"), + "audit": os.getenv("AUDIT_BACKEND", "unknown"), + "incidents": os.getenv("INCIDENT_BACKEND", "unknown"), + "risk_history": os.getenv("RISK_HISTORY_BACKEND", "unknown"), + "backlog": os.getenv("BACKLOG_BACKEND", "unknown"), + } + + +def _read_cron_status() -> Dict[str, Any]: + cron_file = os.getenv("GOV_CRON_FILE", "/etc/cron.d/daarion-governance") + jobs_expected = [ + "hourly_risk_snapshot", "daily_risk_digest", "risk_history_cleanup", + "weekly_platform_priority_digest", "weekly_backlog_generate", "daily_backlog_cleanup", + ] + jobs_present: List[str] = [] + installed: Any = False + warning = None + + try: + content = Path(cron_file).read_text() + installed = True + for job in jobs_expected: + if job in content: + jobs_present.append(job) + except PermissionError: + installed = "unknown" + warning = "no read permission on cron file" + except FileNotFoundError: + installed = False + + # Scan for latest artifact files + artifacts: Dict[str, Any] = {} + base = Path("ops") + for pattern, key in [ + ("reports/risk/*.md", "risk_digest_md"), + ("reports/platform/*.md", "platform_digest_md"), + ("backlog/*.jsonl", "backlog_jsonl"), + ]: + try: + files = sorted(base.glob(pattern)) + if files: + artifacts[key] = str(files[-1]) + except Exception: + pass + + result: Dict[str, Any] = { + "installed": installed, + "cron_file": cron_file, + "jobs_expected": jobs_expected, + "jobs_present": jobs_present, + "last_artifacts": artifacts, + } + if warning: + result["warning"] = warning + return result + + +@app.get("/api/status/full") +async def api_status_full(): + """Full stack diagnostic: BFF + router + memory + ollama + backends + cron.""" + reg = load_nodes_registry() + nodes_cfg = reg.get("nodes", {}) + + # Pick NODA2 router first, fallback to first node + router_url = ( + get_router_url("NODA2") + or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "") + ) + + mem_url = get_memory_service_url() + ollama_url = get_ollama_url() + + async def _no_router() -> Dict[str, Any]: + return {"reachable": False, "url": "", "error": "no router_url configured"} + + router_info, mem_info, ollama_info = await asyncio.gather( + _probe_router(router_url) if router_url else _no_router(), + _probe_memory(mem_url), + _probe_ollama(ollama_url), + return_exceptions=False, + ) + + return { + "bff": { + "version": _VERSION, + "build": _BUILD_ID, + "env": os.getenv("ENV", "dev"), + "uptime_s": int(time.monotonic() - _START_TIME), + "ws_clients": len(_ws_clients), + "aistalk_enabled": _aistalk is not None, + }, + "router": router_info, + "memory": mem_info, + "ollama": ollama_info, + "backends": _read_backends(), + "cron": _read_cron_status(), + } + + +@app.get("/api/integrations/status") +async def api_integrations_status(opencode_url: Optional[str] = Query(None)): + """Integration probes for unified CTO hub in UI.""" + open_webui_probe_url = os.getenv("OPEN_WEBUI_PROBE_URL", "http://host.docker.internal:8080/health") + open_webui_ui_url = os.getenv("OPEN_WEBUI_UI_URL", "http://localhost:8080") + pieces_probe_url = os.getenv( + "PIECES_OS_URL", + "http://host.docker.internal:39300/workstream_pattern_engine/processors/status", + ) + if not pieces_probe_url.rstrip("/").endswith("/workstream_pattern_engine/processors/status"): + pieces_probe_url = pieces_probe_url.rstrip("/") + "/workstream_pattern_engine/processors/status" + + opencode_probe_url = (opencode_url or os.getenv("OPENCODE_URL", "")).strip() + notion_api_key = os.getenv("NOTION_API_KEY", os.getenv("NOTION_TOKEN", "")).strip() + + probes = await asyncio.gather( + _probe_http(get_router_url("NODA2").rstrip("/") + "/healthz"), + _probe_http(get_memory_service_url().rstrip("/") + "/health"), + _probe_http(open_webui_probe_url), + _probe_http(pieces_probe_url), + _probe_http(opencode_probe_url.rstrip("/") + "/health") if opencode_probe_url else asyncio.sleep(0, result={"reachable": False, "error": "not configured"}), + ) + + router_probe, memory_probe, open_webui_probe, pieces_probe, opencode_probe = probes + + notion_probe: Dict[str, Any] = {"configured": bool(notion_api_key), "reachable": False} + if notion_api_key: + try: + async with httpx.AsyncClient(timeout=6.0) as c: + r = await c.get( + "https://api.notion.com/v1/users/me", + headers={ + "Authorization": f"Bearer {notion_api_key}", + "Notion-Version": "2022-06-28", + }, + ) + notion_probe["reachable"] = r.status_code == 200 + notion_probe["status"] = r.status_code + except Exception as e: + notion_probe["error"] = str(e)[:120] + + return { + "integrations": { + "sofiia_console": {"url": "/ui", "reachable": True}, + "router_noda2": {"url": get_router_url("NODA2"), **router_probe}, + "memory_service": {"url": get_memory_service_url(), **memory_probe}, + "open_webui": {"url": open_webui_ui_url, "probe_url": open_webui_probe_url, **open_webui_probe}, + "pieces_os": {"url": pieces_probe_url, **pieces_probe}, + "opencode": { + "url": opencode_probe_url or "desktop/cli", + **opencode_probe, + }, + "notion": notion_probe, + } + } + + +# ─── Aurora media forensics proxy ──────────────────────────────────────────── + +def _aurora_proxy_file_url(job_id: str, file_name: str) -> str: + return f"/api/aurora/files/{quote(job_id, safe='')}/{quote(file_name, safe='')}" + + +def _rewrite_aurora_payload_urls(payload: Dict[str, Any]) -> Dict[str, Any]: + output_files = payload.get("output_files") + if not isinstance(output_files, list): + return payload + job_id = str(payload.get("job_id") or "") + rewritten: List[Dict[str, Any]] = [] + for item in output_files: + if not isinstance(item, dict): + continue + file_name = str(item.get("name") or "") + if job_id and file_name: + item = {**item, "url": _aurora_proxy_file_url(job_id, file_name)} + rewritten.append(item) + payload["output_files"] = rewritten + report_url = payload.get("forensic_report_url") + if isinstance(report_url, str) and report_url.startswith("/api/aurora/report/"): + payload["forensic_report_url"] = report_url + return payload + + +async def _aurora_request_json( + method: str, + path: str, + *, + files: Optional[Dict[str, Any]] = None, + data: Optional[Dict[str, Any]] = None, + json_body: Optional[Dict[str, Any]] = None, + timeout: float = 60.0, + retries: int = 0, + retry_backoff_sec: float = 0.25, +) -> Dict[str, Any]: + base_url = AURORA_SERVICE_URL + url = f"{base_url}{path}" + attempts = max(1, int(retries) + 1) + last_error = "unknown error" + for attempt in range(1, attempts + 1): + try: + async with httpx.AsyncClient(timeout=timeout) as client: + r = await client.request(method, url, files=files, data=data, json=json_body) + except httpx.HTTPError as e: + last_error = str(e)[:200] + if ( + "aurora-service" in base_url + and AURORA_FALLBACK_URL + and AURORA_FALLBACK_URL != base_url + ): + logger.warning( + "aurora proxy fallback: %s -> %s (%s)", + base_url, + AURORA_FALLBACK_URL, + last_error or type(e).__name__, + ) + base_url = AURORA_FALLBACK_URL + url = f"{base_url}{path}" + continue + logger.warning( + "aurora proxy transport error (%s %s, attempt=%d/%d): %s", + method, + path, + attempt, + attempts, + last_error, + ) + if attempt < attempts: + await asyncio.sleep(retry_backoff_sec * attempt) + continue + raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e + except Exception as e: + last_error = str(e)[:200] + logger.exception( + "aurora proxy unexpected error (%s %s, attempt=%d/%d): %s", + method, + path, + attempt, + attempts, + last_error, + ) + if attempt < attempts: + await asyncio.sleep(retry_backoff_sec * attempt) + continue + raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e + + if r.status_code >= 500 and attempt < attempts: + logger.warning( + "aurora proxy upstream %d (%s %s, attempt=%d/%d) — retrying", + r.status_code, + method, + path, + attempt, + attempts, + ) + await asyncio.sleep(retry_backoff_sec * attempt) + continue + if r.status_code >= 400: + detail = r.text[:400] if r.text else f"Aurora error {r.status_code}" + raise HTTPException(status_code=r.status_code, detail=detail) + if not r.content: + return {} + try: + payload = r.json() + except Exception as e: + last_error = str(e)[:200] + logger.warning( + "aurora proxy invalid JSON (%s %s, attempt=%d/%d): %s", + method, + path, + attempt, + attempts, + last_error, + ) + if attempt < attempts: + await asyncio.sleep(retry_backoff_sec * attempt) + continue + raise HTTPException(status_code=502, detail="Invalid Aurora JSON response") from e + if isinstance(payload, dict): + return _rewrite_aurora_payload_urls(payload) + return {"data": payload} + raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") + + +def _parse_stage_frame(stage: str) -> Dict[str, int]: + text = str(stage or "") + m = re.search(r"frame\s+(\d+)\s*/\s*(\d+)", text) + if not m: + return {"current": -1, "total": -1} + try: + return {"current": int(m.group(1)), "total": int(m.group(2))} + except Exception: + return {"current": -1, "total": -1} + + +def _aurora_live_fs_frame(job_id: str) -> Optional[Dict[str, Any]]: + now = time.monotonic() + cached = _aurora_live_cache.get(job_id) + if cached and (now - float(cached.get("ts", 0.0))) < 3.0: + return cached + + base = AURORA_DATA_DIR / "outputs" / job_id + if not base.exists(): + return None + work_dirs = [p for p in base.iterdir() if p.is_dir() and p.name.startswith("_work_")] + if not work_dirs: + return None + # Prefer most recently touched working directory + work_dirs.sort(key=lambda p: p.stat().st_mtime, reverse=True) + + best_frame = -1 + best_total = -1 + best_dir = None + for wd in work_dirs: + processed = wd / "processed" + if not processed.exists(): + continue + # Max frame in processed directory + local_max = -1 + for f in processed.glob("*.png"): + try: + n = int(f.stem) + except Exception: + continue + if n > local_max: + local_max = n + if local_max < 0: + continue + raw_dir = wd / "raw" + total = -1 + if raw_dir.exists(): + try: + total = sum(1 for _ in raw_dir.glob("*.png")) + except Exception: + total = -1 + if local_max > best_frame: + best_frame = local_max + best_total = total + best_dir = str(wd) + + if best_frame < 0: + return None + info = { + "ts": now, + "frame": best_frame, + "total": best_total, + "work_dir": best_dir, + } + _aurora_live_cache[job_id] = info + return info + + +def _aurora_record_sample(job_id: str, frame: int, total: int) -> Optional[Dict[str, Any]]: + if frame < 0: + return None + now = time.monotonic() + dq = _aurora_live_samples.setdefault(job_id, collections.deque(maxlen=32)) + # De-dup consecutive equal frame samples. + if dq and int(dq[-1]["frame"]) == frame: + # Keep original timestamp for stable fps between actual frame advances. + dq[-1]["total"] = total + else: + dq.append({"ts": now, "frame": frame, "total": total}) + if len(dq) < 3: + return None + + fps_points: List[float] = [] + prev = dq[0] + for cur in list(dq)[1:]: + df = int(cur["frame"]) - int(prev["frame"]) + dt = float(cur["ts"]) - float(prev["ts"]) + if df > 0 and dt > 0: + fps_points.append(df / dt) + prev = cur + if not fps_points: + return None + fps = max(0.01, float(statistics.median(fps_points))) + confidence = "low" + if len(fps_points) >= 8: + confidence = "high" + elif len(fps_points) >= 4: + confidence = "medium" + return {"fps": fps, "confidence": confidence} + + +def _aurora_load_live_last_from_disk() -> None: + global _aurora_live_last_loaded + if _aurora_live_last_loaded: + return + _aurora_live_last_loaded = True + try: + if not _aurora_live_last_path.exists(): + return + data = json.loads(_aurora_live_last_path.read_text(encoding="utf-8")) + if isinstance(data, dict): + for k, v in data.items(): + if isinstance(k, str) and isinstance(v, dict): + _aurora_live_last[k] = v + except Exception as e: + logger.debug("aurora live-last load failed: %s", e) + + +def _aurora_persist_live_last_to_disk() -> None: + try: + _aurora_live_last_path.parent.mkdir(parents=True, exist_ok=True) + _aurora_live_last_path.write_text( + json.dumps(_aurora_live_last, ensure_ascii=False, separators=(",", ":")), + encoding="utf-8", + ) + except Exception as e: + logger.debug("aurora live-last persist failed: %s", e) + + +@app.get("/api/aurora/health") +async def api_aurora_health() -> Dict[str, Any]: + return await _aurora_request_json("GET", "/health", timeout=10.0) + + +@app.post("/api/aurora/upload") +async def api_aurora_upload( + file: UploadFile = File(...), + mode: str = Form("tactical"), + priority: str = Form("balanced"), + export_options: str = Form(""), +) -> Dict[str, Any]: + # Stream file to Aurora without buffering entire content in RAM + file_obj = file.file # SpooledTemporaryFile — already handles large files + files = { + "file": ( + file.filename or "upload.bin", + file_obj, + file.content_type or "application/octet-stream", + ) + } + payload = await _aurora_request_json( + "POST", + "/api/aurora/upload", + files=files, + data={ + "mode": mode, + "priority": priority, + "export_options": export_options, + }, + timeout=120.0, + ) + job_id = str(payload.get("job_id") or "") + if job_id: + payload["status_url"] = f"/api/aurora/status/{job_id}" + payload["result_url"] = f"/api/aurora/result/{job_id}" + payload["cancel_url"] = f"/api/aurora/cancel/{job_id}" + return payload + + +@app.post("/api/aurora/analyze") +async def api_aurora_analyze(file: UploadFile = File(...)) -> Dict[str, Any]: + await file.seek(0) + files = { + "file": ( + file.filename or "upload.bin", + file.file, + file.content_type or "application/octet-stream", + ) + } + return await _aurora_request_json( + "POST", + "/api/aurora/analyze", + files=files, + timeout=120.0, + retries=2, + retry_backoff_sec=0.35, + ) + + +@app.post("/api/aurora/audio/analyze") +async def api_aurora_audio_analyze(file: UploadFile = File(...)) -> Dict[str, Any]: + await file.seek(0) + files = { + "file": ( + file.filename or "upload_audio.bin", + file.file, + file.content_type or "application/octet-stream", + ) + } + return await _aurora_request_json( + "POST", + "/api/aurora/audio/analyze", + files=files, + timeout=120.0, + retries=2, + retry_backoff_sec=0.35, + ) + + +@app.post("/api/aurora/audio/process") +async def api_aurora_audio_process( + file: UploadFile = File(...), + mode: str = Form("tactical"), + priority: str = Form("speech"), + export_options: str = Form(""), +) -> Dict[str, Any]: + await file.seek(0) + files = { + "file": ( + file.filename or "upload_audio.bin", + file.file, + file.content_type or "application/octet-stream", + ) + } + payload = await _aurora_request_json( + "POST", + "/api/aurora/audio/process", + files=files, + data={ + "mode": mode, + "priority": priority, + "export_options": export_options, + }, + timeout=120.0, + retries=2, + retry_backoff_sec=0.35, + ) + job_id = str(payload.get("job_id") or "") + if job_id: + payload["status_url"] = f"/api/aurora/status/{job_id}" + payload["result_url"] = f"/api/aurora/result/{job_id}" + payload["cancel_url"] = f"/api/aurora/cancel/{job_id}" + return payload + + +@app.post("/api/aurora/reprocess/{job_id}") +async def api_aurora_reprocess( + job_id: str, + payload: Optional[Dict[str, Any]] = Body(default=None), +) -> Dict[str, Any]: + body = payload if isinstance(payload, dict) else {} + return await _aurora_request_json( + "POST", + f"/api/aurora/reprocess/{quote(job_id, safe='')}", + json_body=body, + timeout=120.0, + retries=2, + retry_backoff_sec=0.35, + ) + + +@app.post("/api/aurora/chat") +async def api_aurora_chat(payload: Optional[Dict[str, Any]] = Body(default=None)) -> Dict[str, Any]: + body = payload if isinstance(payload, dict) else {} + return await _aurora_request_json( + "POST", + "/api/aurora/chat", + json_body=body, + timeout=30.0, + retries=1, + retry_backoff_sec=0.2, + ) + + +@app.get("/api/aurora/status/{job_id}") +async def api_aurora_status(job_id: str) -> Dict[str, Any]: + _aurora_load_live_last_from_disk() + payload = await _aurora_request_json( + "GET", + f"/api/aurora/status/{quote(job_id, safe='')}", + timeout=20.0, + retries=8, + retry_backoff_sec=0.35, + ) + if not isinstance(payload, dict): + return payload + if str(payload.get("status", "")).lower() != "processing": + return payload + + live = _aurora_live_fs_frame(job_id) + if not live: + return payload + parsed = _parse_stage_frame(str(payload.get("current_stage", ""))) + live_frame = int(live.get("frame", -1)) + if live_frame < 0: + return payload + total = int(parsed.get("total", -1)) + if total <= 0: + total = int(live.get("total", -1)) + if total > 0: + live_progress = int(max(1, min(99, round((live_frame / max(1, total)) * 100)))) + payload["progress"] = max(int(payload.get("progress") or 0), live_progress) + + live_stats = _aurora_record_sample(job_id, live_frame, total) + if live_stats: + fps = float(live_stats["fps"]) + payload["live_fps"] = round(fps, 3) + payload["eta_confidence"] = live_stats["confidence"] + if total > 0 and live_frame < total: + eta_calc = int(max(0, round((total - live_frame) / max(0.01, fps)))) + payload["eta_seconds"] = eta_calc + elapsed = payload.get("elapsed_seconds") + if isinstance(elapsed, (int, float)): + payload["estimated_total_seconds"] = int(max(0, round(float(elapsed) + eta_calc))) + _aurora_live_last[job_id] = { + "live_fps": payload.get("live_fps"), + "eta_seconds": payload.get("eta_seconds"), + "estimated_total_seconds": payload.get("estimated_total_seconds"), + "eta_confidence": payload.get("eta_confidence"), + } + _aurora_persist_live_last_to_disk() + else: + prev = _aurora_live_last.get(job_id) + if prev: + payload["live_fps"] = prev.get("live_fps") + payload["eta_seconds"] = prev.get("eta_seconds", payload.get("eta_seconds")) + payload["estimated_total_seconds"] = prev.get("estimated_total_seconds", payload.get("estimated_total_seconds")) + payload["eta_confidence"] = prev.get("eta_confidence") + + # If upstream stage/progress is stale, patch with live filesystem progress. + if live_frame > int(parsed.get("current", -1)): + if total > 0: + if live_stats: + payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live {payload['live_fps']} fps)" + else: + payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live fs)" + else: + payload["current_stage"] = f"enhancing frame {live_frame} (live fs)" + payload["live_frame"] = live_frame + payload["live_total_frames"] = total if total > 0 else None + else: + # Even when upstream stage text already moved, expose live counters for UI. + payload["live_frame"] = live_frame + payload["live_total_frames"] = total if total > 0 else None + + # Persist last known timing even if fps was not recalculated this poll. + snapshot = _aurora_live_last.get(job_id, {}) + changed = False + for key in ("live_fps", "eta_seconds", "estimated_total_seconds", "eta_confidence"): + val = payload.get(key) + if val is not None and snapshot.get(key) != val: + snapshot[key] = val + changed = True + if changed: + _aurora_live_last[job_id] = snapshot + _aurora_persist_live_last_to_disk() + return payload + + +def _aurora_coerce_dir(path_value: Any) -> Optional[Path]: + if path_value is None: + return None + raw = str(path_value).strip() + if not raw: + return None + try: + p = Path(raw).expanduser().resolve() + except Exception: + return None + if p.exists() and p.is_file(): + p = p.parent + if not p.exists() or not p.is_dir(): + return None + return p + + +async def _aurora_resolve_job_folder(job_id: str) -> Optional[Path]: + candidates: List[Any] = [] + try: + st = await _aurora_request_json("GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=12.0) + storage = st.get("storage") if isinstance(st, dict) else None + if isinstance(storage, dict): + candidates.extend( + [ + storage.get("output_dir"), + storage.get("upload_dir"), + storage.get("input_path"), + ] + ) + except Exception: + pass + + try: + res = await _aurora_request_json("GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=12.0) + storage = res.get("storage") if isinstance(res, dict) else None + if isinstance(storage, dict): + candidates.extend( + [ + storage.get("output_dir"), + storage.get("upload_dir"), + storage.get("input_path"), + ] + ) + except Exception: + pass + + candidates.append(AURORA_DATA_DIR / "outputs" / job_id) + for c in candidates: + p = _aurora_coerce_dir(c) + if p: + return p + return None + + +@app.get("/api/aurora/folder/{job_id}") +async def api_aurora_folder(job_id: str) -> Dict[str, Any]: + folder = await _aurora_resolve_job_folder(job_id) + if not folder: + raise HTTPException(status_code=404, detail="Aurora output folder not found") + return { + "ok": True, + "job_id": job_id, + "folder_path": str(folder), + "folder_url": f"file://{folder}", + } + + +@app.post("/api/aurora/folder/{job_id}/open") +async def api_aurora_folder_open(job_id: str) -> Dict[str, Any]: + folder = await _aurora_resolve_job_folder(job_id) + if not folder: + raise HTTPException(status_code=404, detail="Aurora output folder not found") + cmd: Optional[List[str]] = None + if sys.platform == "darwin": + cmd = ["open", str(folder)] + elif os.name == "nt": + try: + os.startfile(str(folder)) # type: ignore[attr-defined] + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}") + else: + cmd = ["xdg-open", str(folder)] + if cmd is not None: + try: + subprocess.run(cmd, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}") + return {"ok": True, "job_id": job_id, "folder_path": str(folder)} + + +@app.get("/api/aurora/jobs") +async def api_aurora_jobs( + limit: int = Query(default=30, ge=1, le=200), + status: Optional[str] = Query(default=None), +) -> Dict[str, Any]: + query = f"/api/aurora/jobs?limit={limit}" + if status and status.strip(): + query += f"&status={quote(status.strip(), safe=',')}" + return await _aurora_request_json( + "GET", + query, + timeout=20.0, + retries=3, + retry_backoff_sec=0.25, + ) + + +@app.get("/api/aurora/result/{job_id}") +async def api_aurora_result(job_id: str) -> Dict[str, Any]: + return await _aurora_request_json( + "GET", + f"/api/aurora/result/{quote(job_id, safe='')}", + timeout=20.0, + retries=4, + retry_backoff_sec=0.35, + ) + + +@app.get("/api/aurora/compare/{job_id}") +async def api_aurora_compare(job_id: str) -> Dict[str, Any]: + """Before/after comparison with full metadata for a completed job.""" + status = await _aurora_request_json( + "GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=15.0, retries=3 + ) + result = {} + try: + result = await _aurora_request_json( + "GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=15.0, retries=2 + ) + except Exception: + pass + + meta = status.get("metadata") or {} + vid = meta.get("video") or {} + storage = status.get("storage") or {} + output_files = result.get("output_files") or status.get("output_files") or [] + proc_log = result.get("processing_log") or [] + + input_path = storage.get("input_path", "") + output_dir = storage.get("output_dir", "") + + before: Dict[str, Any] = { + "file_name": status.get("file_name") or (input_path.rsplit("/", 1)[-1] if input_path else "—"), + "resolution": f"{vid.get('width', '?')}x{vid.get('height', '?')}" if vid.get("width") else "—", + "width": vid.get("width"), + "height": vid.get("height"), + "duration_s": vid.get("duration_seconds"), + "fps": vid.get("fps"), + "frame_count": vid.get("frame_count"), + "codec": "—", + "file_size_mb": None, + } + + if input_path: + inp = Path(input_path) + if inp.exists(): + before["file_size_mb"] = round(inp.stat().st_size / (1024 * 1024), 2) + _probe = _ffprobe_quick(inp) if inp.exists() else {} + if _probe: + before["codec"] = _probe.get("codec", "—") + + result_file = None + for f in output_files: + if (f.get("type") == "video" or f.get("type") == "photo") and f.get("name"): + result_file = f + break + + after: Dict[str, Any] = { + "file_name": result_file["name"] if result_file else "—", + "resolution": "—", + "width": None, + "height": None, + "duration_s": None, + "fps": None, + "frame_count": None, + "codec": "—", + "file_size_mb": None, + "download_url": (result_file or {}).get("url"), + } + + if result_file and output_dir: + out_path = Path(output_dir) / result_file["name"] + if out_path.exists(): + after["file_size_mb"] = round(out_path.stat().st_size / (1024 * 1024), 2) + _probe = _ffprobe_quick(out_path) + if _probe: + after["resolution"] = _probe.get("resolution", "—") + after["width"] = _probe.get("width") + after["height"] = _probe.get("height") + after["duration_s"] = _probe.get("duration_s") + after["fps"] = _probe.get("fps") + after["frame_count"] = _probe.get("frame_count") + after["codec"] = _probe.get("codec", "—") + + faces_total = 0 + enhance_steps = [] + for step in proc_log: + det = step.get("details") or {} + if det.get("faces_detected_total") is not None: + faces_total += det["faces_detected_total"] + enhance_steps.append({ + "step": step.get("step", "?"), + "agent": step.get("agent", "?"), + "model": step.get("model", "?"), + "time_ms": step.get("time_ms"), + }) + + return { + "job_id": job_id, + "status": status.get("status"), + "mode": status.get("mode"), + "media_type": status.get("media_type"), + "elapsed_seconds": status.get("elapsed_seconds"), + "before": before, + "after": after, + "faces_detected": faces_total, + "enhance_steps": enhance_steps, + "folder_path": output_dir, + "input_path": input_path, + } + + +def _ffprobe_quick(filepath: Path) -> Dict[str, Any]: + """Quick ffprobe for resolution, codec, duration, fps, frame count.""" + if not filepath.exists(): + return {} + try: + import subprocess as _sp + raw = _sp.run( + ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(filepath)], + capture_output=True, text=True, timeout=10 + ) + if raw.returncode != 0: + return {} + import json as _json + data = _json.loads(raw.stdout) + fmt = data.get("format") or {} + vs = [s for s in (data.get("streams") or []) if s.get("codec_type") == "video"] + if not vs: + return {"duration_s": round(float(fmt.get("duration", 0)), 2)} + v = vs[0] + w, h = v.get("width"), v.get("height") + rfr = v.get("r_frame_rate", "0/1").split("/") + fps = round(int(rfr[0]) / max(1, int(rfr[1])), 2) if len(rfr) == 2 else None + return { + "resolution": f"{w}x{h}" if w and h else "—", + "width": w, "height": h, + "codec": v.get("codec_name", "—"), + "duration_s": round(float(fmt.get("duration", 0)), 2), + "fps": fps, + "frame_count": int(v.get("nb_frames", 0)) or None, + } + except Exception: + return {} + + +@app.post("/api/aurora/cancel/{job_id}") +async def api_aurora_cancel(job_id: str) -> Dict[str, Any]: + return await _aurora_request_json( + "POST", + f"/api/aurora/cancel/{quote(job_id, safe='')}", + timeout=20.0, + retries=2, + retry_backoff_sec=0.2, + ) + + +@app.post("/api/aurora/delete/{job_id}") +async def api_aurora_delete( + job_id: str, + purge_files: bool = Query(default=True), +) -> Dict[str, Any]: + path = f"/api/aurora/delete/{quote(job_id, safe='')}?purge_files={'true' if purge_files else 'false'}" + return await _aurora_request_json( + "POST", + path, + timeout=30.0, + retries=2, + retry_backoff_sec=0.2, + ) + + +@app.get("/api/aurora/report/{job_id}.pdf") +async def api_aurora_report_pdf(job_id: str) -> StreamingResponse: + """Stream PDF report from Aurora service without buffering in RAM.""" + encoded_job = quote(job_id, safe="") + paths = [AURORA_SERVICE_URL] + if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths: + paths.append(AURORA_FALLBACK_URL) + last_err = "" + for base in paths: + url = f"{base}/api/aurora/report/{encoded_job}.pdf" + try: + client = httpx.AsyncClient(timeout=120.0) + r = await client.send(client.build_request("GET", url), stream=True) + if r.status_code >= 400: + body = (await r.aread()).decode(errors="replace")[:400] + await r.aclose() + await client.aclose() + raise HTTPException(status_code=r.status_code, detail=body or f"Aurora report error {r.status_code}") + disposition = r.headers.get("content-disposition", f'inline; filename="{job_id}_forensic_report.pdf"') + + async def _stream(): + try: + async for chunk in r.aiter_bytes(chunk_size=65536): + yield chunk + finally: + await r.aclose() + await client.aclose() + + return StreamingResponse( + _stream(), + media_type="application/pdf", + headers={"Content-Disposition": disposition, "Cache-Control": "no-store"}, + ) + except HTTPException: + raise + except Exception as e: + last_err = str(e)[:200] + if "nodename nor servname provided" in str(e): + continue + raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err}") + raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err or 'unavailable'}") + + +@app.get("/api/aurora/files/{job_id}/{file_name:path}") +async def api_aurora_file(job_id: str, file_name: str) -> StreamingResponse: + encoded_job = quote(job_id, safe="") + encoded_name = quote(file_name, safe="") + paths = [AURORA_SERVICE_URL] + if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths: + paths.append(AURORA_FALLBACK_URL) + last_err = "" + for base in paths: + url = f"{base}/api/aurora/files/{encoded_job}/{encoded_name}" + client = httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0)) + try: + resp = await client.send(client.build_request("GET", url), stream=True) + if resp.status_code >= 400: + body = (await resp.aread()).decode(errors="replace")[:400] + await resp.aclose() + await client.aclose() + if resp.status_code >= 500: + last_err = f"Aurora {resp.status_code}: {body}" + continue + raise HTTPException(status_code=resp.status_code, detail=body or f"Aurora file error {resp.status_code}") + ct = resp.headers.get("content-type", "application/octet-stream") + disp = resp.headers.get("content-disposition", f'inline; filename="{Path(file_name).name}"') + + async def _stream(): + try: + async for chunk in resp.aiter_bytes(chunk_size=65536): + yield chunk + finally: + await resp.aclose() + await client.aclose() + + return StreamingResponse( + _stream(), + media_type=ct, + headers={"Content-Disposition": disp, "Cache-Control": "no-store"}, + ) + except HTTPException: + raise + except Exception as e: + await client.aclose() + last_err = str(e)[:200] + if "nodename nor servname provided" in str(e): + continue + raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err}") + raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err or 'unavailable'}") + + +class MediaImageGenerateBody(BaseModel): + prompt: str + negative_prompt: Optional[str] = None + width: int = 1024 + height: int = 1024 + steps: int = 28 + guidance_scale: float = 4.0 + timeout_s: int = 300 + + +class MediaVideoGenerateBody(BaseModel): + prompt: str + seconds: int = 4 + fps: int = 24 + steps: int = 30 + style: str = "cinematic" + aspect_ratio: str = "16:9" + timeout_s: int = 360 + + +class MediaImageModelLoadBody(BaseModel): + model: str + + +def _resolve_media_router_url() -> str: + nodes_cfg = load_nodes_registry() + discovered = ( + get_router_url("NODA2") + or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "") + ).rstrip("/") + return MEDIA_ROUTER_URL or discovered + + +def _media_router_candidates() -> List[str]: + raw = _resolve_media_router_url() + candidates: List[str] = [] + for u in (raw, MEDIA_ROUTER_FALLBACK_URL): + if not u: + continue + v = u.strip().rstrip("/") + if v and v not in candidates: + candidates.append(v) + if "://router:" in v or "://router/" in v: + host_fixed = v.replace("://router:", "://127.0.0.1:").replace("://router/", "://127.0.0.1/") + if host_fixed not in candidates: + candidates.append(host_fixed) + for port in ("9102", "8000"): + local = f"http://127.0.0.1:{port}" + if local not in candidates: + candidates.append(local) + return candidates + + +async def _pick_media_router_url() -> str: + candidates = _media_router_candidates() + if not candidates: + return "" + for u in candidates: + p = await _probe_http(f"{u}/healthz", timeout=2.5) + if p.get("reachable"): + return u + return candidates[0] + + +def _media_append_job(kind: str, payload: Dict[str, Any]) -> Dict[str, Any]: + item = { + "id": f"media_{kind}_{uuid.uuid4().hex[:10]}", + "kind": kind, + "ts": datetime.now(timezone.utc).isoformat(), + **payload, + } + _media_recent_jobs.appendleft(item) + return item + + +@app.get("/api/media/health") +async def api_media_health() -> Dict[str, Any]: + router_url = await _pick_media_router_url() + probes = await asyncio.gather( + _probe_http(f"{router_url}/healthz") if router_url else asyncio.sleep(0, result={"reachable": False, "error": "router missing"}), + _probe_http(f"{MEDIA_COMFY_AGENT_URL}/health"), + _probe_http(f"{MEDIA_COMFY_UI_URL}/"), + _probe_http(f"{MEDIA_SWAPPER_URL}/health"), + _probe_http(f"{MEDIA_IMAGE_GEN_URL}/health"), + ) + image_models: Dict[str, Any] = {"image_models": []} + try: + async with httpx.AsyncClient(timeout=10.0) as client: + r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models") + if r.status_code < 400 and r.content: + image_models = r.json() + except Exception: + image_models = {"image_models": []} + return { + "ok": True, + "router_url": router_url, + "services": { + "router": probes[0], + "comfy_agent": probes[1], + "comfy_ui": probes[2], + "swapper": probes[3], + "image_gen": probes[4], + }, + "image_models": image_models.get("image_models", []), + "active_image_model": image_models.get("active_image_model"), + "fallback_order": ["comfy", "swapper", "image-gen-service"], + } + + +@app.get("/api/media/models/image") +async def api_media_image_models() -> Dict[str, Any]: + try: + async with httpx.AsyncClient(timeout=15.0) as client: + r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models") + if r.status_code >= 400: + raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper error") + data = r.json() if r.content else {} + return { + "ok": True, + "image_models": data.get("image_models", []), + "active_image_model": data.get("active_image_model"), + "device": data.get("device"), + } + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=502, detail=f"Image models unavailable: {str(e)[:200]}") + + +@app.post("/api/media/models/image/load") +async def api_media_image_model_load(body: MediaImageModelLoadBody) -> Dict[str, Any]: + model = body.model.strip() + if not model: + raise HTTPException(status_code=400, detail="model is required") + try: + async with httpx.AsyncClient(timeout=300.0) as client: + r = await client.post(f"{MEDIA_SWAPPER_URL}/image/models/{quote(model, safe='')}/load") + if r.status_code >= 400: + raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper load error") + return {"ok": True, "result": r.json() if r.content else {"status": "ok"}} + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=502, detail=f"Image model load failed: {str(e)[:200]}") + + +@app.post("/api/media/generate/image") +async def api_media_generate_image(body: MediaImageGenerateBody) -> Dict[str, Any]: + prompt = body.prompt.strip() + if not prompt: + raise HTTPException(status_code=400, detail="prompt is required") + router_url = await _pick_media_router_url() + if not router_url: + raise HTTPException(status_code=503, detail="Router URL not configured") + + params = { + "prompt": prompt, + "negative_prompt": body.negative_prompt or "", + "width": max(256, min(2048, int(body.width))), + "height": max(256, min(2048, int(body.height))), + "steps": max(1, min(120, int(body.steps))), + "guidance_scale": max(0.0, min(20.0, float(body.guidance_scale))), + "timeout_s": max(30, min(900, int(body.timeout_s))), + } + started = time.monotonic() + response = await execute_tool( + router_url, + tool="image_generate", + action="generate", + params=params, + agent_id="sofiia", + timeout=float(params["timeout_s"] + 30), + api_key=ROUTER_API_KEY, + ) + ok = response.get("status") == "ok" + result_data = response.get("data") or {} + result_item = _media_append_job( + "image", + { + "status": "ok" if ok else "failed", + "provider": "router:image_generate", + "prompt": prompt[:180], + "duration_ms": int((time.monotonic() - started) * 1000), + "result": result_data.get("result"), + "has_image_base64": bool(result_data.get("image_base64")), + "error": (response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error"), + }, + ) + if not ok: + raise HTTPException(status_code=502, detail=f"Image generate failed: {result_item.get('error') or 'tool failed'}") + return {"ok": True, "job": result_item, "tool_response": response} + + +@app.post("/api/media/generate/video") +async def api_media_generate_video(body: MediaVideoGenerateBody) -> Dict[str, Any]: + prompt = body.prompt.strip() + if not prompt: + raise HTTPException(status_code=400, detail="prompt is required") + router_url = await _pick_media_router_url() + if not router_url: + raise HTTPException(status_code=503, detail="Router URL not configured") + + params = { + "prompt": prompt, + "seconds": max(1, min(8, int(body.seconds))), + "fps": max(8, min(60, int(body.fps))), + "steps": max(1, min(120, int(body.steps))), + "timeout_s": max(60, min(1200, int(body.timeout_s))), + } + started = time.monotonic() + response = await execute_tool( + router_url, + tool="comfy_generate_video", + action="generate", + params=params, + agent_id="sofiia", + timeout=float(params["timeout_s"] + 30), + api_key=ROUTER_API_KEY, + ) + ok = response.get("status") == "ok" + provider = "router:comfy_generate_video" + fallback_payload: Dict[str, Any] = {} + if not ok: + try: + async with httpx.AsyncClient(timeout=120.0) as client: + r = await client.post( + f"{MEDIA_SWAPPER_URL}/video/generate", + json={ + "prompt": prompt, + "duration": params["seconds"], + "style": body.style, + "aspect_ratio": body.aspect_ratio, + }, + ) + if r.status_code < 400: + fallback_payload = r.json() if r.content else {} + ok = True + provider = "swapper:video/generate" + except Exception as e: + fallback_payload = {"error": str(e)[:200]} + + result_item = _media_append_job( + "video", + { + "status": "ok" if ok else "failed", + "provider": provider, + "prompt": prompt[:180], + "duration_ms": int((time.monotonic() - started) * 1000), + "result": (response.get("data") or {}).get("result") if not fallback_payload else fallback_payload, + "error": None if ok else ((response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error")), + }, + ) + if not ok: + raise HTTPException(status_code=502, detail=f"Video generate failed: {result_item.get('error') or 'tool failed'}") + return {"ok": True, "job": result_item, "tool_response": response, "fallback_response": fallback_payload} + + +@app.get("/api/media/jobs") +async def api_media_jobs(limit: int = Query(default=20, ge=1, le=100)) -> Dict[str, Any]: + return {"ok": True, "count": min(limit, len(_media_recent_jobs)), "jobs": list(_media_recent_jobs)[:limit]} + + +# ─── Chat (runtime contract) ───────────────────────────────────────────────── + +@app.get("/api/chat/config") +async def api_chat_config() -> Dict[str, Any]: + return { + "preferred_model": SOFIIA_PREFERRED_CHAT_MODEL, + "ollama": { + "timeout_sec": SOFIIA_OLLAMA_TIMEOUT_SEC, + "voice_timeout_sec": SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC, + "keep_alive": SOFIIA_OLLAMA_KEEP_ALIVE, + "num_ctx": SOFIIA_OLLAMA_NUM_CTX, + "num_thread": SOFIIA_OLLAMA_NUM_THREAD, + "num_gpu": SOFIIA_OLLAMA_NUM_GPU, + }, + } + + +class ChatSendBody(BaseModel): + message: str + model: str = "ollama:qwen3:14b" + node_id: str = "NODA2" + project_id: Optional[str] = None + session_id: Optional[str] = None + user_id: Optional[str] = None + history: List[Dict[str, Any]] = [] + # Voice routing hint — forwarded to Router as X-Voice-Profile header + # Values: "voice_fast_uk" (default) | "voice_quality_uk" + voice_profile: Optional[str] = None + + +@app.post("/api/chat/send") +async def api_chat_send(body: ChatSendBody, request: Request): + """BFF chat: Ollama or router. Returns runtime contract fields. Rate: 30/min.""" + client_ip = request.client.host if request.client else "unknown" + if not _check_rate(f"chat:{client_ip}", max_calls=30, window_sec=60): + raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min") + + # Runtime identity + project_id = body.project_id or "default" + session_id = body.session_id or f"sess_{uuid.uuid4().hex[:12]}" + user_id = body.user_id or "console_user" + + provider, _, model_name = body.model.partition(":") + reply = "" + t0 = time.monotonic() + + def _clean_reply(text: str) -> str: + """Strip ... reasoning blocks (Qwen3/DeepSeek-R1) before returning to user. + + Strategy: + 1. re.DOTALL regex removes complete ... blocks. + 2. Fallback split removes any trailing unclosed block + (model stopped mid-reasoning without ). + """ + import re + # Primary: strip complete blocks (multiline-safe with DOTALL) + cleaned = re.sub(r".*?", "", text, + flags=re.DOTALL | re.IGNORECASE) + # Fallback: if an unclosed block remains, drop everything after it + if "" in cleaned.lower(): + cleaned = re.split(r"(?i)", cleaned)[0] + return cleaned.strip() + + # Broadcast: user message sent + _broadcast_bg(_make_event("chat.message", + {"text": body.message[:200], "provider": provider, "model": body.model}, + project_id=project_id, session_id=session_id, user_id=user_id)) + + # voice_profile determines LLM options for voice turns. + # None = text chat (full prompt, no token limit enforcement). + _vp = body.voice_profile # "voice_fast_uk" | "voice_quality_uk" | None + _is_voice_turn = _vp is not None + _is_quality = _vp == "voice_quality_uk" + + # System prompt: voice turns get guardrails appended + _system_prompt = SOFIIA_SYSTEM_PROMPT + if _is_voice_turn: + _system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX + + _voice_options = { + "temperature": 0.18 if _is_quality else 0.15, + "repeat_penalty": 1.1, + "num_predict": 256 if _is_quality else 220, # max_tokens per contract (≤256) + } if _is_voice_turn else { + "temperature": 0.15, + "repeat_penalty": 1.1, + "num_predict": SOFIIA_OLLAMA_NUM_PREDICT_TEXT, + } + + if provider == "ollama": + ollama_url = get_ollama_url() + effective_model_name = model_name or "qwen3:14b" + messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}] + messages.extend(body.history[-12:]) + messages.append({"role": "user", "content": body.message}) + try: + async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client: + r = await client.post( + f"{ollama_url}/api/chat", + json=_make_ollama_payload(effective_model_name, messages, _voice_options), + ) + r.raise_for_status() + data = r.json() + reply = _clean_reply((data.get("message") or {}).get("content", "") or "Ollama: порожня відповідь") + except httpx.HTTPStatusError as e: + err_msg = f"Ollama HTTP {e.response.status_code}" + _broadcast_bg(_make_event("error", {"where": "ollama", "message": err_msg}, + project_id=project_id, session_id=session_id, user_id=user_id)) + raise HTTPException(status_code=502, detail=f"{err_msg}: {str(e)[:200]}") + except Exception as e: + _broadcast_bg(_make_event("error", {"where": "ollama", "message": str(e)[:100]}, + project_id=project_id, session_id=session_id, user_id=user_id)) + raise HTTPException(status_code=502, detail=f"Ollama error: {str(e)[:200]}") + + elif provider == "router": + base_url = get_router_url(body.node_id) + router_agent_id = "sofiia" + router_model = None + if model_name: + if "|" in model_name: + left, right = model_name.split("|", 1) + router_agent_id = left or "sofiia" + router_model = right or None + elif ":" in model_name: + # Looks like model id (qwen3:14b, qwen3.5:35b-a3b, etc.) + router_model = model_name + elif model_name not in ("default",): + # Treat plain token as agent id (router:soul, router:monitor, ...) + router_agent_id = model_name + metadata: Dict[str, Any] = { + "project_id": project_id, + "session_id": session_id, + "user_id": user_id, + "client": "sofiia-console", + "voice_profile": _vp, + } + try: + out = await infer( + base_url, + router_agent_id, + body.message, + model=router_model, + metadata=metadata, + timeout=300.0, + api_key=ROUTER_API_KEY, + ) + reply = _clean_reply(out.get("response", out.get("text", ""))) + except Exception as e: + _broadcast_bg(_make_event("error", {"where": "router", "message": str(e)[:100]}, + project_id=project_id, session_id=session_id, user_id=user_id)) + raise HTTPException(status_code=502, detail=str(e)[:300]) + + elif provider == "glm": + # Zhipu AI GLM — OpenAI-compatible API at bigmodel.cn + glm_api_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip() + if not glm_api_key: + raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.") + glm_model = model_name or "glm-4.7" + messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}] + messages_glm.extend(body.history[-12:]) + messages_glm.append({"role": "user", "content": body.message}) + try: + async with httpx.AsyncClient(timeout=120.0) as client: + r = await client.post( + "https://open.bigmodel.cn/api/paas/v4/chat/completions", + headers={"Authorization": f"Bearer {glm_api_key}", "Content-Type": "application/json"}, + json={"model": glm_model, "messages": messages_glm, "stream": False}, + ) + r.raise_for_status() + data = r.json() + reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "GLM: порожня відповідь") + except httpx.HTTPStatusError as e: + err_msg = f"GLM HTTP {e.response.status_code}: {e.response.text[:200]}" + _broadcast_bg(_make_event("error", {"where": "glm", "message": err_msg}, + project_id=project_id, session_id=session_id, user_id=user_id)) + raise HTTPException(status_code=502, detail=err_msg) + except Exception as e: + _broadcast_bg(_make_event("error", {"where": "glm", "message": str(e)[:100]}, + project_id=project_id, session_id=session_id, user_id=user_id)) + raise HTTPException(status_code=502, detail=f"GLM error: {str(e)[:200]}") + + elif provider == "grok": + # xAI Grok — OpenAI-compatible API + xai_api_key = os.getenv("XAI_API_KEY", "").strip() + if not xai_api_key: + raise HTTPException(status_code=503, detail="XAI_API_KEY not set. Add it to BFF environment.") + grok_model = model_name or "grok-4-1-fast-reasoning" + messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}] + messages.extend(body.history[-12:]) + messages.append({"role": "user", "content": body.message}) + try: + async with httpx.AsyncClient(timeout=120.0) as client: + r = await client.post( + "https://api.x.ai/v1/chat/completions", + headers={"Authorization": f"Bearer {xai_api_key}", "Content-Type": "application/json"}, + json={"model": grok_model, "messages": messages, "stream": False}, + ) + r.raise_for_status() + data = r.json() + reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "Grok: порожня відповідь") + except httpx.HTTPStatusError as e: + err_msg = f"Grok HTTP {e.response.status_code}: {e.response.text[:200]}" + _broadcast_bg(_make_event("error", {"where": "grok", "message": err_msg}, + project_id=project_id, session_id=session_id, user_id=user_id)) + raise HTTPException(status_code=502, detail=err_msg) + except Exception as e: + _broadcast_bg(_make_event("error", {"where": "grok", "message": str(e)[:100]}, + project_id=project_id, session_id=session_id, user_id=user_id)) + raise HTTPException(status_code=502, detail=f"Grok error: {str(e)[:200]}") + + else: + raise HTTPException(status_code=400, detail=f"Unsupported provider: {provider}. Use ollama, router, or grok.") + + latency_ms = int((time.monotonic() - t0) * 1000) + tokens_est = len(reply.split()) + trace_id = f"chat_{session_id}_{uuid.uuid4().hex[:8]}" + + # Broadcast: reply + _broadcast_bg(_make_event("chat.reply", + {"text": reply[:200], "provider": provider, "model": body.model, + "latency_ms": latency_ms, "trace_id": trace_id}, + project_id=project_id, session_id=session_id, user_id=user_id)) + + # Memory save (best-effort, non-blocking) + asyncio.get_event_loop().create_task( + _do_save_memory(body.message, reply, session_id, project_id, user_id) + ) + + # AISTALK forward (if enabled) + if _aistalk: + try: + _aistalk.handle_event(_make_event("chat.reply", + {"text": reply, "provider": provider, "model": body.model}, + project_id=project_id, session_id=session_id, user_id=user_id)) + except Exception as e: + logger.debug("AISTALK forward failed: %s", e) + + return { + "ok": True, + "project_id": project_id, + "session_id": session_id, + "user_id": user_id, + "response": reply, + "model": body.model, + "backend": provider, + "trace_id": trace_id, + "meta": { + "latency_ms": latency_ms, + "tokens_est": tokens_est, + "trace_id": trace_id, + }, + } + + +async def _do_save_memory( + user_msg: str, + ai_reply: str, + session_id: str, + project_id: str = "default", + user_id: str = "console_user", + agent_id: str = "sofiia", +) -> None: + # 1) Persist to local SQLite (projects/sessions/messages schema) + try: + # Ensure target project exists to satisfy sessions.project_id FK. + proj = await _app_db.get_project(project_id) + if not proj: + await _app_db.create_project( + name=project_id.upper(), + description=f"Auto-created project for {project_id} sessions", + project_id=project_id, + ) + await _app_db.upsert_session(session_id, project_id=project_id) + last_msg = None + if user_msg: + saved = await _app_db.save_message(session_id, "user", user_msg[:4096]) + last_msg = saved["msg_id"] + if ai_reply: + await _app_db.save_message( + session_id, "assistant", ai_reply[:4096], parent_msg_id=last_msg + ) + except Exception as e: + logger.debug("SQLite memory save skipped: %s", e) + + # 2) Best-effort: also send to Memory Service (Qdrant + Neo4j) + mem_url = get_memory_service_url() + try: + async with httpx.AsyncClient(timeout=5.0) as client: + for role, content in [("user", user_msg), ("assistant", ai_reply)]: + if not content: + continue + resp = await client.post(f"{mem_url}/agents/{agent_id}/memory", json={ + "agent_id": agent_id, + "role": role, + "content": content[:1000], + "user_id": user_id, + "channel_id": session_id, + "metadata": {"project_id": project_id, "client": "sofiia-console", "agent_id": agent_id}, + }) + if resp.status_code >= 400: + logger.warning( + "Memory Service save failed status=%s agent=%s session=%s body=%s", + resp.status_code, + agent_id, + session_id, + (resp.text or "")[:240], + ) + except Exception as e: + logger.debug("Memory Service save skipped: %s", e) + + +# ─── Ops ──────────────────────────────────────────────────────────────────── + +class OpsRunBody(BaseModel): + action_id: str + node_id: str = "NODA2" + params: dict = {} + project_id: Optional[str] = None + session_id: Optional[str] = None + source_run_id: Optional[str] = None # link to supervisor run + source_msg_id: Optional[str] = None # link to message + + +class NodeUpsertBody(BaseModel): + node_id: str + label: str + router_url: str + gateway_url: Optional[str] = "" + monitor_url: Optional[str] = "" + supervisor_url: Optional[str] = "" + ssh_host: Optional[str] = "" + ssh_port: Optional[int] = 22 + ssh_user: Optional[str] = "" + ssh_password_env: Optional[str] = "" + ssh_ipv6: Optional[str] = "" + ssh_host_keys: Optional[List[Dict[str, Any]]] = None + enabled: bool = True + + +@app.get("/api/ops/actions") +async def api_ops_actions_list(): + return {"actions": list(OPS_ACTIONS.keys())} + + +@app.post("/api/ops/run") +async def api_ops_run(body: OpsRunBody, _auth=Depends(require_api_key)): + """Run ops action. Broadcasts ops.run event and auto-creates ops_run graph node.""" + import uuid as _uuid + t0 = time.monotonic() + project_id = body.project_id or "default" + session_id = body.session_id or "console" + ops_run_id = str(_uuid.uuid4()) + started_at = _app_db._now() if _app_db else None + + result = await run_ops_action( + body.action_id, body.node_id, body.params, + agent_id="sofiia", timeout=90.0, api_key=ROUTER_API_KEY, + ) + elapsed = int((time.monotonic() - t0) * 1000) + ok = result.get("status") != "failed" + status_str = "ok" if ok else "failed" + error_str = result.get("error", "") if not ok else "" + + _broadcast_bg(_make_event("ops.run", + {"name": body.action_id, "ok": ok, "elapsed_ms": elapsed}, + project_id=project_id, session_id=session_id)) + + # Auto-create ops_run graph node (fire-and-forget, do not fail the request) + if _app_db and project_id: + try: + gn = await _app_db.upsert_ops_run_node( + project_id=project_id, + ops_run_id=ops_run_id, + action_id=body.action_id, + node_id=body.node_id, + status=status_str, + elapsed_ms=elapsed, + error=str(error_str)[:500], + started_at=started_at or "", + source_run_id=body.source_run_id or "", + source_msg_id=body.source_msg_id or "", + ) + result["_graph_node_id"] = gn.get("node_id") + result["_ops_run_id"] = ops_run_id + except Exception as _e: + logger.warning("ops_run graph node creation failed (non-fatal): %s", _e) + + return result + + +# ─── Nodes ────────────────────────────────────────────────────────────────── + +@app.get("/api/nodes/dashboard") +async def api_nodes_dashboard(refresh: bool = Query(False), _auth: str = Depends(require_auth)): + """ + Nodes dashboard with full telemetry. + Returns cached data (refreshed every NODES_POLL_INTERVAL_SEC seconds). + Pass ?refresh=true to force immediate re-probe. + """ + if refresh or not _nodes_cache["nodes"]: + fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY) + _nodes_cache.update({**fresh, "ts": _now_iso()}) + return {**fresh, "ts": _nodes_cache["ts"], "cached": False} + return {**_nodes_cache, "cached": True} + + +@app.get("/api/nodes/registry") +async def api_nodes_registry(_auth: str = Depends(require_auth)): + return load_nodes_registry() + + +@app.get("/api/nodes/ssh/status") +async def api_nodes_ssh_status( + node_id: str = Query(..., description="Node ID, e.g. NODA1"), + _auth=Depends(require_api_key_strict), +): + node_id = node_id.strip().upper() + ssh = get_node_ssh_profile(node_id) + if not ssh.get("configured"): + return { + "ok": False, + "node_id": node_id, + "configured": False, + "error": "ssh profile is not configured", + "ssh": ssh, + } + + host = ssh.get("host", "") + host_ipv6 = (ssh.get("ipv6") or "").strip() + port = int(ssh.get("port") or 22) + tcp_ok = False + tcp_error = None + connect_host = host + + def _try_connect(target_host: str) -> Optional[str]: + try: + with socket.create_connection((target_host, port), timeout=5): + return None + except Exception as e: + return str(e)[:160] + + tcp_error = _try_connect(host) + if tcp_error is None: + tcp_ok = True + elif host_ipv6: + err_v6 = _try_connect(host_ipv6) + if err_v6 is None: + tcp_ok = True + tcp_error = None + connect_host = host_ipv6 + else: + tcp_error = f"ipv4={tcp_error}; ipv6={err_v6}"[:220] + + ok = tcp_ok and (ssh["auth"]["password_set"] or ssh["auth"]["private_key_set"]) + return { + "ok": ok, + "node_id": node_id, + "configured": True, + "tcp_reachable": tcp_ok, + "tcp_error": tcp_error, + "connect_host": connect_host, + "ssh": ssh, + } + + +@app.post("/api/nodes/add") +async def api_nodes_add(body: NodeUpsertBody, _auth=Depends(require_api_key_strict)): + reg = load_nodes_registry() + reg.setdefault("defaults", {"health_timeout_sec": 10, "tools_timeout_sec": 30}) + reg.setdefault("nodes", {}) + node_id = body.node_id.strip().upper() + if not node_id: + raise HTTPException(status_code=400, detail="node_id is required") + node_payload: Dict[str, Any] = { + "label": body.label.strip() or node_id, + "router_url": body.router_url.strip(), + "gateway_url": (body.gateway_url or "").strip(), + "monitor_url": (body.monitor_url or body.router_url).strip(), + "supervisor_url": (body.supervisor_url or "").strip(), + "enabled": body.enabled, + } + ssh_host = (body.ssh_host or "").strip() + ssh_user = (body.ssh_user or "").strip() + if ssh_host and ssh_user: + node_payload["ssh"] = { + "host": ssh_host, + "ipv6": (body.ssh_ipv6 or "").strip(), + "port": int(body.ssh_port or 22), + "user": ssh_user, + "auth": { + "password_env": (body.ssh_password_env or f"NODES_{node_id}_SSH_PASSWORD").strip(), + }, + "host_keys": body.ssh_host_keys or [], + } + + reg["nodes"][node_id] = node_payload + path = save_nodes_registry(reg) + fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY) + _nodes_cache.update({**fresh, "ts": _now_iso()}) + return {"ok": True, "saved_to": str(path), "node_id": node_id, "nodes": reg.get("nodes", {})} + + +# ─── Voice ────────────────────────────────────────────────────────────────── + +@app.post("/api/voice/stt") +async def api_voice_stt( + request: Request, + audio: UploadFile = File(...), + language: Optional[str] = Query(None), + session_id: Optional[str] = Query(None), + project_id: Optional[str] = Query(None), +): + """STT proxy → memory-service. Rate: 20/min. Broadcasts voice.stt events.""" + client_ip = request.client.host if request.client else "unknown" + if not _check_rate(f"stt:{client_ip}", max_calls=20, window_sec=60): + raise HTTPException(status_code=429, detail="Rate limit: 20 STT calls/min") + + sid = session_id or "console" + pid = project_id or "default" + _broadcast_bg(_make_event("voice.stt", {"phase": "start"}, + project_id=pid, session_id=sid)) + t0 = time.monotonic() + + mem_url = get_memory_service_url() + try: + content = await audio.read() + if not content: + raise HTTPException(status_code=400, detail="Empty audio file") + async with httpx.AsyncClient(timeout=60.0) as client: + files = {"audio": (audio.filename or "audio.webm", content, audio.content_type or "audio/webm")} + params = {"language": language} if language else {} + r = await client.post(f"{mem_url}/voice/stt", files=files, params=params) + r.raise_for_status() + result = r.json() + elapsed = int((time.monotonic() - t0) * 1000) + upstream_ms = result.get("compute_ms", 0) + logger.info("STT ok: lang=%s text_len=%d bff_ms=%d upstream_ms=%d", + language or "auto", len(result.get("text", "")), elapsed, upstream_ms) + _broadcast_bg(_make_event("voice.stt", + {"phase": "done", "elapsed_ms": elapsed, "upstream_ms": upstream_ms}, + project_id=pid, session_id=sid)) + result["bff_ms"] = elapsed + return result + except httpx.HTTPStatusError as e: + logger.error("STT upstream error: status=%s", e.response.status_code) + _broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]}, + project_id=pid, session_id=sid)) + raise HTTPException(status_code=e.response.status_code, detail=f"STT upstream: {str(e)[:200]}") + except HTTPException: + raise + except Exception as e: + logger.error("STT proxy error: %s", e, exc_info=True) + _broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]}, + project_id=pid, session_id=sid)) + raise HTTPException(status_code=502, detail=f"STT error: {str(e)[:200]}") + + +class TTSRequest(BaseModel): + text: str + voice: Optional[str] = "default" + speed: Optional[float] = 1.0 + model: Optional[str] = "piper" + session_id: Optional[str] = None + project_id: Optional[str] = None + + +@app.post("/api/voice/tts") +async def api_voice_tts(body: TTSRequest, request: Request): + """TTS proxy → memory-service. Rate: 30/min per IP. Concurrent: MAX_CONCURRENT_TTS.""" + client_ip = request.client.host if request.client else "unknown" + if not _check_rate(f"tts:{client_ip}", max_calls=30, window_sec=60): + raise HTTPException(status_code=429, detail="Rate limit: 30 TTS calls/min per client") + + # Concurrent synthesis guard — prevents memory-service DoS on burst requests + sem = _get_tts_semaphore() + if not sem._value: # non-blocking peek: all slots occupied + raise HTTPException(status_code=503, + detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.") + + # Server-side sanitization: strips , markdown, URLs; truncates safely + text = _sanitize_for_voice(body.text.strip()) + if not text: + raise HTTPException(status_code=400, detail="Empty text") + + sid = body.session_id or "console" + pid = body.project_id or "default" + _broadcast_bg(_make_event("voice.tts", {"phase": "start", "voice": body.voice}, + project_id=pid, session_id=sid)) + t0 = time.monotonic() + + sem = _get_tts_semaphore() + async with sem: # enforce MAX_CONCURRENT_TTS globally + try: + # ── Voice HA path (opt-in via VOICE_HA_ENABLED=true) ────────────── + if is_voice_ha_enabled(): + router_url = get_voice_ha_router_url() + tts_payload = { + "text": text, + "voice": body.voice, + "speed": body.speed, + "model": body.model, + } + async with httpx.AsyncClient(timeout=30.0) as client: + r = await client.post( + f"{router_url}/v1/capability/voice_tts", + json=tts_payload, + ) + r.raise_for_status() + elapsed = int((time.monotonic() - t0) * 1000) + upstream_ct = r.headers.get("content-type", "audio/wav") + tts_engine = r.headers.get("X-TTS-Engine", "unknown") + tts_voice_used = r.headers.get("X-TTS-Voice", body.voice) + voice_node = r.headers.get("X-Voice-Node", "unknown") + voice_mode = r.headers.get("X-Voice-Mode", "remote") + ext = "mp3" if "mpeg" in upstream_ct else "wav" + logger.info("TTS HA ok: voice=%s node=%s mode=%s elapsed=%dms", + tts_voice_used, voice_node, voice_mode, elapsed) + _broadcast_bg(_make_event("voice.tts", + {"phase": "done", "voice": tts_voice_used, "engine": tts_engine, + "elapsed_ms": elapsed, "ha_mode": voice_mode, "ha_node": voice_node}, + project_id=pid, session_id=sid)) + return StreamingResponse( + io.BytesIO(r.content), + media_type=upstream_ct, + headers={ + "Content-Disposition": f"inline; filename=speech.{ext}", + "X-TTS-Engine": tts_engine, + "X-TTS-Voice": tts_voice_used, + "X-TTS-Elapsed-MS": str(elapsed), + "X-Voice-Node": voice_node, + "X-Voice-Mode": voice_mode, + "Cache-Control": "no-store", + }, + ) + + # ── Legacy direct path (default, VOICE_HA_ENABLED=false) ────────── + mem_url = get_memory_service_url() + async with httpx.AsyncClient(timeout=30.0) as client: + r = await client.post( + f"{mem_url}/voice/tts", + json={"text": text, "voice": body.voice, "speed": body.speed, "model": body.model}, + ) + r.raise_for_status() + elapsed = int((time.monotonic() - t0) * 1000) + upstream_ct = r.headers.get("content-type", "audio/wav") + tts_engine = r.headers.get("X-TTS-Engine", "unknown") + tts_voice_used = r.headers.get("X-TTS-Voice", body.voice) + ext = "mp3" if "mpeg" in upstream_ct else "wav" + logger.info("TTS ok: voice=%s engine=%s len=%d fmt=%s elapsed=%dms", + tts_voice_used, tts_engine, len(text), ext, elapsed) + _broadcast_bg(_make_event("voice.tts", + {"phase": "done", "voice": tts_voice_used, "engine": tts_engine, "elapsed_ms": elapsed}, + project_id=pid, session_id=sid)) + return StreamingResponse( + io.BytesIO(r.content), + media_type=upstream_ct, + headers={ + "Content-Disposition": f"inline; filename=speech.{ext}", + "X-TTS-Engine": tts_engine, + "X-TTS-Voice": tts_voice_used, + "X-TTS-Elapsed-MS": str(elapsed), + "Cache-Control": "no-store", + }, + ) + except httpx.HTTPStatusError as e: + _record_tts_error("http_error", e.response.status_code, str(e)[:120], body.voice) + logger.error("TTS upstream error: status=%s voice=%s ha=%s", + e.response.status_code, body.voice, is_voice_ha_enabled()) + _broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]}, + project_id=pid, session_id=sid)) + raise HTTPException(status_code=e.response.status_code, detail=f"TTS upstream: {str(e)[:200]}") + except Exception as e: + _record_tts_error("proxy_error", None, str(e)[:120], body.voice) + logger.error("TTS proxy error: %s ha=%s", e, is_voice_ha_enabled(), exc_info=True) + _broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]}, + project_id=pid, session_id=sid)) + raise HTTPException(status_code=502, detail=f"TTS error: {str(e)[:200]}") + + +@app.get("/api/voice/voices") +async def api_voice_voices(): + mem_url = get_memory_service_url() + try: + async with httpx.AsyncClient(timeout=10.0) as client: + r = await client.get(f"{mem_url}/voice/voices") + r.raise_for_status() + return r.json() + except Exception as e: + return {"piper": [], "macos": [{"id": "Milena", "name": "Milena (uk-UA)", "lang": "uk-UA"}], "error": str(e)[:100]} + + +# ─── Phase 2: Voice Chat Stream (sentence chunking → early TTS) ────────────── +# Strategy: split LLM text into sentences → synthesize first sentence immediately +# → return {first_audio_b64, first_text, rest_text[]} +# Browser plays first sentence while fetching TTS for remaining sentences in bg. +# TTFA drops from ~10-14s to ~3-5s (LLM still runs full, but TTS starts on chunk1). + +from app.voice_utils import split_into_voice_chunks as _split_into_voice_chunks +from app.voice_utils import clean_think_blocks as _clean_think_blocks_util +from app.voice_utils import sanitize_for_voice as _sanitize_for_voice +from app.voice_utils import MIN_CHUNK_CHARS as _MIN_CHUNK_CHARS, MAX_CHUNK_CHARS as _MAX_CHUNK_CHARS + + +class VoiceChatStreamBody(BaseModel): + message: str + model: str = "ollama:qwen3:14b" + node_id: str = "NODA2" + voice: Optional[str] = None + voice_profile: Optional[str] = "voice_fast_uk" + session_id: Optional[str] = None + project_id: Optional[str] = None + history: List[Dict[str, Any]] = [] + + +@app.post("/api/voice/chat/stream") +async def api_voice_chat_stream(body: VoiceChatStreamBody, request: Request): + """Phase 2 Voice Chat: LLM → sentence split → first sentence TTS immediately. + + Returns: + { + ok: bool, + first_text: str, # first sentence + first_audio_b64: str, # base64 MP3 for immediate playback + first_audio_mime: str, # "audio/mpeg" + rest_chunks: [str, ...], # remaining sentences (client fetches TTS via /api/voice/tts) + full_text: str, # full LLM reply (for display) + trace_id: str, + meta: {llm_ms, tts_ms, chunks_total} + } + + Client flow: + 1. POST /api/voice/chat/stream → play first_audio_b64 immediately + 2. For each chunk in rest_chunks: POST /api/voice/tts → enqueue audio + """ + import re as _re # noqa: F401 – kept for legacy; re already imported at module level + client_ip = request.client.host if request.client else "unknown" + if not _check_rate(f"voice_stream:{client_ip}", max_calls=15, window_sec=60): + raise HTTPException(status_code=429, detail="Rate limit: 15 voice stream calls/min per client") + + # Concurrent TTS guard also applies to stream endpoint (TTS inside) + sem = _get_tts_semaphore() + if not sem._value: + raise HTTPException(status_code=503, + detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.") + + sid = body.session_id or f"vs_{uuid.uuid4().hex[:10]}" + pid = body.project_id or "default" + trace_id = f"vs_{sid}_{uuid.uuid4().hex[:8]}" + + _vp = body.voice_profile or "voice_fast_uk" + _is_quality = _vp == "voice_quality_uk" + _system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX + + # Track for repro pack + global _voice_last_model, _voice_last_profile + _voice_last_model = body.model + _voice_last_profile = _vp + + _broadcast_bg(_make_event("voice.stream", {"phase": "start", "trace_id": trace_id}, + project_id=pid, session_id=sid)) + + # ── 1. LLM ──────────────────────────────────────────────────────────────── + t0_llm = time.monotonic() + provider, _, model_name = body.model.partition(":") + reply = "" + + def _clean(text: str) -> str: + cleaned = re.sub(r".*?", "", text, flags=re.DOTALL | re.IGNORECASE) + if "" in cleaned.lower(): + cleaned = re.split(r"(?i)", cleaned)[0] + return cleaned.strip() + + try: + if provider == "ollama": + ollama_url = get_ollama_url() + effective_model_name = model_name or "qwen3:14b" + messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}] + messages.extend(body.history[-8:]) + messages.append({"role": "user", "content": body.message}) + voice_options = { + "temperature": 0.18 if _is_quality else 0.15, + "repeat_penalty": 1.1, + "num_predict": 256 if _is_quality else 220, + } + async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC) as client: + r = await client.post( + f"{ollama_url}/api/chat", + json=_make_ollama_payload(effective_model_name, messages, voice_options), + ) + r.raise_for_status() + raw = (r.json().get("message") or {}).get("content", "") + reply = _clean(raw) + elif provider == "grok": + xai_key = os.getenv("XAI_API_KEY", "").strip() + if not xai_key: + raise HTTPException(status_code=503, detail="XAI_API_KEY not set.") + grok_model = model_name or "grok-4-1-fast-reasoning" + messages_g: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}] + messages_g.extend(body.history[-8:]) + messages_g.append({"role": "user", "content": body.message}) + async with httpx.AsyncClient(timeout=60.0) as client: + r = await client.post( + "https://api.x.ai/v1/chat/completions", + headers={"Authorization": f"Bearer {xai_key}", "Content-Type": "application/json"}, + json={"model": grok_model, "messages": messages_g, "stream": False, + "max_tokens": 1024, "temperature": 0.2}, + ) + r.raise_for_status() + raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "") + reply = _clean(raw) + elif provider == "glm": + glm_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip() + if not glm_key: + raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.") + glm_model = model_name or "glm-5" + messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}] + messages_glm.extend(body.history[-8:]) + messages_glm.append({"role": "user", "content": body.message}) + async with httpx.AsyncClient(timeout=60.0) as client: + r = await client.post( + "https://open.bigmodel.cn/api/paas/v4/chat/completions", + headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"}, + json={"model": glm_model, "messages": messages_glm, "stream": False}, + ) + r.raise_for_status() + raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "") + reply = _clean(raw) + else: + raise HTTPException(status_code=400, detail=f"voice/stream: provider '{provider}' not supported. Use: ollama, grok, glm.") + except HTTPException: + raise + except Exception as e: + _record_llm_error("inference_error", body.model, str(e)[:120]) + _broadcast_bg(_make_event("error", {"where": "voice_stream_llm", "trace_id": trace_id, "message": str(e)[:100]}, + project_id=pid, session_id=sid)) + raise HTTPException(status_code=502, detail=f"LLM error: {str(e)[:200]}") + + llm_ms = int((time.monotonic() - t0_llm) * 1000) + if not reply: + reply = "Не можу відповісти зараз." + + # ── 2. Sentence chunking ────────────────────────────────────────────────── + # sanitize full reply before splitting (removes markdown, , URLs) + sanitized_reply = _sanitize_for_voice(reply) + chunks = _split_into_voice_chunks(sanitized_reply) + if not chunks: + chunks = [sanitized_reply] if sanitized_reply else ["Не можу відповісти зараз."] + + first_chunk = chunks[0] + # rest_chunks: sanitize + hard cap (prevents DoS via unreasonably long replies) + _MAX_REST_CHUNKS = int(os.getenv("MAX_VOICE_REST_CHUNKS", "8")) + all_rest = [_sanitize_for_voice(c) for c in chunks[1:] if _sanitize_for_voice(c)] + rest_chunks = all_rest[:_MAX_REST_CHUNKS] # cap: never more than 8 background TTS calls + + # ── 3. TTS for first sentence (immediate) ───────────────────────────────── + t0_tts = time.monotonic() + first_audio_b64 = "" + first_audio_mime = "audio/mpeg" + voice = body.voice or "default" + _ha_voice_node = None + _ha_voice_mode = None + + try: + import base64 as _b64 + tts_json = {"text": first_chunk, "voice": voice, "speed": 1.0} + async with httpx.AsyncClient(timeout=15.0) as client: + if is_voice_ha_enabled(): + # HA path: Router selects best node for TTS + router_url = get_voice_ha_router_url() + r_tts = await client.post(f"{router_url}/v1/capability/voice_tts", json=tts_json) + r_tts.raise_for_status() + _ha_voice_node = r_tts.headers.get("X-Voice-Node") + _ha_voice_mode = r_tts.headers.get("X-Voice-Mode") + logger.debug("voice_stream TTS via HA: node=%s mode=%s", + _ha_voice_node, _ha_voice_mode) + else: + # Legacy direct path + mem_url = get_memory_service_url() + r_tts = await client.post(f"{mem_url}/voice/tts", json=tts_json) + r_tts.raise_for_status() + first_audio_mime = r_tts.headers.get("content-type", "audio/mpeg").split(";")[0] + first_audio_b64 = _b64.b64encode(r_tts.content).decode() + except Exception as e: + logger.warning("voice_stream TTS failed for first chunk (ha=%s): %s", + is_voice_ha_enabled(), e) + # Not fatal: client can still render text + + tts_ms = int((time.monotonic() - t0_tts) * 1000) + + _broadcast_bg(_make_event("voice.stream", { + "phase": "done", + "trace_id": trace_id, + "llm_ms": llm_ms, + "tts_ms": tts_ms, + "chunks_total": len(chunks), + }, project_id=pid, session_id=sid)) + + logger.info("voice_stream ok: trace=%s llm=%dms tts=%dms chunks=%d first=%dB", + trace_id, llm_ms, tts_ms, len(chunks), len(r_tts.content) if first_audio_b64 else 0) + + body_data = { + "ok": True, + "trace_id": trace_id, + "first_text": first_chunk, + "first_audio_b64": first_audio_b64, + "first_audio_mime": first_audio_mime, + "rest_chunks": rest_chunks, + "full_text": reply, + "meta": { + "llm_ms": llm_ms, + "tts_ms": tts_ms, + "chunks_total": len(chunks), + "voice": voice, + "model": body.model, + "voice_profile": _vp, + }, + } + + from fastapi.responses import JSONResponse as _JSONResponse + resp_headers = {} + if _ha_voice_mode: + resp_headers["X-Voice-Mode"] = _ha_voice_mode + if _ha_voice_node: + resp_headers["X-Voice-Node"] = _ha_voice_node + if _ha_voice_mode or _ha_voice_node: + resp_headers["X-Voice-Cap"] = "voice_tts" + + if resp_headers: + return _JSONResponse(content=body_data, headers=resp_headers) + return body_data + + +# ─── Voice Telemetry Beacon ─────────────────────────────────────────────────── +# Receives performance marks from browser, records Prometheus histograms. +# Browser calls this via navigator.sendBeacon (fire-and-forget). + +try: + from prometheus_client import Histogram as _PromHistogram, Counter as _PromCounter + _voice_ttfa_hist = _PromHistogram( + "voice_ttfa_ms", "Time-to-first-audio (request → first audio playable)", + ["model", "voice_profile"], + buckets=[500, 1000, 2000, 3000, 5000, 7000, 10000, 15000], + ) + _voice_llm_hist = _PromHistogram( + "voice_llm_ms", "LLM inference time for voice turns", + ["model", "voice_profile"], + buckets=[500, 1000, 2000, 5000, 8000, 12000, 20000], + ) + _voice_tts_first_hist = _PromHistogram( + "voice_tts_first_ms", "First-sentence TTS synthesis time", + ["voice_profile"], + buckets=[200, 500, 800, 1200, 2000, 3000], + ) + _voice_e2e_hist = _PromHistogram( + "voice_e2e_ms", "End-to-end voice turn latency (user stop speaking → audio plays)", + ["voice_profile"], + buckets=[1000, 2000, 4000, 6000, 9000, 13000, 20000], + ) + _voice_underflow_counter = _PromCounter( + "voice_queue_underflows_total", "Times playback queue ran empty before TTS finished", + ["voice_profile"], + ) + _PROM_VOICE_OK = True +except Exception: + _PROM_VOICE_OK = False + + +class VoiceTelemetryPayload(BaseModel): + event: str = "voice_turn" + # Idempotency: session_id + turn_id deduplicate duplicate beacon submissions + session_id: Optional[str] = None + turn_id: Optional[str] = None # monotonic turn counter or UUID per turn + ttfa_ms: Optional[int] = None + llm_ms: Optional[int] = None + tts_first_ms: Optional[int] = None + e2e_ms: Optional[int] = None + stt_ms: Optional[int] = None + underflows: int = 0 + model: Optional[str] = None + voice_profile: Optional[str] = None + + +class VoiceTelemetryBatch(BaseModel): + """Batch beacon: array of turns submitted together (reduces HTTP overhead).""" + events: List[VoiceTelemetryPayload] = [] + + +def _process_telemetry_item(payload: VoiceTelemetryPayload) -> bool: + """Process a single telemetry item. Returns False if duplicate.""" + sid = payload.session_id or "anon" + tid = payload.turn_id or "noid" + if _telem_is_duplicate(sid, tid): + return False # skip duplicate + + model = (payload.model or "unknown").replace("ollama:", "") + profile = payload.voice_profile or "unknown" + + if _PROM_VOICE_OK: + try: + if payload.ttfa_ms is not None: + _voice_ttfa_hist.labels(model=model, voice_profile=profile).observe(payload.ttfa_ms) + if payload.llm_ms is not None: + _voice_llm_hist.labels(model=model, voice_profile=profile).observe(payload.llm_ms) + if payload.tts_first_ms is not None: + _voice_tts_first_hist.labels(voice_profile=profile).observe(payload.tts_first_ms) + if payload.e2e_ms is not None: + _voice_e2e_hist.labels(voice_profile=profile).observe(payload.e2e_ms) + if payload.underflows: + _voice_underflow_counter.labels(voice_profile=profile).inc(payload.underflows) + except Exception as exc: + logger.debug("telemetry/voice prom error: %s", exc) + + logger.info( + "voice_telemetry: model=%s profile=%s ttfa=%s llm=%s tts=%s e2e=%s underflows=%d sid=%s", + model, profile, payload.ttfa_ms, payload.llm_ms, + payload.tts_first_ms, payload.e2e_ms, payload.underflows, sid, + ) + + # Feed the degradation state machine + if payload.ttfa_ms is not None or payload.tts_first_ms is not None: + _voice_degradation_sm.observe( + ttfa_ms=payload.ttfa_ms, + tts_first_ms=payload.tts_first_ms, + underflows=payload.underflows, + profile=profile, + ) + return True + + +@app.post("/api/telemetry/voice", status_code=204) +async def api_telemetry_voice(payload: VoiceTelemetryPayload): + """Browser beacon endpoint (single turn). Fire-and-forget, always 204.""" + _process_telemetry_item(payload) + # 204 No Content — browser doesn't await response + + +@app.post("/api/telemetry/voice/batch", status_code=204) +async def api_telemetry_voice_batch(batch: VoiceTelemetryBatch, request: Request): + """Batch beacon: process up to 20 turns in one HTTP call. + + Useful when browser queues multiple turns before sending (e.g. tab becomes + visible again, or connection was lost briefly). + """ + client_ip = request.client.host if request.client else "unknown" + if not _check_rate(f"telem_batch:{client_ip}", max_calls=60, window_sec=60): + raise HTTPException(status_code=429, detail="Rate limit: 60 telemetry batches/min") + + cap = min(len(batch.events), 20) # hard cap per batch + processed = sum(1 for item in batch.events[:cap] if _process_telemetry_item(item)) + logger.debug("telemetry/voice/batch: submitted=%d processed=%d cap=%d", + len(batch.events), processed, cap) + + +# ─── Voice Degradation State Machine ───────────────────────────────────────── +# Tracks rolling window of voice telemetry and determines system-level state. +# States: ok → degraded_tts → degraded_llm → fast_lock → emergency +# Client polls GET /api/voice/degradation_status to show UI badge. + +import collections +from dataclasses import dataclass as _dc, field as _field +from enum import Enum + +class VoiceDegradationState(str, Enum): + OK = "ok" # all SLOs met + DEGRADED_TTS = "degraded_tts" # TTS slow/failing → show "TTS SLOW" badge + DEGRADED_LLM = "degraded_llm" # LLM slow → profile auto-demoted to fast + FAST_LOCK = "fast_lock" # LLM degraded, forced to voice_fast_uk + EMERGENCY = "emergency" # TTS failing → warn user, fallback banner + +# SLO thresholds (ms) — aligned with config/slo_policy.yml +_SM_TTFA_WARN = 5000 # TTFA p95 > 5s → degraded_llm +_SM_TTFA_LOCK = 8000 # TTFA p95 > 8s → fast_lock +_SM_TTS_WARN = 2000 # TTS first p95 > 2s → degraded_tts +_SM_TTS_CRIT = 4000 # TTS first p95 > 4s → emergency +_SM_UNDERFLOW_RATE = 0.1 # >10% of recent turns have underflows → degraded_tts +_SM_WINDOW = 20 # rolling window (last N telemetry events) +_SM_MIN_SAMPLES = 5 # need at least N samples before changing state + + +@_dc +class _VoiceDegradationSM: + """Rolling-window degradation state machine.""" + _ttfa_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW)) + _tts_first_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW)) + _underflow_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW)) + state: VoiceDegradationState = VoiceDegradationState.OK + state_since: float = _field(default_factory=time.monotonic) + recommended_profile: str = "voice_fast_uk" + last_reason: str = "" + _lock: object = _field(default_factory=lambda: __import__('asyncio').Lock()) + + def observe(self, ttfa_ms: Optional[int], tts_first_ms: Optional[int], + underflows: int, profile: str) -> None: + if ttfa_ms is not None: + self._ttfa_window.append(ttfa_ms) + if tts_first_ms is not None: + self._tts_first_window.append(tts_first_ms) + self._underflow_window.append(1 if underflows > 0 else 0) + self._recompute() + + def _p95(self, window: collections.deque) -> Optional[float]: + if len(window) < _SM_MIN_SAMPLES: + return None + s = sorted(window) + return s[int(len(s) * 0.95)] + + def _underflow_rate(self) -> float: + if not self._underflow_window: + return 0.0 + return sum(self._underflow_window) / len(self._underflow_window) + + def _recompute(self) -> None: + ttfa_p95 = self._p95(self._ttfa_window) + tts_p95 = self._p95(self._tts_first_window) + uf_rate = self._underflow_rate() + + prev_state = self.state + + if tts_p95 is not None and tts_p95 > _SM_TTS_CRIT: + self.state = VoiceDegradationState.EMERGENCY + self.recommended_profile = "voice_fast_uk" + self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_CRIT}ms" + elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_LOCK: + self.state = VoiceDegradationState.FAST_LOCK + self.recommended_profile = "voice_fast_uk" + self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_LOCK}ms — locked to fast profile" + elif tts_p95 is not None and tts_p95 > _SM_TTS_WARN: + self.state = VoiceDegradationState.DEGRADED_TTS + self.recommended_profile = "voice_fast_uk" + self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_WARN}ms" + elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_WARN: + self.state = VoiceDegradationState.DEGRADED_LLM + self.recommended_profile = "voice_fast_uk" + self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_WARN}ms" + elif uf_rate > _SM_UNDERFLOW_RATE: + self.state = VoiceDegradationState.DEGRADED_TTS + self.recommended_profile = "voice_fast_uk" + self.last_reason = f"Underflow rate={uf_rate:.1%} > {_SM_UNDERFLOW_RATE:.0%}" + else: + self.state = VoiceDegradationState.OK + self.recommended_profile = "voice_fast_uk" # default + self.last_reason = "all SLOs met" + + if self.state != prev_state: + self.state_since = time.monotonic() + logger.warning("voice_degradation state: %s → %s | %s", + prev_state.value, self.state.value, self.last_reason) + + def status_dict(self) -> dict: + return { + "state": self.state.value, + "state_since_sec": int(time.monotonic() - self.state_since), + "recommended_profile": self.recommended_profile, + "reason": self.last_reason, + "samples": { + "ttfa": len(self._ttfa_window), + "tts_first": len(self._tts_first_window), + }, + "p95": { + "ttfa_ms": self._p95(self._ttfa_window), + "tts_first_ms": self._p95(self._tts_first_window), + }, + "underflow_rate": round(self._underflow_rate(), 3), + "ui_badge": _SM_UI_BADGE.get(self.state, ""), + } + + +# UI badge text per state +_SM_UI_BADGE = { + VoiceDegradationState.OK: "", + VoiceDegradationState.DEGRADED_TTS: "⚠ TTS SLOW", + VoiceDegradationState.DEGRADED_LLM: "⚠ AI SLOW", + VoiceDegradationState.FAST_LOCK: "⚡ FAST MODE", + VoiceDegradationState.EMERGENCY: "🔴 TTS DEGRADED", +} + +_voice_degradation_sm = _VoiceDegradationSM() + + +@app.get("/api/voice/degradation_status") +async def api_voice_degradation_status(): + """Returns current voice degradation state + repro pack for incident diagnosis. + + Repro pack fields (for on-call): + node_id, edge_tts_version, last_model, last_profile, + last_5_tts_errors, last_5_llm_errors + """ + base = _voice_degradation_sm.status_dict() + # Enrich with repro pack + base["repro"] = { + "node_id": _NODE_ID, + "last_model": _voice_last_model, + "last_profile": _voice_last_profile, + "last_5_tts_errors": list(_voice_tts_errors), + "last_5_llm_errors": list(_voice_llm_errors), + "concurrent_tts_slots_free": _get_tts_semaphore()._value, + "max_concurrent_tts": _MAX_CONCURRENT_TTS, + } + return base + + +# ─── Memory ────────────────────────────────────────────────────────────────── + +@app.get("/api/memory/status") +async def api_memory_status(_auth: str = Depends(require_auth)): + mem_url = get_memory_service_url() + try: + async with httpx.AsyncClient(timeout=8.0) as client: + r = await client.get(f"{mem_url}/health") + r.raise_for_status() + data = r.json() + return { + "ok": True, + "memory_url": mem_url, + "status": data.get("status", "unknown"), + "vector_store": data.get("vector_store", {}), + "stt": "whisper-large-v3-turbo", + "tts": "edge-tts / macOS say", + } + except Exception as e: + return {"ok": False, "error": str(e)[:200], "memory_url": mem_url} + + +@app.get("/api/memory/context") +async def api_memory_context( + session_id: str = Query("console"), + agent_id: str = Query("sofiia"), + user_id: Optional[str] = Query(None), + limit: int = Query(20, ge=1, le=100), + _auth: str = Depends(require_auth), +): + mem_url = get_memory_service_url() + agent_key = str(agent_id or "").strip().lower() + resolved_user = user_id or ("aistalk_user" if agent_key == "aistalk" else "console_user") + async def _sqlite_fallback_events() -> List[Dict[str, Any]]: + events: List[Dict[str, Any]] = [] + if _app_db: + try: + rows = await _app_db.list_messages(session_id, limit=limit) + for row in rows: + events.append( + { + "role": row.get("role", "unknown"), + "content": row.get("content", ""), + "ts": row.get("ts"), + "source": "sqlite_fallback", + } + ) + except Exception: + pass + return events + try: + async with httpx.AsyncClient(timeout=8.0) as client: + r = await client.get( + f"{mem_url}/agents/{agent_id}/memory", + params={"user_id": resolved_user, "channel_id": session_id, "limit": limit}, + ) + r.raise_for_status() + data = r.json() + events = data.get("events") if isinstance(data, dict) else None + if isinstance(events, list) and events: + return data + # Remote is alive but returned empty history; expose local persisted history too. + local_events = await _sqlite_fallback_events() + if local_events: + return {"events": local_events, "fallback": "sqlite_after_empty_remote"} + return data if isinstance(data, dict) else {"events": []} + except Exception as e: + # Fallback to local SQLite session memory so UI still has context. + events = await _sqlite_fallback_events() + return {"events": events, "error": str(e)[:100], "fallback": "sqlite"} + + +# ─── WebSocket /ws/events ──────────────────────────────────────────────────── + +@app.websocket("/ws/events") +async def ws_events(websocket: WebSocket): + """WebSocket event stream. Clients receive all broadcast events.""" + await websocket.accept() + _ws_clients.add(websocket) + logger.info("WS client connected, total=%d", len(_ws_clients)) + # Send welcome + await websocket.send_text(json.dumps(_make_event("nodes.status", { + "message": "connected", + "bff_version": _VERSION, + "ws_clients": len(_ws_clients), + }))) + try: + while True: + # Keep-alive: read pings from client (or just wait) + try: + msg = await asyncio.wait_for(websocket.receive_text(), timeout=15.0) + # Client can send {"type":"ping"} → pong + if msg: + try: + cmd = json.loads(msg) + if cmd.get("type") == "ping": + await websocket.send_text(json.dumps({"type": "pong", "ts": _now_iso()})) + except Exception: + pass + except asyncio.TimeoutError: + # Send periodic heartbeat with cached nodes if available + hb_data: Dict[str, Any] = { + "bff_uptime_s": int(time.monotonic() - _START_TIME), + "ws_clients": len(_ws_clients), + } + if _nodes_cache.get("nodes"): + hb_data["nodes"] = [ + { + "id": n["node_id"], + "online": n.get("online", False), + "router_ok": n.get("router_ok", False), + "router_latency_ms": n.get("router_latency_ms"), + } + for n in _nodes_cache["nodes"] + ] + hb_data["nodes_ts"] = _nodes_cache.get("ts", "") + await websocket.send_text(json.dumps(_make_event("nodes.status", hb_data))) + except WebSocketDisconnect: + pass + except Exception as e: + logger.debug("WS error: %s", e) + finally: + _ws_clients.discard(websocket) + logger.info("WS client disconnected, total=%d", len(_ws_clients)) + + +# ─── UI ───────────────────────────────────────────────────────────────────── + +STATIC_DIR = Path(__file__).resolve().parent.parent / "static" +_NO_CACHE = {"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache"} + + +@app.get("/api/meta/version") +async def get_meta_version(): + """Build metadata endpoint — always no-cache, always public.""" + return JSONResponse( + content={ + "version": _VERSION, + "build_sha": _BUILD_SHA, + "build_time": _BUILD_TIME, + "service": "sofiia-console", + }, + headers=_NO_CACHE, + ) + + +# ─── Auth endpoints ────────────────────────────────────────────────────────── + +class _LoginBody(BaseModel): + key: str + + +@app.post("/api/auth/login") +async def auth_login(body: _LoginBody, response: Response): + """ + Verify API key (sent in JSON body — avoids header encoding issues). + On success: set httpOnly session cookie, return ok=true. + No CORS/header encoding issues since key travels in request body. + """ + if not _key_valid(body.key): + raise HTTPException(status_code=401, detail="Invalid key") + + token = _cookie_token(body.key) + response.set_cookie( + key=_COOKIE_NAME, + value=token, + httponly=True, + secure=_IS_PROD, # Secure=True in prod (HTTPS only) + samesite="lax", + max_age=_COOKIE_MAX_AGE, + path="/", + ) + return {"ok": True, "auth": "cookie"} + + +@app.post("/api/auth/logout") +async def auth_logout(response: Response): + """Clear session cookie.""" + response.delete_cookie(key=_COOKIE_NAME, path="/") + return {"ok": True} + + +@app.get("/api/auth/check") +async def auth_check(request: Request): + """Returns 200 if session is valid, 401 otherwise. Used by UI on startup.""" + # Localhost is always open — no auth needed + client_ip = (request.client.host if request.client else "") or "" + if client_ip in ("127.0.0.1", "::1", "localhost"): + return {"ok": True, "auth": "localhost"} + configured = get_console_api_key() + if not configured: + return {"ok": True, "auth": "open"} + from .auth import _expected_cookie_token as _ect + cookie_val = request.cookies.get(_COOKIE_NAME, "") + import secrets as _sec + if cookie_val and _sec.compare_digest(cookie_val, _ect()): + return {"ok": True, "auth": "cookie"} + raise HTTPException(status_code=401, detail="Not authenticated") + + +@app.get("/", response_class=HTMLResponse) +async def ui_root(): + index = STATIC_DIR / "index.html" + content = index.read_text(encoding="utf-8") if index.exists() else _fallback_html() + return HTMLResponse(content=content, headers=_NO_CACHE) + + +@app.get("/ui", response_class=HTMLResponse) +async def ui_alias(): + return await ui_root() + + +def _fallback_html() -> str: + return """Sofiia Console +

Sofiia Control Console v""" + _VERSION + """

+

Endpoints: GET /api/health | GET /api/status/full | POST /api/chat/send | WS /ws/events

+""" + + +@app.get("/chat", response_class=HTMLResponse) +async def ui_chat(): + p = STATIC_DIR / "chat.html" + content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html() + return HTMLResponse(content=content, headers=_NO_CACHE) + + +@app.get("/ops", response_class=HTMLResponse) +async def ui_ops(): + p = STATIC_DIR / "ops.html" + content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html() + return HTMLResponse(content=content, headers=_NO_CACHE) + + +@app.get("/nodes", response_class=HTMLResponse) +async def ui_nodes(): + p = STATIC_DIR / "nodes.html" + content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html() + return HTMLResponse(content=content, headers=_NO_CACHE) + + +# ── Supervisor Proxy ─────────────────────────────────────────────────────────── +_SUPERVISOR_URL = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080").rstrip("/") +_SUPERVISOR_FALLBACK_URL = os.getenv("SUPERVISOR_FALLBACK_URL", "http://127.0.0.1:8084").rstrip("/") + + +async def _supervisor_request_json( + method: str, + path: str, + *, + timeout: float = 30.0, + json_body: Optional[Dict[str, Any]] = None, +) -> Tuple[int, Dict[str, Any]]: + urls = [_SUPERVISOR_URL] + if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls: + urls.append(_SUPERVISOR_FALLBACK_URL) + + last_err = "unavailable" + for base in urls: + target = f"{base}{path}" + try: + async with httpx.AsyncClient(timeout=timeout) as client: + resp = await client.request(method, target, json=json_body) + except Exception as e: + last_err = str(e)[:200] + continue + + if resp.status_code >= 400: + detail = resp.text[:400] if resp.text else f"Supervisor error {resp.status_code}" + raise HTTPException(status_code=resp.status_code, detail=detail) + + if not resp.content: + return resp.status_code, {} + try: + payload = resp.json() + except Exception: + return resp.status_code, {"raw": resp.text[:1000]} + if isinstance(payload, dict): + return resp.status_code, payload + return resp.status_code, {"data": payload} + + raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {last_err}") + + +@app.post("/api/supervisor/runs") +async def start_supervisor_run(request: Request, _auth: str = Depends(require_auth)): + """Start a LangGraph run on sofiia-supervisor. + + Body: {"graph": "alert_triage|incident_triage|postmortem_draft|release_check", + "project_id": "", ...params} + + If project_id is provided, auto-creates an agent_run dialog_node in the graph + and returns node_id in the response for UI tracking. + """ + body = await request.json() + graph_name = body.pop("graph", None) + project_id = body.pop("project_id", None) + if not graph_name: + raise HTTPException(status_code=400, detail="'graph' field is required") + try: + status_code, result = await _supervisor_request_json( + "POST", + f"/v1/graphs/{graph_name}/runs", + timeout=60.0, + json_body=body, + ) + + # Auto-create agent_run node if project is provided + if project_id and status_code in (200, 201, 202): + run_id = result.get("run_id") or result.get("id") or str(uuid.uuid4()) + try: + pack = await _app_db.create_evidence_pack( + project_id=project_id, + run_id=run_id, + graph_name=graph_name, + result_data={"status": "started", "summary": f"Run started: {graph_name}"}, + created_by="sofiia", + ) + result["_node_id"] = pack.get("node_id") + except Exception as node_err: + logger.warning("evidence_pack node creation failed (non-fatal): %s", node_err) + + return JSONResponse(status_code=status_code, content=result) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}") + + +@app.get("/api/supervisor/runs/{run_id}") +async def get_supervisor_run(run_id: str, _auth: str = Depends(require_auth)): + """Get the status/result of a LangGraph run.""" + try: + status_code, payload = await _supervisor_request_json( + "GET", + f"/v1/runs/{run_id}", + timeout=15.0, + ) + return JSONResponse(status_code=status_code, content=payload) + except Exception as e: + raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}") + + +@app.post("/api/supervisor/runs/{run_id}/cancel") +async def cancel_supervisor_run(run_id: str, _auth: str = Depends(require_auth)): + """Cancel a running LangGraph run.""" + try: + status_code, payload = await _supervisor_request_json( + "POST", + f"/v1/runs/{run_id}/cancel", + timeout=10.0, + ) + return JSONResponse(status_code=status_code, content=payload) + except Exception as e: + raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}") + + +@app.get("/api/supervisor/graphs") +async def list_supervisor_graphs(): + """List available LangGraph graphs (no auth — read-only discovery).""" + urls = [_SUPERVISOR_URL] + if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls: + urls.append(_SUPERVISOR_FALLBACK_URL) + last_err = "unavailable" + for base in urls: + try: + async with httpx.AsyncClient(timeout=5.0) as client: + resp = await client.get(f"{base}/healthz") + data = resp.json() + return { + "graphs": data.get("graphs", []), + "healthy": resp.status_code == 200, + "url": base, + "state_backend": data.get("state_backend"), + } + except Exception as e: + last_err = str(e) + continue + return {"graphs": [], "healthy": False, "error": last_err} + + +@app.get("/api/aistalk/status") +async def aistalk_status(): + """AISTALK integration status for SOFIIA UI.""" + try: + sup = await list_supervisor_graphs() + aurora = await api_aurora_health() + runtime = await _aistalk_runtime_state() + adapter_status: Dict[str, Any] + relay_health: Dict[str, Any] + if _aistalk is not None: + try: + relay_health = _aistalk.probe_health() + except Exception as e: + relay_health = {"enabled": True, "ok": False, "error": str(e)[:200]} + try: + adapter_status = _aistalk.status() + except Exception: + adapter_status = {"enabled": True, "base_url": "unknown"} + else: + relay_health = {"enabled": False, "ok": False, "error": "disabled"} + adapter_status = {"enabled": False, "base_url": ""} + return { + "aistalk_enabled": _aistalk is not None, + "aistalk_adapter": repr(_aistalk) if _aistalk is not None else "disabled", + "adapter": adapter_status, + "relay_health": relay_health, + "supervisor": sup, + "aurora": aurora, + "runtime": runtime, + "docs": { + "contract": "/docs/aistalk/contract.md", + "supervisor": "/docs/supervisor/langgraph_supervisor.md", + }, + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +def _parse_agent_md(path: Path) -> Dict[str, Any]: + text = path.read_text(encoding="utf-8", errors="ignore") + lines = [ln.rstrip() for ln in text.splitlines()] + title = path.stem + display_name = title + role: List[str] = [] + outputs: List[str] = [] + boundaries: List[str] = [] + capabilities: List[str] = [] + intro: List[str] = [] + in_section: Optional[str] = None + + for raw in lines: + line = raw.strip() + if not line: + continue + if line.startswith("# "): + display_name = line[2:].strip() + continue + low = line.lower() + if low.startswith("role:"): + in_section = "role" + continue + if low.startswith("output:"): + in_section = "output" + continue + if low.startswith("outputs:"): + in_section = "output" + continue + if low.startswith("boundary:"): + in_section = "boundary" + continue + if low.startswith("boundaries:"): + in_section = "boundary" + continue + if low.startswith("capabilities:"): + in_section = "capabilities" + continue + if low.startswith("modes:") or low.startswith("rules:") or low.startswith("internal sub-pipeline"): + in_section = None + continue + if line.startswith("```"): + in_section = None + continue + + if line.startswith("- "): + item = line[2:].strip() + if in_section == "role": + role.append(item) + elif in_section == "output": + outputs.append(item) + elif in_section == "boundary": + boundaries.append(item) + elif in_section == "capabilities": + capabilities.append(item) + continue + if in_section is None and not line.startswith("#"): + # Some agent role files store purpose as plain intro line without "Role:" section. + intro.append(line) + + summary = role[0] if role else (intro[0] if intro else "") + return { + "id": title.lower(), + "name": display_name, + "summary": summary, + "role": role, + "outputs": outputs, + "boundaries": boundaries, + "capabilities": capabilities, + "source": str(path), + } + + +@app.get("/api/aistalk/catalog") +async def aistalk_catalog(): + """ + Return AISTALK subagent catalog + declared capabilities for UI rendering. + """ + roots = [ + Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk", + Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk", + ] + root = next((p for p in roots if p.exists()), None) + if root is None: + return { + "ok": False, + "error": "AISTALK roles directory not found", + "agents": [], + "domains": [], + } + + agents: List[Dict[str, Any]] = [] + for p in sorted(root.glob("*.md")): + try: + agents.append(_parse_agent_md(p)) + except Exception as e: + agents.append( + { + "id": p.stem.lower(), + "name": p.stem, + "summary": "", + "role": [], + "outputs": [], + "boundaries": [f"parse_error: {str(e)[:120]}"], + "capabilities": [], + "source": str(p), + } + ) + + # High-level specialization domains for UI badges/filters. + domains = [ + {"id": "osint", "name": "OSINT & Recon", "agents": ["tracer", "stealth", "shadow"]}, + {"id": "analysis", "name": "Threat Analysis", "agents": ["neuron", "graph", "risk"]}, + {"id": "offdef", "name": "Offense/Defense", "agents": ["redteam", "blueteam", "purpleteam", "bughunter", "devteam"]}, + {"id": "forensics", "name": "Media Forensics", "agents": ["aurora"]}, + {"id": "security", "name": "Governance & Data Safety", "agents": ["vault", "quantum"]}, + {"id": "orchestration", "name": "Command & Synthesis", "agents": ["orchestrator_synthesis"]}, + ] + return { + "ok": True, + "root": str(root), + "count": len(agents), + "agents": agents, + "domains": domains, + } + + +_AISTALK_RUNTIME_PATH = AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aistalk_runtime.json" +_AISTALK_AGENT_ORDER = [ + "orchestrator_synthesis", + "tracer", + "shadow", + "stealth", + "neuron", + "graph", + "bughunter", + "redteam", + "blueteam", + "purpleteam", + "risk", + "vault", + "quantum", + "devteam", + "aurora", +] +_aistalk_team_active_runs: Dict[str, float] = {} +_aistalk_chat_active: int = 0 +_aistalk_state_lock = asyncio.Lock() + + +def _aistalk_roles_root() -> Optional[Path]: + roots = [ + Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk", + Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk", + ] + return next((p for p in roots if p.exists()), None) + + +def _aistalk_resource_snapshot() -> Dict[str, Any]: + cpu = os.cpu_count() or 8 + mem_gb: Optional[float] = None + try: + page_size = os.sysconf("SC_PAGE_SIZE") + total_pages = os.sysconf("SC_PHYS_PAGES") + if page_size > 0 and total_pages > 0: + mem_gb = round((page_size * total_pages) / (1024 ** 3), 1) + except Exception: + mem_gb = None + return { + "cpu_count": cpu, + "memory_gb": mem_gb, + "ollama_num_ctx": SOFIIA_OLLAMA_NUM_CTX, + "ollama_num_thread": SOFIIA_OLLAMA_NUM_THREAD, + "ollama_num_gpu": SOFIIA_OLLAMA_NUM_GPU, + } + + +def _aistalk_recommended_limits(resources: Dict[str, Any]) -> Dict[str, Any]: + cpu = int(resources.get("cpu_count") or 8) + mem = resources.get("memory_gb") + mem_gb = float(mem) if isinstance(mem, (int, float)) else 0.0 + if cpu >= 12 and mem_gb >= 24: + profile = "performance" + team_max = 2 + chat_max = 4 + elif cpu >= 8 and mem_gb >= 16: + profile = "balanced" + team_max = 1 + chat_max = 3 + else: + profile = "safe" + team_max = 1 + chat_max = 2 + return { + "profile": profile, + "max_parallel_team_runs": team_max, + "max_parallel_chat": chat_max, + "rule": ( + "Aurora/forensics jobs are GPU-heavy: keep team runs low; " + "chat parallelism may be higher but bounded by CPU/RAM." + ), + } + + +async def _aistalk_local_models() -> List[str]: + ollama_url = get_ollama_url().rstrip("/") + try: + async with httpx.AsyncClient(timeout=5.0) as client: + r = await client.get(f"{ollama_url}/api/tags") + r.raise_for_status() + data = r.json() + models = [str((m or {}).get("name", "")).strip() for m in (data.get("models") or [])] + return [m for m in models if m] + except Exception: + return [] + + +def _aistalk_default_model_map(models: List[str]) -> Dict[str, str]: + available = set(models) + + def pick(*candidates: str) -> str: + for c in candidates: + if c in available: + return c + if models: + return models[0] + return "qwen3:14b" + + orchestrator = pick("qwen3.5:35b-a3b", "qwen3:14b", "gemma3:latest") + analyst = pick("qwen3:14b", "qwen3.5:35b-a3b", "gemma3:latest") + lightweight = pick("gemma3:latest", "qwen3:14b", "qwen3.5:35b-a3b") + + mapping: Dict[str, str] = {} + for agent_id in _AISTALK_AGENT_ORDER: + if agent_id in ("orchestrator_synthesis", "risk", "neuron", "graph"): + mapping[agent_id] = orchestrator + elif agent_id in ("tracer", "shadow", "stealth", "vault", "quantum"): + mapping[agent_id] = analyst + else: + mapping[agent_id] = lightweight + return mapping + + +def _read_aistalk_runtime() -> Dict[str, Any]: + if _AISTALK_RUNTIME_PATH.exists(): + try: + raw = json.loads(_AISTALK_RUNTIME_PATH.read_text(encoding="utf-8")) + if isinstance(raw, dict): + return raw + except Exception: + pass + return {} + + +def _write_aistalk_runtime(data: Dict[str, Any]) -> None: + _AISTALK_RUNTIME_PATH.parent.mkdir(parents=True, exist_ok=True) + _AISTALK_RUNTIME_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8") + + +async def _aistalk_runtime_state() -> Dict[str, Any]: + resources = _aistalk_resource_snapshot() + recommended = _aistalk_recommended_limits(resources) + models = await _aistalk_local_models() + stored = _read_aistalk_runtime() + + limits = stored.get("limits") if isinstance(stored.get("limits"), dict) else {} + max_team = int(limits.get("max_parallel_team_runs") or recommended["max_parallel_team_runs"]) + max_chat = int(limits.get("max_parallel_chat") or recommended["max_parallel_chat"]) + profile = str(limits.get("profile") or recommended["profile"]) + + saved_models = stored.get("agent_models") if isinstance(stored.get("agent_models"), dict) else {} + defaults = _aistalk_default_model_map(models) + agent_models: Dict[str, str] = {} + for aid in _AISTALK_AGENT_ORDER: + selected = str(saved_models.get(aid) or defaults.get(aid) or "") + if models and selected not in models: + selected = defaults.get(aid) or models[0] + if not selected: + selected = "qwen3:14b" + agent_models[aid] = selected + + state = { + "limits": { + "profile": profile, + "max_parallel_team_runs": max(1, min(max_team, 4)), + "max_parallel_chat": max(1, min(max_chat, 8)), + }, + "recommended": recommended, + "resources": resources, + "available_models": models, + "agent_models": agent_models, + "active_team_runs": len(_aistalk_team_active_runs), + "active_chat": _aistalk_chat_active, + } + # Persist normalized shape for future restarts. + _write_aistalk_runtime({"limits": state["limits"], "agent_models": state["agent_models"]}) + return state + + +def _aistalk_role_prompt(agent_id: str) -> str: + root = _aistalk_roles_root() + if root is None: + return "You are AISTALK security analyst. Respond with findings, risk, next actions." + target = root / f"{agent_id}.md" + if not target.exists(): + target = root / "orchestrator_synthesis.md" + try: + text = target.read_text(encoding="utf-8", errors="ignore") + # Keep prompt concise enough for local models. + return text[:6000] + except Exception: + return "You are AISTALK security analyst. Respond with findings, risk, next actions." + + +@app.get("/api/aistalk/runtime") +async def aistalk_runtime(_auth: str = Depends(require_auth)): + return await _aistalk_runtime_state() + + +class AISTalkModelSetBody(BaseModel): + agent_id: str + model: str + + +@app.post("/api/aistalk/runtime/model") +async def aistalk_set_agent_model(body: AISTalkModelSetBody, _auth: str = Depends(require_auth)): + state = await _aistalk_runtime_state() + aid = str(body.agent_id or "").strip().lower() + if aid not in _AISTALK_AGENT_ORDER: + raise HTTPException(status_code=400, detail=f"Unknown agent_id: {aid}") + model = str(body.model or "").strip() + models = state.get("available_models") or [] + if models and model not in models: + raise HTTPException(status_code=400, detail=f"Model not available locally: {model}") + stored = _read_aistalk_runtime() + stored.setdefault("limits", state.get("limits", {})) + stored.setdefault("agent_models", state.get("agent_models", {})) + stored["agent_models"][aid] = model + _write_aistalk_runtime(stored) + return {"ok": True, "agent_id": aid, "model": model} + + +class AISTalkLimitsBody(BaseModel): + profile: Optional[str] = None + max_parallel_team_runs: Optional[int] = None + max_parallel_chat: Optional[int] = None + + +@app.post("/api/aistalk/runtime/limits") +async def aistalk_set_limits(body: AISTalkLimitsBody, _auth: str = Depends(require_auth)): + state = await _aistalk_runtime_state() + stored = _read_aistalk_runtime() + limits = dict(state.get("limits", {})) + if body.profile: + limits["profile"] = str(body.profile) + if body.max_parallel_team_runs is not None: + limits["max_parallel_team_runs"] = max(1, min(int(body.max_parallel_team_runs), 4)) + if body.max_parallel_chat is not None: + limits["max_parallel_chat"] = max(1, min(int(body.max_parallel_chat), 8)) + stored["limits"] = limits + stored.setdefault("agent_models", state.get("agent_models", {})) + _write_aistalk_runtime(stored) + return {"ok": True, "limits": limits} + + +def _is_terminal_run_status(status: str) -> bool: + s = (status or "").strip().lower() + return s in {"succeeded", "failed", "cancelled", "canceled", "timeout", "error"} + + +class AISTalkChatBody(BaseModel): + message: str + agent_id: str = "orchestrator_synthesis" + model: Optional[str] = None + session_id: Optional[str] = None + project_id: Optional[str] = None + user_id: Optional[str] = None + history: List[Dict[str, Any]] = [] + + +@app.post("/api/aistalk/chat") +async def aistalk_chat(body: AISTalkChatBody, request: Request, _auth: str = Depends(require_auth)): + client_ip = request.client.host if request.client else "unknown" + if not _check_rate(f"aistalk_chat:{client_ip}", max_calls=40, window_sec=60): + raise HTTPException(status_code=429, detail="Rate limit: 40 AISTALK chat messages/min") + + state = await _aistalk_runtime_state() + limits = state.get("limits", {}) + max_chat = int(limits.get("max_parallel_chat") or 2) + async with _aistalk_state_lock: + global _aistalk_chat_active + if _aistalk_chat_active >= max_chat: + raise HTTPException( + status_code=429, + detail=f"AISTALK chat busy: active={_aistalk_chat_active}, limit={max_chat}", + ) + _aistalk_chat_active += 1 + + agent_id = str(body.agent_id or "orchestrator_synthesis").strip().lower() + if agent_id not in _AISTALK_AGENT_ORDER: + agent_id = "orchestrator_synthesis" + selected_model = str(body.model or "").strip() or str((state.get("agent_models") or {}).get(agent_id) or "") + if not selected_model: + selected_model = "qwen3:14b" + if (state.get("available_models") or []) and selected_model not in state["available_models"]: + selected_model = (state.get("available_models") or ["qwen3:14b"])[0] + + project_id = body.project_id or "aistalk" + session_id = body.session_id or f"aistalk_sess_{uuid.uuid4().hex[:10]}" + user_id = body.user_id or "aistalk_user" + + try: + role_prompt = _aistalk_role_prompt(agent_id) + system_prompt = ( + "Ти працюєш у складі AISTALK (крипто-детективне агентство з безпеки мережі). " + "Формат відповіді: findings -> risk -> actions. " + "Пиши конкретно, без вигадок, позначай невизначеність.\n\n" + + role_prompt + ) + + messages: List[Dict[str, Any]] = [{"role": "system", "content": system_prompt}] + messages.extend(body.history[-10:]) + messages.append({"role": "user", "content": body.message}) + + t0 = time.monotonic() + async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client: + r = await client.post( + f"{get_ollama_url().rstrip('/')}/api/chat", + json=_make_ollama_payload( + selected_model, + messages, + { + "temperature": 0.15, + "repeat_penalty": 1.1, + "num_predict": min(1024, SOFIIA_OLLAMA_NUM_PREDICT_TEXT), + }, + ), + ) + r.raise_for_status() + data = r.json() + reply = ((data.get("message") or {}).get("content") or "").strip() or "AISTALK: порожня відповідь" + latency_ms = int((time.monotonic() - t0) * 1000) + + _broadcast_bg( + _make_event( + "chat.reply", + { + "text": reply[:200], + "provider": "ollama", + "model": f"ollama:{selected_model}", + "agent_id": agent_id, + "latency_ms": latency_ms, + }, + project_id=project_id, + session_id=session_id, + user_id=user_id, + ) + ) + asyncio.get_event_loop().create_task( + _do_save_memory( + body.message, + reply, + session_id, + project_id, + user_id, + agent_id="aistalk", + ) + ) + return { + "ok": True, + "project_id": project_id, + "session_id": session_id, + "user_id": user_id, + "agent_id": agent_id, + "model": f"ollama:{selected_model}", + "response": reply, + "meta": {"latency_ms": latency_ms, "active_chat": _aistalk_chat_active, "limit_chat": max_chat}, + } + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=502, detail=f"AISTALK chat error: {str(e)[:200]}") + finally: + async with _aistalk_state_lock: + _aistalk_chat_active = max(0, _aistalk_chat_active - 1) + + +def _aistalk_autobuild_input( + graph: str, + objective: str, + input_payload: Dict[str, Any], +) -> Dict[str, Any]: + payload = dict(input_payload or {}) + if graph == "incident_triage": + payload.setdefault("service", "aurora-service") + payload.setdefault("symptom", objective or "Aurora pipeline anomaly") + payload.setdefault("env", "prod") + payload.setdefault("include_traces", False) + return payload + + if graph == "release_check": + payload.setdefault("service_name", "aurora-service") + payload.setdefault("diff_text", objective or "") + payload.setdefault("run_deps", True) + payload.setdefault("run_drift", True) + payload.setdefault("run_smoke", False) + return payload + + if graph == "alert_triage": + # Graph is mostly autonomous; leave room for dry_run/profile overrides. + payload.setdefault("dry_run", False) + payload.setdefault("policy_profile", "default") + return payload + + if graph == "postmortem_draft": + incident_id = str(payload.get("incident_id") or "").strip() + if not incident_id and objective: + m = re.search(r"(inc_[A-Za-z0-9_\-]+)", objective) + if m: + incident_id = m.group(1) + if not incident_id: + raise HTTPException( + status_code=400, + detail="postmortem_draft requires input.incident_id (e.g. inc_123abc)", + ) + payload["incident_id"] = incident_id + payload.setdefault("service", "aurora-service") + payload.setdefault("env", "prod") + payload.setdefault("include_traces", False) + return payload + + # Unknown/custom graph: pass-through without mutation. + return payload + + +@app.post("/api/aistalk/team/run") +async def aistalk_team_run(request: Request, _auth: str = Depends(require_auth)): + """Run AISTALK team workflow via LangGraph supervisor.""" + body = await request.json() + graph = str(body.get("graph") or "incident_triage").strip() + objective = str(body.get("objective") or "").strip() + input_payload = body.get("input") + if not isinstance(input_payload, dict): + input_payload = {} + input_payload = _aistalk_autobuild_input(graph, objective, input_payload) + runtime = await _aistalk_runtime_state() + max_team_runs = int((runtime.get("limits") or {}).get("max_parallel_team_runs") or 1) + # GC stale local entries (12h safety window). + now_ts = time.time() + stale = [rid for rid, ts in _aistalk_team_active_runs.items() if (now_ts - ts) > 12 * 3600] + for rid in stale: + _aistalk_team_active_runs.pop(rid, None) + if len(_aistalk_team_active_runs) >= max_team_runs: + raise HTTPException( + status_code=429, + detail=f"AISTALK team busy: active_runs={len(_aistalk_team_active_runs)}, limit={max_team_runs}", + ) + + sup_payload = { + "workspace_id": str(body.get("workspace_id") or "daarion"), + "user_id": str(body.get("user_id") or "aistalk_user"), + "agent_id": "aistalk", + "input": input_payload, + } + status_code, payload = await _supervisor_request_json( + "POST", + f"/v1/graphs/{graph}/runs", + timeout=60.0, + json_body=sup_payload, + ) + if status_code in (200, 201, 202) and isinstance(payload, dict): + rid = str(payload.get("run_id") or payload.get("id") or "").strip() + if rid: + _aistalk_team_active_runs[rid] = time.time() + return JSONResponse( + status_code=status_code, + content={ + "ok": status_code in (200, 201, 202), + "graph": graph, + "objective": objective, + "active_runs": len(_aistalk_team_active_runs), + "limit_runs": max_team_runs, + **payload, + }, + ) + + +@app.post("/api/aistalk/relay/test") +async def aistalk_relay_test(request: Request, _auth: str = Depends(require_auth)): + """Send a synthetic event to AISTALK relay and return adapter status.""" + body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {} + event_type = str(body.get("type") or "aistalk.ping").strip() + event = _make_event( + event_type, + {"message": body.get("message", "relay test"), "source": "sofiia-console"}, + project_id=str(body.get("project_id") or "aistalk"), + session_id=str(body.get("session_id") or f"aistalk_test_{uuid.uuid4().hex[:8]}"), + user_id="sofiia", + ) + if _aistalk is None: + raise HTTPException(status_code=503, detail="AISTALK adapter disabled") + _aistalk.handle_event(event) + return { + "ok": True, + "queued": True, + "event_type": event_type, + "adapter": _aistalk.status(), + } + + +@app.get("/api/aistalk/team/run/{run_id}") +async def aistalk_team_run_status(run_id: str, _auth: str = Depends(require_auth)): + status_code, payload = await _supervisor_request_json( + "GET", + f"/v1/runs/{run_id}", + timeout=20.0, + ) + if isinstance(payload, dict) and _is_terminal_run_status(str(payload.get("status") or "")): + _aistalk_team_active_runs.pop(run_id, None) + return JSONResponse(status_code=status_code, content=payload) + + +# ── Evidence Pack Engine ──────────────────────────────────────────────────────── + +@app.post("/api/projects/{project_id}/supervisor/evidence") +async def record_evidence_pack( + project_id: str, + request: Request, + _auth: str = Depends(require_auth), +): + """Record an Evidence Pack for a completed Supervisor run. + + Links the run into the Dialog Graph and auto-creates follow-up tasks. + + Body: { + "run_id": str, # required + "graph_name": str, # required + "status": "completed|failed", # optional + "summary": str, # optional + "findings": [...], # optional + "recommendations": [...], # optional + "follow_up_tasks": [ # optional - auto-created as tasks + {"title": ..., "description": ..., "priority": "normal|high|urgent"} + ] + } + """ + body = await request.json() + run_id = body.get("run_id") + graph_name = body.get("graph_name") + if not run_id or not graph_name: + raise HTTPException(status_code=400, detail="run_id and graph_name are required") + try: + pack = await _app_db.create_evidence_pack( + project_id=project_id, + run_id=run_id, + graph_name=graph_name, + result_data=body, + created_by="sofiia", + ) + return JSONResponse(status_code=201, content=pack) + except Exception as e: + logger.error("record_evidence_pack failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Graph Integrity ───────────────────────────────────────────────────────────── + +@app.get("/api/projects/{project_id}/graph/integrity") +async def graph_integrity(project_id: str, _auth: str = Depends(require_auth)): + """Run integrity checks on the project Dialog Graph. + + Returns: {"ok": bool, "violations": [...], "stats": {...}} + """ + try: + result = await _app_db.check_graph_integrity(project_id) + status_code = 200 if result["ok"] else 422 + return JSONResponse(status_code=status_code, content=result) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Graph Hygiene ─────────────────────────────────────────────────────────────── + +@app.post("/api/projects/{project_id}/graph/hygiene/run") +async def run_graph_hygiene( + project_id: str, + request: Request, + _auth: str = Depends(require_auth), +): + """Run Graph Hygiene Engine: dedup, lifecycle normalization, importance scoring. + + Body (all optional): + { + "dry_run": true, // default true — compute but don't write + "scope": "all"|"recent", // default "all" + "since": "ISO8601" // required when scope=recent + } + + Returns: {"ok": bool, "dry_run": bool, "changes": [...], "stats": {...}} + """ + body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {} + dry_run = body.get("dry_run", True) + scope = body.get("scope", "all") + since = body.get("since") + try: + result = await _app_db.run_graph_hygiene( + project_id=project_id, + dry_run=dry_run, + scope=scope, + since=since, + ) + return JSONResponse(status_code=200, content=result) + except Exception as e: + logger.error("run_graph_hygiene failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Self-Reflection Engine ────────────────────────────────────────────────────── + +@app.post("/api/projects/{project_id}/supervisor/reflect") +async def supervisor_reflect( + project_id: str, + request: Request, + _auth: str = Depends(require_auth), +): + """Create a Self-Reflection artifact for a completed Supervisor run. + + Analyzes the Evidence Pack and creates a 'decision' node (reflection) + linked to the agent_run node via 'reflects_on' edge. + + Body: { + "run_id": str, // required + "evidence": { // optional — pass evidence data for richer analysis + "summary": ..., + "findings": [...], + "recommendations": [...], + "follow_up_tasks": [...] + } + } + + Returns: {node_id, reflection: {...scores, risks, ...}, edge_id, task_ids} + """ + body = await request.json() + run_id = body.get("run_id") + if not run_id: + raise HTTPException(status_code=400, detail="run_id is required") + evidence_data = body.get("evidence") or {} + try: + result = await _app_db.create_run_reflection( + project_id=project_id, + run_id=run_id, + evidence_data=evidence_data, + created_by="sofiia", + ) + return JSONResponse(status_code=201, content=result) + except Exception as e: + logger.error("supervisor_reflect failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Strategic CTO Layer: Snapshots ─────────────────────────────────────────── + +@app.post("/api/projects/{project_id}/graph/snapshot") +async def compute_snapshot( + project_id: str, + window: str = "7d", + _auth: str = Depends(require_auth), +): + """Compute and store a graph analytics snapshot for the project.""" + try: + result = await _app_db.compute_graph_snapshot(project_id=project_id, window=window) + return JSONResponse(status_code=201, content=result) + except Exception as e: + logger.error("compute_snapshot failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/projects/{project_id}/graph/snapshot") +async def get_snapshot( + project_id: str, + window: str = "7d", + _auth: str = Depends(require_auth), +): + """Get the latest snapshot for the project and window.""" + snap = await _app_db.get_latest_snapshot(project_id=project_id, window=window) + if not snap: + raise HTTPException(status_code=404, detail="No snapshot found. Run POST first.") + return JSONResponse(content=snap) + + +# ── Strategic CTO Layer: Signals ───────────────────────────────────────────── + +@app.post("/api/projects/{project_id}/graph/signals/recompute") +async def recompute_signals( + project_id: str, + window: str = "7d", + dry_run: bool = True, + _auth: str = Depends(require_auth), +): + """Run signal detection rules and upsert graph_signals.""" + try: + result = await _app_db.recompute_graph_signals( + project_id=project_id, + window=window, + dry_run=dry_run, + ) + return JSONResponse(status_code=200, content=result) + except Exception as e: + logger.error("recompute_signals failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/projects/{project_id}/graph/signals") +async def list_signals( + project_id: str, + status: str = "open", + limit: int = 50, + _auth: str = Depends(require_auth), +): + """List graph signals for the project.""" + signals = await _app_db.get_graph_signals(project_id=project_id, status=status, limit=limit) + return JSONResponse(content={"signals": signals, "count": len(signals)}) + + +@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/ack") +async def ack_signal( + project_id: str, + signal_id: str, + _auth: str = Depends(require_auth), +): + result = await _app_db.update_signal_status(signal_id=signal_id, new_status="ack") + if not result: + raise HTTPException(status_code=404, detail="Signal not found") + return JSONResponse(content=result) + + +@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/resolve") +async def resolve_signal( + project_id: str, + signal_id: str, + _auth: str = Depends(require_auth), +): + result = await _app_db.update_signal_status(signal_id=signal_id, new_status="resolved") + if not result: + raise HTTPException(status_code=404, detail="Signal not found") + return JSONResponse(content=result) + + +@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/dismiss") +async def dismiss_signal( + project_id: str, + signal_id: str, + _auth: str = Depends(require_auth), +): + result = await _app_db.update_signal_status(signal_id=signal_id, new_status="dismissed") + if not result: + raise HTTPException(status_code=404, detail="Signal not found") + return JSONResponse(content=result) + + +@app.post("/api/projects/{project_id}/graph/signals/auto-resolve") +async def auto_resolve_signals( + project_id: str, + dry_run: bool = True, + _auth: str = Depends(require_auth), +): + """Check resolution criteria for all open/ack signals and auto-resolve if met. + + ?dry_run=true — compute without writing (default) + ?dry_run=false — apply resolutions + + Returns: {ok, dry_run, checked, resolved, diff: [{signal_id, signal_type, action, reason}]} + """ + try: + result = await _app_db.auto_resolve_signals( + project_id=project_id, + dry_run=dry_run, + ) + return JSONResponse(content=result) + except Exception as e: + logger.error("auto_resolve_signals failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/mitigate") +async def mitigate_signal( + project_id: str, + signal_id: str, + playbook_id: str = "", + _auth: str = Depends(require_auth), +): + """Create a deterministic mitigation plan for a signal. + + If playbook_id is provided, creates tasks from the playbook steps instead of templates. + Otherwise uses built-in mitigation templates. + + Returns: {plan_node_id, task_ids, task_count, signal_type} + """ + try: + if playbook_id: + result = await _app_db.apply_playbook_to_signal( + project_id=project_id, + signal_id=signal_id, + playbook_id=playbook_id, + created_by="sofiia", + ) + else: + result = await _app_db.create_mitigation_plan( + project_id=project_id, + signal_id=signal_id, + created_by="sofiia", + ) + await _app_db.update_signal_status(signal_id=signal_id, new_status="ack") + return JSONResponse(status_code=201, content=result) + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + except Exception as e: + logger.error("mitigate_signal failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +# ── CTO Portfolio (Cross-Project) ──────────────────────────────────────────── + +@app.get("/api/cto/portfolio/snapshots") +async def portfolio_snapshots( + window: str = "7d", + _auth: str = Depends(require_auth), +): + """Get the latest snapshot for every project (cross-project portfolio view). + + Returns: {projects: [{project_id, name, metrics, snapshot_at}], window} + """ + db = await _app_db.get_db() + # All projects + async with db.execute("SELECT project_id, name FROM projects ORDER BY name") as cur: + projects = await cur.fetchall() + result = [] + for pid, pname in projects: + snap = await _app_db.get_latest_snapshot(pid, window) + # Get latest lesson bucket + trend_flags + async with db.execute( + "SELECT date_bucket, metrics_json FROM lessons WHERE project_id=? ORDER BY date_bucket DESC LIMIT 1", + (pid,), + ) as cur: + lrow = await cur.fetchone() + lesson_bucket = None + lesson_trend_flags = None + if lrow: + lesson_bucket = lrow[0] + try: + import json as _json + lm = _json.loads(lrow[1] or "{}") + lesson_trend_flags = lm.get("trend_flags") + except Exception: + pass + # Compute streaks + try: + lesson_streaks = await _app_db.compute_lesson_streaks(pid) + except Exception: + lesson_streaks = None + result.append({ + "project_id": pid, + "name": pname, + "metrics": snap["metrics"] if snap else None, + "snapshot_at": snap["created_at"] if snap else None, + "latest_lesson_bucket": lesson_bucket, + "latest_lesson_trend_flags": lesson_trend_flags, + "latest_lesson_streaks": lesson_streaks, + }) + return JSONResponse(content={"projects": result, "window": window, "count": len(result)}) + + +@app.get("/api/cto/portfolio/signals") +async def portfolio_signals( + status: str = "open", + severity: str = "", + limit: int = 50, + _auth: str = Depends(require_auth), +): + """Get signals across all projects, ordered by severity then created_at. + + ?status=open|ack|resolved|dismissed|all + ?severity=high,critical (comma-separated filter, optional) + """ + db = await _app_db.get_db() + async with db.execute("SELECT project_id, name FROM projects") as cur: + projects = {r[0]: r[1] for r in await cur.fetchall()} + + if status == "all": + q = "SELECT *, rowid FROM graph_signals ORDER BY severity DESC, created_at DESC LIMIT ?" + params: tuple = (limit,) + else: + q = "SELECT *, rowid FROM graph_signals WHERE status=? ORDER BY severity DESC, created_at DESC LIMIT ?" + params = (status, limit) + + async with db.execute(q, params) as cur: + rows = await cur.fetchall() + + # Severity order for sorting + SEV_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3} + sev_filter = {s.strip() for s in severity.split(",") if s.strip()} if severity else set() + + signals = [] + for row in rows: + d = dict(row) + if "rowid" in d: + del d["rowid"] + try: + d["evidence"] = json.loads(d["evidence"]) + except Exception: + d["evidence"] = {} + if sev_filter and d.get("severity") not in sev_filter: + continue + d["project_name"] = projects.get(d["project_id"], d["project_id"]) + signals.append(d) + + signals.sort(key=lambda s: (SEV_ORDER.get(s.get("severity", "low"), 3), s.get("created_at", ""))) + return JSONResponse(content={"signals": signals[:limit], "count": len(signals), "status": status}) + + +@app.post("/api/cto/portfolio/drift/recompute") +async def portfolio_drift_recompute( + window: str = "7d", + dry_run: bool = False, + _auth: str = Depends(require_auth), +): + """Recompute portfolio-level drift signals based on lesson streaks across all projects.""" + try: + result = await _app_db.recompute_portfolio_signals(window=window, dry_run=dry_run) + return JSONResponse(content=result) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/cto/portfolio/drift/signals") +async def portfolio_drift_signals( + status: str = "open", + _auth: str = Depends(require_auth), +): + """Get portfolio-level drift signals.""" + try: + signals = await _app_db.list_portfolio_signals(status=status) + return JSONResponse(content={"signals": signals, "count": len(signals)}) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/cto/portfolio/streaks") +async def portfolio_streaks( + _auth: str = Depends(require_auth), +): + """Get streak data for all projects.""" + try: + db = await _app_db.get_db() + async with db.execute("SELECT project_id, name FROM projects") as cur: + projects = await cur.fetchall() + result = [] + for pid, pname in projects: + streaks = await _app_db.compute_lesson_streaks(pid) + result.append({"project_id": pid, "name": pname, "streaks": streaks}) + return JSONResponse(content={"projects": result, "count": len(result)}) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Playbooks (Graph Learning Layer) ───────────────────────────────────────── + +@app.get("/api/projects/{project_id}/playbooks") +async def list_playbooks( + project_id: str, + signal_type: str = "", + limit: int = 10, + _auth: str = Depends(require_auth), +): + """List playbooks for a project, ordered by success_rate desc.""" + try: + pbs = await _app_db.list_playbooks( + project_id=project_id, + signal_type=signal_type, + limit=limit, + ) + return JSONResponse(content={"playbooks": pbs, "count": len(pbs)}) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/projects/{project_id}/playbooks/from-signal/{signal_id}") +async def create_playbook_from_signal( + project_id: str, + signal_id: str, + _auth: str = Depends(require_auth), +): + """Promote current mitigation of a signal into a playbook (or update existing). + + Requires signal to have plan_node_id and mitigation_task_ids in evidence. + Returns: {playbook_id, doc_id, version_id, context_key, created, stats} + """ + try: + # Check if signal is resolved to update stats + db = await _app_db.get_db() + async with db.execute( + "SELECT status, evidence FROM graph_signals WHERE id=? AND project_id=?", + (signal_id, project_id), + ) as cur: + srow = await cur.fetchone() + resolved = srow[0] == "resolved" if srow else False + result = await _app_db.upsert_playbook_from_signal( + project_id=project_id, + signal_id=signal_id, + resolved=resolved, + ) + return JSONResponse(status_code=201, content=result) + except ValueError as e: + raise HTTPException(status_code=409, detail=str(e)) + except Exception as e: + logger.error("create_playbook_from_signal failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Portfolio Batch Recompute ───────────────────────────────────────────────── + +@app.post("/api/cto/portfolio/snapshots/recompute") +async def portfolio_snapshots_recompute( + window: str = "7d", + force: bool = False, + _auth: str = Depends(require_auth), +): + """Recompute graph snapshots for ALL projects. + + Skips projects that already have a snapshot for today (date_bucket) unless force=true. + Returns: {computed, skipped, errors[]} + """ + import datetime as _dt2 + db = await _app_db.get_db() + async with db.execute("SELECT project_id FROM projects") as cur: + project_ids = [r[0] for r in await cur.fetchall()] + + today = _dt2.datetime.utcnow().strftime("%Y-%m-%d") + computed, skipped, errors = 0, 0, [] + for pid in project_ids: + try: + if not force: + async with db.execute( + "SELECT id FROM graph_snapshots WHERE project_id=? AND window=? AND date_bucket=?", + (pid, window, today), + ) as cur: + exists = await cur.fetchone() + if exists: + skipped += 1 + continue + await _app_db.compute_graph_snapshot(project_id=pid, window=window) + computed += 1 + except Exception as e: + errors.append({"project_id": pid, "error": str(e)}) + return JSONResponse(content={"computed": computed, "skipped": skipped, "errors": errors}) + + +@app.post("/api/cto/portfolio/signals/recompute") +async def portfolio_signals_recompute( + window: str = "7d", + dry_run: bool = False, + _auth: str = Depends(require_auth), +): + """Recompute signals for ALL projects. + + Returns: {results: [{project_id, new, refreshed, total}], errors[]} + """ + db = await _app_db.get_db() + async with db.execute("SELECT project_id FROM projects") as cur: + project_ids = [r[0] for r in await cur.fetchall()] + + results, errors = [], [] + for pid in project_ids: + try: + diff = await _app_db.recompute_graph_signals( + project_id=pid, window=window, dry_run=dry_run + ) + new_count = sum(1 for d in diff if d.get("action") == "new") + refresh_count = sum(1 for d in diff if d.get("action") in ("refresh", "reopen")) + results.append({ + "project_id": pid, + "new": new_count, + "refreshed": refresh_count, + "total": len(diff), + }) + except Exception as e: + errors.append({"project_id": pid, "error": str(e)}) + return JSONResponse(content={"results": results, "errors": errors, "dry_run": dry_run}) + + +# ── Lessons (Graph Learning Layer) ──────────────────────────────────────────── + +@app.post("/api/projects/{project_id}/lessons/generate") +async def generate_lesson( + project_id: str, + window: str = "7d", + dry_run: bool = True, + _auth: str = Depends(require_auth), +): + """Generate a weekly Lessons Learned report for a project. + + dry_run=true (default): compute and return without writing to DB. + dry_run=false: persist lesson node + metrics + improvement tasks. + + Returns: {dry_run, date_bucket, markdown, metrics, planned_improvement_tasks, evidence} + """ + try: + result = await _app_db.upsert_lesson( + project_id=project_id, + window=window, + dry_run=dry_run, + created_by="sofiia", + ) + return JSONResponse(status_code=200 if dry_run else 201, content=result) + except Exception as e: + logger.error("generate_lesson failed: %s", e) + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/projects/{project_id}/lessons") +async def list_lessons_endpoint( + project_id: str, + window: str = "7d", + limit: int = 8, + _auth: str = Depends(require_auth), +): + """List lessons for a project, ordered by date_bucket desc.""" + try: + lessons = await _app_db.list_lessons(project_id=project_id, window=window, limit=limit) + return JSONResponse(content={"lessons": lessons, "count": len(lessons)}) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/projects/{project_id}/lessons/{lesson_id}") +async def get_lesson_endpoint( + project_id: str, + lesson_id: str, + _auth: str = Depends(require_auth), +): + """Get full lesson detail including markdown and linked evidence.""" + try: + lesson = await _app_db.get_lesson_detail(project_id=project_id, lesson_id=lesson_id) + if not lesson: + raise HTTPException(status_code=404, detail="Lesson not found") + return JSONResponse(content=lesson) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/projects/{project_id}/lessons/impact/recompute") +async def recompute_lesson_impact( + project_id: str, + window: str = "7d", + dry_run: bool = False, + force: bool = False, + _auth: str = Depends(require_auth), +): + """Recompute impact score for the prior-bucket lesson based on current-bucket metrics.""" + try: + if dry_run: + # Preview: just return what would be computed, no write + result = await _app_db.evaluate_lesson_impact( + project_id=project_id, window=window, force=True + ) + return JSONResponse(content={"dry_run": True, "preview": result}) + result = await _app_db.evaluate_lesson_impact( + project_id=project_id, window=window, force=force + ) + return JSONResponse(content={"dry_run": False, "result": result}) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/cto/portfolio/lessons/generate") +async def portfolio_lessons_generate( + window: str = "7d", + dry_run: bool = False, + force: bool = False, + _auth: str = Depends(require_auth), +): + """Generate lessons for ALL projects. + + Skips projects that already have a lesson for the current bucket (unless force=true). + Returns: {generated, skipped, errors[]} + """ + db = await _app_db.get_db() + async with db.execute("SELECT project_id FROM projects") as cur: + project_ids = [r[0] for r in await cur.fetchall()] + + current_bucket = _app_db.compute_lesson_bucket() + generated, skipped, errors = 0, 0, [] + for pid in project_ids: + try: + if not force and not dry_run: + async with db.execute( + "SELECT lesson_id FROM lessons WHERE project_id=? AND date_bucket=? AND window=?", + (pid, current_bucket, window), + ) as cur: + exists = await cur.fetchone() + if exists: + skipped += 1 + continue + await _app_db.upsert_lesson(project_id=pid, window=window, dry_run=dry_run) + generated += 1 + except Exception as e: + errors.append({"project_id": pid, "error": str(e)}) + return JSONResponse(content={ + "generated": generated, + "skipped": skipped, + "errors": errors, + "dry_run": dry_run, + "date_bucket": current_bucket, + }) + + +# ── Level 6: Governance Gates ───────────────────────────────────────────────── + +@app.get("/api/projects/{project_id}/governance/gates") +async def get_governance_gates( + project_id: str, + window: str = "7d", + _auth: str = Depends(require_auth), +): + """Return latest governance gate evaluation (dry_run, no persist).""" + try: + result = await _app_db.evaluate_governance_gates( + project_id=project_id, window=window, dry_run=True + ) + return JSONResponse(content=result) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/projects/{project_id}/governance/gates/evaluate") +async def evaluate_governance_gates_endpoint( + project_id: str, + window: str = "7d", + dry_run: bool = False, + _auth: str = Depends(require_auth), +): + """Evaluate governance gates and optionally persist decision node.""" + try: + result = await _app_db.evaluate_governance_gates( + project_id=project_id, window=window, dry_run=dry_run + ) + return JSONResponse(content=result) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Level 6: Portfolio Drift Auto-plan / Auto-run ──────────────────────────── + +@app.post("/api/cto/portfolio/drift/{signal_id}/auto-plan") +async def portfolio_drift_auto_plan( + signal_id: str, + _auth: str = Depends(require_auth), +): + """Populate evidence.auto_actions.runs with planned entries (dry_run=True).""" + try: + result = await _app_db.auto_plan_drift_signal(signal_id=signal_id) + if "error" in result: + raise HTTPException(status_code=404, detail=result["error"]) + return JSONResponse(content=result) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/cto/portfolio/drift/{signal_id}/auto-run") +async def portfolio_drift_auto_run( + signal_id: str, + dry_run: bool = False, + force: bool = False, + _auth: str = Depends(require_auth), +): + """Execute planned/queued workflow runs for a portfolio drift signal.""" + try: + supervisor_url = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080") + result = await _app_db.auto_run_drift_signal( + signal_id=signal_id, + dry_run=dry_run, + force=force, + supervisor_url=supervisor_url, + ) + if "error" in result: + raise HTTPException(status_code=404, detail=result["error"]) + return JSONResponse(content=result) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Level 7: Governance Audit Trail ────────────────────────────────────────── + +@app.get("/api/cto/audit/events") +async def audit_events_portfolio( + scope: Optional[str] = "portfolio", + limit: int = 100, + event_type: Optional[str] = None, + status: Optional[str] = None, + since: Optional[str] = None, + _auth: str = Depends(require_auth), +): + """List governance audit events for portfolio (or any scope).""" + try: + items = await _app_db.list_governance_events( + scope=scope, project_id="portfolio" if scope == "portfolio" else None, + event_type=event_type, status=status, since=since, limit=limit, + ) + return JSONResponse(content={"items": items, "count": len(items)}) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/projects/{project_id}/audit/events") +async def audit_events_project( + project_id: str, + limit: int = 100, + event_type: Optional[str] = None, + status: Optional[str] = None, + since: Optional[str] = None, + _auth: str = Depends(require_auth), +): + """List governance audit events for a specific project.""" + try: + items = await _app_db.list_governance_events( + scope="project", project_id=project_id, + event_type=event_type, status=status, since=since, limit=limit, + ) + return JSONResponse(content={"items": items, "count": len(items)}) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +# ── Level 8: Agents as Projects ─────────────────────────────────────────────── + +import difflib as _difflib +import time as _time + +# ── Agent Ops helpers ────────────────────────────────────────────────────────── + +async def _fetch_agents_from_gateway( + node_id: str, + gateway_url: str, + timeout_ms: Optional[int] = None, + get_retry: int = 1, +) -> tuple: + """Fetch agents list from gateway /health. Returns (agents, error_str|None, latency_ms). + + Respects per-node timeout_ms and retry policy. + """ + if not gateway_url: + return [], f"No gateway_url configured for {node_id}", None + timeout_sec = (timeout_ms or 2500) / 1000.0 + last_err = None + attempts = get_retry + 1 + t0 = _time.monotonic() + for attempt in range(attempts): + try: + async with httpx.AsyncClient(timeout=timeout_sec) as client: + resp = await client.get(f"{gateway_url.rstrip('/')}/health") + latency_ms = int((_time.monotonic() - t0) * 1000) + if resp.status_code != 200: + last_err = f"HTTP {resp.status_code}" + continue + data = resp.json() + raw = data.get("agents", {}) + agents: List[Dict] = [] + if isinstance(raw, dict): + for aid, info in raw.items(): + agents.append({ + "agent_id": aid, + "display_name": info.get("name", aid), + "status": "healthy" if info.get("prompt_loaded") else "degraded", + "telegram_token_configured": info.get("telegram_token_configured", False), + "prompt_loaded": info.get("prompt_loaded", False), + "node_id": node_id, + "active_prompt": info.get("active_prompt"), + "badges": info.get("badges", []), + "visibility": info.get("visibility", "public"), + "telegram_mode": info.get("telegram_mode", "on"), + "lifecycle_status": info.get("lifecycle_status", "active"), + }) + elif isinstance(raw, list): + for a in raw: + agents.append({**a, "node_id": node_id}) + return agents, None, latency_ms + except Exception as e: + last_err = str(e)[:200] + latency_ms = int((_time.monotonic() - t0) * 1000) + return [], last_err, latency_ms + + +def _node_info(node_id: str) -> Dict: + """Return {gateway_url, policy} for a node.""" + from .config import get_node_policy + return { + "gateway_url": get_gateway_url(node_id), + "policy": get_node_policy(node_id), + } + + +def _agent_desired_payload(override: Dict) -> Dict: + """Canonical desired-state payload from an override row.""" + return { + "display_name": override.get("display_name"), + "domain": override.get("domain"), + "system_prompt_md": override.get("system_prompt_md"), + } + + +def _merge_agent_with_override(agent: Dict, override: Optional[Dict]) -> Dict: + result = dict(agent) + if not override: + result["has_override"] = False + result["drift"] = False + return result + if override.get("display_name"): result["display_name"] = override["display_name"] + if override.get("domain"): result["domain"] = override["domain"] + if override.get("system_prompt_md"): result["system_prompt_md"] = override["system_prompt_md"] + result["is_hidden"] = bool(override.get("is_hidden")) + result["has_override"] = True + result["override_updated_at"] = override.get("updated_at") + result["last_applied_hash"] = override.get("last_applied_hash") + result["last_applied_at"] = override.get("last_applied_at") + # Drift: desired hash != last applied hash + desired = _agent_desired_payload(override) + desired_hash = _app_db._agent_payload_hash(desired) + result["desired_hash"] = desired_hash + active_hash = override.get("last_applied_hash") + result["drift"] = bool(active_hash and active_hash != desired_hash) + return result + + +async def _check_prompt_freeze(node_id: str, agent_id: str) -> bool: + """Return True if PROMPT_FREEZE gate is active for any related project.""" + try: + # Check portfolio gate + gates = await _app_db.evaluate_governance_gates("portfolio", window="7d", dry_run=True) + for g in gates.get("gates", []): + if g.get("name") == "PROMPT_FREEZE" and g.get("status") != "PASS": + return True + except Exception: + pass + return False + + +# ── Agent CRUD endpoints ─────────────────────────────────────────────────────── + +# Agents required on every online node — if absent, signal is raised +_REQUIRED_PER_NODE_AGENTS: List[str] = ["monitor"] + + +def _normalize_agent_capabilities(agent: Dict) -> Dict: + """Add normalized capabilities: {voice, telegram} to agent dict.""" + badges = agent.get("badges", []) + telegram_mode = agent.get("telegram_mode", "on") + agent_id = agent.get("agent_id", "") + agent["capabilities"] = { + "voice": agent_id == "aistalk" or "voice" in badges, + "telegram": telegram_mode != "off", + } + return agent + + +async def _emit_monitor_missing_event(node_id: str, bucket: str) -> None: + """Write a governance_event when monitor is confirmed absent on an online node.""" + try: + await _app_db.append_governance_event( + scope="portfolio", + project_id="portfolio", + actor_type="system", + actor_id=None, + event_type="node_required_agent_missing", + idempotency_key=f"req|missing|{node_id}|monitor|{bucket}", + severity="high", + status="error", + ref_type="node", + ref_id=node_id, + evidence={ + "v": 1, + "message": f"Required agent 'monitor' absent on {node_id}", + "inputs": {"node_id": node_id, "required_agent": "monitor"}, + "outputs": {"missing": True}, + "links": {}, + "timings": {}, + }, + ) + except Exception as exc: + logger.warning("_emit_monitor_missing_event failed: %s", exc) + + +@app.get("/api/agents") +async def list_agents( + nodes: str = "NODA1", + include_hidden: bool = False, + _auth: str = Depends(require_auth), +): + """Fetch agents from node gateways, merge with local overrides. + + Returns {items, node_errors, stats, required_missing_nodes, nodes_queried}. + Partial node failure never blocks other nodes — always HTTP 200. + """ + node_ids = [n.strip().upper() for n in nodes.split(",") if n.strip()] + today_bucket = datetime.utcnow().strftime("%Y-%m-%d") + + all_agents: List[Dict] = [] + node_errors: List[Dict] = [] + node_stats: List[Dict] = [] + required_missing_nodes: List[Dict] = [] # nodes where required agents absent + + overrides_list = await _app_db.list_agent_overrides() + overrides_map = {(o["node_id"], o["agent_id"]): o for o in overrides_list} + + for node_id in node_ids: + ni = _node_info(node_id) + gw_url = ni["gateway_url"] + policy = ni["policy"] + agents_raw, err, latency_ms = await _fetch_agents_from_gateway( + node_id, gw_url, + timeout_ms=policy["gateway_timeout_ms"], + get_retry=policy["get_retry"], + ) + if err: + node_errors.append({ + "node_id": node_id, "error": err, + "gateway_url": gw_url, "latency_ms": latency_ms, + "node_role": policy["node_role"], + }) + node_stats.append({"node_id": node_id, "ok": False, "count": 0, + "latency_ms": latency_ms}) + # Node offline → skip required check (not "missing", just "unreachable") + else: + count = 0 + present_agent_ids: Set[str] = set() + for agent in agents_raw: + override = overrides_map.get((node_id, agent["agent_id"])) + merged = _merge_agent_with_override(agent, override) + merged = _normalize_agent_capabilities(merged) + if not include_hidden and merged.get("is_hidden"): + continue + merged["latency_ms"] = latency_ms + all_agents.append(merged) + present_agent_ids.add(agent["agent_id"]) + count += 1 + node_stats.append({"node_id": node_id, "ok": True, "count": count, + "latency_ms": latency_ms, + "node_role": policy["node_role"]}) + # Required agent check — only for online nodes + for req_id in _REQUIRED_PER_NODE_AGENTS: + if req_id not in present_agent_ids: + required_missing_nodes.append({ + "node_id": node_id, + "agent_id": req_id, + "reason": "absent_from_registry", + }) + asyncio.create_task(_emit_monitor_missing_event(node_id, today_bucket)) + + all_agents.sort(key=lambda a: (a.get("status") != "healthy", a.get("display_name", "").lower())) + nodes_ok = sum(1 for s in node_stats if s["ok"]) + return JSONResponse(content={ + "items": all_agents, + "node_errors": node_errors, + "stats": {"nodes_ok": nodes_ok, "nodes_total": len(node_ids), "agents_total": len(all_agents)}, + "required_missing_nodes": required_missing_nodes, + "nodes_queried": node_ids, + }) + + +@app.get("/api/agents/{node_id}/{agent_id}") +async def get_agent(node_id: str, agent_id: str, _auth: str = Depends(require_auth)): + node_id = node_id.upper() + ni = _node_info(node_id) + policy = ni["policy"] + agents_raw, err, latency_ms = await _fetch_agents_from_gateway( + node_id, ni["gateway_url"], + timeout_ms=policy["gateway_timeout_ms"], + get_retry=policy["get_retry"], + ) + override = await _app_db.get_agent_override(node_id, agent_id) + agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None) + if not agent: + if override: + desired = _agent_desired_payload(override) + return JSONResponse(content={"agent": { + **override, "status": "unknown", "node_offline": True, + "desired_hash": _app_db._agent_payload_hash(desired), "drift": False, + "latency_ms": latency_ms, + }}) + raise HTTPException(status_code=404, detail=f"Agent '{agent_id}' not found on {node_id}") + merged = _merge_agent_with_override(agent, override) + merged["latency_ms"] = latency_ms + return JSONResponse(content={"agent": merged, "node_error": err}) + + +@app.get("/api/agents/{node_id}/{agent_id}/versions") +async def list_agent_versions( + node_id: str, agent_id: str, + limit: int = 10, + _auth: str = Depends(require_auth), +): + """Return version history for an agent override.""" + node_id = node_id.upper() + versions = await _app_db.list_agent_versions(node_id, agent_id, limit=limit) + return JSONResponse(content={"versions": versions}) + + +class AgentOverridePatch(BaseModel): + display_name: Optional[str] = None + domain: Optional[str] = None + system_prompt_md: Optional[str] = None + is_hidden: Optional[bool] = None + + +@app.patch("/api/agents/{node_id}/{agent_id}") +async def patch_agent_override( + node_id: str, agent_id: str, + body: AgentOverridePatch, + _auth: str = Depends(require_auth), +): + """Save local override (does NOT push to node). Creates a version snapshot.""" + node_id = node_id.upper() + override = await _app_db.upsert_agent_override( + node_id, agent_id, + display_name=body.display_name, + domain=body.domain, + system_prompt_md=body.system_prompt_md, + is_hidden=body.is_hidden, + ) + # Audit: agent_override_saved + await _app_db.append_governance_event( + scope="project", project_id=agent_id, actor_type="user", + event_type="agent_override_saved", + idempotency_key=f"aos|{node_id}|{agent_id}|{override.get('version_hash','')}", + severity="info", status="ok", + ref_type="agent", ref_id=agent_id, + evidence=_app_db._make_evidence( + message=f"Override saved for {agent_id} on {node_id}", + outputs={"version_hash": override.get("version_hash"), "fields_changed": [ + k for k, v in body.dict(exclude_none=True).items() + ]}, + ), + ) + return JSONResponse(content={"override": override, "saved": True}) + + +@app.post("/api/agents/{node_id}/{agent_id}/reset") +async def reset_agent_override(node_id: str, agent_id: str, _auth: str = Depends(require_auth)): + """Remove local override, revert to registry state.""" + node_id = node_id.upper() + await _app_db.delete_agent_override(node_id, agent_id) + return JSONResponse(content={"reset": True, "node_id": node_id, "agent_id": agent_id}) + + +# ── Safe Apply v2 ────────────────────────────────────────────────────────────── + +@app.post("/api/agents/{node_id}/{agent_id}/apply") +async def apply_agent_override( + node_id: str, agent_id: str, + dry_run: bool = True, + plan_id: Optional[str] = None, + force: bool = False, + _auth: str = Depends(require_auth), +): + """Safe Apply v2. + + dry_run=true → returns diff_text + will_change + plan_id (sha256 of desired state). + dry_run=false → requires plan_id to match; applies and stores last_applied_hash. + """ + node_id = node_id.upper() + + # Governance gate check: PROMPT_FREEZE + if not dry_run and not force: + frozen = await _check_prompt_freeze(node_id, agent_id) + if frozen: + return JSONResponse( + status_code=423, + content={"error": "PROMPT_FREEZE gate is active. Use force=true to override (requires review).", + "gate": "PROMPT_FREEZE", "node_id": node_id, "agent_id": agent_id}, + ) + + override = await _app_db.get_agent_override(node_id, agent_id) + if not override: + raise HTTPException(status_code=404, detail="No local override found. Use PATCH first.") + + desired = _agent_desired_payload(override) + computed_plan_id = _app_db._agent_payload_hash(desired) + + # Fetch current active prompt for diff + gw_url = get_gateway_url(node_id) + agents_raw, _ = await _fetch_agents_from_gateway(node_id, gw_url) + active_agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None) + active_prompt = active_agent.get("active_prompt", "") if active_agent else "" + desired_prompt = desired.get("system_prompt_md") or "" + + # Build unified diff + diff_lines = list(_difflib.unified_diff( + (active_prompt or "").splitlines(keepends=True), + desired_prompt.splitlines(keepends=True), + fromfile=f"{agent_id}:active", + tofile=f"{agent_id}:desired", + n=3, + )) + diff_text = "".join(diff_lines) if diff_lines else "" + will_change = bool(diff_text) or (override.get("domain") is not None) + + if dry_run: + # Audit: agent_apply_planned + await _app_db.append_governance_event( + scope="project", project_id=agent_id, actor_type="user", + event_type="agent_apply_planned", + idempotency_key=f"aap|{node_id}|{agent_id}|{computed_plan_id}", + severity="info", status="ok", + ref_type="agent", ref_id=agent_id, + evidence=_app_db._make_evidence( + message=f"Apply planned (dry-run) for {agent_id}@{node_id}", + outputs={"will_change": will_change, "plan_id": computed_plan_id, + "diff_lines": len(diff_lines)}, + ), + ) + return JSONResponse(content={ + "dry_run": True, "will_change": will_change, + "plan_id": computed_plan_id, + "diff_text": diff_text, + "desired": desired, + "node_id": node_id, "agent_id": agent_id, + }) + + # Apply: validate plan_id + if plan_id and plan_id != computed_plan_id: + raise HTTPException( + status_code=409, + detail=f"plan_id mismatch: provided={plan_id} computed={computed_plan_id}. " + "Re-run dry_run=true to get fresh plan_id.", + ) + + applied: List[Dict] = [] + errors_apply: List[Dict] = [] + + if desired_prompt and gw_url: + try: + async with httpx.AsyncClient(timeout=10.0) as client: + resp = await client.post( + f"{gw_url.rstrip('/')}/admin/agents/{agent_id}/prompt", + json={"prompt": desired_prompt}, + headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")}, + ) + if resp.status_code in (200, 201, 204): + applied.append({"action": "update_system_prompt", "status": "ok"}) + else: + errors_apply.append({"action": "update_system_prompt", + "error": f"HTTP {resp.status_code}: {resp.text[:200]}"}) + except Exception as e: + errors_apply.append({"action": "update_system_prompt", "error": str(e)[:300]}) + + success = len(applied) > 0 and len(errors_apply) == 0 + + # Mark last_applied_hash if successful + if success: + await _app_db.upsert_agent_override( + node_id, agent_id, _mark_applied_hash=computed_plan_id, + ) + + # Audit + evt_type = "agent_apply_executed" if success else "agent_apply_failed" + await _app_db.append_governance_event( + scope="project", project_id=agent_id, actor_type="user", + event_type=evt_type, + idempotency_key=f"aae|{node_id}|{agent_id}|{computed_plan_id}|{'ok' if success else 'fail'}", + severity="info" if success else "high", status="ok" if success else "error", + ref_type="agent", ref_id=agent_id, + evidence=_app_db._make_evidence( + message=f"Apply {'succeeded' if success else 'failed'} for {agent_id}@{node_id}", + outputs={"plan_id": computed_plan_id, "applied": applied, "errors": errors_apply}, + ), + ) + + return JSONResponse(content={ + "dry_run": False, "success": success, + "plan_id": computed_plan_id, + "applied": applied, "errors": errors_apply, + "node_id": node_id, "agent_id": agent_id, + }) + + +@app.post("/api/agents/{node_id}/{agent_id}/rollback") +async def rollback_agent_override( + node_id: str, agent_id: str, + version_hash: str, + _auth: str = Depends(require_auth), +): + """Rollback agent override to a specific version by version_hash.""" + node_id = node_id.upper() + version = await _app_db.get_agent_version_by_hash(node_id, agent_id, version_hash) + if not version: + raise HTTPException(status_code=404, detail=f"Version {version_hash} not found") + + payload = version["payload"] + # Restore the override to this version's payload + updated = await _app_db.upsert_agent_override( + node_id, agent_id, + display_name=payload.get("display_name"), + domain=payload.get("domain"), + system_prompt_md=payload.get("system_prompt_md"), + ) + + # Audit + await _app_db.append_governance_event( + scope="project", project_id=agent_id, actor_type="user", + event_type="agent_rollback_executed", + idempotency_key=f"arb|{node_id}|{agent_id}|{version_hash}|{_app_db._now()}", + severity="warn", status="ok", + ref_type="agent", ref_id=agent_id, + evidence=_app_db._make_evidence( + message=f"Rollback to version {version_hash} for {agent_id}@{node_id}", + outputs={"version_hash": version_hash, "created_at": version.get("created_at")}, + ), + ) + return JSONResponse(content={ + "rolled_back": True, "version_hash": version_hash, + "override": updated, "node_id": node_id, "agent_id": agent_id, + }) + + +# ── Bulk Agent Actions (multi-node + canary) ─────────────────────────────────── + +async def _apply_single_agent( + node_id: str, + override: Dict, + agents_map: Dict, + gw_url: str, + apply_timeout_sec: float, +) -> Dict: + """Apply a single agent override. Returns result dict with status field.""" + aid = override["agent_id"] + desired = _agent_desired_payload(override) + plan_id = _app_db._agent_payload_hash(desired) + active_agent = agents_map.get(aid, {}) + active_prompt = active_agent.get("active_prompt", "") or "" + desired_prompt = desired.get("system_prompt_md") or "" + will_change = desired_prompt != active_prompt + + if not desired_prompt or not gw_url: + return {"node_id": node_id, "agent_id": aid, "status": "skipped", + "plan_id": plan_id, "drift": will_change, + "error": "no prompt or no gateway_url"} + + applied_ok = False + err_msg = None + try: + async with httpx.AsyncClient(timeout=apply_timeout_sec) as client: + resp = await client.post( + f"{gw_url.rstrip('/')}/admin/agents/{aid}/prompt", + json={"prompt": desired_prompt}, + headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")}, + ) + applied_ok = resp.status_code in (200, 201, 204) + if not applied_ok: + err_msg = f"HTTP {resp.status_code}: {resp.text[:100]}" + except Exception as e: + err_msg = str(e)[:200] + + if applied_ok: + await _app_db.upsert_agent_override(node_id, aid, _mark_applied_hash=plan_id) + + return { + "node_id": node_id, "agent_id": aid, + "status": "applied" if applied_ok else "failed", + "plan_id": plan_id, "drift": will_change, + "error": err_msg, + } + + +@app.post("/api/agents/bulk/apply") +async def bulk_apply_agents( + nodes: str = "NODA1", + node: Optional[str] = None, # legacy single-node param + dry_run: bool = True, + mode: str = "all", # "all" | "canary" + limit: int = 2, # canary: max N agents + _auth: str = Depends(require_auth), +): + """Apply local overrides across one or many nodes. + + mode=canary: apply first `limit` agents with drift=True, stop on first failure. + Returns {results, node_errors, summary}. + """ + # Support legacy ?node= param + raw_nodes = node.upper() if node else nodes + node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()] + + all_results: List[Dict] = [] + node_errors: List[Dict] = [] + bulk_run_id = str(uuid.uuid4())[:8] + + # Audit: bulk plan created + await _app_db.append_governance_event( + scope="portfolio", project_id="portfolio", actor_type="user", + event_type="agent_bulk_plan_created", + idempotency_key=f"abpc|{bulk_run_id}|{raw_nodes}|{mode}", + severity="info", status="ok", + evidence=_app_db._make_evidence( + message=f"Bulk {'canary' if mode=='canary' else 'apply'} planned: nodes={raw_nodes} dry_run={dry_run}", + outputs={"mode": mode, "limit": limit, "nodes": node_ids, "dry_run": dry_run}, + ), + ) + + for node_id in node_ids: + ni = _node_info(node_id) + policy = ni["policy"] + gw_url = ni["gateway_url"] + apply_timeout_sec = policy["apply_timeout_ms"] / 1000.0 + + overrides = await _app_db.list_agent_overrides(node_id) + agents_raw, err, latency_ms = await _fetch_agents_from_gateway( + node_id, gw_url, + timeout_ms=policy["gateway_timeout_ms"], + get_retry=policy["get_retry"], + ) + if err and not agents_raw: + node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms}) + continue + + agents_map = {a["agent_id"]: a for a in agents_raw} + + # Select candidates: non-hidden, sorted deterministically by agent_id + candidates = sorted( + [o for o in overrides if not o.get("is_hidden")], + key=lambda o: o["agent_id"], + ) + + if mode == "canary": + # For canary: only agents with drift + drift_candidates = [] + for o in candidates: + desired = _agent_desired_payload(o) + plan_id = _app_db._agent_payload_hash(desired) + is_drift = bool(o.get("last_applied_hash") and o["last_applied_hash"] != plan_id) + if is_drift: + drift_candidates.append(o) + candidates = drift_candidates[:limit] + + if dry_run: + for override in candidates: + aid = override["agent_id"] + desired = _agent_desired_payload(override) + plan_id = _app_db._agent_payload_hash(desired) + active_agent = agents_map.get(aid, {}) + active_prompt = active_agent.get("active_prompt", "") or "" + desired_prompt = desired.get("system_prompt_md") or "" + all_results.append({ + "node_id": node_id, "agent_id": aid, "status": "planned", + "plan_id": plan_id, "drift": desired_prompt != active_prompt, "error": None, + }) + continue + + # Canary: log start + if mode == "canary" and candidates: + await _app_db.append_governance_event( + scope="portfolio", project_id="portfolio", actor_type="user", + event_type="agent_bulk_canary_started", + idempotency_key=f"abcs|{bulk_run_id}|{node_id}", + severity="info", status="ok", + evidence=_app_db._make_evidence( + message=f"Canary apply started: {len(candidates)} agents on {node_id}", + outputs={"agents": [o["agent_id"] for o in candidates], "limit": limit}, + ), + ) + + canary_stopped = False + for override in candidates: + # Check governance gate per agent + frozen = await _check_prompt_freeze(node_id, override["agent_id"]) + if frozen: + all_results.append({ + "node_id": node_id, "agent_id": override["agent_id"], + "status": "blocked", "plan_id": None, "drift": True, + "error": "PROMPT_FREEZE gate active", + }) + continue + + result = await _apply_single_agent( + node_id, override, agents_map, gw_url, apply_timeout_sec, + ) + all_results.append(result) + + # Canary stop-on-failure + if mode == "canary" and result["status"] == "failed": + canary_stopped = True + # Mark remaining as skipped + remaining_ids = {o["agent_id"] for o in candidates} - {r["agent_id"] for r in all_results if r["node_id"] == node_id} + for rid in sorted(remaining_ids): + all_results.append({ + "node_id": node_id, "agent_id": rid, "status": "skipped", + "plan_id": None, "drift": True, + "error": f"canary stopped after failure of {result['agent_id']}", + }) + await _app_db.append_governance_event( + scope="portfolio", project_id="portfolio", actor_type="user", + event_type="agent_bulk_canary_stopped", + idempotency_key=f"abcstop|{bulk_run_id}|{node_id}|{result['agent_id']}", + severity="high", status="error", + evidence=_app_db._make_evidence( + message=f"Canary stopped on {result['agent_id']}@{node_id}: {result['error']}", + outputs={"failed_agent": result["agent_id"], "error": result["error"]}, + ), + ) + break + + if mode == "canary" and not canary_stopped and candidates: + await _app_db.append_governance_event( + scope="portfolio", project_id="portfolio", actor_type="user", + event_type="agent_bulk_apply_completed", + idempotency_key=f"abac|{bulk_run_id}|{node_id}", + severity="info", status="ok", + evidence=_app_db._make_evidence( + message=f"Canary apply completed on {node_id}: {len(candidates)} agents", + outputs={"agents_applied": [r["agent_id"] for r in all_results + if r["node_id"] == node_id and r["status"] == "applied"]}, + ), + ) + + # Build summary + status_counts: Dict[str, int] = {} + for r in all_results: + status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1 + + return JSONResponse(content={ + "results": all_results, + "node_errors": node_errors, + "summary": status_counts, + "dry_run": dry_run, + "mode": mode, + "bulk_run_id": bulk_run_id, + }) + + +@app.post("/api/agents/bulk/diff") +async def bulk_diff_agents( + nodes: str = "NODA1", + node: Optional[str] = None, + _auth: str = Depends(require_auth), +): + """Return diff summary for all agents with local overrides. Supports multi-node.""" + raw_nodes = node.upper() if node else nodes + node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()] + + report: List[Dict] = [] + node_errors: List[Dict] = [] + + for node_id in node_ids: + ni = _node_info(node_id) + policy = ni["policy"] + gw_url = ni["gateway_url"] + overrides = await _app_db.list_agent_overrides(node_id) + agents_raw, err, latency_ms = await _fetch_agents_from_gateway( + node_id, gw_url, + timeout_ms=policy["gateway_timeout_ms"], + get_retry=policy["get_retry"], + ) + if err: + node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms}) + agents_map = {a["agent_id"]: a for a in agents_raw} + + for override in overrides: + aid = override["agent_id"] + desired = _agent_desired_payload(override) + plan_id = _app_db._agent_payload_hash(desired) + active_agent = agents_map.get(aid, {}) + active_prompt = active_agent.get("active_prompt") or "" + desired_prompt = desired.get("system_prompt_md") or "" + diff_lines = list(_difflib.unified_diff( + active_prompt.splitlines(keepends=True), + desired_prompt.splitlines(keepends=True), + fromfile=f"{aid}:active", tofile=f"{aid}:desired", n=2, + )) + is_drift = bool(override.get("last_applied_hash") and + override["last_applied_hash"] != plan_id) + report.append({ + "node_id": node_id, "agent_id": aid, + "plan_id": plan_id, + "last_applied_hash": override.get("last_applied_hash"), + "drift": is_drift, + "diff_lines": len(diff_lines), + "diff_text": "".join(diff_lines[:60]), + }) + + return JSONResponse(content={"report": report, "node_errors": node_errors, + "nodes_queried": node_ids}) + + +@app.get("/api/agents/export/prompts") +async def export_agent_prompts( + nodes: str = "NODA1", + node: Optional[str] = None, + _auth: str = Depends(require_auth), +): + """Export all agent system prompts as a JSON bundle (multi-node).""" + raw_nodes = node.upper() if node else nodes + node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()] + + bundle: List[Dict] = [] + node_errors: List[Dict] = [] + + for node_id in node_ids: + ni = _node_info(node_id) + policy = ni["policy"] + gw_url = ni["gateway_url"] + overrides = await _app_db.list_agent_overrides(node_id) + agents_raw, err, latency_ms = await _fetch_agents_from_gateway( + node_id, gw_url, + timeout_ms=policy["gateway_timeout_ms"], + get_retry=policy["get_retry"], + ) + if err: + node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms}) + agents_map = {a["agent_id"]: a for a in agents_raw} + overrides_map = {o["agent_id"]: o for o in overrides} + + for aid, agent in agents_map.items(): + override = overrides_map.get(aid) + merged = _merge_agent_with_override(agent, override) + bundle.append({ + "agent_id": aid, "node_id": node_id, + "display_name": merged.get("display_name", aid), + "domain": merged.get("domain"), + "system_prompt_md": merged.get("system_prompt_md"), + "has_override": merged.get("has_override", False), + }) + + bundle.sort(key=lambda x: (x["node_id"], x["agent_id"])) + return JSONResponse(content={ + "nodes_queried": node_ids, + "exported_at": _app_db._now(), + "count": len(bundle), + "agents": bundle, + "node_errors": node_errors, + }) + + +# ── Kling AI proxy ──────────────────────────────────────────────────────────── + +@app.get("/api/aurora/kling/health") +async def console_kling_health() -> Dict[str, Any]: + try: + return await _aurora_request_json("GET", "/api/aurora/kling/health", timeout=12.0, retries=1) + except Exception as exc: + return {"ok": False, "error": str(exc)} + + +@app.post("/api/aurora/kling/enhance/{job_id}") +async def console_kling_enhance( + job_id: str, + prompt: str = Form("enhance video quality, improve sharpness and clarity"), + negative_prompt: str = Form("noise, blur, artifacts, distortion"), + mode: str = Form("pro"), + duration: str = Form("5"), + cfg_scale: float = Form(0.5), +) -> Dict[str, Any]: + return await _aurora_request_json( + "POST", + "/api/aurora/kling/enhance", + data={ + "job_id": job_id, + "prompt": prompt, + "negative_prompt": negative_prompt, + "mode": mode, + "duration": duration, + "cfg_scale": str(cfg_scale), + }, + timeout=120.0, + retries=1, + ) + + +@app.get("/api/aurora/kling/status/{job_id}") +async def console_kling_status(job_id: str) -> Dict[str, Any]: + return await _aurora_request_json("GET", f"/api/aurora/kling/status/{job_id}", timeout=20.0, retries=2) + + +@app.get("/api/aurora/kling/task/{task_id}") +async def console_kling_task(task_id: str, endpoint: str = Query("video2video")) -> Dict[str, Any]: + return await _aurora_request_json("GET", f"/api/aurora/kling/task/{task_id}?endpoint={endpoint}", timeout=20.0, retries=2) + + +@app.get("/api/aurora/plates/{job_id}") +async def console_plates(job_id: str) -> Dict[str, Any]: + return await _aurora_request_json("GET", f"/api/aurora/plates/{job_id}", timeout=15.0, retries=2) + + +# ── Sofiia Auto-Router & Budget Dashboard proxy ──────────────────────────────── + +async def _router_request_json(method: str, path: str, json_body: Optional[Dict] = None, timeout: float = 20.0) -> Dict[str, Any]: + """Forward request to the Router service (noda1 or local).""" + import aiohttp as _aiohttp + # Use the first configured node's router URL + nodes_reg = load_nodes_registry() + nodes = (nodes_reg.get("nodes") or {}) if isinstance(nodes_reg, dict) else {} + node_id = next(iter(nodes), "noda1") + router_url = get_router_url(node_id) + url = f"{router_url.rstrip('/')}{path}" + try: + async with _aiohttp.ClientSession() as sess: + if method.upper() == "GET": + async with sess.get(url, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp: + return await resp.json(content_type=None) + else: + async with sess.post(url, json=json_body, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp: + return await resp.json(content_type=None) + except Exception as e: + return {"error": str(e)} + + +@app.post("/api/sofiia/auto-route") +async def console_auto_route(body: Dict[str, Any]) -> Dict[str, Any]: + """Proxy: classify prompt and get recommended model.""" + return await _router_request_json("POST", "/v1/sofiia/auto-route", json_body=body) + + +@app.get("/api/sofiia/budget") +async def console_budget_dashboard() -> Dict[str, Any]: + """Proxy: get budget dashboard data from router.""" + return await _router_request_json("GET", "/v1/sofiia/budget") + + +@app.post("/api/sofiia/budget/limits") +async def console_set_budget_limits(body: Dict[str, Any]) -> Dict[str, Any]: + """Proxy: set provider budget limit.""" + return await _router_request_json("POST", "/v1/sofiia/budget/limits", json_body=body) + + +@app.get("/api/sofiia/budget/stats") +async def console_budget_stats(window_hours: int = 24) -> Dict[str, Any]: + """Proxy: get budget stats for time window.""" + return await _router_request_json("GET", f"/v1/sofiia/budget/stats?window_hours={window_hours}") + + +@app.get("/api/sofiia/catalog") +async def console_model_catalog(refresh_ollama: bool = False) -> Dict[str, Any]: + """Proxy: get full model catalog with availability.""" + return await _router_request_json("GET", f"/v1/sofiia/catalog?refresh_ollama={str(refresh_ollama).lower()}")