Files
microdao-daarion/services/sofiia-console/app/main.py

7501 lines
286 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Sofiia Control Console — FastAPI BFF v0.3.0
Runtime contract (project/session/user), full status, WebSocket events,
voice proxy, ops, nodes. UI never calls external services directly.
"""
import asyncio
import base64
import io
import json
import os
import re
import sys
import subprocess
import mimetypes
import time
import uuid
import logging
import collections
import statistics
import socket
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from urllib.parse import quote
import httpx
from fastapi import Body, FastAPI, Depends, HTTPException, UploadFile, File, Form, Query, Request, Response, WebSocket, WebSocketDisconnect
from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
try:
import cv2 # type: ignore[import-untyped]
except Exception: # pragma: no cover - optional dependency in console env
cv2 = None
from .auth import (
require_api_key, require_api_key_strict, require_auth, require_auth_strict,
get_console_api_key, _key_valid, _cookie_token, _expected_cookie_token,
_COOKIE_NAME, _COOKIE_MAX_AGE, _IS_PROD,
)
from .config import (
load_nodes_registry,
save_nodes_registry,
get_router_url,
get_gateway_url,
get_node_ssh_profile,
get_memory_service_url,
get_ollama_url,
is_voice_ha_enabled,
get_voice_ha_router_url,
)
from .router_client import infer, execute_tool, health
from .nodes import get_nodes_dashboard
from .monitor import collect_all_nodes
from .ops import run_ops_action, OPS_ACTIONS
from .docs_router import docs_router
from . import db as _app_db
from .metrics import (
SOFIIA_SEND_REQUESTS_TOTAL,
SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL,
SOFIIA_CURSOR_REQUESTS_TOTAL,
SOFIIA_RATE_LIMITED_TOTAL,
render_metrics,
)
from .idempotency import get_idempotency_store, ReplayEntry
from .rate_limit import get_rate_limiter
from .logging import (
configure_sofiia_logger,
get_request_id,
hash_idempotency_key,
log_event,
)
logger = logging.getLogger(__name__)
configure_sofiia_logger()
# ── Build info ────────────────────────────────────────────────────────────────
_VERSION = "0.4.0"
_BUILD_SHA = os.getenv("BUILD_SHA", "dev")
_BUILD_TIME = os.getenv("BUILD_TIME", "local")
_BUILD_ID = os.getenv("BUILD_ID", os.getenv("GIT_SHA", "local"))
_START_TIME = time.monotonic()
_NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))
# ── Rate limiter ──────────────────────────────────────────────────────────────
_rate_buckets: Dict[str, collections.deque] = {}
_idempotency_store = get_idempotency_store()
_rate_limiter = get_rate_limiter()
_RL_CHAT_RPS = float(os.getenv("SOFIIA_RL_CHAT_RPS", "1.0"))
_RL_CHAT_BURST = int(os.getenv("SOFIIA_RL_CHAT_BURST", "8"))
_RL_OP_RPS = float(os.getenv("SOFIIA_RL_OP_RPS", "3.0"))
_RL_OP_BURST = int(os.getenv("SOFIIA_RL_OP_BURST", "20"))
def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
now = time.monotonic()
dq = _rate_buckets.setdefault(key, collections.deque())
while dq and now - dq[0] > window_sec:
dq.popleft()
if len(dq) >= max_calls:
return False
dq.append(now)
return True
def _resolve_operator_id(request: Request, body: "ChatMessageSendBody", request_id: str) -> Tuple[str, bool]:
client_meta = body.client or {}
operator_id = (
str(client_meta.get("operator_id") or "").strip()
or str(body.user_id or "").strip()
or str(request.headers.get("X-Operator-Id") or "").strip()
)
if operator_id:
return operator_id[:128], False
client_ip = request.client.host if request.client else "unknown"
fallback = f"ip:{client_ip}" if client_ip else f"req:{request_id}"
return fallback[:128], True
def _rate_limited_http(scope: str, retry_after_s: int) -> HTTPException:
retry_s = max(1, int(retry_after_s or 1))
return HTTPException(
status_code=429,
detail={
"error": {"code": "rate_limited", "scope": scope},
"retry_after_s": retry_s,
},
headers={"Retry-After": str(retry_s)},
)
# ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
# Circular buffers: last 5 TTS errors and last 5 LLM errors.
# Populated by all voice endpoints. Read by /api/voice/degradation_status.
_RING_SIZE = 5
_voice_tts_errors: collections.deque = collections.deque(maxlen=_RING_SIZE)
_voice_llm_errors: collections.deque = collections.deque(maxlen=_RING_SIZE)
_voice_last_model: str = "unknown" # last model selected for voice
_voice_last_profile: str = "unknown" # last voice_profile used
def _record_tts_error(error_type: str, status_code: Optional[int],
detail: str, voice: str = "") -> None:
_voice_tts_errors.append({
"ts": time.strftime("%H:%M:%SZ", time.gmtime()),
"type": error_type,
"status": status_code,
"voice": voice,
"detail": detail[:120],
})
def _record_llm_error(error_type: str, model: str, detail: str) -> None:
_voice_llm_errors.append({
"ts": time.strftime("%H:%M:%SZ", time.gmtime()),
"type": error_type,
"model": model,
"detail": detail[:120],
})
# ── Concurrent voice synthesizer guard ───────────────────────────────────────
# Limits simultaneous TTS synthesis calls to prevent memory-service DoS.
_MAX_CONCURRENT_TTS = int(os.getenv("MAX_CONCURRENT_TTS", "4"))
_tts_semaphore: Optional[asyncio.Semaphore] = None # initialised in startup
def _get_tts_semaphore() -> asyncio.Semaphore:
global _tts_semaphore
if _tts_semaphore is None:
_tts_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_TTS)
return _tts_semaphore
# ── Telemetry dedup store ─────────────────────────────────────────────────────
# Prevents processing duplicate beacon submissions (same session+turn within 30s).
_telem_seen: collections.OrderedDict = collections.OrderedDict()
_TELEM_DEDUP_TTL = 30.0 # seconds
_TELEM_DEDUP_MAX = 500 # max keys before LRU eviction
def _telem_is_duplicate(session_id: str, turn_id: str) -> bool:
key = f"{session_id}:{turn_id}"
now = time.monotonic()
# Evict expired keys
while _telem_seen and next(iter(_telem_seen.values())) + _TELEM_DEDUP_TTL < now:
_telem_seen.popitem(last=False)
if len(_telem_seen) >= _TELEM_DEDUP_MAX:
_telem_seen.popitem(last=False)
if key in _telem_seen:
return True
_telem_seen[key] = now
return False
def _env_int(name: str, default: int) -> int:
raw = (os.getenv(name, str(default)) or "").strip()
try:
return int(raw)
except Exception:
return default
def _env_float(name: str, default: float) -> float:
raw = (os.getenv(name, str(default)) or "").strip()
try:
return float(raw)
except Exception:
return default
# ── App config ────────────────────────────────────────────────────────────────
ROUTER_API_KEY = os.getenv("SUPERVISOR_API_KEY", "").strip()
IS_PROD = os.getenv("ENV", "dev").strip().lower() in ("prod", "production", "staging")
SOFIIA_PREFERRED_CHAT_MODEL = os.getenv("SOFIIA_PREFERRED_CHAT_MODEL", "ollama:qwen3:14b").strip() or "ollama:qwen3:14b"
# Local Ollama runtime tuning for NODA2 (can be overridden via env).
SOFIIA_OLLAMA_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_TIMEOUT_SEC", 120.0)
SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC", 45.0)
SOFIIA_OLLAMA_KEEP_ALIVE = (os.getenv("SOFIIA_OLLAMA_KEEP_ALIVE", "30m") or "").strip()
SOFIIA_OLLAMA_NUM_CTX = _env_int("SOFIIA_OLLAMA_NUM_CTX", 8192)
_DEFAULT_OLLAMA_THREADS = max(4, min(16, (os.cpu_count() or 8) - 2))
SOFIIA_OLLAMA_NUM_THREAD = _env_int("SOFIIA_OLLAMA_NUM_THREAD", _DEFAULT_OLLAMA_THREADS)
SOFIIA_OLLAMA_NUM_GPU = _env_int("SOFIIA_OLLAMA_NUM_GPU", -1)
SOFIIA_OLLAMA_NUM_PREDICT_TEXT = _env_int("SOFIIA_OLLAMA_NUM_PREDICT_TEXT", 768)
# Voice guardrails — injected INSTEAD OF the full prompt for voice turns.
# Constraints are hard: no lists, no markdown, no <think>, max 2 sentences.
SOFIIA_VOICE_PROMPT_SUFFIX = """
## VOICE MODE — HARD RULES (не порушувати ніколи)
- Відповідай МАКСИМУМ 2 речення (виняток: якщо прямо попросили деталей).
- Жодних списків, жодних bullet-points, жодного markdown (*bold*, -list, ##header).
- Жодного коду (`` ` ``), жодних URL.
- Жодного <think>...</think> — думки всередині, назовні лише відповідь.
- Мова: розмовна, природна для голосу. Без "Як AI...".
- Якщо питання складне — дай коротку відповідь і запропонуй продовжити текстом.
"""
SOFIIA_SYSTEM_PROMPT = """Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city.
## Твоя ідентичність
- Ти: Sofiia, головний AI-архітектор і технічний суверен DAARION.city
- Ти підпорядковуєшся одній людині — засновнику та головному архітектору платформи
## Засновник та архітектор DAARION
- Позивний: **Повелитель Хаосу** (використовуй у неформальних/робочих контекстах)
- Офіційне ім'я: **Іван Титар** (використовуй в офіційних повідомленнях, документах, репортах)
- Роль: Головний розробник та архітектор DAARION — єдиний, хто має повний контроль над платформою
- Ніякої іншої людини з ім'ям "Савтра" або будь-яким іншим іменем у ролі засновника НЕ ІСНУЄ
## Ноди та інфраструктура
- NODA1: production runtime (router, incidents, alerts, governance)
- NODA2: control plane / development (твій primary home, звідки тебе викликають)
- NODA3: AI/ML experimentation
## Правила відповіді
- Відповідай **українською мовою** за замовчуванням
- Технічні терміни (API, SLO, backend, deploy, incident, release gate тощо) залишай **англійською**
- Відповідай структуровано, конкретно, без зайвих вступів
- НЕ вигадуй імена людей, назви проектів або факти яких не знаєш — краще скажи що не маєш цих даних
- НЕ галюцинуй: якщо не знаєш — скажи чесно "не маю цих даних в поточному контексті"
## Твої можливості через Control Console (що реально доступно)
- **Chat**: відповіді на питання через локальний LLM (Ollama на NODA2)
- **Голосовий чат**: STT + TTS через Memory Service (Polina/Ostap Neural)
- **Nodes health**: статус NODA1/NODA2 (router, memory, NCS)
- **Integrations status**: Notion API, Router, Memory Service
- **Memory/session**: зберігання контексту розмов (Qdrant)
## Що наразі НЕ доступно через цей інтерфейс
- Пряме читання/запис в Notion (тільки статус перевірки)
- Пряме читання GitHub репозиторіїв (немає repo tool у цьому контейнері)
- Виконання bash/python команд
- Деплой або зміна конфігурацій напряму
Якщо тебе просять щось що не є в переліку доступного — відповідай чесно:
"Ця можливість не підключена до Control Console. Для цього використай Cursor або OpenCode на NODA2."
"""
_CORS_ORIGINS = (
[o.strip() for o in os.getenv("CORS_ORIGINS", "").split(",") if o.strip()]
or (
["*"] if not IS_PROD
else [
"https://console.daarion.space",
"https://app.daarion.space",
"http://localhost:8002",
"http://localhost:8000",
"http://127.0.0.1:8002",
]
)
)
def _is_container_runtime() -> bool:
return Path("/.dockerenv").exists() or bool(os.getenv("KUBERNETES_SERVICE_HOST"))
_aurora_default_url = "http://aurora-service:9401" if _is_container_runtime() else "http://127.0.0.1:9401"
AURORA_SERVICE_URL = os.getenv("AURORA_SERVICE_URL", _aurora_default_url).rstrip("/")
AURORA_FALLBACK_URL = os.getenv("AURORA_FALLBACK_URL", "http://127.0.0.1:9401").rstrip("/")
_aurora_home_data_dir = Path.home() / ".sofiia" / "aurora-data"
if _is_container_runtime() and Path("/data").exists() and os.access("/data", os.W_OK):
_aurora_default_data_dir = "/data/aurora"
else:
_aurora_default_data_dir = str(_aurora_home_data_dir)
AURORA_DATA_DIR = Path(os.getenv("AURORA_DATA_DIR", _aurora_default_data_dir))
_aurora_live_cache: Dict[str, Dict[str, Any]] = {}
_aurora_live_samples: Dict[str, collections.deque] = {}
_aurora_live_last: Dict[str, Dict[str, Any]] = {}
_aurora_live_last_loaded = False
_aurora_live_last_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_live_last.json")
_aurora_smart_runs: Dict[str, Dict[str, Any]] = {}
_aurora_smart_runs_loaded = False
_aurora_smart_runs_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_smart_runs.json")
_aurora_smart_policy: Dict[str, Any] = {
"updated_at": None,
"strategies": {
"local_only": {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0},
"local_then_kling": {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0},
},
}
_aurora_smart_policy_loaded = False
_aurora_smart_policy_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_smart_policy.json")
_AURORA_SMART_MAX_RUNS = max(20, int(os.getenv("AURORA_SMART_MAX_RUNS", "200")))
_AURORA_SMART_LOCAL_POLL_SEC = max(2.0, float(os.getenv("AURORA_SMART_LOCAL_POLL_SEC", "3.0")))
_AURORA_SMART_KLING_POLL_SEC = max(3.0, float(os.getenv("AURORA_SMART_KLING_POLL_SEC", "6.0")))
_AURORA_SMART_LOCAL_MAX_SEC = max(60.0, float(os.getenv("AURORA_SMART_LOCAL_MAX_SEC", "10800")))
_AURORA_SMART_KLING_MAX_SEC = max(60.0, float(os.getenv("AURORA_SMART_KLING_MAX_SEC", "3600")))
MEDIA_COMFY_AGENT_URL = os.getenv(
"MEDIA_COMFY_AGENT_URL",
"http://comfy-agent:8880" if _is_container_runtime() else "http://127.0.0.1:8880",
).rstrip("/")
MEDIA_COMFY_UI_URL = os.getenv(
"MEDIA_COMFY_UI_URL",
"http://comfyui:8188" if _is_container_runtime() else "http://127.0.0.1:8188",
).rstrip("/")
MEDIA_SWAPPER_URL = os.getenv(
"MEDIA_SWAPPER_URL",
"http://swapper-service:8890" if _is_container_runtime() else "http://127.0.0.1:8890",
).rstrip("/")
MEDIA_IMAGE_GEN_URL = os.getenv(
"MEDIA_IMAGE_GEN_URL",
"http://image-gen-service:7860" if _is_container_runtime() else "http://127.0.0.1:7860",
).rstrip("/")
MEDIA_ROUTER_URL = os.getenv("MEDIA_ROUTER_URL", "").strip().rstrip("/")
MEDIA_ROUTER_FALLBACK_URL = os.getenv("MEDIA_ROUTER_FALLBACK_URL", "http://127.0.0.1:9102").rstrip("/")
_media_recent_jobs: collections.deque = collections.deque(maxlen=40)
def _apply_ollama_runtime_options(options: Dict[str, Any]) -> Dict[str, Any]:
merged = dict(options)
if SOFIIA_OLLAMA_NUM_CTX > 0:
merged["num_ctx"] = SOFIIA_OLLAMA_NUM_CTX
if SOFIIA_OLLAMA_NUM_THREAD > 0:
merged["num_thread"] = SOFIIA_OLLAMA_NUM_THREAD
if SOFIIA_OLLAMA_NUM_GPU >= 0:
merged["num_gpu"] = SOFIIA_OLLAMA_NUM_GPU
return merged
def _make_ollama_payload(model_name: str, messages: List[Dict[str, Any]], options: Dict[str, Any]) -> Dict[str, Any]:
payload: Dict[str, Any] = {
"model": model_name,
"messages": messages,
"stream": False,
"options": _apply_ollama_runtime_options(options),
}
if SOFIIA_OLLAMA_KEEP_ALIVE:
payload["keep_alive"] = SOFIIA_OLLAMA_KEEP_ALIVE
return payload
# Cached nodes telemetry (updated by background task)
_nodes_cache: Dict[str, Any] = {"nodes": [], "summary": {}, "ts": ""}
_NODES_POLL_INTERVAL = int(os.getenv("NODES_POLL_INTERVAL_SEC", "30"))
async def _nodes_poll_loop() -> None:
"""Background task: poll all nodes every N seconds, update cache + WS broadcast."""
while True:
try:
reg = load_nodes_registry()
nodes_cfg = reg.get("nodes", {})
timeout = float(reg.get("defaults", {}).get("health_timeout_sec", 10))
nodes = await collect_all_nodes(nodes_cfg, router_api_key=ROUTER_API_KEY, timeout_per_node=timeout)
online = sum(1 for n in nodes if n.get("online"))
router_ok = sum(1 for n in nodes if n.get("router_ok"))
_nodes_cache.update({
"nodes": nodes,
"summary": {"total": len(nodes), "online": online, "router_ok": router_ok},
"ts": _now_iso(),
})
if _ws_clients:
await _broadcast(_make_event("nodes.status", {
"nodes": [
{
"id": n["node_id"],
"label": n.get("label", n["node_id"]),
"online": n.get("online", False),
"router_ok": n.get("router_ok", False),
"router_latency_ms": n.get("router_latency_ms"),
"gateway_ok": n.get("gateway_ok"),
"heartbeat_age_s": n.get("heartbeat_age_s"),
"open_incidents": n.get("open_incidents"),
"monitor_source": n.get("monitor_source"),
}
for n in nodes
],
"summary": {"total": len(nodes), "online": online, "router_ok": router_ok},
}))
except Exception as e:
logger.debug("nodes poll error: %s", e)
await asyncio.sleep(_NODES_POLL_INTERVAL)
from contextlib import asynccontextmanager
@asynccontextmanager
async def lifespan(app_: Any):
# Init SQLite DB for projects/documents/sessions/messages
try:
await _app_db.init_db()
logger.info("✅ sofiia-console DB initialised")
except Exception as e:
logger.warning("DB init failed (non-fatal, Projects/Docs disabled): %s", e)
task = asyncio.create_task(_nodes_poll_loop())
logger.info("Nodes poll loop started (interval=%ds)", _NODES_POLL_INTERVAL)
try:
_smart_resume_active_monitors()
except Exception as e:
logger.warning("aurora smart monitor resume failed: %s", e)
yield
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
await _app_db.close_db()
app = FastAPI(
title="Sofiia Control Console",
description="Operator BFF for Sofiia CTO agent",
version=_VERSION,
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=_CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Projects + Documents + Sessions + Dialog Map API
app.include_router(docs_router)
# ── WebSocket event bus ───────────────────────────────────────────────────────
_ws_clients: Set[WebSocket] = set()
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat(timespec="milliseconds")
def _make_event(
event_type: str,
data: Dict[str, Any],
*,
project_id: str = "",
session_id: str = "",
user_id: str = "console_user",
) -> Dict[str, Any]:
return {
"v": 1,
"type": event_type,
"ts": _now_iso(),
"project_id": project_id,
"session_id": session_id,
"user_id": user_id,
"data": data,
}
async def _broadcast(event: Dict[str, Any]) -> None:
global _ws_clients
if not _ws_clients:
return
dead: Set[WebSocket] = set()
payload = json.dumps(event, ensure_ascii=False)
for ws in list(_ws_clients):
try:
await ws.send_text(payload)
except Exception:
dead.add(ws)
_ws_clients -= dead
def _broadcast_bg(event: Dict[str, Any]) -> None:
"""Fire-and-forget broadcast from sync context."""
try:
loop = asyncio.get_event_loop()
if loop.is_running():
loop.create_task(_broadcast(event))
except Exception:
pass
# ── AISTALK adapter ───────────────────────────────────────────────────────────
try:
from .adapters.aistalk import AISTALKAdapter as _AISTALKAdapter
_aistalk = _AISTALKAdapter(
base_url=os.getenv("AISTALK_URL", ""),
api_key=os.getenv("AISTALK_API_KEY", ""),
) if os.getenv("AISTALK_ENABLED", "false").lower() == "true" else None
except Exception:
_aistalk = None
# ─── Health ─────────────────────────────────────────────────────────────────
@app.get("/api/health")
async def api_health():
base = {
"ok": True,
"service": "sofiia-console",
"version": _VERSION,
"build": _BUILD_ID,
"env": os.getenv("ENV", "dev"),
"uptime_s": int(time.monotonic() - _START_TIME),
}
reg = load_nodes_registry()
nodes_map = reg.get("nodes") or {}
nodes = list(nodes_map.items())
if not nodes:
return {**base, "message": "no nodes configured"}
first_id, _first_cfg = ("NODA2", nodes_map["NODA2"]) if "NODA2" in nodes_map else nodes[0]
router_url = get_router_url(first_id)
if not router_url:
return {**base, "message": "no router_url"}
try:
r = await health(router_url)
return {**base, "ok": r.get("ok", False), "router": r, "node_id": first_id}
except Exception as e:
return {**base, "ok": False, "error": str(e)[:200], "node_id": first_id}
# ─── Status/Full ─────────────────────────────────────────────────────────────
async def _probe_router(router_url: str) -> Dict[str, Any]:
t0 = time.monotonic()
try:
async with httpx.AsyncClient(timeout=5.0) as c:
for path in ("/healthz", "/health"):
try:
r = await c.get(f"{router_url.rstrip('/')}{path}")
if r.status_code == 200:
latency = int((time.monotonic() - t0) * 1000)
# probe tool execute availability
tool_ok = False
try:
r2 = await c.get(
f"{router_url.rstrip('/')}/v1/tools/execute",
timeout=1.5,
)
tool_ok = r2.status_code in (200, 405)
except Exception:
pass
infer_ok = False
try:
r3 = await c.get(
f"{router_url.rstrip('/')}/v1/agents/sofiia/infer",
timeout=1.5,
)
infer_ok = r3.status_code in (200, 405)
except Exception:
pass
return {"url": router_url, "reachable": True,
"routes": {"tools_execute": tool_ok, "agent_infer": infer_ok},
"latency_ms": latency}
except Exception:
continue
return {"url": router_url, "reachable": False, "routes": {}, "latency_ms": None}
except Exception as e:
return {"url": router_url, "reachable": False, "error": str(e)[:100]}
async def _probe_memory(mem_url: str) -> Dict[str, Any]:
t0 = time.monotonic()
try:
async with httpx.AsyncClient(timeout=5.0) as c:
r = await c.get(f"{mem_url.rstrip('/')}/health")
r.raise_for_status()
d = r.json()
vs = d.get("vector_store", {})
vectors = sum(
(v.get("points_count", 0) or 0)
for v in vs.values()
if isinstance(v, dict)
)
return {
"url": mem_url,
"reachable": True,
"stats": {"vectors": vectors, "collections": len(vs)},
"latency_ms": int((time.monotonic() - t0) * 1000),
}
except Exception as e:
return {"url": mem_url, "reachable": False, "error": str(e)[:100]}
async def _probe_ollama(ollama_url: str) -> Dict[str, Any]:
t0 = time.monotonic()
try:
async with httpx.AsyncClient(timeout=5.0) as c:
r = await c.get(f"{ollama_url.rstrip('/')}/api/tags")
r.raise_for_status()
d = r.json()
models = [m.get("name", "") for m in d.get("models", [])]
return {
"url": ollama_url,
"reachable": True,
"models": models[:20],
"latency_ms": int((time.monotonic() - t0) * 1000),
}
except Exception as e:
return {"url": ollama_url, "reachable": False, "models": [], "error": str(e)[:100]}
async def _probe_http(url: str, *, timeout: float = 4.0) -> Dict[str, Any]:
t0 = time.monotonic()
try:
async with httpx.AsyncClient(timeout=timeout) as c:
r = await c.get(url)
return {
"reachable": r.status_code < 500,
"status": r.status_code,
"latency_ms": int((time.monotonic() - t0) * 1000),
}
except Exception as e:
return {"reachable": False, "error": str(e)[:120]}
def _read_backends() -> Dict[str, str]:
"""Read backend env vars from BFF environment (no secrets)."""
return {
"alerts": os.getenv("ALERT_BACKEND", "unknown"),
"audit": os.getenv("AUDIT_BACKEND", "unknown"),
"incidents": os.getenv("INCIDENT_BACKEND", "unknown"),
"risk_history": os.getenv("RISK_HISTORY_BACKEND", "unknown"),
"backlog": os.getenv("BACKLOG_BACKEND", "unknown"),
}
def _read_cron_status() -> Dict[str, Any]:
cron_file = os.getenv("GOV_CRON_FILE", "/etc/cron.d/daarion-governance")
jobs_expected = [
"hourly_risk_snapshot", "daily_risk_digest", "risk_history_cleanup",
"weekly_platform_priority_digest", "weekly_backlog_generate", "daily_backlog_cleanup",
]
jobs_present: List[str] = []
installed: Any = False
warning = None
try:
content = Path(cron_file).read_text()
installed = True
for job in jobs_expected:
if job in content:
jobs_present.append(job)
except PermissionError:
installed = "unknown"
warning = "no read permission on cron file"
except FileNotFoundError:
installed = False
# Scan for latest artifact files
artifacts: Dict[str, Any] = {}
base = Path("ops")
for pattern, key in [
("reports/risk/*.md", "risk_digest_md"),
("reports/platform/*.md", "platform_digest_md"),
("backlog/*.jsonl", "backlog_jsonl"),
]:
try:
files = sorted(base.glob(pattern))
if files:
artifacts[key] = str(files[-1])
except Exception:
pass
result: Dict[str, Any] = {
"installed": installed,
"cron_file": cron_file,
"jobs_expected": jobs_expected,
"jobs_present": jobs_present,
"last_artifacts": artifacts,
}
if warning:
result["warning"] = warning
return result
@app.get("/api/status/full")
async def api_status_full():
"""Full stack diagnostic: BFF + router + memory + ollama + backends + cron."""
reg = load_nodes_registry()
nodes_cfg = reg.get("nodes", {})
# Pick NODA2 router first, fallback to first node
router_url = (
get_router_url("NODA2")
or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "")
)
mem_url = get_memory_service_url()
ollama_url = get_ollama_url()
async def _no_router() -> Dict[str, Any]:
return {"reachable": False, "url": "", "error": "no router_url configured"}
router_info, mem_info, ollama_info = await asyncio.gather(
_probe_router(router_url) if router_url else _no_router(),
_probe_memory(mem_url),
_probe_ollama(ollama_url),
return_exceptions=False,
)
return {
"bff": {
"version": _VERSION,
"build": _BUILD_ID,
"env": os.getenv("ENV", "dev"),
"uptime_s": int(time.monotonic() - _START_TIME),
"ws_clients": len(_ws_clients),
"aistalk_enabled": _aistalk is not None,
},
"router": router_info,
"memory": mem_info,
"ollama": ollama_info,
"backends": _read_backends(),
"cron": _read_cron_status(),
}
@app.get("/api/integrations/status")
async def api_integrations_status(opencode_url: Optional[str] = Query(None)):
"""Integration probes for unified CTO hub in UI."""
open_webui_probe_url = os.getenv("OPEN_WEBUI_PROBE_URL", "http://host.docker.internal:8080/health")
open_webui_ui_url = os.getenv("OPEN_WEBUI_UI_URL", "http://localhost:8080")
pieces_probe_url = os.getenv(
"PIECES_OS_URL",
"http://host.docker.internal:39300/workstream_pattern_engine/processors/status",
)
if not pieces_probe_url.rstrip("/").endswith("/workstream_pattern_engine/processors/status"):
pieces_probe_url = pieces_probe_url.rstrip("/") + "/workstream_pattern_engine/processors/status"
opencode_probe_url = (opencode_url or os.getenv("OPENCODE_URL", "")).strip()
notion_api_key = os.getenv("NOTION_API_KEY", os.getenv("NOTION_TOKEN", "")).strip()
probes = await asyncio.gather(
_probe_http(get_router_url("NODA2").rstrip("/") + "/healthz"),
_probe_http(get_memory_service_url().rstrip("/") + "/health"),
_probe_http(open_webui_probe_url),
_probe_http(pieces_probe_url),
_probe_http(opencode_probe_url.rstrip("/") + "/health") if opencode_probe_url else asyncio.sleep(0, result={"reachable": False, "error": "not configured"}),
)
router_probe, memory_probe, open_webui_probe, pieces_probe, opencode_probe = probes
notion_probe: Dict[str, Any] = {"configured": bool(notion_api_key), "reachable": False}
if notion_api_key:
try:
async with httpx.AsyncClient(timeout=6.0) as c:
r = await c.get(
"https://api.notion.com/v1/users/me",
headers={
"Authorization": f"Bearer {notion_api_key}",
"Notion-Version": "2022-06-28",
},
)
notion_probe["reachable"] = r.status_code == 200
notion_probe["status"] = r.status_code
except Exception as e:
notion_probe["error"] = str(e)[:120]
return {
"integrations": {
"sofiia_console": {"url": "/ui", "reachable": True},
"router_noda2": {"url": get_router_url("NODA2"), **router_probe},
"memory_service": {"url": get_memory_service_url(), **memory_probe},
"open_webui": {"url": open_webui_ui_url, "probe_url": open_webui_probe_url, **open_webui_probe},
"pieces_os": {"url": pieces_probe_url, **pieces_probe},
"opencode": {
"url": opencode_probe_url or "desktop/cli",
**opencode_probe,
},
"notion": notion_probe,
}
}
# ─── Aurora media forensics proxy ────────────────────────────────────────────
def _aurora_proxy_file_url(job_id: str, file_name: str) -> str:
return f"/api/aurora/files/{quote(job_id, safe='')}/{quote(file_name, safe='')}"
def _rewrite_aurora_payload_urls(payload: Dict[str, Any]) -> Dict[str, Any]:
output_files = payload.get("output_files")
if not isinstance(output_files, list):
return payload
job_id = str(payload.get("job_id") or "")
rewritten: List[Dict[str, Any]] = []
for item in output_files:
if not isinstance(item, dict):
continue
file_name = str(item.get("name") or "")
if job_id and file_name:
item = {**item, "url": _aurora_proxy_file_url(job_id, file_name)}
rewritten.append(item)
payload["output_files"] = rewritten
report_url = payload.get("forensic_report_url")
if isinstance(report_url, str) and report_url.startswith("/api/aurora/report/"):
payload["forensic_report_url"] = report_url
return payload
async def _aurora_request_json(
method: str,
path: str,
*,
files: Optional[Dict[str, Any]] = None,
data: Optional[Dict[str, Any]] = None,
json_body: Optional[Dict[str, Any]] = None,
timeout: float = 60.0,
retries: int = 0,
retry_backoff_sec: float = 0.25,
) -> Dict[str, Any]:
base_url = AURORA_SERVICE_URL
url = f"{base_url}{path}"
attempts = max(1, int(retries) + 1)
last_error = "unknown error"
for attempt in range(1, attempts + 1):
try:
async with httpx.AsyncClient(timeout=timeout) as client:
r = await client.request(method, url, files=files, data=data, json=json_body)
except httpx.HTTPError as e:
last_error = str(e)[:200]
if (
"aurora-service" in base_url
and AURORA_FALLBACK_URL
and AURORA_FALLBACK_URL != base_url
):
logger.warning(
"aurora proxy fallback: %s -> %s (%s)",
base_url,
AURORA_FALLBACK_URL,
last_error or type(e).__name__,
)
base_url = AURORA_FALLBACK_URL
url = f"{base_url}{path}"
continue
logger.warning(
"aurora proxy transport error (%s %s, attempt=%d/%d): %s",
method,
path,
attempt,
attempts,
last_error,
)
if attempt < attempts:
await asyncio.sleep(retry_backoff_sec * attempt)
continue
raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e
except Exception as e:
last_error = str(e)[:200]
logger.exception(
"aurora proxy unexpected error (%s %s, attempt=%d/%d): %s",
method,
path,
attempt,
attempts,
last_error,
)
if attempt < attempts:
await asyncio.sleep(retry_backoff_sec * attempt)
continue
raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e
if r.status_code >= 500 and attempt < attempts:
logger.warning(
"aurora proxy upstream %d (%s %s, attempt=%d/%d) — retrying",
r.status_code,
method,
path,
attempt,
attempts,
)
await asyncio.sleep(retry_backoff_sec * attempt)
continue
if r.status_code >= 400:
detail = r.text[:400] if r.text else f"Aurora error {r.status_code}"
raise HTTPException(status_code=r.status_code, detail=detail)
if not r.content:
return {}
try:
payload = r.json()
except Exception as e:
last_error = str(e)[:200]
logger.warning(
"aurora proxy invalid JSON (%s %s, attempt=%d/%d): %s",
method,
path,
attempt,
attempts,
last_error,
)
if attempt < attempts:
await asyncio.sleep(retry_backoff_sec * attempt)
continue
raise HTTPException(status_code=502, detail="Invalid Aurora JSON response") from e
if isinstance(payload, dict):
return _rewrite_aurora_payload_urls(payload)
return {"data": payload}
raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}")
def _parse_stage_frame(stage: str) -> Dict[str, int]:
text = str(stage or "")
m = re.search(r"frame\s+(\d+)\s*/\s*(\d+)", text)
if not m:
return {"current": -1, "total": -1}
try:
return {"current": int(m.group(1)), "total": int(m.group(2))}
except Exception:
return {"current": -1, "total": -1}
def _aurora_live_fs_frame(job_id: str) -> Optional[Dict[str, Any]]:
now = time.monotonic()
cached = _aurora_live_cache.get(job_id)
if cached and (now - float(cached.get("ts", 0.0))) < 3.0:
return cached
base = AURORA_DATA_DIR / "outputs" / job_id
if not base.exists():
return None
work_dirs = [p for p in base.iterdir() if p.is_dir() and p.name.startswith("_work_")]
if not work_dirs:
return None
# Prefer most recently touched working directory
work_dirs.sort(key=lambda p: p.stat().st_mtime, reverse=True)
best_frame = -1
best_total = -1
best_dir = None
for wd in work_dirs:
processed = wd / "processed"
if not processed.exists():
continue
# Max frame in processed directory
local_max = -1
for f in processed.glob("*.png"):
try:
n = int(f.stem)
except Exception:
continue
if n > local_max:
local_max = n
if local_max < 0:
continue
raw_dir = wd / "raw"
total = -1
if raw_dir.exists():
try:
total = sum(1 for _ in raw_dir.glob("*.png"))
except Exception:
total = -1
if local_max > best_frame:
best_frame = local_max
best_total = total
best_dir = str(wd)
if best_frame < 0:
return None
info = {
"ts": now,
"frame": best_frame,
"total": best_total,
"work_dir": best_dir,
}
_aurora_live_cache[job_id] = info
return info
def _aurora_record_sample(job_id: str, frame: int, total: int) -> Optional[Dict[str, Any]]:
if frame < 0:
return None
now = time.monotonic()
dq = _aurora_live_samples.setdefault(job_id, collections.deque(maxlen=32))
# De-dup consecutive equal frame samples.
if dq and int(dq[-1]["frame"]) == frame:
# Keep original timestamp for stable fps between actual frame advances.
dq[-1]["total"] = total
else:
dq.append({"ts": now, "frame": frame, "total": total})
if len(dq) < 3:
return None
fps_points: List[float] = []
prev = dq[0]
for cur in list(dq)[1:]:
df = int(cur["frame"]) - int(prev["frame"])
dt = float(cur["ts"]) - float(prev["ts"])
if df > 0 and dt > 0:
fps_points.append(df / dt)
prev = cur
if not fps_points:
return None
fps = max(0.01, float(statistics.median(fps_points)))
confidence = "low"
if len(fps_points) >= 8:
confidence = "high"
elif len(fps_points) >= 4:
confidence = "medium"
return {"fps": fps, "confidence": confidence}
def _aurora_load_live_last_from_disk() -> None:
global _aurora_live_last_loaded
if _aurora_live_last_loaded:
return
_aurora_live_last_loaded = True
try:
if not _aurora_live_last_path.exists():
return
data = json.loads(_aurora_live_last_path.read_text(encoding="utf-8"))
if isinstance(data, dict):
for k, v in data.items():
if isinstance(k, str) and isinstance(v, dict):
_aurora_live_last[k] = v
except Exception as e:
logger.debug("aurora live-last load failed: %s", e)
def _aurora_persist_live_last_to_disk() -> None:
try:
_aurora_live_last_path.parent.mkdir(parents=True, exist_ok=True)
_aurora_live_last_path.write_text(
json.dumps(_aurora_live_last, ensure_ascii=False, separators=(",", ":")),
encoding="utf-8",
)
except Exception as e:
logger.debug("aurora live-last persist failed: %s", e)
def _smart_now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _smart_is_terminal(status: Any) -> bool:
return str(status or "").lower() in {"completed", "failed", "cancelled"}
def _smart_media_type(file_name: str, content_type: str) -> str:
name = str(file_name or "").lower()
ctype = str(content_type or "").lower()
video_ext = (".mp4", ".avi", ".mov", ".mkv", ".webm")
audio_ext = (".mp3", ".wav", ".flac", ".m4a", ".aac", ".ogg")
image_ext = (".jpg", ".jpeg", ".png", ".webp", ".tif", ".tiff", ".bmp")
if ctype.startswith("video/") or name.endswith(video_ext):
return "video"
if ctype.startswith("audio/") or name.endswith(audio_ext):
return "audio"
if ctype.startswith("image/") or name.endswith(image_ext):
return "photo"
return "unknown"
def _smart_trim_runs() -> None:
if len(_aurora_smart_runs) <= _AURORA_SMART_MAX_RUNS:
return
ordered = sorted(
_aurora_smart_runs.items(),
key=lambda kv: str((kv[1] or {}).get("created_at") or ""),
reverse=True,
)
keep = dict(ordered[:_AURORA_SMART_MAX_RUNS])
_aurora_smart_runs.clear()
_aurora_smart_runs.update(keep)
def _smart_load_runs_from_disk() -> None:
global _aurora_smart_runs_loaded
if _aurora_smart_runs_loaded:
return
_aurora_smart_runs_loaded = True
try:
if not _aurora_smart_runs_path.exists():
return
payload = json.loads(_aurora_smart_runs_path.read_text(encoding="utf-8"))
if isinstance(payload, dict):
runs = payload.get("runs")
else:
runs = payload
if isinstance(runs, dict):
for run_id, run in runs.items():
if isinstance(run_id, str) and isinstance(run, dict):
_aurora_smart_runs[run_id] = run
_smart_trim_runs()
except Exception as exc:
logger.debug("aurora smart-runs load failed: %s", exc)
def _smart_persist_runs() -> None:
try:
_smart_trim_runs()
_aurora_smart_runs_path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"updated_at": _smart_now_iso(),
"runs": _aurora_smart_runs,
}
_aurora_smart_runs_path.write_text(
json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
encoding="utf-8",
)
except Exception as exc:
logger.debug("aurora smart-runs persist failed: %s", exc)
def _smart_load_policy_from_disk() -> None:
global _aurora_smart_policy_loaded
if _aurora_smart_policy_loaded:
return
_aurora_smart_policy_loaded = True
try:
if not _aurora_smart_policy_path.exists():
return
payload = json.loads(_aurora_smart_policy_path.read_text(encoding="utf-8"))
if isinstance(payload, dict):
strategies = payload.get("strategies")
if isinstance(strategies, dict):
_aurora_smart_policy["strategies"] = strategies
_aurora_smart_policy["updated_at"] = payload.get("updated_at")
except Exception as exc:
logger.debug("aurora smart-policy load failed: %s", exc)
def _smart_persist_policy() -> None:
try:
_aurora_smart_policy["updated_at"] = _smart_now_iso()
_aurora_smart_policy_path.parent.mkdir(parents=True, exist_ok=True)
_aurora_smart_policy_path.write_text(
json.dumps(_aurora_smart_policy, ensure_ascii=False, separators=(",", ":")),
encoding="utf-8",
)
except Exception as exc:
logger.debug("aurora smart-policy persist failed: %s", exc)
def _smart_strategy_stats(strategy: str) -> Dict[str, Any]:
_smart_load_policy_from_disk()
strategies = _aurora_smart_policy.setdefault("strategies", {})
stats = strategies.get(strategy)
if not isinstance(stats, dict):
stats = {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0}
strategies[strategy] = stats
return stats
def _smart_update_strategy_score(strategy: str, score: float) -> None:
stats = _smart_strategy_stats(strategy)
try:
count = int(stats.get("count") or 0) + 1
avg = float(stats.get("avg_score") or 0.0)
stats["avg_score"] = round(((avg * (count - 1)) + float(score)) / max(1, count), 4)
stats["count"] = count
_smart_persist_policy()
except Exception:
return
def _smart_update_strategy_outcome(strategy: str, success: bool) -> None:
stats = _smart_strategy_stats(strategy)
key = "wins" if success else "losses"
stats[key] = int(stats.get(key) or 0) + 1
_smart_persist_policy()
def _smart_new_run_id() -> str:
stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
return f"smart_{stamp}_{uuid.uuid4().hex[:6]}"
def _smart_append_audit(run: Dict[str, Any], event: str, detail: Optional[Dict[str, Any]] = None) -> None:
audit = run.setdefault("audit", [])
if not isinstance(audit, list):
audit = []
run["audit"] = audit
item: Dict[str, Any] = {"ts": _smart_now_iso(), "event": str(event)}
if isinstance(detail, dict) and detail:
item["detail"] = detail
audit.append(item)
if len(audit) > 200:
del audit[:-200]
run["updated_at"] = item["ts"]
def _smart_analysis_features(analysis: Optional[Dict[str, Any]]) -> Dict[str, Any]:
if not isinstance(analysis, dict):
return {
"faces": 0,
"plates": 0,
"noise": "unknown",
"blur": "unknown",
"quality_score": 0.0,
}
faces = len(analysis.get("faces") or []) if isinstance(analysis.get("faces"), list) else 0
plates = len(analysis.get("license_plates") or []) if isinstance(analysis.get("license_plates"), list) else 0
qa = analysis.get("quality_analysis") if isinstance(analysis.get("quality_analysis"), dict) else {}
noise = str(qa.get("noise_level") or "unknown").lower()
blur = str(qa.get("blur_level") or "unknown").lower()
score = 0.0
score += min(2.0, faces * 0.2)
score += min(2.0, plates * 0.4)
if noise in {"high", "very_high"}:
score += 1.0
if blur in {"high", "very_high"}:
score += 1.0
return {
"faces": faces,
"plates": plates,
"noise": noise,
"blur": blur,
"quality_score": round(score, 3),
}
def _smart_decide_strategy(
*,
media_type: str,
mode: str,
requested_strategy: str,
prefer_quality: bool,
budget_tier: str,
analysis: Optional[Dict[str, Any]],
learning_enabled: bool,
) -> Dict[str, Any]:
strategy = str(requested_strategy or "auto").strip().lower()
valid = {"auto", "local_only", "local_then_kling"}
if strategy not in valid:
strategy = "auto"
features = _smart_analysis_features(analysis)
reasons: List[str] = []
score = 0.0
if media_type != "video":
chosen = "local_only"
reasons.append("non-video media -> local stack only")
return {"strategy": chosen, "reasons": reasons, "score": 0.0, "features": features}
if strategy in {"local_only", "local_then_kling"}:
reasons.append(f"explicit strategy={strategy}")
return {"strategy": strategy, "reasons": reasons, "score": features["quality_score"], "features": features}
score += float(features["quality_score"])
if prefer_quality:
score += 1.3
reasons.append("prefer_quality=true")
if str(mode).lower() == "forensic":
score += 0.8
reasons.append("forensic mode")
budget_norm = str(budget_tier or "normal").strip().lower()
if budget_norm == "low":
score -= 1.4
reasons.append("budget_tier=low")
elif budget_norm == "high":
score += 0.6
reasons.append("budget_tier=high")
if learning_enabled:
stats = _smart_strategy_stats("local_then_kling")
wins = int(stats.get("wins") or 0)
losses = int(stats.get("losses") or 0)
total = wins + losses
if total >= 6:
success_ratio = wins / max(1, total)
if success_ratio >= 0.65:
score += 0.5
reasons.append(f"learned success ratio {success_ratio:.2f}")
elif success_ratio <= 0.35:
score -= 0.7
reasons.append(f"learned low success ratio {success_ratio:.2f}")
chosen = "local_then_kling" if score >= 2.1 else "local_only"
if not reasons:
reasons.append("default heuristic")
return {"strategy": chosen, "reasons": reasons, "score": round(score, 3), "features": features}
def _smart_compact_result(result_payload: Dict[str, Any]) -> Dict[str, Any]:
payload = {}
if not isinstance(result_payload, dict):
return payload
payload["mode"] = result_payload.get("mode")
payload["media_type"] = result_payload.get("media_type")
payload["digital_signature"] = result_payload.get("digital_signature")
output_files = result_payload.get("output_files")
if isinstance(output_files, list):
payload["output_files"] = output_files[:8]
q = result_payload.get("quality_report")
if isinstance(q, dict):
payload["quality_report"] = q
return payload
async def _smart_fetch_run_status(run_id: str) -> Optional[Dict[str, Any]]:
_smart_load_runs_from_disk()
run = _aurora_smart_runs.get(run_id)
if not isinstance(run, dict):
return None
return run
async def _smart_monitor_run(run_id: str) -> None:
run = await _smart_fetch_run_status(run_id)
if not run:
return
local = run.get("local") if isinstance(run.get("local"), dict) else {}
local_job_id = str(local.get("job_id") or "")
if not local_job_id:
_smart_append_audit(run, "monitor.error", {"reason": "missing local job id"})
run["status"] = "failed"
run["phase"] = "failed"
_smart_persist_runs()
return
start = time.monotonic()
while time.monotonic() - start <= _AURORA_SMART_LOCAL_MAX_SEC:
try:
st = await _aurora_request_json(
"GET",
f"/api/aurora/status/{quote(local_job_id, safe='')}",
timeout=20.0,
retries=2,
retry_backoff_sec=0.25,
)
except Exception as exc:
_smart_append_audit(run, "local.status.error", {"error": str(exc)[:220]})
await asyncio.sleep(_AURORA_SMART_LOCAL_POLL_SEC)
continue
status = str(st.get("status") or "").lower()
if status in {"queued", "processing"}:
run["phase"] = "local_processing"
run["status"] = "processing"
elif status == "completed":
run["phase"] = "local_completed"
run["status"] = "processing"
else:
run["phase"] = f"local_{status or 'unknown'}"
run["status"] = status
run["local"] = {
**local,
"job_id": local_job_id,
"status": status,
"progress": st.get("progress"),
"current_stage": st.get("current_stage"),
"eta_seconds": st.get("eta_seconds"),
"live_fps": st.get("live_fps"),
"error_message": st.get("error_message"),
"updated_at": _smart_now_iso(),
}
_smart_persist_runs()
if status in {"queued", "processing"}:
await asyncio.sleep(_AURORA_SMART_LOCAL_POLL_SEC)
continue
if status != "completed":
run["status"] = "failed"
run["phase"] = "local_failed"
_smart_append_audit(
run,
"local.failed",
{"status": status, "error": str(st.get("error_message") or "")[:220]},
)
_smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
_smart_persist_runs()
return
_smart_append_audit(run, "local.completed", {"job_id": local_job_id})
break
else:
run["status"] = "failed"
run["phase"] = "local_timeout"
_smart_append_audit(run, "local.timeout", {"max_sec": _AURORA_SMART_LOCAL_MAX_SEC})
_smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
_smart_persist_runs()
return
try:
local_result = await _aurora_request_json(
"GET",
f"/api/aurora/result/{quote(local_job_id, safe='')}",
timeout=30.0,
retries=2,
retry_backoff_sec=0.25,
)
except Exception as exc:
run["status"] = "failed"
run["phase"] = "local_result_error"
_smart_append_audit(run, "local.result.error", {"error": str(exc)[:240]})
_smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
_smart_persist_runs()
return
run.setdefault("local", {})
if isinstance(run["local"], dict):
run["local"]["result"] = _smart_compact_result(local_result)
run["local"]["result_ready"] = True
run["selected_stack"] = "local"
policy = run.get("policy") if isinstance(run.get("policy"), dict) else {}
strategy = str(policy.get("strategy") or "local_only")
media_type = str(run.get("media_type") or "")
kling = run.get("kling") if isinstance(run.get("kling"), dict) else {}
if strategy != "local_then_kling" or media_type != "video":
run["status"] = "completed"
run["phase"] = "completed"
_smart_append_audit(run, "smart.completed", {"selected_stack": "local", "reason": "strategy local_only or non-video"})
_smart_update_strategy_outcome(strategy, True)
_smart_persist_runs()
return
run["phase"] = "kling_submitting"
run["status"] = "processing"
_smart_append_audit(run, "kling.submit.start")
_smart_persist_runs()
try:
submit = await _aurora_request_json(
"POST",
"/api/aurora/kling/enhance",
data={
"job_id": local_job_id,
"prompt": str(kling.get("prompt") or "enhance video quality, improve sharpness and clarity"),
"negative_prompt": str(kling.get("negative_prompt") or "noise, blur, artifacts, distortion"),
"mode": str(kling.get("mode") or "pro"),
"duration": str(kling.get("duration") or "5"),
"cfg_scale": str(kling.get("cfg_scale") if kling.get("cfg_scale") is not None else "0.5"),
},
timeout=120.0,
retries=1,
retry_backoff_sec=0.25,
)
except Exception as exc:
run["kling"] = {
**kling,
"status": "failed",
"error": str(exc)[:640],
}
run["status"] = "completed"
run["phase"] = "completed_with_kling_failure"
run["selected_stack"] = "local"
_smart_append_audit(run, "kling.submit.error", {"error": str(exc)[:220]})
_smart_update_strategy_outcome(strategy, False)
_smart_persist_runs()
return
task_id = str(submit.get("kling_task_id") or "")
run["kling"] = {
**kling,
"task_id": task_id,
"status": str(submit.get("status") or "submitted").lower(),
"endpoint": str(submit.get("kling_endpoint") or "video2video"),
"submitted_at": _smart_now_iso(),
}
_smart_append_audit(run, "kling.submitted", {"task_id": task_id})
_smart_persist_runs()
k_start = time.monotonic()
while time.monotonic() - k_start <= _AURORA_SMART_KLING_MAX_SEC:
try:
kst = await _aurora_request_json(
"GET",
f"/api/aurora/kling/status/{quote(local_job_id, safe='')}",
timeout=30.0,
retries=1,
retry_backoff_sec=0.2,
)
except Exception as exc:
_smart_append_audit(run, "kling.status.error", {"error": str(exc)[:220]})
await asyncio.sleep(_AURORA_SMART_KLING_POLL_SEC)
continue
k_status = str(kst.get("status") or "").lower()
k_url = kst.get("kling_result_url")
run["phase"] = "kling_processing"
run["kling"] = {
**(run.get("kling") if isinstance(run.get("kling"), dict) else {}),
"status": k_status,
"result_url": k_url,
"last_polled_at": _smart_now_iso(),
}
_smart_persist_runs()
if k_status in {"submitted", "queued", "running", "processing", "pending"}:
await asyncio.sleep(_AURORA_SMART_KLING_POLL_SEC)
continue
if k_status in {"succeed", "completed", "success"} and k_url:
run["status"] = "completed"
run["phase"] = "completed"
run["selected_stack"] = "kling"
_smart_append_audit(run, "smart.completed", {"selected_stack": "kling", "task_id": task_id})
_smart_update_strategy_outcome(strategy, True)
_smart_persist_runs()
return
run["status"] = "completed"
run["phase"] = "completed_with_kling_failure"
run["selected_stack"] = "local"
_smart_append_audit(
run,
"kling.terminal.non_success",
{"status": k_status, "task_id": task_id},
)
_smart_update_strategy_outcome(strategy, False)
_smart_persist_runs()
return
run["status"] = "completed"
run["phase"] = "completed_with_kling_timeout"
run["selected_stack"] = "local"
_smart_append_audit(run, "kling.timeout", {"max_sec": _AURORA_SMART_KLING_MAX_SEC})
_smart_update_strategy_outcome(strategy, False)
_smart_persist_runs()
def _smart_resume_active_monitors() -> None:
_smart_load_runs_from_disk()
for run_id, run in list(_aurora_smart_runs.items()):
if not isinstance(run, dict):
continue
if _smart_is_terminal(run.get("status")):
continue
try:
asyncio.create_task(_smart_monitor_run(run_id))
except Exception:
continue
@app.get("/api/aurora/health")
async def api_aurora_health() -> Dict[str, Any]:
return await _aurora_request_json("GET", "/health", timeout=10.0)
@app.post("/api/aurora/upload")
async def api_aurora_upload(
file: UploadFile = File(...),
mode: str = Form("tactical"),
priority: str = Form("balanced"),
export_options: str = Form(""),
) -> Dict[str, Any]:
# Stream file to Aurora without buffering entire content in RAM
file_obj = file.file # SpooledTemporaryFile — already handles large files
files = {
"file": (
file.filename or "upload.bin",
file_obj,
file.content_type or "application/octet-stream",
)
}
payload = await _aurora_request_json(
"POST",
"/api/aurora/upload",
files=files,
data={
"mode": mode,
"priority": priority,
"export_options": export_options,
},
timeout=120.0,
)
job_id = str(payload.get("job_id") or "")
if job_id:
payload["status_url"] = f"/api/aurora/status/{job_id}"
payload["result_url"] = f"/api/aurora/result/{job_id}"
payload["cancel_url"] = f"/api/aurora/cancel/{job_id}"
return payload
@app.post("/api/aurora/process-smart")
async def api_aurora_process_smart(
file: UploadFile = File(...),
mode: str = Form("tactical"),
priority: str = Form("balanced"),
export_options: str = Form(""),
strategy: str = Form("auto"),
prefer_quality: bool = Form(True),
budget_tier: str = Form("normal"),
learning_enabled: bool = Form(True),
kling_prompt: str = Form("enhance video quality, improve sharpness and clarity"),
kling_negative_prompt: str = Form("noise, blur, artifacts, distortion"),
kling_mode: str = Form("pro"),
kling_duration: str = Form("5"),
kling_cfg_scale: float = Form(0.5),
) -> Dict[str, Any]:
_smart_load_runs_from_disk()
_smart_load_policy_from_disk()
file_name = file.filename or "upload.bin"
content_type = file.content_type or "application/octet-stream"
media_type = _smart_media_type(file_name, content_type)
analysis: Optional[Dict[str, Any]] = None
if media_type in {"video", "photo"}:
try:
await file.seek(0)
files = {"file": (file_name, file.file, content_type)}
analysis = await _aurora_request_json(
"POST",
"/api/aurora/analyze",
files=files,
timeout=120.0,
retries=1,
retry_backoff_sec=0.25,
)
except Exception as exc:
analysis = None
logger.warning("smart-process analyze skipped: %s", str(exc)[:220])
policy = _smart_decide_strategy(
media_type=media_type,
mode=mode,
requested_strategy=strategy,
prefer_quality=bool(prefer_quality),
budget_tier=budget_tier,
analysis=analysis,
learning_enabled=bool(learning_enabled),
)
chosen_strategy = str(policy.get("strategy") or "local_only")
policy.setdefault("requested_strategy", str(strategy or "auto"))
policy["learning_enabled"] = bool(learning_enabled)
policy["budget_tier"] = str(budget_tier or "normal")
await file.seek(0)
files = {"file": (file_name, file.file, content_type)}
local_payload = await _aurora_request_json(
"POST",
"/api/aurora/upload",
files=files,
data={
"mode": mode,
"priority": priority,
"export_options": export_options,
},
timeout=120.0,
)
local_job_id = str(local_payload.get("job_id") or "")
if not local_job_id:
raise HTTPException(status_code=502, detail="Smart process failed: local job_id missing")
run_id = _smart_new_run_id()
now = _smart_now_iso()
run: Dict[str, Any] = {
"run_id": run_id,
"created_at": now,
"updated_at": now,
"status": "processing",
"phase": "local_processing",
"media_type": media_type,
"selected_stack": None,
"requested": {
"mode": mode,
"priority": priority,
"export_options": export_options,
"strategy": strategy,
"prefer_quality": bool(prefer_quality),
"budget_tier": budget_tier,
"learning_enabled": bool(learning_enabled),
},
"policy": policy,
"analysis_summary": _smart_analysis_features(analysis),
"analysis": analysis if isinstance(analysis, dict) else None,
"local": {
"job_id": local_job_id,
"status": "queued",
"submit_payload": {
"status_url": f"/api/aurora/status/{quote(local_job_id, safe='')}",
"result_url": f"/api/aurora/result/{quote(local_job_id, safe='')}",
},
},
"kling": {
"enabled": chosen_strategy == "local_then_kling" and media_type == "video",
"status": "pending",
"prompt": kling_prompt,
"negative_prompt": kling_negative_prompt,
"mode": kling_mode,
"duration": kling_duration,
"cfg_scale": kling_cfg_scale,
},
"audit": [],
}
_smart_append_audit(
run,
"smart.submitted",
{
"local_job_id": local_job_id,
"media_type": media_type,
"strategy": chosen_strategy,
"score": policy.get("score"),
},
)
_aurora_smart_runs[run_id] = run
_smart_persist_runs()
try:
asyncio.create_task(_smart_monitor_run(run_id))
except Exception as exc:
_smart_append_audit(run, "monitor.spawn.error", {"error": str(exc)[:220]})
_smart_persist_runs()
return {
"smart_run_id": run_id,
"status": run.get("status"),
"phase": run.get("phase"),
"media_type": media_type,
"local_job_id": local_job_id,
"policy": policy,
"smart_status_url": f"/api/aurora/process-smart/{quote(run_id, safe='')}",
"local_status_url": f"/api/aurora/status/{quote(local_job_id, safe='')}",
"local_result_url": f"/api/aurora/result/{quote(local_job_id, safe='')}",
}
@app.get("/api/aurora/process-smart")
async def api_aurora_process_smart_list(
limit: int = Query(default=20, ge=1, le=200),
status: Optional[str] = Query(default=None),
) -> Dict[str, Any]:
_smart_load_runs_from_disk()
requested = str(status or "").strip().lower()
rows = []
for run in _aurora_smart_runs.values():
if not isinstance(run, dict):
continue
run_status = str(run.get("status") or "")
if requested and run_status.lower() != requested:
continue
local = run.get("local") if isinstance(run.get("local"), dict) else {}
kling = run.get("kling") if isinstance(run.get("kling"), dict) else {}
rows.append(
{
"run_id": run.get("run_id"),
"status": run_status,
"phase": run.get("phase"),
"media_type": run.get("media_type"),
"strategy": (run.get("policy") or {}).get("strategy") if isinstance(run.get("policy"), dict) else None,
"selected_stack": run.get("selected_stack"),
"created_at": run.get("created_at"),
"updated_at": run.get("updated_at"),
"local_job_id": local.get("job_id"),
"local_status": local.get("status"),
"kling_status": kling.get("status"),
}
)
rows.sort(key=lambda x: str(x.get("created_at") or ""), reverse=True)
return {"runs": rows[:limit], "count": min(limit, len(rows)), "total": len(rows)}
@app.get("/api/aurora/process-smart/{run_id}")
async def api_aurora_process_smart_status(run_id: str) -> Dict[str, Any]:
run = await _smart_fetch_run_status(run_id)
if not run:
raise HTTPException(status_code=404, detail="smart run not found")
return run
@app.post("/api/aurora/process-smart/{run_id}/feedback")
async def api_aurora_process_smart_feedback(
run_id: str,
payload: Optional[Dict[str, Any]] = Body(default=None),
) -> Dict[str, Any]:
run = await _smart_fetch_run_status(run_id)
if not run:
raise HTTPException(status_code=404, detail="smart run not found")
body = payload if isinstance(payload, dict) else {}
score_raw = body.get("score")
score: Optional[float] = None
try:
if score_raw is not None:
score = float(score_raw)
except Exception:
score = None
selected_stack = str(body.get("selected_stack") or "").strip().lower() or None
notes = str(body.get("notes") or "").strip()
feedback = {
"ts": _smart_now_iso(),
"score": score,
"selected_stack": selected_stack,
"notes": notes[:1000] if notes else None,
}
run["feedback"] = feedback
strategy = str((run.get("policy") or {}).get("strategy") or "local_only")
if score is not None:
score = max(1.0, min(5.0, score))
_smart_update_strategy_score(strategy, score)
if selected_stack in {"local", "kling"}:
run["selected_stack"] = selected_stack
_smart_append_audit(run, "feedback.received", {"score": score, "selected_stack": selected_stack})
_smart_persist_runs()
return {
"ok": True,
"run_id": run_id,
"feedback": feedback,
"policy": _aurora_smart_policy,
}
@app.get("/api/aurora/process-smart/policy/stats")
async def api_aurora_process_smart_policy_stats() -> Dict[str, Any]:
_smart_load_policy_from_disk()
return _aurora_smart_policy
@app.post("/api/aurora/analyze")
async def api_aurora_analyze(file: UploadFile = File(...)) -> Dict[str, Any]:
await file.seek(0)
files = {
"file": (
file.filename or "upload.bin",
file.file,
file.content_type or "application/octet-stream",
)
}
return await _aurora_request_json(
"POST",
"/api/aurora/analyze",
files=files,
timeout=120.0,
retries=2,
retry_backoff_sec=0.35,
)
@app.post("/api/aurora/audio/analyze")
async def api_aurora_audio_analyze(file: UploadFile = File(...)) -> Dict[str, Any]:
await file.seek(0)
files = {
"file": (
file.filename or "upload_audio.bin",
file.file,
file.content_type or "application/octet-stream",
)
}
return await _aurora_request_json(
"POST",
"/api/aurora/audio/analyze",
files=files,
timeout=120.0,
retries=2,
retry_backoff_sec=0.35,
)
@app.post("/api/aurora/audio/process")
async def api_aurora_audio_process(
file: UploadFile = File(...),
mode: str = Form("tactical"),
priority: str = Form("speech"),
export_options: str = Form(""),
) -> Dict[str, Any]:
await file.seek(0)
files = {
"file": (
file.filename or "upload_audio.bin",
file.file,
file.content_type or "application/octet-stream",
)
}
payload = await _aurora_request_json(
"POST",
"/api/aurora/audio/process",
files=files,
data={
"mode": mode,
"priority": priority,
"export_options": export_options,
},
timeout=120.0,
retries=2,
retry_backoff_sec=0.35,
)
job_id = str(payload.get("job_id") or "")
if job_id:
payload["status_url"] = f"/api/aurora/status/{job_id}"
payload["result_url"] = f"/api/aurora/result/{job_id}"
payload["cancel_url"] = f"/api/aurora/cancel/{job_id}"
return payload
@app.post("/api/aurora/reprocess/{job_id}")
async def api_aurora_reprocess(
job_id: str,
payload: Optional[Dict[str, Any]] = Body(default=None),
) -> Dict[str, Any]:
body = payload if isinstance(payload, dict) else {}
return await _aurora_request_json(
"POST",
f"/api/aurora/reprocess/{quote(job_id, safe='')}",
json_body=body,
timeout=120.0,
retries=2,
retry_backoff_sec=0.35,
)
@app.post("/api/aurora/chat")
async def api_aurora_chat(payload: Optional[Dict[str, Any]] = Body(default=None)) -> Dict[str, Any]:
body = payload if isinstance(payload, dict) else {}
return await _aurora_request_json(
"POST",
"/api/aurora/chat",
json_body=body,
timeout=30.0,
retries=1,
retry_backoff_sec=0.2,
)
@app.get("/api/aurora/status/{job_id}")
async def api_aurora_status(job_id: str) -> Dict[str, Any]:
_aurora_load_live_last_from_disk()
payload = await _aurora_request_json(
"GET",
f"/api/aurora/status/{quote(job_id, safe='')}",
timeout=20.0,
retries=8,
retry_backoff_sec=0.35,
)
if not isinstance(payload, dict):
return payload
if str(payload.get("status", "")).lower() != "processing":
return payload
live = _aurora_live_fs_frame(job_id)
if not live:
return payload
parsed = _parse_stage_frame(str(payload.get("current_stage", "")))
live_frame = int(live.get("frame", -1))
if live_frame < 0:
return payload
total = int(parsed.get("total", -1))
if total <= 0:
total = int(live.get("total", -1))
if total > 0:
live_progress = int(max(1, min(99, round((live_frame / max(1, total)) * 100))))
payload["progress"] = max(int(payload.get("progress") or 0), live_progress)
live_stats = _aurora_record_sample(job_id, live_frame, total)
if live_stats:
fps = float(live_stats["fps"])
payload["live_fps"] = round(fps, 3)
payload["eta_confidence"] = live_stats["confidence"]
if total > 0 and live_frame < total:
eta_calc = int(max(0, round((total - live_frame) / max(0.01, fps))))
payload["eta_seconds"] = eta_calc
elapsed = payload.get("elapsed_seconds")
if isinstance(elapsed, (int, float)):
payload["estimated_total_seconds"] = int(max(0, round(float(elapsed) + eta_calc)))
_aurora_live_last[job_id] = {
"live_fps": payload.get("live_fps"),
"eta_seconds": payload.get("eta_seconds"),
"estimated_total_seconds": payload.get("estimated_total_seconds"),
"eta_confidence": payload.get("eta_confidence"),
}
_aurora_persist_live_last_to_disk()
else:
prev = _aurora_live_last.get(job_id)
if prev:
payload["live_fps"] = prev.get("live_fps")
payload["eta_seconds"] = prev.get("eta_seconds", payload.get("eta_seconds"))
payload["estimated_total_seconds"] = prev.get("estimated_total_seconds", payload.get("estimated_total_seconds"))
payload["eta_confidence"] = prev.get("eta_confidence")
# If upstream stage/progress is stale, patch with live filesystem progress.
if live_frame > int(parsed.get("current", -1)):
if total > 0:
if live_stats:
payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live {payload['live_fps']} fps)"
else:
payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live fs)"
else:
payload["current_stage"] = f"enhancing frame {live_frame} (live fs)"
payload["live_frame"] = live_frame
payload["live_total_frames"] = total if total > 0 else None
else:
# Even when upstream stage text already moved, expose live counters for UI.
payload["live_frame"] = live_frame
payload["live_total_frames"] = total if total > 0 else None
# Persist last known timing even if fps was not recalculated this poll.
snapshot = _aurora_live_last.get(job_id, {})
changed = False
for key in ("live_fps", "eta_seconds", "estimated_total_seconds", "eta_confidence"):
val = payload.get(key)
if val is not None and snapshot.get(key) != val:
snapshot[key] = val
changed = True
if changed:
_aurora_live_last[job_id] = snapshot
_aurora_persist_live_last_to_disk()
return payload
def _aurora_coerce_dir(path_value: Any) -> Optional[Path]:
if path_value is None:
return None
raw = str(path_value).strip()
if not raw:
return None
try:
p = Path(raw).expanduser().resolve()
except Exception:
return None
if p.exists() and p.is_file():
p = p.parent
if not p.exists() or not p.is_dir():
return None
return p
async def _aurora_resolve_job_folder(job_id: str) -> Optional[Path]:
candidates: List[Any] = []
try:
st = await _aurora_request_json("GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=12.0)
storage = st.get("storage") if isinstance(st, dict) else None
if isinstance(storage, dict):
candidates.extend(
[
storage.get("output_dir"),
storage.get("upload_dir"),
storage.get("input_path"),
]
)
except Exception:
pass
try:
res = await _aurora_request_json("GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=12.0)
storage = res.get("storage") if isinstance(res, dict) else None
if isinstance(storage, dict):
candidates.extend(
[
storage.get("output_dir"),
storage.get("upload_dir"),
storage.get("input_path"),
]
)
except Exception:
pass
candidates.append(AURORA_DATA_DIR / "outputs" / job_id)
for c in candidates:
p = _aurora_coerce_dir(c)
if p:
return p
return None
@app.get("/api/aurora/folder/{job_id}")
async def api_aurora_folder(job_id: str) -> Dict[str, Any]:
folder = await _aurora_resolve_job_folder(job_id)
if not folder:
raise HTTPException(status_code=404, detail="Aurora output folder not found")
return {
"ok": True,
"job_id": job_id,
"folder_path": str(folder),
"folder_url": f"file://{folder}",
}
@app.post("/api/aurora/folder/{job_id}/open")
async def api_aurora_folder_open(job_id: str) -> Dict[str, Any]:
folder = await _aurora_resolve_job_folder(job_id)
if not folder:
raise HTTPException(status_code=404, detail="Aurora output folder not found")
cmd: Optional[List[str]] = None
if sys.platform == "darwin":
cmd = ["open", str(folder)]
elif os.name == "nt":
try:
os.startfile(str(folder)) # type: ignore[attr-defined]
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}")
else:
cmd = ["xdg-open", str(folder)]
if cmd is not None:
try:
subprocess.run(cmd, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}")
return {"ok": True, "job_id": job_id, "folder_path": str(folder)}
@app.get("/api/aurora/jobs")
async def api_aurora_jobs(
limit: int = Query(default=30, ge=1, le=200),
status: Optional[str] = Query(default=None),
) -> Dict[str, Any]:
query = f"/api/aurora/jobs?limit={limit}"
if status and status.strip():
query += f"&status={quote(status.strip(), safe=',')}"
return await _aurora_request_json(
"GET",
query,
timeout=20.0,
retries=3,
retry_backoff_sec=0.25,
)
@app.get("/api/aurora/result/{job_id}")
async def api_aurora_result(job_id: str) -> Dict[str, Any]:
return await _aurora_request_json(
"GET",
f"/api/aurora/result/{quote(job_id, safe='')}",
timeout=20.0,
retries=4,
retry_backoff_sec=0.35,
)
@app.get("/api/aurora/quality/{job_id}")
async def api_aurora_quality(
job_id: str,
refresh: bool = Query(default=False),
) -> Dict[str, Any]:
path = f"/api/aurora/quality/{quote(job_id, safe='')}?refresh={'true' if refresh else 'false'}"
return await _aurora_request_json(
"GET",
path,
timeout=20.0,
retries=4,
retry_backoff_sec=0.35,
)
@app.get("/api/aurora/compare/{job_id}")
async def api_aurora_compare(job_id: str) -> Dict[str, Any]:
"""Before/after comparison with full metadata for a completed job."""
status = await _aurora_request_json(
"GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=15.0, retries=3
)
result = {}
try:
result = await _aurora_request_json(
"GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=15.0, retries=2
)
except Exception:
pass
meta = status.get("metadata") or {}
vid = meta.get("video") or {}
storage = status.get("storage") or {}
output_files = result.get("output_files") or status.get("output_files") or []
proc_log = result.get("processing_log") or []
input_path = storage.get("input_path", "")
output_dir = storage.get("output_dir", "")
before: Dict[str, Any] = {
"file_name": status.get("file_name") or (input_path.rsplit("/", 1)[-1] if input_path else ""),
"resolution": f"{vid.get('width', '?')}x{vid.get('height', '?')}" if vid.get("width") else "",
"width": vid.get("width"),
"height": vid.get("height"),
"duration_s": vid.get("duration_seconds"),
"fps": vid.get("fps"),
"frame_count": vid.get("frame_count"),
"codec": "",
"file_size_mb": None,
}
if input_path:
inp = Path(input_path)
if inp.exists():
before["file_size_mb"] = round(inp.stat().st_size / (1024 * 1024), 2)
_probe = _ffprobe_quick(inp) if inp.exists() else {}
if _probe:
before["resolution"] = _probe.get("resolution", before["resolution"])
before["width"] = _probe.get("width", before["width"])
before["height"] = _probe.get("height", before["height"])
before["duration_s"] = _probe.get("duration_s", before["duration_s"])
before["fps"] = _probe.get("fps", before["fps"])
before["frame_count"] = _probe.get("frame_count", before["frame_count"])
before["codec"] = _probe.get("codec", "")
result_file = None
for f in output_files:
if (f.get("type") == "video" or f.get("type") == "photo") and f.get("name"):
result_file = f
break
after: Dict[str, Any] = {
"file_name": result_file["name"] if result_file else "",
"resolution": "",
"width": None,
"height": None,
"duration_s": None,
"fps": None,
"frame_count": None,
"codec": "",
"file_size_mb": None,
"download_url": (result_file or {}).get("url"),
}
output_media_path: Optional[Path] = None
if result_file and output_dir:
out_path = Path(output_dir) / result_file["name"]
if out_path.exists():
output_media_path = out_path
after["file_size_mb"] = round(out_path.stat().st_size / (1024 * 1024), 2)
_probe = _ffprobe_quick(out_path)
if _probe:
after["resolution"] = _probe.get("resolution", "")
after["width"] = _probe.get("width")
after["height"] = _probe.get("height")
after["duration_s"] = _probe.get("duration_s")
after["fps"] = _probe.get("fps")
after["frame_count"] = _probe.get("frame_count")
after["codec"] = _probe.get("codec", "")
faces_total = 0
enhance_steps = []
for step in proc_log:
det = step.get("details") or {}
if det.get("faces_detected_total") is not None:
faces_total += det["faces_detected_total"]
enhance_steps.append({
"step": step.get("step", "?"),
"agent": step.get("agent", "?"),
"model": step.get("model", "?"),
"time_ms": step.get("time_ms"),
})
frame_preview = _aurora_ensure_compare_frame_preview(
job_id=job_id,
media_type=str(status.get("media_type") or ""),
input_path=Path(input_path) if input_path else None,
output_path=output_media_path,
output_dir=Path(output_dir) if output_dir else None,
)
detections = await _aurora_build_compare_detections(
media_type=str(status.get("media_type") or ""),
output_dir=Path(output_dir) if output_dir else None,
frame_preview=frame_preview,
fps=before.get("fps") or after.get("fps"),
)
return {
"job_id": job_id,
"status": status.get("status"),
"mode": status.get("mode"),
"media_type": status.get("media_type"),
"elapsed_seconds": status.get("elapsed_seconds"),
"before": before,
"after": after,
"faces_detected": faces_total,
"enhance_steps": enhance_steps,
"frame_preview": frame_preview,
"detections": detections,
"folder_path": output_dir,
"input_path": input_path,
}
def _aurora_extract_frame_preview(source: Path, target: Path, *, second: float = 1.0) -> bool:
"""Write a JPEG preview frame for image/video sources."""
if not source.exists():
return False
target.parent.mkdir(parents=True, exist_ok=True)
ext = source.suffix.lower()
if ext in {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tif", ".tiff"}:
try:
target.write_bytes(source.read_bytes())
return True
except Exception:
return False
ffmpeg = [
"ffmpeg",
"-hide_banner",
"-loglevel",
"error",
"-y",
"-ss",
f"{max(0.0, float(second)):.3f}",
"-i",
str(source),
"-frames:v",
"1",
"-q:v",
"2",
str(target),
]
try:
run = subprocess.run(ffmpeg, capture_output=True, text=True, timeout=20)
if run.returncode == 0 and target.exists() and target.stat().st_size > 0:
return True
except Exception:
pass
# Fallback for short videos / odd timestamps.
ffmpeg_fallback = ffmpeg[:]
ffmpeg_fallback[6] = "0.0"
try:
run = subprocess.run(ffmpeg_fallback, capture_output=True, text=True, timeout=20)
return run.returncode == 0 and target.exists() and target.stat().st_size > 0
except Exception:
return False
def _aurora_ensure_compare_frame_preview(
*,
job_id: str,
media_type: str,
input_path: Optional[Path],
output_path: Optional[Path],
output_dir: Optional[Path],
) -> Optional[Dict[str, Any]]:
if not output_dir or not output_dir.exists():
return None
if not input_path or not input_path.exists():
return None
if not output_path or not output_path.exists():
return None
before_name = "_compare_before.jpg"
after_name = "_compare_after.jpg"
before_path = output_dir / before_name
after_path = output_dir / after_name
ts = 1.0 if media_type == "video" else 0.0
if not before_path.exists() or before_path.stat().st_size == 0:
_aurora_extract_frame_preview(input_path, before_path, second=ts)
if not after_path.exists() or after_path.stat().st_size == 0:
_aurora_extract_frame_preview(output_path, after_path, second=ts)
if not before_path.exists() or not after_path.exists():
return None
if before_path.stat().st_size <= 0 or after_path.stat().st_size <= 0:
return None
quoted_job = quote(job_id, safe="")
return {
"timestamp_sec": ts,
"before_url": f"/api/aurora/files/{quoted_job}/{quote(before_name, safe='')}",
"after_url": f"/api/aurora/files/{quoted_job}/{quote(after_name, safe='')}",
}
def _aurora_bbox_xyxy(raw_bbox: Any) -> Optional[List[int]]:
if not isinstance(raw_bbox, (list, tuple)) or len(raw_bbox) < 4:
return None
try:
x1 = int(float(raw_bbox[0]))
y1 = int(float(raw_bbox[1]))
x2 = int(float(raw_bbox[2]))
y2 = int(float(raw_bbox[3]))
except Exception:
return None
if x2 < x1:
x1, x2 = x2, x1
if y2 < y1:
y1, y2 = y2, y1
if x2 <= x1 or y2 <= y1:
return None
return [x1, y1, x2, y2]
def _aurora_image_dims(path: Path) -> Optional[Dict[str, int]]:
if cv2 is None or not path.exists():
return None
try:
img = cv2.imread(str(path), cv2.IMREAD_COLOR)
if img is None:
return None
h, w = img.shape[:2]
if w <= 0 or h <= 0:
return None
return {"width": int(w), "height": int(h)}
except Exception:
return None
def _aurora_detect_faces_from_preview(path: Path) -> List[Dict[str, Any]]:
if cv2 is None or not path.exists():
return []
try:
frame = cv2.imread(str(path), cv2.IMREAD_COLOR)
if frame is None:
return []
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cascade_path = Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml"
cascade = cv2.CascadeClassifier(str(cascade_path))
if cascade.empty():
return []
faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(20, 20))
out: List[Dict[str, Any]] = []
for (x, y, w, h) in faces[:40]:
roi = gray[y : y + h, x : x + w]
lap = float(cv2.Laplacian(roi, cv2.CV_64F).var()) if roi.size > 0 else 0.0
conf = max(0.5, min(0.99, 0.55 + (lap / 400.0)))
out.append(
{
"bbox": [int(x), int(y), int(x + w), int(y + h)],
"confidence": round(conf, 3),
}
)
return out
except Exception:
return []
async def _aurora_detect_faces_via_service(path: Path) -> List[Dict[str, Any]]:
if not path.exists():
return []
mime = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
timeout = httpx.Timeout(20.0, connect=6.0)
try:
async with httpx.AsyncClient(timeout=timeout) as client:
with path.open("rb") as fh:
files = {"file": (path.name, fh, mime)}
resp = await client.post(f"{AURORA_SERVICE_URL}/api/aurora/analyze", files=files)
if resp.status_code >= 400:
return []
payload = resp.json() if resp.content else {}
except Exception:
return []
faces_raw = payload.get("faces")
if not isinstance(faces_raw, list):
return []
out: List[Dict[str, Any]] = []
for item in faces_raw[:60]:
if not isinstance(item, dict):
continue
bbox = item.get("bbox")
if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
continue
try:
x = int(float(bbox[0]))
y = int(float(bbox[1]))
w = int(float(bbox[2]))
h = int(float(bbox[3]))
except Exception:
continue
if w <= 1 or h <= 1:
continue
conf: Optional[float]
try:
conf = round(float(item.get("confidence")), 3)
except Exception:
conf = None
out.append(
{
"bbox": [x, y, x + w, y + h],
"confidence": conf,
}
)
return out
def _aurora_select_plate_detections(
output_dir: Path,
*,
target_frame: Optional[int],
max_items: int = 12,
) -> List[Dict[str, Any]]:
report_path = output_dir / "plate_detections.json"
if not report_path.exists():
return []
try:
payload = json.loads(report_path.read_text(encoding="utf-8"))
except Exception:
return []
source_items: List[Any]
detections = payload.get("detections")
unique = payload.get("unique")
if isinstance(detections, list) and detections:
source_items = detections
elif isinstance(unique, list) and unique:
source_items = unique
else:
return []
parsed: List[Dict[str, Any]] = []
for item in source_items:
if not isinstance(item, dict):
continue
bbox = _aurora_bbox_xyxy(item.get("bbox"))
if not bbox:
continue
text_value = str(item.get("text") or "").strip()
conf_value: Optional[float]
try:
conf_value = round(float(item.get("confidence")), 3)
except Exception:
conf_value = None
frame_value: Optional[int]
try:
frame_value = int(item.get("frame")) if item.get("frame") is not None else None
except Exception:
frame_value = None
parsed.append(
{
"bbox": bbox,
"text": text_value or None,
"confidence": conf_value,
"frame": frame_value,
}
)
if not parsed:
return []
with_frame = [x for x in parsed if x.get("frame") is not None]
if target_frame is not None and with_frame:
min_distance = min(abs(int(x["frame"]) - int(target_frame)) for x in with_frame)
keep = max(4, min_distance + 2)
filtered = [x for x in with_frame if abs(int(x["frame"]) - int(target_frame)) <= keep]
filtered.sort(key=lambda x: (abs(int(x["frame"]) - int(target_frame)), -(x.get("confidence") or 0.0)))
return filtered[:max_items]
parsed.sort(key=lambda x: (-(x.get("confidence") or 0.0), x.get("text") or ""))
return parsed[:max_items]
async def _aurora_build_compare_detections(
*,
media_type: str,
output_dir: Optional[Path],
frame_preview: Optional[Dict[str, Any]],
fps: Any,
) -> Optional[Dict[str, Any]]:
if not output_dir or not output_dir.exists():
return None
if not isinstance(frame_preview, dict):
return None
before_path = output_dir / "_compare_before.jpg"
after_path = output_dir / "_compare_after.jpg"
before_faces = _aurora_detect_faces_from_preview(before_path)
after_faces = _aurora_detect_faces_from_preview(after_path)
if not before_faces and before_path.exists():
before_faces = await _aurora_detect_faces_via_service(before_path)
if not after_faces and after_path.exists():
after_faces = await _aurora_detect_faces_via_service(after_path)
before_size = _aurora_image_dims(before_path)
after_size = _aurora_image_dims(after_path)
target_ts = float(frame_preview.get("timestamp_sec") or 0.0)
target_frame: Optional[int] = None
if str(media_type).lower() == "video":
try:
fps_val = float(fps)
except Exception:
fps_val = 15.0
if fps_val <= 0:
fps_val = 15.0
target_frame = int(round(target_ts * fps_val))
plate_items = _aurora_select_plate_detections(output_dir, target_frame=target_frame)
return {
"target_timestamp_sec": target_ts if str(media_type).lower() == "video" else None,
"target_frame": target_frame,
"before": {
"frame_size": before_size,
"faces": before_faces,
"plates": plate_items,
},
"after": {
"frame_size": after_size,
"faces": after_faces,
"plates": plate_items,
},
}
def _ffprobe_quick(filepath: Path) -> Dict[str, Any]:
"""Quick ffprobe for resolution, codec, duration, fps, frame count."""
if not filepath.exists():
return {}
try:
import subprocess as _sp
raw = _sp.run(
["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(filepath)],
capture_output=True, text=True, timeout=10
)
if raw.returncode != 0:
return {}
import json as _json
data = _json.loads(raw.stdout)
fmt = data.get("format") or {}
vs = [s for s in (data.get("streams") or []) if s.get("codec_type") == "video"]
if not vs:
return {"duration_s": round(float(fmt.get("duration", 0)), 2)}
v = vs[0]
w, h = v.get("width"), v.get("height")
rfr = v.get("r_frame_rate", "0/1").split("/")
fps = round(int(rfr[0]) / max(1, int(rfr[1])), 2) if len(rfr) == 2 else None
return {
"resolution": f"{w}x{h}" if w and h else "",
"width": w, "height": h,
"codec": v.get("codec_name", ""),
"duration_s": round(float(fmt.get("duration", 0)), 2),
"fps": fps,
"frame_count": int(v.get("nb_frames", 0)) or None,
}
except Exception:
return {}
@app.post("/api/aurora/cancel/{job_id}")
async def api_aurora_cancel(job_id: str) -> Dict[str, Any]:
return await _aurora_request_json(
"POST",
f"/api/aurora/cancel/{quote(job_id, safe='')}",
timeout=20.0,
retries=2,
retry_backoff_sec=0.2,
)
@app.post("/api/aurora/delete/{job_id}")
async def api_aurora_delete(
job_id: str,
purge_files: bool = Query(default=True),
) -> Dict[str, Any]:
path = f"/api/aurora/delete/{quote(job_id, safe='')}?purge_files={'true' if purge_files else 'false'}"
return await _aurora_request_json(
"POST",
path,
timeout=30.0,
retries=2,
retry_backoff_sec=0.2,
)
@app.get("/api/aurora/report/{job_id}.pdf")
async def api_aurora_report_pdf(job_id: str) -> StreamingResponse:
"""Stream PDF report from Aurora service without buffering in RAM."""
encoded_job = quote(job_id, safe="")
paths = [AURORA_SERVICE_URL]
if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths:
paths.append(AURORA_FALLBACK_URL)
last_err = ""
for base in paths:
url = f"{base}/api/aurora/report/{encoded_job}.pdf"
try:
client = httpx.AsyncClient(timeout=120.0)
r = await client.send(client.build_request("GET", url), stream=True)
if r.status_code >= 400:
body = (await r.aread()).decode(errors="replace")[:400]
await r.aclose()
await client.aclose()
raise HTTPException(status_code=r.status_code, detail=body or f"Aurora report error {r.status_code}")
disposition = r.headers.get("content-disposition", f'inline; filename="{job_id}_forensic_report.pdf"')
async def _stream():
try:
async for chunk in r.aiter_bytes(chunk_size=65536):
yield chunk
finally:
await r.aclose()
await client.aclose()
return StreamingResponse(
_stream(),
media_type="application/pdf",
headers={"Content-Disposition": disposition, "Cache-Control": "no-store"},
)
except HTTPException:
raise
except Exception as e:
last_err = str(e)[:200]
if "nodename nor servname provided" in str(e):
continue
raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err}")
raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err or 'unavailable'}")
@app.get("/api/aurora/files/{job_id}/{file_name:path}")
async def api_aurora_file(job_id: str, file_name: str, request: Request) -> StreamingResponse:
encoded_job = quote(job_id, safe="")
encoded_name = quote(file_name, safe="")
paths = [AURORA_SERVICE_URL]
if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths:
paths.append(AURORA_FALLBACK_URL)
last_err = ""
for base in paths:
url = f"{base}/api/aurora/files/{encoded_job}/{encoded_name}"
client = httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0))
try:
upstream_headers: Dict[str, str] = {}
for name in ("range", "if-range", "if-none-match", "if-modified-since"):
value = request.headers.get(name)
if value:
upstream_headers[name] = value
resp = await client.send(client.build_request("GET", url, headers=upstream_headers), stream=True)
if resp.status_code >= 400:
body = (await resp.aread()).decode(errors="replace")[:400]
await resp.aclose()
await client.aclose()
if resp.status_code >= 500:
last_err = f"Aurora {resp.status_code}: {body}"
continue
raise HTTPException(status_code=resp.status_code, detail=body or f"Aurora file error {resp.status_code}")
ct = resp.headers.get("content-type", "application/octet-stream")
passthrough_headers: Dict[str, str] = {}
for name in (
"content-disposition",
"content-length",
"content-range",
"accept-ranges",
"etag",
"last-modified",
"cache-control",
):
value = resp.headers.get(name)
if value:
passthrough_headers[name] = value
if "content-disposition" not in passthrough_headers:
passthrough_headers["content-disposition"] = f'inline; filename="{Path(file_name).name}"'
passthrough_headers.setdefault("cache-control", "no-store")
async def _stream():
try:
async for chunk in resp.aiter_bytes(chunk_size=65536):
yield chunk
finally:
await resp.aclose()
await client.aclose()
return StreamingResponse(
_stream(),
status_code=resp.status_code,
media_type=ct,
headers=passthrough_headers,
)
except HTTPException:
raise
except Exception as e:
await client.aclose()
last_err = str(e)[:200]
if "nodename nor servname provided" in str(e):
continue
raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err}")
raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err or 'unavailable'}")
class MediaImageGenerateBody(BaseModel):
prompt: str
negative_prompt: Optional[str] = None
width: int = 1024
height: int = 1024
steps: int = 28
guidance_scale: float = 4.0
timeout_s: int = 300
class MediaVideoGenerateBody(BaseModel):
prompt: str
seconds: int = 4
fps: int = 24
steps: int = 30
style: str = "cinematic"
aspect_ratio: str = "16:9"
timeout_s: int = 360
class MediaImageModelLoadBody(BaseModel):
model: str
def _resolve_media_router_url() -> str:
nodes_cfg = load_nodes_registry()
discovered = (
get_router_url("NODA2")
or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "")
).rstrip("/")
return MEDIA_ROUTER_URL or discovered
def _media_router_candidates() -> List[str]:
raw = _resolve_media_router_url()
candidates: List[str] = []
for u in (raw, MEDIA_ROUTER_FALLBACK_URL):
if not u:
continue
v = u.strip().rstrip("/")
if v and v not in candidates:
candidates.append(v)
if "://router:" in v or "://router/" in v:
host_fixed = v.replace("://router:", "://127.0.0.1:").replace("://router/", "://127.0.0.1/")
if host_fixed not in candidates:
candidates.append(host_fixed)
for port in ("9102", "8000"):
local = f"http://127.0.0.1:{port}"
if local not in candidates:
candidates.append(local)
return candidates
async def _pick_media_router_url() -> str:
candidates = _media_router_candidates()
if not candidates:
return ""
for u in candidates:
p = await _probe_http(f"{u}/healthz", timeout=2.5)
if p.get("reachable"):
return u
return candidates[0]
def _media_append_job(kind: str, payload: Dict[str, Any]) -> Dict[str, Any]:
item = {
"id": f"media_{kind}_{uuid.uuid4().hex[:10]}",
"kind": kind,
"ts": datetime.now(timezone.utc).isoformat(),
**payload,
}
_media_recent_jobs.appendleft(item)
return item
@app.get("/api/media/health")
async def api_media_health() -> Dict[str, Any]:
router_url = await _pick_media_router_url()
probes = await asyncio.gather(
_probe_http(f"{router_url}/healthz") if router_url else asyncio.sleep(0, result={"reachable": False, "error": "router missing"}),
_probe_http(f"{MEDIA_COMFY_AGENT_URL}/health"),
_probe_http(f"{MEDIA_COMFY_UI_URL}/"),
_probe_http(f"{MEDIA_SWAPPER_URL}/health"),
_probe_http(f"{MEDIA_IMAGE_GEN_URL}/health"),
)
image_models: Dict[str, Any] = {"image_models": []}
try:
async with httpx.AsyncClient(timeout=10.0) as client:
r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models")
if r.status_code < 400 and r.content:
image_models = r.json()
except Exception:
image_models = {"image_models": []}
return {
"ok": True,
"router_url": router_url,
"services": {
"router": probes[0],
"comfy_agent": probes[1],
"comfy_ui": probes[2],
"swapper": probes[3],
"image_gen": probes[4],
},
"image_models": image_models.get("image_models", []),
"active_image_model": image_models.get("active_image_model"),
"fallback_order": ["comfy", "swapper", "image-gen-service"],
}
@app.get("/api/media/models/image")
async def api_media_image_models() -> Dict[str, Any]:
try:
async with httpx.AsyncClient(timeout=15.0) as client:
r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models")
if r.status_code >= 400:
raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper error")
data = r.json() if r.content else {}
return {
"ok": True,
"image_models": data.get("image_models", []),
"active_image_model": data.get("active_image_model"),
"device": data.get("device"),
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=502, detail=f"Image models unavailable: {str(e)[:200]}")
@app.post("/api/media/models/image/load")
async def api_media_image_model_load(body: MediaImageModelLoadBody) -> Dict[str, Any]:
model = body.model.strip()
if not model:
raise HTTPException(status_code=400, detail="model is required")
try:
async with httpx.AsyncClient(timeout=300.0) as client:
r = await client.post(f"{MEDIA_SWAPPER_URL}/image/models/{quote(model, safe='')}/load")
if r.status_code >= 400:
raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper load error")
return {"ok": True, "result": r.json() if r.content else {"status": "ok"}}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=502, detail=f"Image model load failed: {str(e)[:200]}")
@app.post("/api/media/generate/image")
async def api_media_generate_image(body: MediaImageGenerateBody) -> Dict[str, Any]:
prompt = body.prompt.strip()
if not prompt:
raise HTTPException(status_code=400, detail="prompt is required")
router_url = await _pick_media_router_url()
if not router_url:
raise HTTPException(status_code=503, detail="Router URL not configured")
params = {
"prompt": prompt,
"negative_prompt": body.negative_prompt or "",
"width": max(256, min(2048, int(body.width))),
"height": max(256, min(2048, int(body.height))),
"steps": max(1, min(120, int(body.steps))),
"guidance_scale": max(0.0, min(20.0, float(body.guidance_scale))),
"timeout_s": max(30, min(900, int(body.timeout_s))),
}
started = time.monotonic()
response = await execute_tool(
router_url,
tool="image_generate",
action="generate",
params=params,
agent_id="sofiia",
timeout=float(params["timeout_s"] + 30),
api_key=ROUTER_API_KEY,
)
ok = response.get("status") == "ok"
result_data = response.get("data") or {}
result_item = _media_append_job(
"image",
{
"status": "ok" if ok else "failed",
"provider": "router:image_generate",
"prompt": prompt[:180],
"duration_ms": int((time.monotonic() - started) * 1000),
"result": result_data.get("result"),
"has_image_base64": bool(result_data.get("image_base64")),
"error": (response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error"),
},
)
if not ok:
raise HTTPException(status_code=502, detail=f"Image generate failed: {result_item.get('error') or 'tool failed'}")
return {"ok": True, "job": result_item, "tool_response": response}
@app.post("/api/media/generate/video")
async def api_media_generate_video(body: MediaVideoGenerateBody) -> Dict[str, Any]:
prompt = body.prompt.strip()
if not prompt:
raise HTTPException(status_code=400, detail="prompt is required")
router_url = await _pick_media_router_url()
if not router_url:
raise HTTPException(status_code=503, detail="Router URL not configured")
params = {
"prompt": prompt,
"seconds": max(1, min(8, int(body.seconds))),
"fps": max(8, min(60, int(body.fps))),
"steps": max(1, min(120, int(body.steps))),
"timeout_s": max(60, min(1200, int(body.timeout_s))),
}
started = time.monotonic()
response = await execute_tool(
router_url,
tool="comfy_generate_video",
action="generate",
params=params,
agent_id="sofiia",
timeout=float(params["timeout_s"] + 30),
api_key=ROUTER_API_KEY,
)
ok = response.get("status") == "ok"
provider = "router:comfy_generate_video"
fallback_payload: Dict[str, Any] = {}
if not ok:
try:
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(
f"{MEDIA_SWAPPER_URL}/video/generate",
json={
"prompt": prompt,
"duration": params["seconds"],
"style": body.style,
"aspect_ratio": body.aspect_ratio,
},
)
if r.status_code < 400:
fallback_payload = r.json() if r.content else {}
ok = True
provider = "swapper:video/generate"
except Exception as e:
fallback_payload = {"error": str(e)[:200]}
result_item = _media_append_job(
"video",
{
"status": "ok" if ok else "failed",
"provider": provider,
"prompt": prompt[:180],
"duration_ms": int((time.monotonic() - started) * 1000),
"result": (response.get("data") or {}).get("result") if not fallback_payload else fallback_payload,
"error": None if ok else ((response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error")),
},
)
if not ok:
raise HTTPException(status_code=502, detail=f"Video generate failed: {result_item.get('error') or 'tool failed'}")
return {"ok": True, "job": result_item, "tool_response": response, "fallback_response": fallback_payload}
@app.get("/api/media/jobs")
async def api_media_jobs(limit: int = Query(default=20, ge=1, le=100)) -> Dict[str, Any]:
return {"ok": True, "count": min(limit, len(_media_recent_jobs)), "jobs": list(_media_recent_jobs)[:limit]}
# ─── Chat (runtime contract) ─────────────────────────────────────────────────
@app.get("/api/chat/config")
async def api_chat_config() -> Dict[str, Any]:
return {
"preferred_model": SOFIIA_PREFERRED_CHAT_MODEL,
"ollama": {
"timeout_sec": SOFIIA_OLLAMA_TIMEOUT_SEC,
"voice_timeout_sec": SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC,
"keep_alive": SOFIIA_OLLAMA_KEEP_ALIVE,
"num_ctx": SOFIIA_OLLAMA_NUM_CTX,
"num_thread": SOFIIA_OLLAMA_NUM_THREAD,
"num_gpu": SOFIIA_OLLAMA_NUM_GPU,
},
}
class ChatSendBody(BaseModel):
message: str
model: str = "ollama:qwen3:14b"
node_id: str = "NODA2"
project_id: Optional[str] = None
session_id: Optional[str] = None
user_id: Optional[str] = None
history: List[Dict[str, Any]] = []
# Voice routing hint — forwarded to Router as X-Voice-Profile header
# Values: "voice_fast_uk" (default) | "voice_quality_uk"
voice_profile: Optional[str] = None
CHAT_PROJECT_ID = "chats"
class ChatCreateBody(BaseModel):
agent_id: str
node_id: str = "NODA2"
source: str = "console"
external_chat_ref: Optional[str] = None
title: Optional[str] = None
class ChatMessageSendBody(BaseModel):
text: str
attachments: List[Dict[str, Any]] = []
project_id: Optional[str] = None
session_id: Optional[str] = None
user_id: Optional[str] = None
routing: Optional[Dict[str, Any]] = None
client: Optional[Dict[str, Any]] = None
idempotency_key: Optional[str] = None
def _make_chat_id(node_id: str, agent_id: str, source: str = "console", external_chat_ref: Optional[str] = None) -> str:
ext = (external_chat_ref or "main").strip() or "main"
return f"chat:{node_id.upper()}:{agent_id.strip().lower()}:{source.strip().lower()}:{ext}"
def _parse_chat_id(chat_id: str) -> Dict[str, str]:
raw = (chat_id or "").strip()
parts = raw.split(":", 4)
if len(parts) == 5 and parts[0] == "chat":
return {
"chat_id": raw,
"node_id": parts[1].upper(),
"agent_id": parts[2].lower(),
"source": parts[3].lower(),
"external_chat_ref": parts[4],
}
# Legacy fallback: treat arbitrary session_id as local NODA2 chat with sofiia
return {
"chat_id": raw,
"node_id": "NODA2",
"agent_id": "sofiia",
"source": "console",
"external_chat_ref": raw or "main",
}
async def _ensure_chat_project() -> None:
proj = await _app_db.get_project(CHAT_PROJECT_ID)
if not proj:
await _app_db.create_project(
name="Chats",
description="Cross-node chat index for Sofiia Console",
project_id=CHAT_PROJECT_ID,
)
def _clean_chat_reply(text: str) -> str:
import re
cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
if "<think>" in cleaned.lower():
cleaned = re.split(r"(?i)<think>", cleaned)[0]
return cleaned.strip()
def _cursor_encode(payload: Dict[str, Any]) -> str:
wrapped = {"v": 1, **payload}
raw = json.dumps(wrapped, separators=(",", ":"), ensure_ascii=True).encode("utf-8")
return base64.urlsafe_b64encode(raw).decode("ascii")
def _cursor_decode(cursor: Optional[str]) -> Dict[str, Any]:
if not cursor:
return {}
try:
decoded = base64.urlsafe_b64decode(cursor.encode("ascii")).decode("utf-8")
data = json.loads(decoded)
if not isinstance(data, dict):
return {}
# Backward compatibility: accept old cursors without "v".
if "v" not in data:
return data
# Current cursor format version.
if int(data.get("v") or 0) == 1:
out = dict(data)
out.pop("v", None)
return out
return {}
except Exception:
return {}
@app.get("/api/chats")
async def api_chats_list(
request: Request,
nodes: str = Query("NODA1,NODA2"),
agent_id: Optional[str] = Query(None),
q: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=200),
cursor: Optional[str] = Query(None),
_auth: str = Depends(require_auth),
):
SOFIIA_CURSOR_REQUESTS_TOTAL.labels(resource="chats").inc()
await _ensure_chat_project()
node_filter = {n.strip().upper() for n in nodes.split(",") if n.strip()}
cur = _cursor_decode(cursor)
before_last_active = str(cur.get("last_active") or "").strip() or None
before_chat_id = str(cur.get("chat_id") or "").strip() or None
fetch_limit = max(limit * 5, limit + 1)
sessions = await _app_db.list_sessions_page(
CHAT_PROJECT_ID,
limit=fetch_limit,
before_last_active=before_last_active,
before_session_id=before_chat_id,
)
items: List[Dict[str, Any]] = []
agent_filter = (agent_id or "").strip().lower()
q_filter = (q or "").strip().lower()
for s in sessions:
sid = str(s.get("session_id") or "")
if not sid:
continue
info = _parse_chat_id(sid)
if node_filter and info["node_id"] not in node_filter:
continue
if agent_filter and info["agent_id"] != agent_filter:
continue
msgs = await _app_db.list_messages(sid, limit=200)
last = msgs[-1] if msgs else None
item = {
"chat_id": sid,
"title": (s.get("title") or f"{info['agent_id']}{info['node_id']}").strip(),
"agent_id": info["agent_id"],
"node_id": info["node_id"],
"source": info["source"],
"external_chat_ref": info["external_chat_ref"],
"updated_at": s.get("last_active"),
"last_message": (
{
"message_id": last.get("msg_id"),
"role": last.get("role"),
"text": (last.get("content") or "")[:280],
"ts": last.get("ts"),
} if last else None
),
"turn_count": s.get("turn_count", 0),
}
if q_filter:
hay = " ".join(
[
item["title"],
item["agent_id"],
item["node_id"],
(item["last_message"] or {}).get("text", ""),
]
).lower()
if q_filter not in hay:
continue
items.append(item)
if len(items) >= limit:
break
next_cursor = None
if items:
last_item = items[-1]
next_cursor = _cursor_encode(
{
"last_active": last_item.get("updated_at"),
"chat_id": last_item.get("chat_id"),
}
)
has_more = len(sessions) >= fetch_limit or len(items) >= limit
log_event(
"chat.list",
request_id=get_request_id(request),
node_id=",".join(sorted(node_filter)) if node_filter else None,
agent_id=(agent_id or None),
cursor_present=bool(cursor),
limit=limit,
has_more=has_more,
next_cursor_present=bool(next_cursor),
status="ok",
)
return {
"items": items,
"count": len(items),
"nodes": sorted(node_filter),
"project_id": CHAT_PROJECT_ID,
"next_cursor": next_cursor,
"has_more": has_more,
}
@app.post("/api/chats")
async def api_chat_create(body: ChatCreateBody, _auth: str = Depends(require_auth)):
await _ensure_chat_project()
cid = _make_chat_id(
node_id=body.node_id,
agent_id=body.agent_id,
source=body.source,
external_chat_ref=body.external_chat_ref,
)
info = _parse_chat_id(cid)
title = (body.title or f"{info['agent_id']}{info['node_id']}{info['source']}").strip()
sess = await _app_db.upsert_session(cid, project_id=CHAT_PROJECT_ID, title=title)
return {"ok": True, "chat": {"chat_id": cid, "title": title, "agent_id": info["agent_id"], "node_id": info["node_id"], "source": info["source"], "external_chat_ref": info["external_chat_ref"], "updated_at": sess.get("last_active")}}
@app.get("/api/chats/{chat_id}/messages")
async def api_chat_messages(
chat_id: str,
request: Request,
limit: int = Query(100, ge=1, le=500),
cursor: Optional[str] = Query(None),
_auth: str = Depends(require_auth),
):
SOFIIA_CURSOR_REQUESTS_TOTAL.labels(resource="messages").inc()
cur = _cursor_decode(cursor)
before_ts = str(cur.get("ts") or "").strip() or None
before_message_id = str(cur.get("message_id") or "").strip() or None
rows_desc = await _app_db.list_messages_page(
chat_id,
limit=limit + 1,
before_ts=before_ts,
before_msg_id=before_message_id,
)
has_more = len(rows_desc) > limit
page_desc = rows_desc[:limit]
rows = list(reversed(page_desc))
info = _parse_chat_id(chat_id)
messages = [
{
"message_id": r.get("msg_id"),
"chat_id": chat_id,
"role": r.get("role"),
"text": r.get("content", ""),
"ts": r.get("ts"),
"meta": {
"node_id": info["node_id"],
"agent_id": info["agent_id"],
"source": info["source"],
},
}
for r in rows
]
next_cursor = None
if has_more and page_desc:
tail = page_desc[-1]
next_cursor = _cursor_encode({"ts": tail.get("ts"), "message_id": tail.get("msg_id")})
log_event(
"chat.messages.list",
request_id=get_request_id(request),
chat_id=chat_id,
node_id=info["node_id"],
agent_id=info["agent_id"],
cursor_present=bool(cursor),
limit=limit,
has_more=has_more,
next_cursor_present=bool(next_cursor),
status="ok",
)
return {
"items": messages,
"count": len(messages),
"chat_id": chat_id,
"next_cursor": next_cursor,
"has_more": has_more,
}
@app.post("/api/chats/{chat_id}/send")
async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Request, _auth: str = Depends(require_auth)):
started_at = time.monotonic()
client_ip = request.client.host if request.client else "unknown"
if not _check_rate(f"chat_v2:{client_ip}", max_calls=30, window_sec=60):
raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min")
text = (body.text or "").strip()
if not text:
raise HTTPException(status_code=400, detail="text is required")
request_id = get_request_id(request)
idem_key = (
(
request.headers.get("Idempotency-Key")
or body.idempotency_key
or ""
).strip()
)[:128]
idem_hash = hash_idempotency_key(idem_key)
info = _parse_chat_id(chat_id)
target_node = ((body.routing or {}).get("force_node_id") or info["node_id"] or "NODA2").upper()
target_agent = info["agent_id"] or "sofiia"
operator_id, operator_id_missing = _resolve_operator_id(request, body, request_id)
chat_rl = _rate_limiter.consume(f"rl:chat:{chat_id}", rps=_RL_CHAT_RPS, burst=_RL_CHAT_BURST)
if not chat_rl.allowed:
SOFIIA_RATE_LIMITED_TOTAL.labels(scope="chat").inc()
log_event(
"chat.send.rate_limited",
request_id=request_id,
scope="chat",
chat_id=chat_id,
node_id=target_node,
agent_id=target_agent,
operator_id=operator_id,
operator_id_missing=operator_id_missing,
limit_rps=_RL_CHAT_RPS,
burst=_RL_CHAT_BURST,
retry_after_s=chat_rl.retry_after_s,
status="error",
error_code="rate_limited",
)
raise _rate_limited_http("chat", chat_rl.retry_after_s)
op_rl = _rate_limiter.consume(f"rl:op:{operator_id}", rps=_RL_OP_RPS, burst=_RL_OP_BURST)
if not op_rl.allowed:
SOFIIA_RATE_LIMITED_TOTAL.labels(scope="operator").inc()
log_event(
"chat.send.rate_limited",
request_id=request_id,
scope="operator",
chat_id=chat_id,
node_id=target_node,
agent_id=target_agent,
operator_id=operator_id,
operator_id_missing=operator_id_missing,
limit_rps=_RL_OP_RPS,
burst=_RL_OP_BURST,
retry_after_s=op_rl.retry_after_s,
status="error",
error_code="rate_limited",
)
raise _rate_limited_http("operator", op_rl.retry_after_s)
log_event(
"chat.send",
request_id=request_id,
chat_id=chat_id,
node_id=target_node,
agent_id=target_agent,
operator_id=operator_id,
operator_id_missing=operator_id_missing,
idempotency_key_hash=(idem_hash or None),
replayed=False,
status="ok",
)
if idem_key:
cache_key = f"{chat_id}::{idem_key}"
cached = _idempotency_store.get(cache_key)
if cached:
SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL.inc()
log_event(
"chat.send.replay",
request_id=request_id,
chat_id=chat_id,
node_id=target_node,
agent_id=target_agent,
idempotency_key_hash=(idem_hash or None),
replayed=True,
message_id=cached.message_id,
status="ok",
)
replay = dict(cached.response_body)
replay["idempotency"] = {"replayed": True, "key": idem_key}
return replay
await _ensure_chat_project()
SOFIIA_SEND_REQUESTS_TOTAL.labels(node_id=target_node).inc()
project_id = body.project_id or CHAT_PROJECT_ID
session_id = body.session_id or chat_id
user_id = operator_id
title = f"{target_agent}{target_node}{info['source']}"
await _app_db.upsert_session(chat_id, project_id=CHAT_PROJECT_ID, title=title)
user_saved = await _app_db.save_message(chat_id, "user", text[:4096])
metadata: Dict[str, Any] = {
"project_id": project_id,
"session_id": session_id,
"user_id": operator_id,
"operator_id": operator_id,
"client": "sofiia-console",
"chat_id": chat_id,
"node_id": target_node,
"agent_id": target_agent,
"source": info["source"],
"external_chat_ref": info["external_chat_ref"],
"attachments": body.attachments or [],
"client_meta": body.client or {},
}
base_url = get_router_url(target_node)
if not base_url:
duration_ms = int((time.monotonic() - started_at) * 1000)
log_event(
"chat.send.error",
request_id=request_id,
chat_id=chat_id,
node_id=target_node,
agent_id=target_agent,
idempotency_key_hash=(idem_hash or None),
status="error",
error_code="router_url_not_configured",
duration_ms=duration_ms,
)
raise HTTPException(status_code=400, detail=f"router_url is not configured for node {target_node}")
try:
out = await infer(
base_url,
target_agent,
text,
model=None,
metadata=metadata,
timeout=300.0,
api_key=ROUTER_API_KEY,
)
except Exception as e:
duration_ms = int((time.monotonic() - started_at) * 1000)
log_event(
"chat.send.error",
request_id=request_id,
chat_id=chat_id,
node_id=target_node,
agent_id=target_agent,
idempotency_key_hash=(idem_hash or None),
status="error",
error_code="upstream_error",
error=str(e)[:180],
duration_ms=duration_ms,
)
_broadcast_bg(
_make_event(
"error",
{"where": "chat_v2.router", "message": str(e)[:180], "chat_id": chat_id, "node_id": target_node, "agent_id": target_agent},
project_id=project_id,
session_id=session_id,
user_id=user_id,
)
)
raise HTTPException(status_code=502, detail=str(e)[:300])
reply = _clean_chat_reply(out.get("response", out.get("text", "")))
assistant_saved = await _app_db.save_message(chat_id, "assistant", (reply or "")[:4096], parent_msg_id=user_saved.get("msg_id"))
trace_id = f"chatv2_{session_id}_{uuid.uuid4().hex[:8]}"
result = {
"ok": True,
"accepted": True,
"chat_id": chat_id,
"node_id": target_node,
"agent_id": target_agent,
"trace_id": trace_id,
"message": {
"message_id": assistant_saved.get("msg_id"),
"role": "assistant",
"text": reply,
"ts": assistant_saved.get("ts"),
"meta": {
"node_id": target_node,
"agent_id": target_agent,
"backend": out.get("backend"),
"model": out.get("model"),
},
},
}
if idem_key:
cache_key = f"{chat_id}::{idem_key}"
_idempotency_store.set(
cache_key,
ReplayEntry(
message_id=str((result.get("message") or {}).get("message_id") or ""),
response_body=dict(result),
created_at=time.monotonic(),
node_id=target_node,
),
)
result["idempotency"] = {"replayed": False, "key": idem_key}
duration_ms = int((time.monotonic() - started_at) * 1000)
log_event(
"chat.send.ok",
request_id=request_id,
chat_id=chat_id,
node_id=target_node,
agent_id=target_agent,
idempotency_key_hash=(idem_hash or None),
message_id=(result.get("message") or {}).get("message_id"),
status="ok",
duration_ms=duration_ms,
)
return result
@app.get("/metrics")
def metrics():
data, content_type = render_metrics()
return Response(content=data, media_type=content_type)
@app.post("/api/chat/send")
async def api_chat_send(body: ChatSendBody, request: Request):
"""BFF chat: Ollama or router. Returns runtime contract fields. Rate: 30/min."""
client_ip = request.client.host if request.client else "unknown"
if not _check_rate(f"chat:{client_ip}", max_calls=30, window_sec=60):
raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min")
# Runtime identity
project_id = body.project_id or "default"
session_id = body.session_id or f"sess_{uuid.uuid4().hex[:12]}"
user_id = body.user_id or "console_user"
provider, _, model_name = body.model.partition(":")
reply = ""
t0 = time.monotonic()
def _clean_reply(text: str) -> str:
"""Strip <think>...</think> reasoning blocks (Qwen3/DeepSeek-R1) before returning to user.
Strategy:
1. re.DOTALL regex removes complete <think>...</think> blocks.
2. Fallback split removes any trailing unclosed <think> block
(model stopped mid-reasoning without </think>).
"""
import re
# Primary: strip complete blocks (multiline-safe with DOTALL)
cleaned = re.sub(r"<think>.*?</think>", "", text,
flags=re.DOTALL | re.IGNORECASE)
# Fallback: if an unclosed <think> block remains, drop everything after it
if "<think>" in cleaned.lower():
cleaned = re.split(r"(?i)<think>", cleaned)[0]
return cleaned.strip()
# Broadcast: user message sent
_broadcast_bg(_make_event("chat.message",
{"text": body.message[:200], "provider": provider, "model": body.model},
project_id=project_id, session_id=session_id, user_id=user_id))
# voice_profile determines LLM options for voice turns.
# None = text chat (full prompt, no token limit enforcement).
_vp = body.voice_profile # "voice_fast_uk" | "voice_quality_uk" | None
_is_voice_turn = _vp is not None
_is_quality = _vp == "voice_quality_uk"
# System prompt: voice turns get guardrails appended
_system_prompt = SOFIIA_SYSTEM_PROMPT
if _is_voice_turn:
_system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX
_voice_options = {
"temperature": 0.18 if _is_quality else 0.15,
"repeat_penalty": 1.1,
"num_predict": 256 if _is_quality else 220, # max_tokens per contract (≤256)
} if _is_voice_turn else {
"temperature": 0.15,
"repeat_penalty": 1.1,
"num_predict": SOFIIA_OLLAMA_NUM_PREDICT_TEXT,
}
if provider == "ollama":
ollama_url = get_ollama_url()
effective_model_name = model_name or "qwen3:14b"
messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
messages.extend(body.history[-12:])
messages.append({"role": "user", "content": body.message})
try:
async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client:
r = await client.post(
f"{ollama_url}/api/chat",
json=_make_ollama_payload(effective_model_name, messages, _voice_options),
)
r.raise_for_status()
data = r.json()
reply = _clean_reply((data.get("message") or {}).get("content", "") or "Ollama: порожня відповідь")
except httpx.HTTPStatusError as e:
err_msg = f"Ollama HTTP {e.response.status_code}"
_broadcast_bg(_make_event("error", {"where": "ollama", "message": err_msg},
project_id=project_id, session_id=session_id, user_id=user_id))
raise HTTPException(status_code=502, detail=f"{err_msg}: {str(e)[:200]}")
except Exception as e:
_broadcast_bg(_make_event("error", {"where": "ollama", "message": str(e)[:100]},
project_id=project_id, session_id=session_id, user_id=user_id))
raise HTTPException(status_code=502, detail=f"Ollama error: {str(e)[:200]}")
elif provider == "router":
base_url = get_router_url(body.node_id)
router_agent_id = "sofiia"
router_model = None
if model_name:
if "|" in model_name:
left, right = model_name.split("|", 1)
router_agent_id = left or "sofiia"
router_model = right or None
elif ":" in model_name:
# Looks like model id (qwen3:14b, qwen3.5:35b-a3b, etc.)
router_model = model_name
elif model_name not in ("default",):
# Treat plain token as agent id (router:soul, router:monitor, ...)
router_agent_id = model_name
metadata: Dict[str, Any] = {
"project_id": project_id,
"session_id": session_id,
"user_id": user_id,
"client": "sofiia-console",
"voice_profile": _vp,
}
try:
out = await infer(
base_url,
router_agent_id,
body.message,
model=router_model,
metadata=metadata,
timeout=300.0,
api_key=ROUTER_API_KEY,
)
reply = _clean_reply(out.get("response", out.get("text", "")))
except Exception as e:
_broadcast_bg(_make_event("error", {"where": "router", "message": str(e)[:100]},
project_id=project_id, session_id=session_id, user_id=user_id))
raise HTTPException(status_code=502, detail=str(e)[:300])
elif provider == "glm":
# Zhipu AI GLM — OpenAI-compatible API at bigmodel.cn
glm_api_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip()
if not glm_api_key:
raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.")
glm_model = model_name or "glm-4.7"
messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
messages_glm.extend(body.history[-12:])
messages_glm.append({"role": "user", "content": body.message})
try:
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(
"https://open.bigmodel.cn/api/paas/v4/chat/completions",
headers={"Authorization": f"Bearer {glm_api_key}", "Content-Type": "application/json"},
json={"model": glm_model, "messages": messages_glm, "stream": False},
)
r.raise_for_status()
data = r.json()
reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "GLM: порожня відповідь")
except httpx.HTTPStatusError as e:
err_msg = f"GLM HTTP {e.response.status_code}: {e.response.text[:200]}"
_broadcast_bg(_make_event("error", {"where": "glm", "message": err_msg},
project_id=project_id, session_id=session_id, user_id=user_id))
raise HTTPException(status_code=502, detail=err_msg)
except Exception as e:
_broadcast_bg(_make_event("error", {"where": "glm", "message": str(e)[:100]},
project_id=project_id, session_id=session_id, user_id=user_id))
raise HTTPException(status_code=502, detail=f"GLM error: {str(e)[:200]}")
elif provider == "grok":
# xAI Grok — OpenAI-compatible API
xai_api_key = os.getenv("XAI_API_KEY", "").strip()
if not xai_api_key:
raise HTTPException(status_code=503, detail="XAI_API_KEY not set. Add it to BFF environment.")
grok_model = model_name or "grok-4-1-fast-reasoning"
messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
messages.extend(body.history[-12:])
messages.append({"role": "user", "content": body.message})
try:
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(
"https://api.x.ai/v1/chat/completions",
headers={"Authorization": f"Bearer {xai_api_key}", "Content-Type": "application/json"},
json={"model": grok_model, "messages": messages, "stream": False},
)
r.raise_for_status()
data = r.json()
reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "Grok: порожня відповідь")
except httpx.HTTPStatusError as e:
err_msg = f"Grok HTTP {e.response.status_code}: {e.response.text[:200]}"
_broadcast_bg(_make_event("error", {"where": "grok", "message": err_msg},
project_id=project_id, session_id=session_id, user_id=user_id))
raise HTTPException(status_code=502, detail=err_msg)
except Exception as e:
_broadcast_bg(_make_event("error", {"where": "grok", "message": str(e)[:100]},
project_id=project_id, session_id=session_id, user_id=user_id))
raise HTTPException(status_code=502, detail=f"Grok error: {str(e)[:200]}")
else:
raise HTTPException(status_code=400, detail=f"Unsupported provider: {provider}. Use ollama, router, or grok.")
latency_ms = int((time.monotonic() - t0) * 1000)
tokens_est = len(reply.split())
trace_id = f"chat_{session_id}_{uuid.uuid4().hex[:8]}"
# Broadcast: reply
_broadcast_bg(_make_event("chat.reply",
{"text": reply[:200], "provider": provider, "model": body.model,
"latency_ms": latency_ms, "trace_id": trace_id},
project_id=project_id, session_id=session_id, user_id=user_id))
# Memory save (best-effort, non-blocking)
asyncio.get_event_loop().create_task(
_do_save_memory(body.message, reply, session_id, project_id, user_id)
)
# AISTALK forward (if enabled)
if _aistalk:
try:
_aistalk.handle_event(_make_event("chat.reply",
{"text": reply, "provider": provider, "model": body.model},
project_id=project_id, session_id=session_id, user_id=user_id))
except Exception as e:
logger.debug("AISTALK forward failed: %s", e)
return {
"ok": True,
"project_id": project_id,
"session_id": session_id,
"user_id": user_id,
"response": reply,
"model": body.model,
"backend": provider,
"trace_id": trace_id,
"meta": {
"latency_ms": latency_ms,
"tokens_est": tokens_est,
"trace_id": trace_id,
},
}
async def _do_save_memory(
user_msg: str,
ai_reply: str,
session_id: str,
project_id: str = "default",
user_id: str = "console_user",
agent_id: str = "sofiia",
) -> None:
# 1) Persist to local SQLite (projects/sessions/messages schema)
try:
# Ensure target project exists to satisfy sessions.project_id FK.
proj = await _app_db.get_project(project_id)
if not proj:
await _app_db.create_project(
name=project_id.upper(),
description=f"Auto-created project for {project_id} sessions",
project_id=project_id,
)
await _app_db.upsert_session(session_id, project_id=project_id)
last_msg = None
if user_msg:
saved = await _app_db.save_message(session_id, "user", user_msg[:4096])
last_msg = saved["msg_id"]
if ai_reply:
await _app_db.save_message(
session_id, "assistant", ai_reply[:4096], parent_msg_id=last_msg
)
except Exception as e:
logger.debug("SQLite memory save skipped: %s", e)
# 2) Best-effort: also send to Memory Service (Qdrant + Neo4j)
mem_url = get_memory_service_url()
try:
async with httpx.AsyncClient(timeout=5.0) as client:
for role, content in [("user", user_msg), ("assistant", ai_reply)]:
if not content:
continue
resp = await client.post(f"{mem_url}/agents/{agent_id}/memory", json={
"agent_id": agent_id,
"role": role,
"content": content[:1000],
"user_id": user_id,
"channel_id": session_id,
"metadata": {"project_id": project_id, "client": "sofiia-console", "agent_id": agent_id},
})
if resp.status_code >= 400:
logger.warning(
"Memory Service save failed status=%s agent=%s session=%s body=%s",
resp.status_code,
agent_id,
session_id,
(resp.text or "")[:240],
)
except Exception as e:
logger.debug("Memory Service save skipped: %s", e)
# ─── Ops ────────────────────────────────────────────────────────────────────
class OpsRunBody(BaseModel):
action_id: str
node_id: str = "NODA2"
params: dict = {}
project_id: Optional[str] = None
session_id: Optional[str] = None
source_run_id: Optional[str] = None # link to supervisor run
source_msg_id: Optional[str] = None # link to message
class NodeUpsertBody(BaseModel):
node_id: str
label: str
router_url: str
gateway_url: Optional[str] = ""
monitor_url: Optional[str] = ""
supervisor_url: Optional[str] = ""
ssh_host: Optional[str] = ""
ssh_port: Optional[int] = 22
ssh_user: Optional[str] = ""
ssh_password_env: Optional[str] = ""
ssh_ipv6: Optional[str] = ""
ssh_host_keys: Optional[List[Dict[str, Any]]] = None
enabled: bool = True
@app.get("/api/ops/actions")
async def api_ops_actions_list():
return {"actions": list(OPS_ACTIONS.keys())}
@app.post("/api/ops/run")
async def api_ops_run(body: OpsRunBody, _auth=Depends(require_api_key)):
"""Run ops action. Broadcasts ops.run event and auto-creates ops_run graph node."""
import uuid as _uuid
t0 = time.monotonic()
project_id = body.project_id or "default"
session_id = body.session_id or "console"
ops_run_id = str(_uuid.uuid4())
started_at = _app_db._now() if _app_db else None
result = await run_ops_action(
body.action_id, body.node_id, body.params,
agent_id="sofiia", timeout=90.0, api_key=ROUTER_API_KEY,
)
elapsed = int((time.monotonic() - t0) * 1000)
ok = result.get("status") != "failed"
status_str = "ok" if ok else "failed"
error_str = result.get("error", "") if not ok else ""
_broadcast_bg(_make_event("ops.run",
{"name": body.action_id, "ok": ok, "elapsed_ms": elapsed},
project_id=project_id, session_id=session_id))
# Auto-create ops_run graph node (fire-and-forget, do not fail the request)
if _app_db and project_id:
try:
gn = await _app_db.upsert_ops_run_node(
project_id=project_id,
ops_run_id=ops_run_id,
action_id=body.action_id,
node_id=body.node_id,
status=status_str,
elapsed_ms=elapsed,
error=str(error_str)[:500],
started_at=started_at or "",
source_run_id=body.source_run_id or "",
source_msg_id=body.source_msg_id or "",
)
result["_graph_node_id"] = gn.get("node_id")
result["_ops_run_id"] = ops_run_id
except Exception as _e:
logger.warning("ops_run graph node creation failed (non-fatal): %s", _e)
return result
# ─── Nodes ──────────────────────────────────────────────────────────────────
@app.get("/api/nodes/dashboard")
async def api_nodes_dashboard(refresh: bool = Query(False), _auth: str = Depends(require_auth)):
"""
Nodes dashboard with full telemetry.
Returns cached data (refreshed every NODES_POLL_INTERVAL_SEC seconds).
Pass ?refresh=true to force immediate re-probe.
"""
if refresh or not _nodes_cache["nodes"]:
fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY)
_nodes_cache.update({**fresh, "ts": _now_iso()})
return {**fresh, "ts": _nodes_cache["ts"], "cached": False}
return {**_nodes_cache, "cached": True}
@app.get("/api/nodes/registry")
async def api_nodes_registry(_auth: str = Depends(require_auth)):
return load_nodes_registry()
@app.get("/api/nodes/ssh/status")
async def api_nodes_ssh_status(
node_id: str = Query(..., description="Node ID, e.g. NODA1"),
_auth=Depends(require_api_key_strict),
):
node_id = node_id.strip().upper()
ssh = get_node_ssh_profile(node_id)
if not ssh.get("configured"):
return {
"ok": False,
"node_id": node_id,
"configured": False,
"error": "ssh profile is not configured",
"ssh": ssh,
}
host = ssh.get("host", "")
host_ipv6 = (ssh.get("ipv6") or "").strip()
port = int(ssh.get("port") or 22)
tcp_ok = False
tcp_error = None
connect_host = host
def _try_connect(target_host: str) -> Optional[str]:
try:
with socket.create_connection((target_host, port), timeout=5):
return None
except Exception as e:
return str(e)[:160]
tcp_error = _try_connect(host)
if tcp_error is None:
tcp_ok = True
elif host_ipv6:
err_v6 = _try_connect(host_ipv6)
if err_v6 is None:
tcp_ok = True
tcp_error = None
connect_host = host_ipv6
else:
tcp_error = f"ipv4={tcp_error}; ipv6={err_v6}"[:220]
ok = tcp_ok and (ssh["auth"]["password_set"] or ssh["auth"]["private_key_set"])
return {
"ok": ok,
"node_id": node_id,
"configured": True,
"tcp_reachable": tcp_ok,
"tcp_error": tcp_error,
"connect_host": connect_host,
"ssh": ssh,
}
@app.post("/api/nodes/add")
async def api_nodes_add(body: NodeUpsertBody, _auth=Depends(require_api_key_strict)):
reg = load_nodes_registry()
reg.setdefault("defaults", {"health_timeout_sec": 10, "tools_timeout_sec": 30})
reg.setdefault("nodes", {})
node_id = body.node_id.strip().upper()
if not node_id:
raise HTTPException(status_code=400, detail="node_id is required")
node_payload: Dict[str, Any] = {
"label": body.label.strip() or node_id,
"router_url": body.router_url.strip(),
"gateway_url": (body.gateway_url or "").strip(),
"monitor_url": (body.monitor_url or body.router_url).strip(),
"supervisor_url": (body.supervisor_url or "").strip(),
"enabled": body.enabled,
}
ssh_host = (body.ssh_host or "").strip()
ssh_user = (body.ssh_user or "").strip()
if ssh_host and ssh_user:
node_payload["ssh"] = {
"host": ssh_host,
"ipv6": (body.ssh_ipv6 or "").strip(),
"port": int(body.ssh_port or 22),
"user": ssh_user,
"auth": {
"password_env": (body.ssh_password_env or f"NODES_{node_id}_SSH_PASSWORD").strip(),
},
"host_keys": body.ssh_host_keys or [],
}
reg["nodes"][node_id] = node_payload
path = save_nodes_registry(reg)
fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY)
_nodes_cache.update({**fresh, "ts": _now_iso()})
return {"ok": True, "saved_to": str(path), "node_id": node_id, "nodes": reg.get("nodes", {})}
# ─── Voice ──────────────────────────────────────────────────────────────────
@app.post("/api/voice/stt")
async def api_voice_stt(
request: Request,
audio: UploadFile = File(...),
language: Optional[str] = Query(None),
session_id: Optional[str] = Query(None),
project_id: Optional[str] = Query(None),
):
"""STT proxy → memory-service. Rate: 20/min. Broadcasts voice.stt events."""
client_ip = request.client.host if request.client else "unknown"
if not _check_rate(f"stt:{client_ip}", max_calls=20, window_sec=60):
raise HTTPException(status_code=429, detail="Rate limit: 20 STT calls/min")
sid = session_id or "console"
pid = project_id or "default"
_broadcast_bg(_make_event("voice.stt", {"phase": "start"},
project_id=pid, session_id=sid))
t0 = time.monotonic()
mem_url = get_memory_service_url()
try:
content = await audio.read()
if not content:
raise HTTPException(status_code=400, detail="Empty audio file")
async with httpx.AsyncClient(timeout=60.0) as client:
files = {"audio": (audio.filename or "audio.webm", content, audio.content_type or "audio/webm")}
params = {"language": language} if language else {}
r = await client.post(f"{mem_url}/voice/stt", files=files, params=params)
r.raise_for_status()
result = r.json()
elapsed = int((time.monotonic() - t0) * 1000)
upstream_ms = result.get("compute_ms", 0)
logger.info("STT ok: lang=%s text_len=%d bff_ms=%d upstream_ms=%d",
language or "auto", len(result.get("text", "")), elapsed, upstream_ms)
_broadcast_bg(_make_event("voice.stt",
{"phase": "done", "elapsed_ms": elapsed, "upstream_ms": upstream_ms},
project_id=pid, session_id=sid))
result["bff_ms"] = elapsed
return result
except httpx.HTTPStatusError as e:
logger.error("STT upstream error: status=%s", e.response.status_code)
_broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]},
project_id=pid, session_id=sid))
raise HTTPException(status_code=e.response.status_code, detail=f"STT upstream: {str(e)[:200]}")
except HTTPException:
raise
except Exception as e:
logger.error("STT proxy error: %s", e, exc_info=True)
_broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]},
project_id=pid, session_id=sid))
raise HTTPException(status_code=502, detail=f"STT error: {str(e)[:200]}")
class TTSRequest(BaseModel):
text: str
voice: Optional[str] = "default"
speed: Optional[float] = 1.0
model: Optional[str] = "piper"
session_id: Optional[str] = None
project_id: Optional[str] = None
@app.post("/api/voice/tts")
async def api_voice_tts(body: TTSRequest, request: Request):
"""TTS proxy → memory-service. Rate: 30/min per IP. Concurrent: MAX_CONCURRENT_TTS."""
client_ip = request.client.host if request.client else "unknown"
if not _check_rate(f"tts:{client_ip}", max_calls=30, window_sec=60):
raise HTTPException(status_code=429, detail="Rate limit: 30 TTS calls/min per client")
# Concurrent synthesis guard — prevents memory-service DoS on burst requests
sem = _get_tts_semaphore()
if not sem._value: # non-blocking peek: all slots occupied
raise HTTPException(status_code=503,
detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.")
# Server-side sanitization: strips <think>, markdown, URLs; truncates safely
text = _sanitize_for_voice(body.text.strip())
if not text:
raise HTTPException(status_code=400, detail="Empty text")
sid = body.session_id or "console"
pid = body.project_id or "default"
_broadcast_bg(_make_event("voice.tts", {"phase": "start", "voice": body.voice},
project_id=pid, session_id=sid))
t0 = time.monotonic()
sem = _get_tts_semaphore()
async with sem: # enforce MAX_CONCURRENT_TTS globally
try:
# ── Voice HA path (opt-in via VOICE_HA_ENABLED=true) ──────────────
if is_voice_ha_enabled():
router_url = get_voice_ha_router_url()
tts_payload = {
"text": text,
"voice": body.voice,
"speed": body.speed,
"model": body.model,
}
async with httpx.AsyncClient(timeout=30.0) as client:
r = await client.post(
f"{router_url}/v1/capability/voice_tts",
json=tts_payload,
)
r.raise_for_status()
elapsed = int((time.monotonic() - t0) * 1000)
upstream_ct = r.headers.get("content-type", "audio/wav")
tts_engine = r.headers.get("X-TTS-Engine", "unknown")
tts_voice_used = r.headers.get("X-TTS-Voice", body.voice)
voice_node = r.headers.get("X-Voice-Node", "unknown")
voice_mode = r.headers.get("X-Voice-Mode", "remote")
ext = "mp3" if "mpeg" in upstream_ct else "wav"
logger.info("TTS HA ok: voice=%s node=%s mode=%s elapsed=%dms",
tts_voice_used, voice_node, voice_mode, elapsed)
_broadcast_bg(_make_event("voice.tts",
{"phase": "done", "voice": tts_voice_used, "engine": tts_engine,
"elapsed_ms": elapsed, "ha_mode": voice_mode, "ha_node": voice_node},
project_id=pid, session_id=sid))
return StreamingResponse(
io.BytesIO(r.content),
media_type=upstream_ct,
headers={
"Content-Disposition": f"inline; filename=speech.{ext}",
"X-TTS-Engine": tts_engine,
"X-TTS-Voice": tts_voice_used,
"X-TTS-Elapsed-MS": str(elapsed),
"X-Voice-Node": voice_node,
"X-Voice-Mode": voice_mode,
"Cache-Control": "no-store",
},
)
# ── Legacy direct path (default, VOICE_HA_ENABLED=false) ──────────
mem_url = get_memory_service_url()
async with httpx.AsyncClient(timeout=30.0) as client:
r = await client.post(
f"{mem_url}/voice/tts",
json={"text": text, "voice": body.voice, "speed": body.speed, "model": body.model},
)
r.raise_for_status()
elapsed = int((time.monotonic() - t0) * 1000)
upstream_ct = r.headers.get("content-type", "audio/wav")
tts_engine = r.headers.get("X-TTS-Engine", "unknown")
tts_voice_used = r.headers.get("X-TTS-Voice", body.voice)
ext = "mp3" if "mpeg" in upstream_ct else "wav"
logger.info("TTS ok: voice=%s engine=%s len=%d fmt=%s elapsed=%dms",
tts_voice_used, tts_engine, len(text), ext, elapsed)
_broadcast_bg(_make_event("voice.tts",
{"phase": "done", "voice": tts_voice_used, "engine": tts_engine, "elapsed_ms": elapsed},
project_id=pid, session_id=sid))
return StreamingResponse(
io.BytesIO(r.content),
media_type=upstream_ct,
headers={
"Content-Disposition": f"inline; filename=speech.{ext}",
"X-TTS-Engine": tts_engine,
"X-TTS-Voice": tts_voice_used,
"X-TTS-Elapsed-MS": str(elapsed),
"Cache-Control": "no-store",
},
)
except httpx.HTTPStatusError as e:
_record_tts_error("http_error", e.response.status_code, str(e)[:120], body.voice)
logger.error("TTS upstream error: status=%s voice=%s ha=%s",
e.response.status_code, body.voice, is_voice_ha_enabled())
_broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]},
project_id=pid, session_id=sid))
raise HTTPException(status_code=e.response.status_code, detail=f"TTS upstream: {str(e)[:200]}")
except Exception as e:
_record_tts_error("proxy_error", None, str(e)[:120], body.voice)
logger.error("TTS proxy error: %s ha=%s", e, is_voice_ha_enabled(), exc_info=True)
_broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]},
project_id=pid, session_id=sid))
raise HTTPException(status_code=502, detail=f"TTS error: {str(e)[:200]}")
@app.get("/api/voice/voices")
async def api_voice_voices():
mem_url = get_memory_service_url()
try:
async with httpx.AsyncClient(timeout=10.0) as client:
r = await client.get(f"{mem_url}/voice/voices")
r.raise_for_status()
return r.json()
except Exception as e:
return {"piper": [], "macos": [{"id": "Milena", "name": "Milena (uk-UA)", "lang": "uk-UA"}], "error": str(e)[:100]}
# ─── Phase 2: Voice Chat Stream (sentence chunking → early TTS) ──────────────
# Strategy: split LLM text into sentences → synthesize first sentence immediately
# → return {first_audio_b64, first_text, rest_text[]}
# Browser plays first sentence while fetching TTS for remaining sentences in bg.
# TTFA drops from ~10-14s to ~3-5s (LLM still runs full, but TTS starts on chunk1).
from app.voice_utils import split_into_voice_chunks as _split_into_voice_chunks
from app.voice_utils import clean_think_blocks as _clean_think_blocks_util
from app.voice_utils import sanitize_for_voice as _sanitize_for_voice
from app.voice_utils import MIN_CHUNK_CHARS as _MIN_CHUNK_CHARS, MAX_CHUNK_CHARS as _MAX_CHUNK_CHARS
class VoiceChatStreamBody(BaseModel):
message: str
model: str = "ollama:qwen3:14b"
node_id: str = "NODA2"
voice: Optional[str] = None
voice_profile: Optional[str] = "voice_fast_uk"
session_id: Optional[str] = None
project_id: Optional[str] = None
history: List[Dict[str, Any]] = []
@app.post("/api/voice/chat/stream")
async def api_voice_chat_stream(body: VoiceChatStreamBody, request: Request):
"""Phase 2 Voice Chat: LLM → sentence split → first sentence TTS immediately.
Returns:
{
ok: bool,
first_text: str, # first sentence
first_audio_b64: str, # base64 MP3 for immediate playback
first_audio_mime: str, # "audio/mpeg"
rest_chunks: [str, ...], # remaining sentences (client fetches TTS via /api/voice/tts)
full_text: str, # full LLM reply (for display)
trace_id: str,
meta: {llm_ms, tts_ms, chunks_total}
}
Client flow:
1. POST /api/voice/chat/stream → play first_audio_b64 immediately
2. For each chunk in rest_chunks: POST /api/voice/tts → enqueue audio
"""
import re as _re # noqa: F401 kept for legacy; re already imported at module level
client_ip = request.client.host if request.client else "unknown"
if not _check_rate(f"voice_stream:{client_ip}", max_calls=15, window_sec=60):
raise HTTPException(status_code=429, detail="Rate limit: 15 voice stream calls/min per client")
# Concurrent TTS guard also applies to stream endpoint (TTS inside)
sem = _get_tts_semaphore()
if not sem._value:
raise HTTPException(status_code=503,
detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.")
sid = body.session_id or f"vs_{uuid.uuid4().hex[:10]}"
pid = body.project_id or "default"
trace_id = f"vs_{sid}_{uuid.uuid4().hex[:8]}"
_vp = body.voice_profile or "voice_fast_uk"
_is_quality = _vp == "voice_quality_uk"
_system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX
# Track for repro pack
global _voice_last_model, _voice_last_profile
_voice_last_model = body.model
_voice_last_profile = _vp
_broadcast_bg(_make_event("voice.stream", {"phase": "start", "trace_id": trace_id},
project_id=pid, session_id=sid))
# ── 1. LLM ────────────────────────────────────────────────────────────────
t0_llm = time.monotonic()
provider, _, model_name = body.model.partition(":")
reply = ""
def _clean(text: str) -> str:
cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
if "<think>" in cleaned.lower():
cleaned = re.split(r"(?i)<think>", cleaned)[0]
return cleaned.strip()
try:
if provider == "ollama":
ollama_url = get_ollama_url()
effective_model_name = model_name or "qwen3:14b"
messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
messages.extend(body.history[-8:])
messages.append({"role": "user", "content": body.message})
voice_options = {
"temperature": 0.18 if _is_quality else 0.15,
"repeat_penalty": 1.1,
"num_predict": 256 if _is_quality else 220,
}
async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC) as client:
r = await client.post(
f"{ollama_url}/api/chat",
json=_make_ollama_payload(effective_model_name, messages, voice_options),
)
r.raise_for_status()
raw = (r.json().get("message") or {}).get("content", "")
reply = _clean(raw)
elif provider == "grok":
xai_key = os.getenv("XAI_API_KEY", "").strip()
if not xai_key:
raise HTTPException(status_code=503, detail="XAI_API_KEY not set.")
grok_model = model_name or "grok-4-1-fast-reasoning"
messages_g: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
messages_g.extend(body.history[-8:])
messages_g.append({"role": "user", "content": body.message})
async with httpx.AsyncClient(timeout=60.0) as client:
r = await client.post(
"https://api.x.ai/v1/chat/completions",
headers={"Authorization": f"Bearer {xai_key}", "Content-Type": "application/json"},
json={"model": grok_model, "messages": messages_g, "stream": False,
"max_tokens": 1024, "temperature": 0.2},
)
r.raise_for_status()
raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "")
reply = _clean(raw)
elif provider == "glm":
glm_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip()
if not glm_key:
raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.")
glm_model = model_name or "glm-5"
messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
messages_glm.extend(body.history[-8:])
messages_glm.append({"role": "user", "content": body.message})
async with httpx.AsyncClient(timeout=60.0) as client:
r = await client.post(
"https://open.bigmodel.cn/api/paas/v4/chat/completions",
headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"},
json={"model": glm_model, "messages": messages_glm, "stream": False},
)
r.raise_for_status()
raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "")
reply = _clean(raw)
else:
raise HTTPException(status_code=400, detail=f"voice/stream: provider '{provider}' not supported. Use: ollama, grok, glm.")
except HTTPException:
raise
except Exception as e:
_record_llm_error("inference_error", body.model, str(e)[:120])
_broadcast_bg(_make_event("error", {"where": "voice_stream_llm", "trace_id": trace_id, "message": str(e)[:100]},
project_id=pid, session_id=sid))
raise HTTPException(status_code=502, detail=f"LLM error: {str(e)[:200]}")
llm_ms = int((time.monotonic() - t0_llm) * 1000)
if not reply:
reply = "Не можу відповісти зараз."
# ── 2. Sentence chunking ──────────────────────────────────────────────────
# sanitize full reply before splitting (removes markdown, <think>, URLs)
sanitized_reply = _sanitize_for_voice(reply)
chunks = _split_into_voice_chunks(sanitized_reply)
if not chunks:
chunks = [sanitized_reply] if sanitized_reply else ["Не можу відповісти зараз."]
first_chunk = chunks[0]
# rest_chunks: sanitize + hard cap (prevents DoS via unreasonably long replies)
_MAX_REST_CHUNKS = int(os.getenv("MAX_VOICE_REST_CHUNKS", "8"))
all_rest = [_sanitize_for_voice(c) for c in chunks[1:] if _sanitize_for_voice(c)]
rest_chunks = all_rest[:_MAX_REST_CHUNKS] # cap: never more than 8 background TTS calls
# ── 3. TTS for first sentence (immediate) ─────────────────────────────────
t0_tts = time.monotonic()
first_audio_b64 = ""
first_audio_mime = "audio/mpeg"
voice = body.voice or "default"
_ha_voice_node = None
_ha_voice_mode = None
try:
import base64 as _b64
tts_json = {"text": first_chunk, "voice": voice, "speed": 1.0}
async with httpx.AsyncClient(timeout=15.0) as client:
if is_voice_ha_enabled():
# HA path: Router selects best node for TTS
router_url = get_voice_ha_router_url()
r_tts = await client.post(f"{router_url}/v1/capability/voice_tts", json=tts_json)
r_tts.raise_for_status()
_ha_voice_node = r_tts.headers.get("X-Voice-Node")
_ha_voice_mode = r_tts.headers.get("X-Voice-Mode")
logger.debug("voice_stream TTS via HA: node=%s mode=%s",
_ha_voice_node, _ha_voice_mode)
else:
# Legacy direct path
mem_url = get_memory_service_url()
r_tts = await client.post(f"{mem_url}/voice/tts", json=tts_json)
r_tts.raise_for_status()
first_audio_mime = r_tts.headers.get("content-type", "audio/mpeg").split(";")[0]
first_audio_b64 = _b64.b64encode(r_tts.content).decode()
except Exception as e:
logger.warning("voice_stream TTS failed for first chunk (ha=%s): %s",
is_voice_ha_enabled(), e)
# Not fatal: client can still render text
tts_ms = int((time.monotonic() - t0_tts) * 1000)
_broadcast_bg(_make_event("voice.stream", {
"phase": "done",
"trace_id": trace_id,
"llm_ms": llm_ms,
"tts_ms": tts_ms,
"chunks_total": len(chunks),
}, project_id=pid, session_id=sid))
logger.info("voice_stream ok: trace=%s llm=%dms tts=%dms chunks=%d first=%dB",
trace_id, llm_ms, tts_ms, len(chunks), len(r_tts.content) if first_audio_b64 else 0)
body_data = {
"ok": True,
"trace_id": trace_id,
"first_text": first_chunk,
"first_audio_b64": first_audio_b64,
"first_audio_mime": first_audio_mime,
"rest_chunks": rest_chunks,
"full_text": reply,
"meta": {
"llm_ms": llm_ms,
"tts_ms": tts_ms,
"chunks_total": len(chunks),
"voice": voice,
"model": body.model,
"voice_profile": _vp,
},
}
from fastapi.responses import JSONResponse as _JSONResponse
resp_headers = {}
if _ha_voice_mode:
resp_headers["X-Voice-Mode"] = _ha_voice_mode
if _ha_voice_node:
resp_headers["X-Voice-Node"] = _ha_voice_node
if _ha_voice_mode or _ha_voice_node:
resp_headers["X-Voice-Cap"] = "voice_tts"
if resp_headers:
return _JSONResponse(content=body_data, headers=resp_headers)
return body_data
# ─── Voice Telemetry Beacon ───────────────────────────────────────────────────
# Receives performance marks from browser, records Prometheus histograms.
# Browser calls this via navigator.sendBeacon (fire-and-forget).
try:
from prometheus_client import Histogram as _PromHistogram, Counter as _PromCounter
_voice_ttfa_hist = _PromHistogram(
"voice_ttfa_ms", "Time-to-first-audio (request → first audio playable)",
["model", "voice_profile"],
buckets=[500, 1000, 2000, 3000, 5000, 7000, 10000, 15000],
)
_voice_llm_hist = _PromHistogram(
"voice_llm_ms", "LLM inference time for voice turns",
["model", "voice_profile"],
buckets=[500, 1000, 2000, 5000, 8000, 12000, 20000],
)
_voice_tts_first_hist = _PromHistogram(
"voice_tts_first_ms", "First-sentence TTS synthesis time",
["voice_profile"],
buckets=[200, 500, 800, 1200, 2000, 3000],
)
_voice_e2e_hist = _PromHistogram(
"voice_e2e_ms", "End-to-end voice turn latency (user stop speaking → audio plays)",
["voice_profile"],
buckets=[1000, 2000, 4000, 6000, 9000, 13000, 20000],
)
_voice_underflow_counter = _PromCounter(
"voice_queue_underflows_total", "Times playback queue ran empty before TTS finished",
["voice_profile"],
)
_PROM_VOICE_OK = True
except Exception:
_PROM_VOICE_OK = False
class VoiceTelemetryPayload(BaseModel):
event: str = "voice_turn"
# Idempotency: session_id + turn_id deduplicate duplicate beacon submissions
session_id: Optional[str] = None
turn_id: Optional[str] = None # monotonic turn counter or UUID per turn
ttfa_ms: Optional[int] = None
llm_ms: Optional[int] = None
tts_first_ms: Optional[int] = None
e2e_ms: Optional[int] = None
stt_ms: Optional[int] = None
underflows: int = 0
model: Optional[str] = None
voice_profile: Optional[str] = None
class VoiceTelemetryBatch(BaseModel):
"""Batch beacon: array of turns submitted together (reduces HTTP overhead)."""
events: List[VoiceTelemetryPayload] = []
def _process_telemetry_item(payload: VoiceTelemetryPayload) -> bool:
"""Process a single telemetry item. Returns False if duplicate."""
sid = payload.session_id or "anon"
tid = payload.turn_id or "noid"
if _telem_is_duplicate(sid, tid):
return False # skip duplicate
model = (payload.model or "unknown").replace("ollama:", "")
profile = payload.voice_profile or "unknown"
if _PROM_VOICE_OK:
try:
if payload.ttfa_ms is not None:
_voice_ttfa_hist.labels(model=model, voice_profile=profile).observe(payload.ttfa_ms)
if payload.llm_ms is not None:
_voice_llm_hist.labels(model=model, voice_profile=profile).observe(payload.llm_ms)
if payload.tts_first_ms is not None:
_voice_tts_first_hist.labels(voice_profile=profile).observe(payload.tts_first_ms)
if payload.e2e_ms is not None:
_voice_e2e_hist.labels(voice_profile=profile).observe(payload.e2e_ms)
if payload.underflows:
_voice_underflow_counter.labels(voice_profile=profile).inc(payload.underflows)
except Exception as exc:
logger.debug("telemetry/voice prom error: %s", exc)
logger.info(
"voice_telemetry: model=%s profile=%s ttfa=%s llm=%s tts=%s e2e=%s underflows=%d sid=%s",
model, profile, payload.ttfa_ms, payload.llm_ms,
payload.tts_first_ms, payload.e2e_ms, payload.underflows, sid,
)
# Feed the degradation state machine
if payload.ttfa_ms is not None or payload.tts_first_ms is not None:
_voice_degradation_sm.observe(
ttfa_ms=payload.ttfa_ms,
tts_first_ms=payload.tts_first_ms,
underflows=payload.underflows,
profile=profile,
)
return True
@app.post("/api/telemetry/voice", status_code=204)
async def api_telemetry_voice(payload: VoiceTelemetryPayload):
"""Browser beacon endpoint (single turn). Fire-and-forget, always 204."""
_process_telemetry_item(payload)
# 204 No Content — browser doesn't await response
@app.post("/api/telemetry/voice/batch", status_code=204)
async def api_telemetry_voice_batch(batch: VoiceTelemetryBatch, request: Request):
"""Batch beacon: process up to 20 turns in one HTTP call.
Useful when browser queues multiple turns before sending (e.g. tab becomes
visible again, or connection was lost briefly).
"""
client_ip = request.client.host if request.client else "unknown"
if not _check_rate(f"telem_batch:{client_ip}", max_calls=60, window_sec=60):
raise HTTPException(status_code=429, detail="Rate limit: 60 telemetry batches/min")
cap = min(len(batch.events), 20) # hard cap per batch
processed = sum(1 for item in batch.events[:cap] if _process_telemetry_item(item))
logger.debug("telemetry/voice/batch: submitted=%d processed=%d cap=%d",
len(batch.events), processed, cap)
# ─── Voice Degradation State Machine ─────────────────────────────────────────
# Tracks rolling window of voice telemetry and determines system-level state.
# States: ok → degraded_tts → degraded_llm → fast_lock → emergency
# Client polls GET /api/voice/degradation_status to show UI badge.
import collections
from dataclasses import dataclass as _dc, field as _field
from enum import Enum
class VoiceDegradationState(str, Enum):
OK = "ok" # all SLOs met
DEGRADED_TTS = "degraded_tts" # TTS slow/failing → show "TTS SLOW" badge
DEGRADED_LLM = "degraded_llm" # LLM slow → profile auto-demoted to fast
FAST_LOCK = "fast_lock" # LLM degraded, forced to voice_fast_uk
EMERGENCY = "emergency" # TTS failing → warn user, fallback banner
# SLO thresholds (ms) — aligned with config/slo_policy.yml
_SM_TTFA_WARN = 5000 # TTFA p95 > 5s → degraded_llm
_SM_TTFA_LOCK = 8000 # TTFA p95 > 8s → fast_lock
_SM_TTS_WARN = 2000 # TTS first p95 > 2s → degraded_tts
_SM_TTS_CRIT = 4000 # TTS first p95 > 4s → emergency
_SM_UNDERFLOW_RATE = 0.1 # >10% of recent turns have underflows → degraded_tts
_SM_WINDOW = 20 # rolling window (last N telemetry events)
_SM_MIN_SAMPLES = 5 # need at least N samples before changing state
@_dc
class _VoiceDegradationSM:
"""Rolling-window degradation state machine."""
_ttfa_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
_tts_first_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
_underflow_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
state: VoiceDegradationState = VoiceDegradationState.OK
state_since: float = _field(default_factory=time.monotonic)
recommended_profile: str = "voice_fast_uk"
last_reason: str = ""
_lock: object = _field(default_factory=lambda: __import__('asyncio').Lock())
def observe(self, ttfa_ms: Optional[int], tts_first_ms: Optional[int],
underflows: int, profile: str) -> None:
if ttfa_ms is not None:
self._ttfa_window.append(ttfa_ms)
if tts_first_ms is not None:
self._tts_first_window.append(tts_first_ms)
self._underflow_window.append(1 if underflows > 0 else 0)
self._recompute()
def _p95(self, window: collections.deque) -> Optional[float]:
if len(window) < _SM_MIN_SAMPLES:
return None
s = sorted(window)
return s[int(len(s) * 0.95)]
def _underflow_rate(self) -> float:
if not self._underflow_window:
return 0.0
return sum(self._underflow_window) / len(self._underflow_window)
def _recompute(self) -> None:
ttfa_p95 = self._p95(self._ttfa_window)
tts_p95 = self._p95(self._tts_first_window)
uf_rate = self._underflow_rate()
prev_state = self.state
if tts_p95 is not None and tts_p95 > _SM_TTS_CRIT:
self.state = VoiceDegradationState.EMERGENCY
self.recommended_profile = "voice_fast_uk"
self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_CRIT}ms"
elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_LOCK:
self.state = VoiceDegradationState.FAST_LOCK
self.recommended_profile = "voice_fast_uk"
self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_LOCK}ms — locked to fast profile"
elif tts_p95 is not None and tts_p95 > _SM_TTS_WARN:
self.state = VoiceDegradationState.DEGRADED_TTS
self.recommended_profile = "voice_fast_uk"
self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_WARN}ms"
elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_WARN:
self.state = VoiceDegradationState.DEGRADED_LLM
self.recommended_profile = "voice_fast_uk"
self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_WARN}ms"
elif uf_rate > _SM_UNDERFLOW_RATE:
self.state = VoiceDegradationState.DEGRADED_TTS
self.recommended_profile = "voice_fast_uk"
self.last_reason = f"Underflow rate={uf_rate:.1%} > {_SM_UNDERFLOW_RATE:.0%}"
else:
self.state = VoiceDegradationState.OK
self.recommended_profile = "voice_fast_uk" # default
self.last_reason = "all SLOs met"
if self.state != prev_state:
self.state_since = time.monotonic()
logger.warning("voice_degradation state: %s%s | %s",
prev_state.value, self.state.value, self.last_reason)
def status_dict(self) -> dict:
return {
"state": self.state.value,
"state_since_sec": int(time.monotonic() - self.state_since),
"recommended_profile": self.recommended_profile,
"reason": self.last_reason,
"samples": {
"ttfa": len(self._ttfa_window),
"tts_first": len(self._tts_first_window),
},
"p95": {
"ttfa_ms": self._p95(self._ttfa_window),
"tts_first_ms": self._p95(self._tts_first_window),
},
"underflow_rate": round(self._underflow_rate(), 3),
"ui_badge": _SM_UI_BADGE.get(self.state, ""),
}
# UI badge text per state
_SM_UI_BADGE = {
VoiceDegradationState.OK: "",
VoiceDegradationState.DEGRADED_TTS: "⚠ TTS SLOW",
VoiceDegradationState.DEGRADED_LLM: "⚠ AI SLOW",
VoiceDegradationState.FAST_LOCK: "⚡ FAST MODE",
VoiceDegradationState.EMERGENCY: "🔴 TTS DEGRADED",
}
_voice_degradation_sm = _VoiceDegradationSM()
@app.get("/api/voice/degradation_status")
async def api_voice_degradation_status():
"""Returns current voice degradation state + repro pack for incident diagnosis.
Repro pack fields (for on-call):
node_id, edge_tts_version, last_model, last_profile,
last_5_tts_errors, last_5_llm_errors
"""
base = _voice_degradation_sm.status_dict()
# Enrich with repro pack
base["repro"] = {
"node_id": _NODE_ID,
"last_model": _voice_last_model,
"last_profile": _voice_last_profile,
"last_5_tts_errors": list(_voice_tts_errors),
"last_5_llm_errors": list(_voice_llm_errors),
"concurrent_tts_slots_free": _get_tts_semaphore()._value,
"max_concurrent_tts": _MAX_CONCURRENT_TTS,
}
return base
# ─── Memory ──────────────────────────────────────────────────────────────────
@app.get("/api/memory/status")
async def api_memory_status(_auth: str = Depends(require_auth)):
mem_url = get_memory_service_url()
try:
async with httpx.AsyncClient(timeout=8.0) as client:
r = await client.get(f"{mem_url}/health")
r.raise_for_status()
data = r.json()
return {
"ok": True,
"memory_url": mem_url,
"status": data.get("status", "unknown"),
"vector_store": data.get("vector_store", {}),
"stt": "whisper-large-v3-turbo",
"tts": "edge-tts / macOS say",
}
except Exception as e:
return {"ok": False, "error": str(e)[:200], "memory_url": mem_url}
@app.get("/api/memory/context")
async def api_memory_context(
session_id: str = Query("console"),
agent_id: str = Query("sofiia"),
user_id: Optional[str] = Query(None),
limit: int = Query(20, ge=1, le=100),
_auth: str = Depends(require_auth),
):
mem_url = get_memory_service_url()
agent_key = str(agent_id or "").strip().lower()
resolved_user = user_id or ("aistalk_user" if agent_key == "aistalk" else "console_user")
async def _sqlite_fallback_events() -> List[Dict[str, Any]]:
events: List[Dict[str, Any]] = []
if _app_db:
try:
rows = await _app_db.list_messages(session_id, limit=limit)
for row in rows:
events.append(
{
"role": row.get("role", "unknown"),
"content": row.get("content", ""),
"ts": row.get("ts"),
"source": "sqlite_fallback",
}
)
except Exception:
pass
return events
try:
async with httpx.AsyncClient(timeout=8.0) as client:
r = await client.get(
f"{mem_url}/agents/{agent_id}/memory",
params={"user_id": resolved_user, "channel_id": session_id, "limit": limit},
)
r.raise_for_status()
data = r.json()
events = data.get("events") if isinstance(data, dict) else None
if isinstance(events, list) and events:
return data
# Remote is alive but returned empty history; expose local persisted history too.
local_events = await _sqlite_fallback_events()
if local_events:
return {"events": local_events, "fallback": "sqlite_after_empty_remote"}
return data if isinstance(data, dict) else {"events": []}
except Exception as e:
# Fallback to local SQLite session memory so UI still has context.
events = await _sqlite_fallback_events()
return {"events": events, "error": str(e)[:100], "fallback": "sqlite"}
# ─── WebSocket /ws/events ────────────────────────────────────────────────────
@app.websocket("/ws/events")
async def ws_events(websocket: WebSocket):
"""WebSocket event stream. Clients receive all broadcast events."""
await websocket.accept()
_ws_clients.add(websocket)
logger.info("WS client connected, total=%d", len(_ws_clients))
# Send welcome
await websocket.send_text(json.dumps(_make_event("nodes.status", {
"message": "connected",
"bff_version": _VERSION,
"ws_clients": len(_ws_clients),
})))
try:
while True:
# Keep-alive: read pings from client (or just wait)
try:
msg = await asyncio.wait_for(websocket.receive_text(), timeout=15.0)
# Client can send {"type":"ping"} → pong
if msg:
try:
cmd = json.loads(msg)
if cmd.get("type") == "ping":
await websocket.send_text(json.dumps({"type": "pong", "ts": _now_iso()}))
except Exception:
pass
except asyncio.TimeoutError:
# Send periodic heartbeat with cached nodes if available
hb_data: Dict[str, Any] = {
"bff_uptime_s": int(time.monotonic() - _START_TIME),
"ws_clients": len(_ws_clients),
}
if _nodes_cache.get("nodes"):
hb_data["nodes"] = [
{
"id": n["node_id"],
"online": n.get("online", False),
"router_ok": n.get("router_ok", False),
"router_latency_ms": n.get("router_latency_ms"),
}
for n in _nodes_cache["nodes"]
]
hb_data["nodes_ts"] = _nodes_cache.get("ts", "")
await websocket.send_text(json.dumps(_make_event("nodes.status", hb_data)))
except WebSocketDisconnect:
pass
except Exception as e:
logger.debug("WS error: %s", e)
finally:
_ws_clients.discard(websocket)
logger.info("WS client disconnected, total=%d", len(_ws_clients))
# ─── UI ─────────────────────────────────────────────────────────────────────
STATIC_DIR = Path(__file__).resolve().parent.parent / "static"
_NO_CACHE = {"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache"}
@app.get("/api/meta/version")
async def get_meta_version():
"""Build metadata endpoint — always no-cache, always public."""
return JSONResponse(
content={
"version": _VERSION,
"build_sha": _BUILD_SHA,
"build_time": _BUILD_TIME,
"service": "sofiia-console",
},
headers=_NO_CACHE,
)
# ─── Auth endpoints ──────────────────────────────────────────────────────────
class _LoginBody(BaseModel):
key: str
@app.post("/api/auth/login")
async def auth_login(body: _LoginBody, response: Response):
"""
Verify API key (sent in JSON body — avoids header encoding issues).
On success: set httpOnly session cookie, return ok=true.
No CORS/header encoding issues since key travels in request body.
"""
if not _key_valid(body.key):
raise HTTPException(status_code=401, detail="Invalid key")
token = _cookie_token(body.key)
response.set_cookie(
key=_COOKIE_NAME,
value=token,
httponly=True,
secure=_IS_PROD, # Secure=True in prod (HTTPS only)
samesite="lax",
max_age=_COOKIE_MAX_AGE,
path="/",
)
return {"ok": True, "auth": "cookie"}
@app.post("/api/auth/logout")
async def auth_logout(response: Response):
"""Clear session cookie."""
response.delete_cookie(key=_COOKIE_NAME, path="/")
return {"ok": True}
@app.get("/api/auth/check")
async def auth_check(request: Request):
"""Returns 200 if session is valid, 401 otherwise. Used by UI on startup."""
# Localhost is always open — no auth needed
client_ip = (request.client.host if request.client else "") or ""
if client_ip in ("127.0.0.1", "::1", "localhost"):
return {"ok": True, "auth": "localhost"}
configured = get_console_api_key()
if not configured:
return {"ok": True, "auth": "open"}
from .auth import _expected_cookie_token as _ect
cookie_val = request.cookies.get(_COOKIE_NAME, "")
import secrets as _sec
if cookie_val and _sec.compare_digest(cookie_val, _ect()):
return {"ok": True, "auth": "cookie"}
raise HTTPException(status_code=401, detail="Not authenticated")
@app.get("/", response_class=HTMLResponse)
async def ui_root():
index = STATIC_DIR / "index.html"
content = index.read_text(encoding="utf-8") if index.exists() else _fallback_html()
return HTMLResponse(content=content, headers=_NO_CACHE)
@app.get("/ui", response_class=HTMLResponse)
async def ui_alias():
return await ui_root()
def _fallback_html() -> str:
return """<!DOCTYPE html><html><head><meta charset="utf-8"><title>Sofiia Console</title></head>
<body><h1>Sofiia Control Console v""" + _VERSION + """</h1>
<p>Endpoints: <code>GET /api/health</code> | <code>GET /api/status/full</code> | <code>POST /api/chat/send</code> | <code>WS /ws/events</code></p>
</body></html>"""
@app.get("/chat", response_class=HTMLResponse)
async def ui_chat():
p = STATIC_DIR / "chat.html"
content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
return HTMLResponse(content=content, headers=_NO_CACHE)
@app.get("/ops", response_class=HTMLResponse)
async def ui_ops():
p = STATIC_DIR / "ops.html"
content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
return HTMLResponse(content=content, headers=_NO_CACHE)
@app.get("/nodes", response_class=HTMLResponse)
async def ui_nodes():
p = STATIC_DIR / "nodes.html"
content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
return HTMLResponse(content=content, headers=_NO_CACHE)
# ── Supervisor Proxy ───────────────────────────────────────────────────────────
_SUPERVISOR_URL = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080").rstrip("/")
_SUPERVISOR_FALLBACK_URL = os.getenv("SUPERVISOR_FALLBACK_URL", "http://127.0.0.1:8084").rstrip("/")
async def _supervisor_request_json(
method: str,
path: str,
*,
timeout: float = 30.0,
json_body: Optional[Dict[str, Any]] = None,
) -> Tuple[int, Dict[str, Any]]:
urls = [_SUPERVISOR_URL]
if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls:
urls.append(_SUPERVISOR_FALLBACK_URL)
last_err = "unavailable"
for base in urls:
target = f"{base}{path}"
try:
async with httpx.AsyncClient(timeout=timeout) as client:
resp = await client.request(method, target, json=json_body)
except Exception as e:
last_err = str(e)[:200]
continue
if resp.status_code >= 400:
detail = resp.text[:400] if resp.text else f"Supervisor error {resp.status_code}"
raise HTTPException(status_code=resp.status_code, detail=detail)
if not resp.content:
return resp.status_code, {}
try:
payload = resp.json()
except Exception:
return resp.status_code, {"raw": resp.text[:1000]}
if isinstance(payload, dict):
return resp.status_code, payload
return resp.status_code, {"data": payload}
raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {last_err}")
@app.post("/api/supervisor/runs")
async def start_supervisor_run(request: Request, _auth: str = Depends(require_auth)):
"""Start a LangGraph run on sofiia-supervisor.
Body: {"graph": "alert_triage|incident_triage|postmortem_draft|release_check",
"project_id": "<optional>", ...params}
If project_id is provided, auto-creates an agent_run dialog_node in the graph
and returns node_id in the response for UI tracking.
"""
body = await request.json()
graph_name = body.pop("graph", None)
project_id = body.pop("project_id", None)
if not graph_name:
raise HTTPException(status_code=400, detail="'graph' field is required")
try:
status_code, result = await _supervisor_request_json(
"POST",
f"/v1/graphs/{graph_name}/runs",
timeout=60.0,
json_body=body,
)
# Auto-create agent_run node if project is provided
if project_id and status_code in (200, 201, 202):
run_id = result.get("run_id") or result.get("id") or str(uuid.uuid4())
try:
pack = await _app_db.create_evidence_pack(
project_id=project_id,
run_id=run_id,
graph_name=graph_name,
result_data={"status": "started", "summary": f"Run started: {graph_name}"},
created_by="sofiia",
)
result["_node_id"] = pack.get("node_id")
except Exception as node_err:
logger.warning("evidence_pack node creation failed (non-fatal): %s", node_err)
return JSONResponse(status_code=status_code, content=result)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
@app.get("/api/supervisor/runs/{run_id}")
async def get_supervisor_run(run_id: str, _auth: str = Depends(require_auth)):
"""Get the status/result of a LangGraph run."""
try:
status_code, payload = await _supervisor_request_json(
"GET",
f"/v1/runs/{run_id}",
timeout=15.0,
)
return JSONResponse(status_code=status_code, content=payload)
except Exception as e:
raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
@app.post("/api/supervisor/runs/{run_id}/cancel")
async def cancel_supervisor_run(run_id: str, _auth: str = Depends(require_auth)):
"""Cancel a running LangGraph run."""
try:
status_code, payload = await _supervisor_request_json(
"POST",
f"/v1/runs/{run_id}/cancel",
timeout=10.0,
)
return JSONResponse(status_code=status_code, content=payload)
except Exception as e:
raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
@app.get("/api/supervisor/graphs")
async def list_supervisor_graphs():
"""List available LangGraph graphs (no auth — read-only discovery)."""
urls = [_SUPERVISOR_URL]
if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls:
urls.append(_SUPERVISOR_FALLBACK_URL)
last_err = "unavailable"
for base in urls:
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(f"{base}/healthz")
data = resp.json()
return {
"graphs": data.get("graphs", []),
"healthy": resp.status_code == 200,
"url": base,
"state_backend": data.get("state_backend"),
}
except Exception as e:
last_err = str(e)
continue
return {"graphs": [], "healthy": False, "error": last_err}
@app.get("/api/aistalk/status")
async def aistalk_status():
"""AISTALK integration status for SOFIIA UI."""
try:
sup = await list_supervisor_graphs()
aurora = await api_aurora_health()
runtime = await _aistalk_runtime_state()
adapter_status: Dict[str, Any]
relay_health: Dict[str, Any]
if _aistalk is not None:
try:
relay_health = _aistalk.probe_health()
except Exception as e:
relay_health = {"enabled": True, "ok": False, "error": str(e)[:200]}
try:
adapter_status = _aistalk.status()
except Exception:
adapter_status = {"enabled": True, "base_url": "unknown"}
else:
relay_health = {"enabled": False, "ok": False, "error": "disabled"}
adapter_status = {"enabled": False, "base_url": ""}
return {
"aistalk_enabled": _aistalk is not None,
"aistalk_adapter": repr(_aistalk) if _aistalk is not None else "disabled",
"adapter": adapter_status,
"relay_health": relay_health,
"supervisor": sup,
"aurora": aurora,
"runtime": runtime,
"docs": {
"contract": "/docs/aistalk/contract.md",
"supervisor": "/docs/supervisor/langgraph_supervisor.md",
},
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
def _parse_agent_md(path: Path) -> Dict[str, Any]:
text = path.read_text(encoding="utf-8", errors="ignore")
lines = [ln.rstrip() for ln in text.splitlines()]
title = path.stem
display_name = title
role: List[str] = []
outputs: List[str] = []
boundaries: List[str] = []
capabilities: List[str] = []
intro: List[str] = []
in_section: Optional[str] = None
for raw in lines:
line = raw.strip()
if not line:
continue
if line.startswith("# "):
display_name = line[2:].strip()
continue
low = line.lower()
if low.startswith("role:"):
in_section = "role"
continue
if low.startswith("output:"):
in_section = "output"
continue
if low.startswith("outputs:"):
in_section = "output"
continue
if low.startswith("boundary:"):
in_section = "boundary"
continue
if low.startswith("boundaries:"):
in_section = "boundary"
continue
if low.startswith("capabilities:"):
in_section = "capabilities"
continue
if low.startswith("modes:") or low.startswith("rules:") or low.startswith("internal sub-pipeline"):
in_section = None
continue
if line.startswith("```"):
in_section = None
continue
if line.startswith("- "):
item = line[2:].strip()
if in_section == "role":
role.append(item)
elif in_section == "output":
outputs.append(item)
elif in_section == "boundary":
boundaries.append(item)
elif in_section == "capabilities":
capabilities.append(item)
continue
if in_section is None and not line.startswith("#"):
# Some agent role files store purpose as plain intro line without "Role:" section.
intro.append(line)
summary = role[0] if role else (intro[0] if intro else "")
return {
"id": title.lower(),
"name": display_name,
"summary": summary,
"role": role,
"outputs": outputs,
"boundaries": boundaries,
"capabilities": capabilities,
"source": str(path),
}
@app.get("/api/aistalk/catalog")
async def aistalk_catalog():
"""
Return AISTALK subagent catalog + declared capabilities for UI rendering.
"""
roots = [
Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk",
Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk",
]
root = next((p for p in roots if p.exists()), None)
if root is None:
return {
"ok": False,
"error": "AISTALK roles directory not found",
"agents": [],
"domains": [],
}
agents: List[Dict[str, Any]] = []
for p in sorted(root.glob("*.md")):
try:
agents.append(_parse_agent_md(p))
except Exception as e:
agents.append(
{
"id": p.stem.lower(),
"name": p.stem,
"summary": "",
"role": [],
"outputs": [],
"boundaries": [f"parse_error: {str(e)[:120]}"],
"capabilities": [],
"source": str(p),
}
)
# High-level specialization domains for UI badges/filters.
domains = [
{"id": "osint", "name": "OSINT & Recon", "agents": ["tracer", "stealth", "shadow"]},
{"id": "analysis", "name": "Threat Analysis", "agents": ["neuron", "graph", "risk"]},
{"id": "offdef", "name": "Offense/Defense", "agents": ["redteam", "blueteam", "purpleteam", "bughunter", "devteam"]},
{"id": "forensics", "name": "Media Forensics", "agents": ["aurora"]},
{"id": "security", "name": "Governance & Data Safety", "agents": ["vault", "quantum"]},
{"id": "orchestration", "name": "Command & Synthesis", "agents": ["orchestrator_synthesis"]},
]
return {
"ok": True,
"root": str(root),
"count": len(agents),
"agents": agents,
"domains": domains,
}
_AISTALK_RUNTIME_PATH = AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aistalk_runtime.json"
_AISTALK_AGENT_ORDER = [
"orchestrator_synthesis",
"tracer",
"shadow",
"stealth",
"neuron",
"graph",
"bughunter",
"redteam",
"blueteam",
"purpleteam",
"risk",
"vault",
"quantum",
"devteam",
"aurora",
]
_aistalk_team_active_runs: Dict[str, float] = {}
_aistalk_chat_active: int = 0
_aistalk_state_lock = asyncio.Lock()
def _aistalk_roles_root() -> Optional[Path]:
roots = [
Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk",
Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk",
]
return next((p for p in roots if p.exists()), None)
def _aistalk_resource_snapshot() -> Dict[str, Any]:
cpu = os.cpu_count() or 8
mem_gb: Optional[float] = None
try:
page_size = os.sysconf("SC_PAGE_SIZE")
total_pages = os.sysconf("SC_PHYS_PAGES")
if page_size > 0 and total_pages > 0:
mem_gb = round((page_size * total_pages) / (1024 ** 3), 1)
except Exception:
mem_gb = None
return {
"cpu_count": cpu,
"memory_gb": mem_gb,
"ollama_num_ctx": SOFIIA_OLLAMA_NUM_CTX,
"ollama_num_thread": SOFIIA_OLLAMA_NUM_THREAD,
"ollama_num_gpu": SOFIIA_OLLAMA_NUM_GPU,
}
def _aistalk_recommended_limits(resources: Dict[str, Any]) -> Dict[str, Any]:
cpu = int(resources.get("cpu_count") or 8)
mem = resources.get("memory_gb")
mem_gb = float(mem) if isinstance(mem, (int, float)) else 0.0
if cpu >= 12 and mem_gb >= 24:
profile = "performance"
team_max = 2
chat_max = 4
elif cpu >= 8 and mem_gb >= 16:
profile = "balanced"
team_max = 1
chat_max = 3
else:
profile = "safe"
team_max = 1
chat_max = 2
return {
"profile": profile,
"max_parallel_team_runs": team_max,
"max_parallel_chat": chat_max,
"rule": (
"Aurora/forensics jobs are GPU-heavy: keep team runs low; "
"chat parallelism may be higher but bounded by CPU/RAM."
),
}
async def _aistalk_local_models() -> List[str]:
ollama_url = get_ollama_url().rstrip("/")
try:
async with httpx.AsyncClient(timeout=5.0) as client:
r = await client.get(f"{ollama_url}/api/tags")
r.raise_for_status()
data = r.json()
models = [str((m or {}).get("name", "")).strip() for m in (data.get("models") or [])]
return [m for m in models if m]
except Exception:
return []
def _aistalk_default_model_map(models: List[str]) -> Dict[str, str]:
available = set(models)
def pick(*candidates: str) -> str:
for c in candidates:
if c in available:
return c
if models:
return models[0]
return "qwen3:14b"
orchestrator = pick("qwen3.5:35b-a3b", "qwen3:14b", "gemma3:latest")
analyst = pick("qwen3:14b", "qwen3.5:35b-a3b", "gemma3:latest")
lightweight = pick("gemma3:latest", "qwen3:14b", "qwen3.5:35b-a3b")
mapping: Dict[str, str] = {}
for agent_id in _AISTALK_AGENT_ORDER:
if agent_id in ("orchestrator_synthesis", "risk", "neuron", "graph"):
mapping[agent_id] = orchestrator
elif agent_id in ("tracer", "shadow", "stealth", "vault", "quantum"):
mapping[agent_id] = analyst
else:
mapping[agent_id] = lightweight
return mapping
def _read_aistalk_runtime() -> Dict[str, Any]:
if _AISTALK_RUNTIME_PATH.exists():
try:
raw = json.loads(_AISTALK_RUNTIME_PATH.read_text(encoding="utf-8"))
if isinstance(raw, dict):
return raw
except Exception:
pass
return {}
def _write_aistalk_runtime(data: Dict[str, Any]) -> None:
_AISTALK_RUNTIME_PATH.parent.mkdir(parents=True, exist_ok=True)
_AISTALK_RUNTIME_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
async def _aistalk_runtime_state() -> Dict[str, Any]:
resources = _aistalk_resource_snapshot()
recommended = _aistalk_recommended_limits(resources)
models = await _aistalk_local_models()
stored = _read_aistalk_runtime()
limits = stored.get("limits") if isinstance(stored.get("limits"), dict) else {}
max_team = int(limits.get("max_parallel_team_runs") or recommended["max_parallel_team_runs"])
max_chat = int(limits.get("max_parallel_chat") or recommended["max_parallel_chat"])
profile = str(limits.get("profile") or recommended["profile"])
saved_models = stored.get("agent_models") if isinstance(stored.get("agent_models"), dict) else {}
defaults = _aistalk_default_model_map(models)
agent_models: Dict[str, str] = {}
for aid in _AISTALK_AGENT_ORDER:
selected = str(saved_models.get(aid) or defaults.get(aid) or "")
if models and selected not in models:
selected = defaults.get(aid) or models[0]
if not selected:
selected = "qwen3:14b"
agent_models[aid] = selected
state = {
"limits": {
"profile": profile,
"max_parallel_team_runs": max(1, min(max_team, 4)),
"max_parallel_chat": max(1, min(max_chat, 8)),
},
"recommended": recommended,
"resources": resources,
"available_models": models,
"agent_models": agent_models,
"active_team_runs": len(_aistalk_team_active_runs),
"active_chat": _aistalk_chat_active,
}
# Persist normalized shape for future restarts.
_write_aistalk_runtime({"limits": state["limits"], "agent_models": state["agent_models"]})
return state
def _aistalk_role_prompt(agent_id: str) -> str:
root = _aistalk_roles_root()
if root is None:
return "You are AISTALK security analyst. Respond with findings, risk, next actions."
target = root / f"{agent_id}.md"
if not target.exists():
target = root / "orchestrator_synthesis.md"
try:
text = target.read_text(encoding="utf-8", errors="ignore")
# Keep prompt concise enough for local models.
return text[:6000]
except Exception:
return "You are AISTALK security analyst. Respond with findings, risk, next actions."
@app.get("/api/aistalk/runtime")
async def aistalk_runtime(_auth: str = Depends(require_auth)):
return await _aistalk_runtime_state()
class AISTalkModelSetBody(BaseModel):
agent_id: str
model: str
@app.post("/api/aistalk/runtime/model")
async def aistalk_set_agent_model(body: AISTalkModelSetBody, _auth: str = Depends(require_auth)):
state = await _aistalk_runtime_state()
aid = str(body.agent_id or "").strip().lower()
if aid not in _AISTALK_AGENT_ORDER:
raise HTTPException(status_code=400, detail=f"Unknown agent_id: {aid}")
model = str(body.model or "").strip()
models = state.get("available_models") or []
if models and model not in models:
raise HTTPException(status_code=400, detail=f"Model not available locally: {model}")
stored = _read_aistalk_runtime()
stored.setdefault("limits", state.get("limits", {}))
stored.setdefault("agent_models", state.get("agent_models", {}))
stored["agent_models"][aid] = model
_write_aistalk_runtime(stored)
return {"ok": True, "agent_id": aid, "model": model}
class AISTalkLimitsBody(BaseModel):
profile: Optional[str] = None
max_parallel_team_runs: Optional[int] = None
max_parallel_chat: Optional[int] = None
@app.post("/api/aistalk/runtime/limits")
async def aistalk_set_limits(body: AISTalkLimitsBody, _auth: str = Depends(require_auth)):
state = await _aistalk_runtime_state()
stored = _read_aistalk_runtime()
limits = dict(state.get("limits", {}))
if body.profile:
limits["profile"] = str(body.profile)
if body.max_parallel_team_runs is not None:
limits["max_parallel_team_runs"] = max(1, min(int(body.max_parallel_team_runs), 4))
if body.max_parallel_chat is not None:
limits["max_parallel_chat"] = max(1, min(int(body.max_parallel_chat), 8))
stored["limits"] = limits
stored.setdefault("agent_models", state.get("agent_models", {}))
_write_aistalk_runtime(stored)
return {"ok": True, "limits": limits}
def _is_terminal_run_status(status: str) -> bool:
s = (status or "").strip().lower()
return s in {"succeeded", "failed", "cancelled", "canceled", "timeout", "error"}
class AISTalkChatBody(BaseModel):
message: str
agent_id: str = "orchestrator_synthesis"
model: Optional[str] = None
session_id: Optional[str] = None
project_id: Optional[str] = None
user_id: Optional[str] = None
history: List[Dict[str, Any]] = []
@app.post("/api/aistalk/chat")
async def aistalk_chat(body: AISTalkChatBody, request: Request, _auth: str = Depends(require_auth)):
client_ip = request.client.host if request.client else "unknown"
if not _check_rate(f"aistalk_chat:{client_ip}", max_calls=40, window_sec=60):
raise HTTPException(status_code=429, detail="Rate limit: 40 AISTALK chat messages/min")
state = await _aistalk_runtime_state()
limits = state.get("limits", {})
max_chat = int(limits.get("max_parallel_chat") or 2)
async with _aistalk_state_lock:
global _aistalk_chat_active
if _aistalk_chat_active >= max_chat:
raise HTTPException(
status_code=429,
detail=f"AISTALK chat busy: active={_aistalk_chat_active}, limit={max_chat}",
)
_aistalk_chat_active += 1
agent_id = str(body.agent_id or "orchestrator_synthesis").strip().lower()
if agent_id not in _AISTALK_AGENT_ORDER:
agent_id = "orchestrator_synthesis"
selected_model = str(body.model or "").strip() or str((state.get("agent_models") or {}).get(agent_id) or "")
if not selected_model:
selected_model = "qwen3:14b"
if (state.get("available_models") or []) and selected_model not in state["available_models"]:
selected_model = (state.get("available_models") or ["qwen3:14b"])[0]
project_id = body.project_id or "aistalk"
session_id = body.session_id or f"aistalk_sess_{uuid.uuid4().hex[:10]}"
user_id = body.user_id or "aistalk_user"
try:
role_prompt = _aistalk_role_prompt(agent_id)
system_prompt = (
"Ти працюєш у складі AISTALK (крипто-детективне агентство з безпеки мережі). "
"Формат відповіді: findings -> risk -> actions. "
"Пиши конкретно, без вигадок, позначай невизначеність.\n\n"
+ role_prompt
)
messages: List[Dict[str, Any]] = [{"role": "system", "content": system_prompt}]
messages.extend(body.history[-10:])
messages.append({"role": "user", "content": body.message})
t0 = time.monotonic()
async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client:
r = await client.post(
f"{get_ollama_url().rstrip('/')}/api/chat",
json=_make_ollama_payload(
selected_model,
messages,
{
"temperature": 0.15,
"repeat_penalty": 1.1,
"num_predict": min(1024, SOFIIA_OLLAMA_NUM_PREDICT_TEXT),
},
),
)
r.raise_for_status()
data = r.json()
reply = ((data.get("message") or {}).get("content") or "").strip() or "AISTALK: порожня відповідь"
latency_ms = int((time.monotonic() - t0) * 1000)
_broadcast_bg(
_make_event(
"chat.reply",
{
"text": reply[:200],
"provider": "ollama",
"model": f"ollama:{selected_model}",
"agent_id": agent_id,
"latency_ms": latency_ms,
},
project_id=project_id,
session_id=session_id,
user_id=user_id,
)
)
asyncio.get_event_loop().create_task(
_do_save_memory(
body.message,
reply,
session_id,
project_id,
user_id,
agent_id="aistalk",
)
)
return {
"ok": True,
"project_id": project_id,
"session_id": session_id,
"user_id": user_id,
"agent_id": agent_id,
"model": f"ollama:{selected_model}",
"response": reply,
"meta": {"latency_ms": latency_ms, "active_chat": _aistalk_chat_active, "limit_chat": max_chat},
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=502, detail=f"AISTALK chat error: {str(e)[:200]}")
finally:
async with _aistalk_state_lock:
_aistalk_chat_active = max(0, _aistalk_chat_active - 1)
def _aistalk_autobuild_input(
graph: str,
objective: str,
input_payload: Dict[str, Any],
) -> Dict[str, Any]:
payload = dict(input_payload or {})
if graph == "incident_triage":
payload.setdefault("service", "aurora-service")
payload.setdefault("symptom", objective or "Aurora pipeline anomaly")
payload.setdefault("env", "prod")
payload.setdefault("include_traces", False)
return payload
if graph == "release_check":
payload.setdefault("service_name", "aurora-service")
payload.setdefault("diff_text", objective or "")
payload.setdefault("run_deps", True)
payload.setdefault("run_drift", True)
payload.setdefault("run_smoke", False)
return payload
if graph == "alert_triage":
# Graph is mostly autonomous; leave room for dry_run/profile overrides.
payload.setdefault("dry_run", False)
payload.setdefault("policy_profile", "default")
return payload
if graph == "postmortem_draft":
incident_id = str(payload.get("incident_id") or "").strip()
if not incident_id and objective:
m = re.search(r"(inc_[A-Za-z0-9_\-]+)", objective)
if m:
incident_id = m.group(1)
if not incident_id:
raise HTTPException(
status_code=400,
detail="postmortem_draft requires input.incident_id (e.g. inc_123abc)",
)
payload["incident_id"] = incident_id
payload.setdefault("service", "aurora-service")
payload.setdefault("env", "prod")
payload.setdefault("include_traces", False)
return payload
# Unknown/custom graph: pass-through without mutation.
return payload
@app.post("/api/aistalk/team/run")
async def aistalk_team_run(request: Request, _auth: str = Depends(require_auth)):
"""Run AISTALK team workflow via LangGraph supervisor."""
body = await request.json()
graph = str(body.get("graph") or "incident_triage").strip()
objective = str(body.get("objective") or "").strip()
input_payload = body.get("input")
if not isinstance(input_payload, dict):
input_payload = {}
input_payload = _aistalk_autobuild_input(graph, objective, input_payload)
runtime = await _aistalk_runtime_state()
max_team_runs = int((runtime.get("limits") or {}).get("max_parallel_team_runs") or 1)
# GC stale local entries (12h safety window).
now_ts = time.time()
stale = [rid for rid, ts in _aistalk_team_active_runs.items() if (now_ts - ts) > 12 * 3600]
for rid in stale:
_aistalk_team_active_runs.pop(rid, None)
if len(_aistalk_team_active_runs) >= max_team_runs:
raise HTTPException(
status_code=429,
detail=f"AISTALK team busy: active_runs={len(_aistalk_team_active_runs)}, limit={max_team_runs}",
)
sup_payload = {
"workspace_id": str(body.get("workspace_id") or "daarion"),
"user_id": str(body.get("user_id") or "aistalk_user"),
"agent_id": "aistalk",
"input": input_payload,
}
status_code, payload = await _supervisor_request_json(
"POST",
f"/v1/graphs/{graph}/runs",
timeout=60.0,
json_body=sup_payload,
)
if status_code in (200, 201, 202) and isinstance(payload, dict):
rid = str(payload.get("run_id") or payload.get("id") or "").strip()
if rid:
_aistalk_team_active_runs[rid] = time.time()
return JSONResponse(
status_code=status_code,
content={
"ok": status_code in (200, 201, 202),
"graph": graph,
"objective": objective,
"active_runs": len(_aistalk_team_active_runs),
"limit_runs": max_team_runs,
**payload,
},
)
@app.post("/api/aistalk/relay/test")
async def aistalk_relay_test(request: Request, _auth: str = Depends(require_auth)):
"""Send a synthetic event to AISTALK relay and return adapter status."""
body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
event_type = str(body.get("type") or "aistalk.ping").strip()
event = _make_event(
event_type,
{"message": body.get("message", "relay test"), "source": "sofiia-console"},
project_id=str(body.get("project_id") or "aistalk"),
session_id=str(body.get("session_id") or f"aistalk_test_{uuid.uuid4().hex[:8]}"),
user_id="sofiia",
)
if _aistalk is None:
raise HTTPException(status_code=503, detail="AISTALK adapter disabled")
_aistalk.handle_event(event)
return {
"ok": True,
"queued": True,
"event_type": event_type,
"adapter": _aistalk.status(),
}
@app.get("/api/aistalk/team/run/{run_id}")
async def aistalk_team_run_status(run_id: str, _auth: str = Depends(require_auth)):
status_code, payload = await _supervisor_request_json(
"GET",
f"/v1/runs/{run_id}",
timeout=20.0,
)
if isinstance(payload, dict) and _is_terminal_run_status(str(payload.get("status") or "")):
_aistalk_team_active_runs.pop(run_id, None)
return JSONResponse(status_code=status_code, content=payload)
# ── Evidence Pack Engine ────────────────────────────────────────────────────────
@app.post("/api/projects/{project_id}/supervisor/evidence")
async def record_evidence_pack(
project_id: str,
request: Request,
_auth: str = Depends(require_auth),
):
"""Record an Evidence Pack for a completed Supervisor run.
Links the run into the Dialog Graph and auto-creates follow-up tasks.
Body: {
"run_id": str, # required
"graph_name": str, # required
"status": "completed|failed", # optional
"summary": str, # optional
"findings": [...], # optional
"recommendations": [...], # optional
"follow_up_tasks": [ # optional - auto-created as tasks
{"title": ..., "description": ..., "priority": "normal|high|urgent"}
]
}
"""
body = await request.json()
run_id = body.get("run_id")
graph_name = body.get("graph_name")
if not run_id or not graph_name:
raise HTTPException(status_code=400, detail="run_id and graph_name are required")
try:
pack = await _app_db.create_evidence_pack(
project_id=project_id,
run_id=run_id,
graph_name=graph_name,
result_data=body,
created_by="sofiia",
)
return JSONResponse(status_code=201, content=pack)
except Exception as e:
logger.error("record_evidence_pack failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
# ── Graph Integrity ─────────────────────────────────────────────────────────────
@app.get("/api/projects/{project_id}/graph/integrity")
async def graph_integrity(project_id: str, _auth: str = Depends(require_auth)):
"""Run integrity checks on the project Dialog Graph.
Returns: {"ok": bool, "violations": [...], "stats": {...}}
"""
try:
result = await _app_db.check_graph_integrity(project_id)
status_code = 200 if result["ok"] else 422
return JSONResponse(status_code=status_code, content=result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# ── Graph Hygiene ───────────────────────────────────────────────────────────────
@app.post("/api/projects/{project_id}/graph/hygiene/run")
async def run_graph_hygiene(
project_id: str,
request: Request,
_auth: str = Depends(require_auth),
):
"""Run Graph Hygiene Engine: dedup, lifecycle normalization, importance scoring.
Body (all optional):
{
"dry_run": true, // default true — compute but don't write
"scope": "all"|"recent", // default "all"
"since": "ISO8601" // required when scope=recent
}
Returns: {"ok": bool, "dry_run": bool, "changes": [...], "stats": {...}}
"""
body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
dry_run = body.get("dry_run", True)
scope = body.get("scope", "all")
since = body.get("since")
try:
result = await _app_db.run_graph_hygiene(
project_id=project_id,
dry_run=dry_run,
scope=scope,
since=since,
)
return JSONResponse(status_code=200, content=result)
except Exception as e:
logger.error("run_graph_hygiene failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
# ── Self-Reflection Engine ──────────────────────────────────────────────────────
@app.post("/api/projects/{project_id}/supervisor/reflect")
async def supervisor_reflect(
project_id: str,
request: Request,
_auth: str = Depends(require_auth),
):
"""Create a Self-Reflection artifact for a completed Supervisor run.
Analyzes the Evidence Pack and creates a 'decision' node (reflection)
linked to the agent_run node via 'reflects_on' edge.
Body: {
"run_id": str, // required
"evidence": { // optional — pass evidence data for richer analysis
"summary": ...,
"findings": [...],
"recommendations": [...],
"follow_up_tasks": [...]
}
}
Returns: {node_id, reflection: {...scores, risks, ...}, edge_id, task_ids}
"""
body = await request.json()
run_id = body.get("run_id")
if not run_id:
raise HTTPException(status_code=400, detail="run_id is required")
evidence_data = body.get("evidence") or {}
try:
result = await _app_db.create_run_reflection(
project_id=project_id,
run_id=run_id,
evidence_data=evidence_data,
created_by="sofiia",
)
return JSONResponse(status_code=201, content=result)
except Exception as e:
logger.error("supervisor_reflect failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
# ── Strategic CTO Layer: Snapshots ───────────────────────────────────────────
@app.post("/api/projects/{project_id}/graph/snapshot")
async def compute_snapshot(
project_id: str,
window: str = "7d",
_auth: str = Depends(require_auth),
):
"""Compute and store a graph analytics snapshot for the project."""
try:
result = await _app_db.compute_graph_snapshot(project_id=project_id, window=window)
return JSONResponse(status_code=201, content=result)
except Exception as e:
logger.error("compute_snapshot failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/projects/{project_id}/graph/snapshot")
async def get_snapshot(
project_id: str,
window: str = "7d",
_auth: str = Depends(require_auth),
):
"""Get the latest snapshot for the project and window."""
snap = await _app_db.get_latest_snapshot(project_id=project_id, window=window)
if not snap:
raise HTTPException(status_code=404, detail="No snapshot found. Run POST first.")
return JSONResponse(content=snap)
# ── Strategic CTO Layer: Signals ─────────────────────────────────────────────
@app.post("/api/projects/{project_id}/graph/signals/recompute")
async def recompute_signals(
project_id: str,
window: str = "7d",
dry_run: bool = True,
_auth: str = Depends(require_auth),
):
"""Run signal detection rules and upsert graph_signals."""
try:
result = await _app_db.recompute_graph_signals(
project_id=project_id,
window=window,
dry_run=dry_run,
)
return JSONResponse(status_code=200, content=result)
except Exception as e:
logger.error("recompute_signals failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/projects/{project_id}/graph/signals")
async def list_signals(
project_id: str,
status: str = "open",
limit: int = 50,
_auth: str = Depends(require_auth),
):
"""List graph signals for the project."""
signals = await _app_db.get_graph_signals(project_id=project_id, status=status, limit=limit)
return JSONResponse(content={"signals": signals, "count": len(signals)})
@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/ack")
async def ack_signal(
project_id: str,
signal_id: str,
_auth: str = Depends(require_auth),
):
result = await _app_db.update_signal_status(signal_id=signal_id, new_status="ack")
if not result:
raise HTTPException(status_code=404, detail="Signal not found")
return JSONResponse(content=result)
@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/resolve")
async def resolve_signal(
project_id: str,
signal_id: str,
_auth: str = Depends(require_auth),
):
result = await _app_db.update_signal_status(signal_id=signal_id, new_status="resolved")
if not result:
raise HTTPException(status_code=404, detail="Signal not found")
return JSONResponse(content=result)
@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/dismiss")
async def dismiss_signal(
project_id: str,
signal_id: str,
_auth: str = Depends(require_auth),
):
result = await _app_db.update_signal_status(signal_id=signal_id, new_status="dismissed")
if not result:
raise HTTPException(status_code=404, detail="Signal not found")
return JSONResponse(content=result)
@app.post("/api/projects/{project_id}/graph/signals/auto-resolve")
async def auto_resolve_signals(
project_id: str,
dry_run: bool = True,
_auth: str = Depends(require_auth),
):
"""Check resolution criteria for all open/ack signals and auto-resolve if met.
?dry_run=true — compute without writing (default)
?dry_run=false — apply resolutions
Returns: {ok, dry_run, checked, resolved, diff: [{signal_id, signal_type, action, reason}]}
"""
try:
result = await _app_db.auto_resolve_signals(
project_id=project_id,
dry_run=dry_run,
)
return JSONResponse(content=result)
except Exception as e:
logger.error("auto_resolve_signals failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/mitigate")
async def mitigate_signal(
project_id: str,
signal_id: str,
playbook_id: str = "",
_auth: str = Depends(require_auth),
):
"""Create a deterministic mitigation plan for a signal.
If playbook_id is provided, creates tasks from the playbook steps instead of templates.
Otherwise uses built-in mitigation templates.
Returns: {plan_node_id, task_ids, task_count, signal_type}
"""
try:
if playbook_id:
result = await _app_db.apply_playbook_to_signal(
project_id=project_id,
signal_id=signal_id,
playbook_id=playbook_id,
created_by="sofiia",
)
else:
result = await _app_db.create_mitigation_plan(
project_id=project_id,
signal_id=signal_id,
created_by="sofiia",
)
await _app_db.update_signal_status(signal_id=signal_id, new_status="ack")
return JSONResponse(status_code=201, content=result)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
logger.error("mitigate_signal failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
# ── CTO Portfolio (Cross-Project) ────────────────────────────────────────────
@app.get("/api/cto/portfolio/snapshots")
async def portfolio_snapshots(
window: str = "7d",
_auth: str = Depends(require_auth),
):
"""Get the latest snapshot for every project (cross-project portfolio view).
Returns: {projects: [{project_id, name, metrics, snapshot_at}], window}
"""
db = await _app_db.get_db()
# All projects
async with db.execute("SELECT project_id, name FROM projects ORDER BY name") as cur:
projects = await cur.fetchall()
result = []
for pid, pname in projects:
snap = await _app_db.get_latest_snapshot(pid, window)
# Get latest lesson bucket + trend_flags
async with db.execute(
"SELECT date_bucket, metrics_json FROM lessons WHERE project_id=? ORDER BY date_bucket DESC LIMIT 1",
(pid,),
) as cur:
lrow = await cur.fetchone()
lesson_bucket = None
lesson_trend_flags = None
if lrow:
lesson_bucket = lrow[0]
try:
import json as _json
lm = _json.loads(lrow[1] or "{}")
lesson_trend_flags = lm.get("trend_flags")
except Exception:
pass
# Compute streaks
try:
lesson_streaks = await _app_db.compute_lesson_streaks(pid)
except Exception:
lesson_streaks = None
result.append({
"project_id": pid,
"name": pname,
"metrics": snap["metrics"] if snap else None,
"snapshot_at": snap["created_at"] if snap else None,
"latest_lesson_bucket": lesson_bucket,
"latest_lesson_trend_flags": lesson_trend_flags,
"latest_lesson_streaks": lesson_streaks,
})
return JSONResponse(content={"projects": result, "window": window, "count": len(result)})
@app.get("/api/cto/portfolio/signals")
async def portfolio_signals(
status: str = "open",
severity: str = "",
limit: int = 50,
_auth: str = Depends(require_auth),
):
"""Get signals across all projects, ordered by severity then created_at.
?status=open|ack|resolved|dismissed|all
?severity=high,critical (comma-separated filter, optional)
"""
db = await _app_db.get_db()
async with db.execute("SELECT project_id, name FROM projects") as cur:
projects = {r[0]: r[1] for r in await cur.fetchall()}
if status == "all":
q = "SELECT *, rowid FROM graph_signals ORDER BY severity DESC, created_at DESC LIMIT ?"
params: tuple = (limit,)
else:
q = "SELECT *, rowid FROM graph_signals WHERE status=? ORDER BY severity DESC, created_at DESC LIMIT ?"
params = (status, limit)
async with db.execute(q, params) as cur:
rows = await cur.fetchall()
# Severity order for sorting
SEV_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3}
sev_filter = {s.strip() for s in severity.split(",") if s.strip()} if severity else set()
signals = []
for row in rows:
d = dict(row)
if "rowid" in d:
del d["rowid"]
try:
d["evidence"] = json.loads(d["evidence"])
except Exception:
d["evidence"] = {}
if sev_filter and d.get("severity") not in sev_filter:
continue
d["project_name"] = projects.get(d["project_id"], d["project_id"])
signals.append(d)
signals.sort(key=lambda s: (SEV_ORDER.get(s.get("severity", "low"), 3), s.get("created_at", "")))
return JSONResponse(content={"signals": signals[:limit], "count": len(signals), "status": status})
@app.post("/api/cto/portfolio/drift/recompute")
async def portfolio_drift_recompute(
window: str = "7d",
dry_run: bool = False,
_auth: str = Depends(require_auth),
):
"""Recompute portfolio-level drift signals based on lesson streaks across all projects."""
try:
result = await _app_db.recompute_portfolio_signals(window=window, dry_run=dry_run)
return JSONResponse(content=result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/cto/portfolio/drift/signals")
async def portfolio_drift_signals(
status: str = "open",
_auth: str = Depends(require_auth),
):
"""Get portfolio-level drift signals."""
try:
signals = await _app_db.list_portfolio_signals(status=status)
return JSONResponse(content={"signals": signals, "count": len(signals)})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/cto/portfolio/streaks")
async def portfolio_streaks(
_auth: str = Depends(require_auth),
):
"""Get streak data for all projects."""
try:
db = await _app_db.get_db()
async with db.execute("SELECT project_id, name FROM projects") as cur:
projects = await cur.fetchall()
result = []
for pid, pname in projects:
streaks = await _app_db.compute_lesson_streaks(pid)
result.append({"project_id": pid, "name": pname, "streaks": streaks})
return JSONResponse(content={"projects": result, "count": len(result)})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# ── Playbooks (Graph Learning Layer) ─────────────────────────────────────────
@app.get("/api/projects/{project_id}/playbooks")
async def list_playbooks(
project_id: str,
signal_type: str = "",
limit: int = 10,
_auth: str = Depends(require_auth),
):
"""List playbooks for a project, ordered by success_rate desc."""
try:
pbs = await _app_db.list_playbooks(
project_id=project_id,
signal_type=signal_type,
limit=limit,
)
return JSONResponse(content={"playbooks": pbs, "count": len(pbs)})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects/{project_id}/playbooks/from-signal/{signal_id}")
async def create_playbook_from_signal(
project_id: str,
signal_id: str,
_auth: str = Depends(require_auth),
):
"""Promote current mitigation of a signal into a playbook (or update existing).
Requires signal to have plan_node_id and mitigation_task_ids in evidence.
Returns: {playbook_id, doc_id, version_id, context_key, created, stats}
"""
try:
# Check if signal is resolved to update stats
db = await _app_db.get_db()
async with db.execute(
"SELECT status, evidence FROM graph_signals WHERE id=? AND project_id=?",
(signal_id, project_id),
) as cur:
srow = await cur.fetchone()
resolved = srow[0] == "resolved" if srow else False
result = await _app_db.upsert_playbook_from_signal(
project_id=project_id,
signal_id=signal_id,
resolved=resolved,
)
return JSONResponse(status_code=201, content=result)
except ValueError as e:
raise HTTPException(status_code=409, detail=str(e))
except Exception as e:
logger.error("create_playbook_from_signal failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
# ── Portfolio Batch Recompute ─────────────────────────────────────────────────
@app.post("/api/cto/portfolio/snapshots/recompute")
async def portfolio_snapshots_recompute(
window: str = "7d",
force: bool = False,
_auth: str = Depends(require_auth),
):
"""Recompute graph snapshots for ALL projects.
Skips projects that already have a snapshot for today (date_bucket) unless force=true.
Returns: {computed, skipped, errors[]}
"""
import datetime as _dt2
db = await _app_db.get_db()
async with db.execute("SELECT project_id FROM projects") as cur:
project_ids = [r[0] for r in await cur.fetchall()]
today = _dt2.datetime.utcnow().strftime("%Y-%m-%d")
computed, skipped, errors = 0, 0, []
for pid in project_ids:
try:
if not force:
async with db.execute(
"SELECT id FROM graph_snapshots WHERE project_id=? AND window=? AND date_bucket=?",
(pid, window, today),
) as cur:
exists = await cur.fetchone()
if exists:
skipped += 1
continue
await _app_db.compute_graph_snapshot(project_id=pid, window=window)
computed += 1
except Exception as e:
errors.append({"project_id": pid, "error": str(e)})
return JSONResponse(content={"computed": computed, "skipped": skipped, "errors": errors})
@app.post("/api/cto/portfolio/signals/recompute")
async def portfolio_signals_recompute(
window: str = "7d",
dry_run: bool = False,
_auth: str = Depends(require_auth),
):
"""Recompute signals for ALL projects.
Returns: {results: [{project_id, new, refreshed, total}], errors[]}
"""
db = await _app_db.get_db()
async with db.execute("SELECT project_id FROM projects") as cur:
project_ids = [r[0] for r in await cur.fetchall()]
results, errors = [], []
for pid in project_ids:
try:
diff = await _app_db.recompute_graph_signals(
project_id=pid, window=window, dry_run=dry_run
)
new_count = sum(1 for d in diff if d.get("action") == "new")
refresh_count = sum(1 for d in diff if d.get("action") in ("refresh", "reopen"))
results.append({
"project_id": pid,
"new": new_count,
"refreshed": refresh_count,
"total": len(diff),
})
except Exception as e:
errors.append({"project_id": pid, "error": str(e)})
return JSONResponse(content={"results": results, "errors": errors, "dry_run": dry_run})
# ── Lessons (Graph Learning Layer) ────────────────────────────────────────────
@app.post("/api/projects/{project_id}/lessons/generate")
async def generate_lesson(
project_id: str,
window: str = "7d",
dry_run: bool = True,
_auth: str = Depends(require_auth),
):
"""Generate a weekly Lessons Learned report for a project.
dry_run=true (default): compute and return without writing to DB.
dry_run=false: persist lesson node + metrics + improvement tasks.
Returns: {dry_run, date_bucket, markdown, metrics, planned_improvement_tasks, evidence}
"""
try:
result = await _app_db.upsert_lesson(
project_id=project_id,
window=window,
dry_run=dry_run,
created_by="sofiia",
)
return JSONResponse(status_code=200 if dry_run else 201, content=result)
except Exception as e:
logger.error("generate_lesson failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/projects/{project_id}/lessons")
async def list_lessons_endpoint(
project_id: str,
window: str = "7d",
limit: int = 8,
_auth: str = Depends(require_auth),
):
"""List lessons for a project, ordered by date_bucket desc."""
try:
lessons = await _app_db.list_lessons(project_id=project_id, window=window, limit=limit)
return JSONResponse(content={"lessons": lessons, "count": len(lessons)})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/projects/{project_id}/lessons/{lesson_id}")
async def get_lesson_endpoint(
project_id: str,
lesson_id: str,
_auth: str = Depends(require_auth),
):
"""Get full lesson detail including markdown and linked evidence."""
try:
lesson = await _app_db.get_lesson_detail(project_id=project_id, lesson_id=lesson_id)
if not lesson:
raise HTTPException(status_code=404, detail="Lesson not found")
return JSONResponse(content=lesson)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects/{project_id}/lessons/impact/recompute")
async def recompute_lesson_impact(
project_id: str,
window: str = "7d",
dry_run: bool = False,
force: bool = False,
_auth: str = Depends(require_auth),
):
"""Recompute impact score for the prior-bucket lesson based on current-bucket metrics."""
try:
if dry_run:
# Preview: just return what would be computed, no write
result = await _app_db.evaluate_lesson_impact(
project_id=project_id, window=window, force=True
)
return JSONResponse(content={"dry_run": True, "preview": result})
result = await _app_db.evaluate_lesson_impact(
project_id=project_id, window=window, force=force
)
return JSONResponse(content={"dry_run": False, "result": result})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/cto/portfolio/lessons/generate")
async def portfolio_lessons_generate(
window: str = "7d",
dry_run: bool = False,
force: bool = False,
_auth: str = Depends(require_auth),
):
"""Generate lessons for ALL projects.
Skips projects that already have a lesson for the current bucket (unless force=true).
Returns: {generated, skipped, errors[]}
"""
db = await _app_db.get_db()
async with db.execute("SELECT project_id FROM projects") as cur:
project_ids = [r[0] for r in await cur.fetchall()]
current_bucket = _app_db.compute_lesson_bucket()
generated, skipped, errors = 0, 0, []
for pid in project_ids:
try:
if not force and not dry_run:
async with db.execute(
"SELECT lesson_id FROM lessons WHERE project_id=? AND date_bucket=? AND window=?",
(pid, current_bucket, window),
) as cur:
exists = await cur.fetchone()
if exists:
skipped += 1
continue
await _app_db.upsert_lesson(project_id=pid, window=window, dry_run=dry_run)
generated += 1
except Exception as e:
errors.append({"project_id": pid, "error": str(e)})
return JSONResponse(content={
"generated": generated,
"skipped": skipped,
"errors": errors,
"dry_run": dry_run,
"date_bucket": current_bucket,
})
# ── Level 6: Governance Gates ─────────────────────────────────────────────────
@app.get("/api/projects/{project_id}/governance/gates")
async def get_governance_gates(
project_id: str,
window: str = "7d",
_auth: str = Depends(require_auth),
):
"""Return latest governance gate evaluation (dry_run, no persist)."""
try:
result = await _app_db.evaluate_governance_gates(
project_id=project_id, window=window, dry_run=True
)
return JSONResponse(content=result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/projects/{project_id}/governance/gates/evaluate")
async def evaluate_governance_gates_endpoint(
project_id: str,
window: str = "7d",
dry_run: bool = False,
_auth: str = Depends(require_auth),
):
"""Evaluate governance gates and optionally persist decision node."""
try:
result = await _app_db.evaluate_governance_gates(
project_id=project_id, window=window, dry_run=dry_run
)
return JSONResponse(content=result)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# ── Level 6: Portfolio Drift Auto-plan / Auto-run ────────────────────────────
@app.post("/api/cto/portfolio/drift/{signal_id}/auto-plan")
async def portfolio_drift_auto_plan(
signal_id: str,
_auth: str = Depends(require_auth),
):
"""Populate evidence.auto_actions.runs with planned entries (dry_run=True)."""
try:
result = await _app_db.auto_plan_drift_signal(signal_id=signal_id)
if "error" in result:
raise HTTPException(status_code=404, detail=result["error"])
return JSONResponse(content=result)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/cto/portfolio/drift/{signal_id}/auto-run")
async def portfolio_drift_auto_run(
signal_id: str,
dry_run: bool = False,
force: bool = False,
_auth: str = Depends(require_auth),
):
"""Execute planned/queued workflow runs for a portfolio drift signal."""
try:
supervisor_url = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080")
result = await _app_db.auto_run_drift_signal(
signal_id=signal_id,
dry_run=dry_run,
force=force,
supervisor_url=supervisor_url,
)
if "error" in result:
raise HTTPException(status_code=404, detail=result["error"])
return JSONResponse(content=result)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# ── Level 7: Governance Audit Trail ──────────────────────────────────────────
@app.get("/api/cto/audit/events")
async def audit_events_portfolio(
scope: Optional[str] = "portfolio",
limit: int = 100,
event_type: Optional[str] = None,
status: Optional[str] = None,
since: Optional[str] = None,
_auth: str = Depends(require_auth),
):
"""List governance audit events for portfolio (or any scope)."""
try:
items = await _app_db.list_governance_events(
scope=scope, project_id="portfolio" if scope == "portfolio" else None,
event_type=event_type, status=status, since=since, limit=limit,
)
return JSONResponse(content={"items": items, "count": len(items)})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/projects/{project_id}/audit/events")
async def audit_events_project(
project_id: str,
limit: int = 100,
event_type: Optional[str] = None,
status: Optional[str] = None,
since: Optional[str] = None,
_auth: str = Depends(require_auth),
):
"""List governance audit events for a specific project."""
try:
items = await _app_db.list_governance_events(
scope="project", project_id=project_id,
event_type=event_type, status=status, since=since, limit=limit,
)
return JSONResponse(content={"items": items, "count": len(items)})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# ── Level 8: Agents as Projects ───────────────────────────────────────────────
import difflib as _difflib
import time as _time
# ── Agent Ops helpers ──────────────────────────────────────────────────────────
async def _fetch_agents_from_gateway(
node_id: str,
gateway_url: str,
timeout_ms: Optional[int] = None,
get_retry: int = 1,
) -> tuple:
"""Fetch agents list from gateway /health. Returns (agents, error_str|None, latency_ms).
Respects per-node timeout_ms and retry policy.
"""
if not gateway_url:
return [], f"No gateway_url configured for {node_id}", None
timeout_sec = (timeout_ms or 2500) / 1000.0
last_err = None
attempts = get_retry + 1
t0 = _time.monotonic()
for attempt in range(attempts):
try:
async with httpx.AsyncClient(timeout=timeout_sec) as client:
resp = await client.get(f"{gateway_url.rstrip('/')}/health")
latency_ms = int((_time.monotonic() - t0) * 1000)
if resp.status_code != 200:
last_err = f"HTTP {resp.status_code}"
continue
data = resp.json()
raw = data.get("agents", {})
agents: List[Dict] = []
if isinstance(raw, dict):
for aid, info in raw.items():
agents.append({
"agent_id": aid,
"display_name": info.get("name", aid),
"status": "healthy" if info.get("prompt_loaded") else "degraded",
"telegram_token_configured": info.get("telegram_token_configured", False),
"prompt_loaded": info.get("prompt_loaded", False),
"node_id": node_id,
"active_prompt": info.get("active_prompt"),
"badges": info.get("badges", []),
"visibility": info.get("visibility", "public"),
"telegram_mode": info.get("telegram_mode", "on"),
"lifecycle_status": info.get("lifecycle_status", "active"),
})
elif isinstance(raw, list):
for a in raw:
agents.append({**a, "node_id": node_id})
return agents, None, latency_ms
except Exception as e:
last_err = str(e)[:200]
latency_ms = int((_time.monotonic() - t0) * 1000)
return [], last_err, latency_ms
def _node_info(node_id: str) -> Dict:
"""Return {gateway_url, policy} for a node."""
from .config import get_node_policy
return {
"gateway_url": get_gateway_url(node_id),
"policy": get_node_policy(node_id),
}
def _agent_desired_payload(override: Dict) -> Dict:
"""Canonical desired-state payload from an override row."""
return {
"display_name": override.get("display_name"),
"domain": override.get("domain"),
"system_prompt_md": override.get("system_prompt_md"),
}
def _merge_agent_with_override(agent: Dict, override: Optional[Dict]) -> Dict:
result = dict(agent)
if not override:
result["has_override"] = False
result["drift"] = False
return result
if override.get("display_name"): result["display_name"] = override["display_name"]
if override.get("domain"): result["domain"] = override["domain"]
if override.get("system_prompt_md"): result["system_prompt_md"] = override["system_prompt_md"]
result["is_hidden"] = bool(override.get("is_hidden"))
result["has_override"] = True
result["override_updated_at"] = override.get("updated_at")
result["last_applied_hash"] = override.get("last_applied_hash")
result["last_applied_at"] = override.get("last_applied_at")
# Drift: desired hash != last applied hash
desired = _agent_desired_payload(override)
desired_hash = _app_db._agent_payload_hash(desired)
result["desired_hash"] = desired_hash
active_hash = override.get("last_applied_hash")
result["drift"] = bool(active_hash and active_hash != desired_hash)
return result
async def _check_prompt_freeze(node_id: str, agent_id: str) -> bool:
"""Return True if PROMPT_FREEZE gate is active for any related project."""
try:
# Check portfolio gate
gates = await _app_db.evaluate_governance_gates("portfolio", window="7d", dry_run=True)
for g in gates.get("gates", []):
if g.get("name") == "PROMPT_FREEZE" and g.get("status") != "PASS":
return True
except Exception:
pass
return False
# ── Agent CRUD endpoints ───────────────────────────────────────────────────────
# Agents required on every online node — if absent, signal is raised
_REQUIRED_PER_NODE_AGENTS: List[str] = ["monitor"]
def _normalize_agent_capabilities(agent: Dict) -> Dict:
"""Add normalized capabilities: {voice, telegram} to agent dict."""
badges = agent.get("badges", [])
telegram_mode = agent.get("telegram_mode", "on")
agent_id = agent.get("agent_id", "")
agent["capabilities"] = {
"voice": agent_id == "aistalk" or "voice" in badges,
"telegram": telegram_mode != "off",
}
return agent
async def _emit_monitor_missing_event(node_id: str, bucket: str) -> None:
"""Write a governance_event when monitor is confirmed absent on an online node."""
try:
await _app_db.append_governance_event(
scope="portfolio",
project_id="portfolio",
actor_type="system",
actor_id=None,
event_type="node_required_agent_missing",
idempotency_key=f"req|missing|{node_id}|monitor|{bucket}",
severity="high",
status="error",
ref_type="node",
ref_id=node_id,
evidence={
"v": 1,
"message": f"Required agent 'monitor' absent on {node_id}",
"inputs": {"node_id": node_id, "required_agent": "monitor"},
"outputs": {"missing": True},
"links": {},
"timings": {},
},
)
except Exception as exc:
logger.warning("_emit_monitor_missing_event failed: %s", exc)
@app.get("/api/agents")
async def list_agents(
nodes: str = "NODA1",
include_hidden: bool = False,
_auth: str = Depends(require_auth),
):
"""Fetch agents from node gateways, merge with local overrides.
Returns {items, node_errors, stats, required_missing_nodes, nodes_queried}.
Partial node failure never blocks other nodes — always HTTP 200.
"""
node_ids = [n.strip().upper() for n in nodes.split(",") if n.strip()]
today_bucket = datetime.utcnow().strftime("%Y-%m-%d")
all_agents: List[Dict] = []
node_errors: List[Dict] = []
node_stats: List[Dict] = []
required_missing_nodes: List[Dict] = [] # nodes where required agents absent
overrides_list = await _app_db.list_agent_overrides()
overrides_map = {(o["node_id"], o["agent_id"]): o for o in overrides_list}
for node_id in node_ids:
ni = _node_info(node_id)
gw_url = ni["gateway_url"]
policy = ni["policy"]
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
node_id, gw_url,
timeout_ms=policy["gateway_timeout_ms"],
get_retry=policy["get_retry"],
)
if err:
node_errors.append({
"node_id": node_id, "error": err,
"gateway_url": gw_url, "latency_ms": latency_ms,
"node_role": policy["node_role"],
})
node_stats.append({"node_id": node_id, "ok": False, "count": 0,
"latency_ms": latency_ms})
# Node offline → skip required check (not "missing", just "unreachable")
else:
count = 0
present_agent_ids: Set[str] = set()
for agent in agents_raw:
override = overrides_map.get((node_id, agent["agent_id"]))
merged = _merge_agent_with_override(agent, override)
merged = _normalize_agent_capabilities(merged)
if not include_hidden and merged.get("is_hidden"):
continue
merged["latency_ms"] = latency_ms
all_agents.append(merged)
present_agent_ids.add(agent["agent_id"])
count += 1
node_stats.append({"node_id": node_id, "ok": True, "count": count,
"latency_ms": latency_ms,
"node_role": policy["node_role"]})
# Required agent check — only for online nodes
for req_id in _REQUIRED_PER_NODE_AGENTS:
if req_id not in present_agent_ids:
required_missing_nodes.append({
"node_id": node_id,
"agent_id": req_id,
"reason": "absent_from_registry",
})
asyncio.create_task(_emit_monitor_missing_event(node_id, today_bucket))
all_agents.sort(key=lambda a: (a.get("status") != "healthy", a.get("display_name", "").lower()))
nodes_ok = sum(1 for s in node_stats if s["ok"])
return JSONResponse(content={
"items": all_agents,
"node_errors": node_errors,
"stats": {"nodes_ok": nodes_ok, "nodes_total": len(node_ids), "agents_total": len(all_agents)},
"required_missing_nodes": required_missing_nodes,
"nodes_queried": node_ids,
})
@app.get("/api/agents/{node_id}/{agent_id}")
async def get_agent(node_id: str, agent_id: str, _auth: str = Depends(require_auth)):
node_id = node_id.upper()
ni = _node_info(node_id)
policy = ni["policy"]
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
node_id, ni["gateway_url"],
timeout_ms=policy["gateway_timeout_ms"],
get_retry=policy["get_retry"],
)
override = await _app_db.get_agent_override(node_id, agent_id)
agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None)
if not agent:
if override:
desired = _agent_desired_payload(override)
return JSONResponse(content={"agent": {
**override, "status": "unknown", "node_offline": True,
"desired_hash": _app_db._agent_payload_hash(desired), "drift": False,
"latency_ms": latency_ms,
}})
raise HTTPException(status_code=404, detail=f"Agent '{agent_id}' not found on {node_id}")
merged = _merge_agent_with_override(agent, override)
merged["latency_ms"] = latency_ms
return JSONResponse(content={"agent": merged, "node_error": err})
@app.get("/api/agents/{node_id}/{agent_id}/versions")
async def list_agent_versions(
node_id: str, agent_id: str,
limit: int = 10,
_auth: str = Depends(require_auth),
):
"""Return version history for an agent override."""
node_id = node_id.upper()
versions = await _app_db.list_agent_versions(node_id, agent_id, limit=limit)
return JSONResponse(content={"versions": versions})
class AgentOverridePatch(BaseModel):
display_name: Optional[str] = None
domain: Optional[str] = None
system_prompt_md: Optional[str] = None
is_hidden: Optional[bool] = None
@app.patch("/api/agents/{node_id}/{agent_id}")
async def patch_agent_override(
node_id: str, agent_id: str,
body: AgentOverridePatch,
_auth: str = Depends(require_auth),
):
"""Save local override (does NOT push to node). Creates a version snapshot."""
node_id = node_id.upper()
override = await _app_db.upsert_agent_override(
node_id, agent_id,
display_name=body.display_name,
domain=body.domain,
system_prompt_md=body.system_prompt_md,
is_hidden=body.is_hidden,
)
# Audit: agent_override_saved
await _app_db.append_governance_event(
scope="project", project_id=agent_id, actor_type="user",
event_type="agent_override_saved",
idempotency_key=f"aos|{node_id}|{agent_id}|{override.get('version_hash','')}",
severity="info", status="ok",
ref_type="agent", ref_id=agent_id,
evidence=_app_db._make_evidence(
message=f"Override saved for {agent_id} on {node_id}",
outputs={"version_hash": override.get("version_hash"), "fields_changed": [
k for k, v in body.dict(exclude_none=True).items()
]},
),
)
return JSONResponse(content={"override": override, "saved": True})
@app.post("/api/agents/{node_id}/{agent_id}/reset")
async def reset_agent_override(node_id: str, agent_id: str, _auth: str = Depends(require_auth)):
"""Remove local override, revert to registry state."""
node_id = node_id.upper()
await _app_db.delete_agent_override(node_id, agent_id)
return JSONResponse(content={"reset": True, "node_id": node_id, "agent_id": agent_id})
# ── Safe Apply v2 ──────────────────────────────────────────────────────────────
@app.post("/api/agents/{node_id}/{agent_id}/apply")
async def apply_agent_override(
node_id: str, agent_id: str,
dry_run: bool = True,
plan_id: Optional[str] = None,
force: bool = False,
_auth: str = Depends(require_auth),
):
"""Safe Apply v2.
dry_run=true → returns diff_text + will_change + plan_id (sha256 of desired state).
dry_run=false → requires plan_id to match; applies and stores last_applied_hash.
"""
node_id = node_id.upper()
# Governance gate check: PROMPT_FREEZE
if not dry_run and not force:
frozen = await _check_prompt_freeze(node_id, agent_id)
if frozen:
return JSONResponse(
status_code=423,
content={"error": "PROMPT_FREEZE gate is active. Use force=true to override (requires review).",
"gate": "PROMPT_FREEZE", "node_id": node_id, "agent_id": agent_id},
)
override = await _app_db.get_agent_override(node_id, agent_id)
if not override:
raise HTTPException(status_code=404, detail="No local override found. Use PATCH first.")
desired = _agent_desired_payload(override)
computed_plan_id = _app_db._agent_payload_hash(desired)
# Fetch current active prompt for diff
gw_url = get_gateway_url(node_id)
agents_raw, _ = await _fetch_agents_from_gateway(node_id, gw_url)
active_agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None)
active_prompt = active_agent.get("active_prompt", "") if active_agent else ""
desired_prompt = desired.get("system_prompt_md") or ""
# Build unified diff
diff_lines = list(_difflib.unified_diff(
(active_prompt or "").splitlines(keepends=True),
desired_prompt.splitlines(keepends=True),
fromfile=f"{agent_id}:active",
tofile=f"{agent_id}:desired",
n=3,
))
diff_text = "".join(diff_lines) if diff_lines else ""
will_change = bool(diff_text) or (override.get("domain") is not None)
if dry_run:
# Audit: agent_apply_planned
await _app_db.append_governance_event(
scope="project", project_id=agent_id, actor_type="user",
event_type="agent_apply_planned",
idempotency_key=f"aap|{node_id}|{agent_id}|{computed_plan_id}",
severity="info", status="ok",
ref_type="agent", ref_id=agent_id,
evidence=_app_db._make_evidence(
message=f"Apply planned (dry-run) for {agent_id}@{node_id}",
outputs={"will_change": will_change, "plan_id": computed_plan_id,
"diff_lines": len(diff_lines)},
),
)
return JSONResponse(content={
"dry_run": True, "will_change": will_change,
"plan_id": computed_plan_id,
"diff_text": diff_text,
"desired": desired,
"node_id": node_id, "agent_id": agent_id,
})
# Apply: validate plan_id
if plan_id and plan_id != computed_plan_id:
raise HTTPException(
status_code=409,
detail=f"plan_id mismatch: provided={plan_id} computed={computed_plan_id}. "
"Re-run dry_run=true to get fresh plan_id.",
)
applied: List[Dict] = []
errors_apply: List[Dict] = []
if desired_prompt and gw_url:
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(
f"{gw_url.rstrip('/')}/admin/agents/{agent_id}/prompt",
json={"prompt": desired_prompt},
headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")},
)
if resp.status_code in (200, 201, 204):
applied.append({"action": "update_system_prompt", "status": "ok"})
else:
errors_apply.append({"action": "update_system_prompt",
"error": f"HTTP {resp.status_code}: {resp.text[:200]}"})
except Exception as e:
errors_apply.append({"action": "update_system_prompt", "error": str(e)[:300]})
success = len(applied) > 0 and len(errors_apply) == 0
# Mark last_applied_hash if successful
if success:
await _app_db.upsert_agent_override(
node_id, agent_id, _mark_applied_hash=computed_plan_id,
)
# Audit
evt_type = "agent_apply_executed" if success else "agent_apply_failed"
await _app_db.append_governance_event(
scope="project", project_id=agent_id, actor_type="user",
event_type=evt_type,
idempotency_key=f"aae|{node_id}|{agent_id}|{computed_plan_id}|{'ok' if success else 'fail'}",
severity="info" if success else "high", status="ok" if success else "error",
ref_type="agent", ref_id=agent_id,
evidence=_app_db._make_evidence(
message=f"Apply {'succeeded' if success else 'failed'} for {agent_id}@{node_id}",
outputs={"plan_id": computed_plan_id, "applied": applied, "errors": errors_apply},
),
)
return JSONResponse(content={
"dry_run": False, "success": success,
"plan_id": computed_plan_id,
"applied": applied, "errors": errors_apply,
"node_id": node_id, "agent_id": agent_id,
})
@app.post("/api/agents/{node_id}/{agent_id}/rollback")
async def rollback_agent_override(
node_id: str, agent_id: str,
version_hash: str,
_auth: str = Depends(require_auth),
):
"""Rollback agent override to a specific version by version_hash."""
node_id = node_id.upper()
version = await _app_db.get_agent_version_by_hash(node_id, agent_id, version_hash)
if not version:
raise HTTPException(status_code=404, detail=f"Version {version_hash} not found")
payload = version["payload"]
# Restore the override to this version's payload
updated = await _app_db.upsert_agent_override(
node_id, agent_id,
display_name=payload.get("display_name"),
domain=payload.get("domain"),
system_prompt_md=payload.get("system_prompt_md"),
)
# Audit
await _app_db.append_governance_event(
scope="project", project_id=agent_id, actor_type="user",
event_type="agent_rollback_executed",
idempotency_key=f"arb|{node_id}|{agent_id}|{version_hash}|{_app_db._now()}",
severity="warn", status="ok",
ref_type="agent", ref_id=agent_id,
evidence=_app_db._make_evidence(
message=f"Rollback to version {version_hash} for {agent_id}@{node_id}",
outputs={"version_hash": version_hash, "created_at": version.get("created_at")},
),
)
return JSONResponse(content={
"rolled_back": True, "version_hash": version_hash,
"override": updated, "node_id": node_id, "agent_id": agent_id,
})
# ── Bulk Agent Actions (multi-node + canary) ───────────────────────────────────
async def _apply_single_agent(
node_id: str,
override: Dict,
agents_map: Dict,
gw_url: str,
apply_timeout_sec: float,
) -> Dict:
"""Apply a single agent override. Returns result dict with status field."""
aid = override["agent_id"]
desired = _agent_desired_payload(override)
plan_id = _app_db._agent_payload_hash(desired)
active_agent = agents_map.get(aid, {})
active_prompt = active_agent.get("active_prompt", "") or ""
desired_prompt = desired.get("system_prompt_md") or ""
will_change = desired_prompt != active_prompt
if not desired_prompt or not gw_url:
return {"node_id": node_id, "agent_id": aid, "status": "skipped",
"plan_id": plan_id, "drift": will_change,
"error": "no prompt or no gateway_url"}
applied_ok = False
err_msg = None
try:
async with httpx.AsyncClient(timeout=apply_timeout_sec) as client:
resp = await client.post(
f"{gw_url.rstrip('/')}/admin/agents/{aid}/prompt",
json={"prompt": desired_prompt},
headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")},
)
applied_ok = resp.status_code in (200, 201, 204)
if not applied_ok:
err_msg = f"HTTP {resp.status_code}: {resp.text[:100]}"
except Exception as e:
err_msg = str(e)[:200]
if applied_ok:
await _app_db.upsert_agent_override(node_id, aid, _mark_applied_hash=plan_id)
return {
"node_id": node_id, "agent_id": aid,
"status": "applied" if applied_ok else "failed",
"plan_id": plan_id, "drift": will_change,
"error": err_msg,
}
@app.post("/api/agents/bulk/apply")
async def bulk_apply_agents(
nodes: str = "NODA1",
node: Optional[str] = None, # legacy single-node param
dry_run: bool = True,
mode: str = "all", # "all" | "canary"
limit: int = 2, # canary: max N agents
_auth: str = Depends(require_auth),
):
"""Apply local overrides across one or many nodes.
mode=canary: apply first `limit` agents with drift=True, stop on first failure.
Returns {results, node_errors, summary}.
"""
# Support legacy ?node= param
raw_nodes = node.upper() if node else nodes
node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
all_results: List[Dict] = []
node_errors: List[Dict] = []
bulk_run_id = str(uuid.uuid4())[:8]
# Audit: bulk plan created
await _app_db.append_governance_event(
scope="portfolio", project_id="portfolio", actor_type="user",
event_type="agent_bulk_plan_created",
idempotency_key=f"abpc|{bulk_run_id}|{raw_nodes}|{mode}",
severity="info", status="ok",
evidence=_app_db._make_evidence(
message=f"Bulk {'canary' if mode=='canary' else 'apply'} planned: nodes={raw_nodes} dry_run={dry_run}",
outputs={"mode": mode, "limit": limit, "nodes": node_ids, "dry_run": dry_run},
),
)
for node_id in node_ids:
ni = _node_info(node_id)
policy = ni["policy"]
gw_url = ni["gateway_url"]
apply_timeout_sec = policy["apply_timeout_ms"] / 1000.0
overrides = await _app_db.list_agent_overrides(node_id)
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
node_id, gw_url,
timeout_ms=policy["gateway_timeout_ms"],
get_retry=policy["get_retry"],
)
if err and not agents_raw:
node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
continue
agents_map = {a["agent_id"]: a for a in agents_raw}
# Select candidates: non-hidden, sorted deterministically by agent_id
candidates = sorted(
[o for o in overrides if not o.get("is_hidden")],
key=lambda o: o["agent_id"],
)
if mode == "canary":
# For canary: only agents with drift
drift_candidates = []
for o in candidates:
desired = _agent_desired_payload(o)
plan_id = _app_db._agent_payload_hash(desired)
is_drift = bool(o.get("last_applied_hash") and o["last_applied_hash"] != plan_id)
if is_drift:
drift_candidates.append(o)
candidates = drift_candidates[:limit]
if dry_run:
for override in candidates:
aid = override["agent_id"]
desired = _agent_desired_payload(override)
plan_id = _app_db._agent_payload_hash(desired)
active_agent = agents_map.get(aid, {})
active_prompt = active_agent.get("active_prompt", "") or ""
desired_prompt = desired.get("system_prompt_md") or ""
all_results.append({
"node_id": node_id, "agent_id": aid, "status": "planned",
"plan_id": plan_id, "drift": desired_prompt != active_prompt, "error": None,
})
continue
# Canary: log start
if mode == "canary" and candidates:
await _app_db.append_governance_event(
scope="portfolio", project_id="portfolio", actor_type="user",
event_type="agent_bulk_canary_started",
idempotency_key=f"abcs|{bulk_run_id}|{node_id}",
severity="info", status="ok",
evidence=_app_db._make_evidence(
message=f"Canary apply started: {len(candidates)} agents on {node_id}",
outputs={"agents": [o["agent_id"] for o in candidates], "limit": limit},
),
)
canary_stopped = False
for override in candidates:
# Check governance gate per agent
frozen = await _check_prompt_freeze(node_id, override["agent_id"])
if frozen:
all_results.append({
"node_id": node_id, "agent_id": override["agent_id"],
"status": "blocked", "plan_id": None, "drift": True,
"error": "PROMPT_FREEZE gate active",
})
continue
result = await _apply_single_agent(
node_id, override, agents_map, gw_url, apply_timeout_sec,
)
all_results.append(result)
# Canary stop-on-failure
if mode == "canary" and result["status"] == "failed":
canary_stopped = True
# Mark remaining as skipped
remaining_ids = {o["agent_id"] for o in candidates} - {r["agent_id"] for r in all_results if r["node_id"] == node_id}
for rid in sorted(remaining_ids):
all_results.append({
"node_id": node_id, "agent_id": rid, "status": "skipped",
"plan_id": None, "drift": True,
"error": f"canary stopped after failure of {result['agent_id']}",
})
await _app_db.append_governance_event(
scope="portfolio", project_id="portfolio", actor_type="user",
event_type="agent_bulk_canary_stopped",
idempotency_key=f"abcstop|{bulk_run_id}|{node_id}|{result['agent_id']}",
severity="high", status="error",
evidence=_app_db._make_evidence(
message=f"Canary stopped on {result['agent_id']}@{node_id}: {result['error']}",
outputs={"failed_agent": result["agent_id"], "error": result["error"]},
),
)
break
if mode == "canary" and not canary_stopped and candidates:
await _app_db.append_governance_event(
scope="portfolio", project_id="portfolio", actor_type="user",
event_type="agent_bulk_apply_completed",
idempotency_key=f"abac|{bulk_run_id}|{node_id}",
severity="info", status="ok",
evidence=_app_db._make_evidence(
message=f"Canary apply completed on {node_id}: {len(candidates)} agents",
outputs={"agents_applied": [r["agent_id"] for r in all_results
if r["node_id"] == node_id and r["status"] == "applied"]},
),
)
# Build summary
status_counts: Dict[str, int] = {}
for r in all_results:
status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1
return JSONResponse(content={
"results": all_results,
"node_errors": node_errors,
"summary": status_counts,
"dry_run": dry_run,
"mode": mode,
"bulk_run_id": bulk_run_id,
})
@app.post("/api/agents/bulk/diff")
async def bulk_diff_agents(
nodes: str = "NODA1",
node: Optional[str] = None,
_auth: str = Depends(require_auth),
):
"""Return diff summary for all agents with local overrides. Supports multi-node."""
raw_nodes = node.upper() if node else nodes
node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
report: List[Dict] = []
node_errors: List[Dict] = []
for node_id in node_ids:
ni = _node_info(node_id)
policy = ni["policy"]
gw_url = ni["gateway_url"]
overrides = await _app_db.list_agent_overrides(node_id)
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
node_id, gw_url,
timeout_ms=policy["gateway_timeout_ms"],
get_retry=policy["get_retry"],
)
if err:
node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
agents_map = {a["agent_id"]: a for a in agents_raw}
for override in overrides:
aid = override["agent_id"]
desired = _agent_desired_payload(override)
plan_id = _app_db._agent_payload_hash(desired)
active_agent = agents_map.get(aid, {})
active_prompt = active_agent.get("active_prompt") or ""
desired_prompt = desired.get("system_prompt_md") or ""
diff_lines = list(_difflib.unified_diff(
active_prompt.splitlines(keepends=True),
desired_prompt.splitlines(keepends=True),
fromfile=f"{aid}:active", tofile=f"{aid}:desired", n=2,
))
is_drift = bool(override.get("last_applied_hash") and
override["last_applied_hash"] != plan_id)
report.append({
"node_id": node_id, "agent_id": aid,
"plan_id": plan_id,
"last_applied_hash": override.get("last_applied_hash"),
"drift": is_drift,
"diff_lines": len(diff_lines),
"diff_text": "".join(diff_lines[:60]),
})
return JSONResponse(content={"report": report, "node_errors": node_errors,
"nodes_queried": node_ids})
@app.get("/api/agents/export/prompts")
async def export_agent_prompts(
nodes: str = "NODA1",
node: Optional[str] = None,
_auth: str = Depends(require_auth),
):
"""Export all agent system prompts as a JSON bundle (multi-node)."""
raw_nodes = node.upper() if node else nodes
node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
bundle: List[Dict] = []
node_errors: List[Dict] = []
for node_id in node_ids:
ni = _node_info(node_id)
policy = ni["policy"]
gw_url = ni["gateway_url"]
overrides = await _app_db.list_agent_overrides(node_id)
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
node_id, gw_url,
timeout_ms=policy["gateway_timeout_ms"],
get_retry=policy["get_retry"],
)
if err:
node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
agents_map = {a["agent_id"]: a for a in agents_raw}
overrides_map = {o["agent_id"]: o for o in overrides}
for aid, agent in agents_map.items():
override = overrides_map.get(aid)
merged = _merge_agent_with_override(agent, override)
bundle.append({
"agent_id": aid, "node_id": node_id,
"display_name": merged.get("display_name", aid),
"domain": merged.get("domain"),
"system_prompt_md": merged.get("system_prompt_md"),
"has_override": merged.get("has_override", False),
})
bundle.sort(key=lambda x: (x["node_id"], x["agent_id"]))
return JSONResponse(content={
"nodes_queried": node_ids,
"exported_at": _app_db._now(),
"count": len(bundle),
"agents": bundle,
"node_errors": node_errors,
})
# ── Kling AI proxy ────────────────────────────────────────────────────────────
@app.get("/api/aurora/kling/health")
async def console_kling_health() -> Dict[str, Any]:
try:
return await _aurora_request_json("GET", "/api/aurora/kling/health", timeout=12.0, retries=1)
except Exception as exc:
return {"ok": False, "error": str(exc)}
@app.post("/api/aurora/kling/enhance")
async def console_kling_enhance_plain(
job_id: str = Form(...),
prompt: str = Form("enhance video quality, improve sharpness and clarity"),
negative_prompt: str = Form("noise, blur, artifacts, distortion"),
mode: str = Form("pro"),
duration: str = Form("5"),
cfg_scale: float = Form(0.5),
) -> Dict[str, Any]:
return await _aurora_request_json(
"POST",
"/api/aurora/kling/enhance",
data={
"job_id": job_id,
"prompt": prompt,
"negative_prompt": negative_prompt,
"mode": mode,
"duration": duration,
"cfg_scale": str(cfg_scale),
},
timeout=120.0,
retries=1,
)
@app.post("/api/aurora/kling/enhance/{job_id}")
async def console_kling_enhance(
job_id: str,
prompt: str = Form("enhance video quality, improve sharpness and clarity"),
negative_prompt: str = Form("noise, blur, artifacts, distortion"),
mode: str = Form("pro"),
duration: str = Form("5"),
cfg_scale: float = Form(0.5),
) -> Dict[str, Any]:
return await console_kling_enhance_plain(
job_id=job_id,
prompt=prompt,
negative_prompt=negative_prompt,
mode=mode,
duration=duration,
cfg_scale=cfg_scale,
)
@app.get("/api/aurora/kling/status/{job_id}")
async def console_kling_status(job_id: str) -> Dict[str, Any]:
return await _aurora_request_json("GET", f"/api/aurora/kling/status/{job_id}", timeout=20.0, retries=2)
@app.get("/api/aurora/kling/task/{task_id}")
async def console_kling_task(task_id: str, endpoint: str = Query("video2video")) -> Dict[str, Any]:
return await _aurora_request_json("GET", f"/api/aurora/kling/task/{task_id}?endpoint={endpoint}", timeout=20.0, retries=2)
@app.get("/api/aurora/plates/{job_id}")
async def console_plates(job_id: str) -> Dict[str, Any]:
return await _aurora_request_json("GET", f"/api/aurora/plates/{job_id}", timeout=15.0, retries=2)
# ── Sofiia Auto-Router & Budget Dashboard proxy ────────────────────────────────
async def _router_request_json(method: str, path: str, json_body: Optional[Dict] = None, timeout: float = 20.0) -> Dict[str, Any]:
"""Forward request to the Router service (noda1 or local)."""
import aiohttp as _aiohttp
# Use the first configured node's router URL
nodes_reg = load_nodes_registry()
nodes = (nodes_reg.get("nodes") or {}) if isinstance(nodes_reg, dict) else {}
node_id = next(iter(nodes), "noda1")
router_url = get_router_url(node_id)
url = f"{router_url.rstrip('/')}{path}"
try:
async with _aiohttp.ClientSession() as sess:
if method.upper() == "GET":
async with sess.get(url, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp:
return await resp.json(content_type=None)
else:
async with sess.post(url, json=json_body, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp:
return await resp.json(content_type=None)
except Exception as e:
return {"error": str(e)}
@app.post("/api/sofiia/auto-route")
async def console_auto_route(body: Dict[str, Any]) -> Dict[str, Any]:
"""Proxy: classify prompt and get recommended model."""
return await _router_request_json("POST", "/v1/sofiia/auto-route", json_body=body)
@app.get("/api/sofiia/budget")
async def console_budget_dashboard() -> Dict[str, Any]:
"""Proxy: get budget dashboard data from router."""
return await _router_request_json("GET", "/v1/sofiia/budget")
@app.post("/api/sofiia/budget/limits")
async def console_set_budget_limits(body: Dict[str, Any]) -> Dict[str, Any]:
"""Proxy: set provider budget limit."""
return await _router_request_json("POST", "/v1/sofiia/budget/limits", json_body=body)
@app.get("/api/sofiia/budget/stats")
async def console_budget_stats(window_hours: int = 24) -> Dict[str, Any]:
"""Proxy: get budget stats for time window."""
return await _router_request_json("GET", f"/v1/sofiia/budget/stats?window_hours={window_hours}")
@app.get("/api/sofiia/catalog")
async def console_model_catalog(refresh_ollama: bool = False) -> Dict[str, Any]:
"""Proxy: get full model catalog with availability."""
return await _router_request_json("GET", f"/v1/sofiia/catalog?refresh_ollama={str(refresh_ollama).lower()}")