7501 lines
286 KiB
Python
7501 lines
286 KiB
Python
"""
|
||
Sofiia Control Console — FastAPI BFF v0.3.0
|
||
Runtime contract (project/session/user), full status, WebSocket events,
|
||
voice proxy, ops, nodes. UI never calls external services directly.
|
||
"""
|
||
import asyncio
|
||
import base64
|
||
import io
|
||
import json
|
||
import os
|
||
import re
|
||
import sys
|
||
import subprocess
|
||
import mimetypes
|
||
import time
|
||
import uuid
|
||
import logging
|
||
import collections
|
||
import statistics
|
||
import socket
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||
from urllib.parse import quote
|
||
|
||
import httpx
|
||
from fastapi import Body, FastAPI, Depends, HTTPException, UploadFile, File, Form, Query, Request, Response, WebSocket, WebSocketDisconnect
|
||
from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
from pydantic import BaseModel
|
||
|
||
try:
|
||
import cv2 # type: ignore[import-untyped]
|
||
except Exception: # pragma: no cover - optional dependency in console env
|
||
cv2 = None
|
||
|
||
from .auth import (
|
||
require_api_key, require_api_key_strict, require_auth, require_auth_strict,
|
||
get_console_api_key, _key_valid, _cookie_token, _expected_cookie_token,
|
||
_COOKIE_NAME, _COOKIE_MAX_AGE, _IS_PROD,
|
||
)
|
||
|
||
from .config import (
|
||
load_nodes_registry,
|
||
save_nodes_registry,
|
||
get_router_url,
|
||
get_gateway_url,
|
||
get_node_ssh_profile,
|
||
get_memory_service_url,
|
||
get_ollama_url,
|
||
is_voice_ha_enabled,
|
||
get_voice_ha_router_url,
|
||
)
|
||
from .router_client import infer, execute_tool, health
|
||
from .nodes import get_nodes_dashboard
|
||
from .monitor import collect_all_nodes
|
||
from .ops import run_ops_action, OPS_ACTIONS
|
||
from .docs_router import docs_router
|
||
from . import db as _app_db
|
||
from .metrics import (
|
||
SOFIIA_SEND_REQUESTS_TOTAL,
|
||
SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL,
|
||
SOFIIA_CURSOR_REQUESTS_TOTAL,
|
||
SOFIIA_RATE_LIMITED_TOTAL,
|
||
render_metrics,
|
||
)
|
||
from .idempotency import get_idempotency_store, ReplayEntry
|
||
from .rate_limit import get_rate_limiter
|
||
from .logging import (
|
||
configure_sofiia_logger,
|
||
get_request_id,
|
||
hash_idempotency_key,
|
||
log_event,
|
||
)
|
||
|
||
logger = logging.getLogger(__name__)
|
||
configure_sofiia_logger()
|
||
|
||
# ── Build info ────────────────────────────────────────────────────────────────
|
||
_VERSION = "0.4.0"
|
||
_BUILD_SHA = os.getenv("BUILD_SHA", "dev")
|
||
_BUILD_TIME = os.getenv("BUILD_TIME", "local")
|
||
_BUILD_ID = os.getenv("BUILD_ID", os.getenv("GIT_SHA", "local"))
|
||
_START_TIME = time.monotonic()
|
||
_NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))
|
||
|
||
# ── Rate limiter ──────────────────────────────────────────────────────────────
|
||
_rate_buckets: Dict[str, collections.deque] = {}
|
||
|
||
_idempotency_store = get_idempotency_store()
|
||
_rate_limiter = get_rate_limiter()
|
||
_RL_CHAT_RPS = float(os.getenv("SOFIIA_RL_CHAT_RPS", "1.0"))
|
||
_RL_CHAT_BURST = int(os.getenv("SOFIIA_RL_CHAT_BURST", "8"))
|
||
_RL_OP_RPS = float(os.getenv("SOFIIA_RL_OP_RPS", "3.0"))
|
||
_RL_OP_BURST = int(os.getenv("SOFIIA_RL_OP_BURST", "20"))
|
||
|
||
def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
|
||
now = time.monotonic()
|
||
dq = _rate_buckets.setdefault(key, collections.deque())
|
||
while dq and now - dq[0] > window_sec:
|
||
dq.popleft()
|
||
if len(dq) >= max_calls:
|
||
return False
|
||
dq.append(now)
|
||
return True
|
||
|
||
|
||
def _resolve_operator_id(request: Request, body: "ChatMessageSendBody", request_id: str) -> Tuple[str, bool]:
|
||
client_meta = body.client or {}
|
||
operator_id = (
|
||
str(client_meta.get("operator_id") or "").strip()
|
||
or str(body.user_id or "").strip()
|
||
or str(request.headers.get("X-Operator-Id") or "").strip()
|
||
)
|
||
if operator_id:
|
||
return operator_id[:128], False
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
fallback = f"ip:{client_ip}" if client_ip else f"req:{request_id}"
|
||
return fallback[:128], True
|
||
|
||
|
||
def _rate_limited_http(scope: str, retry_after_s: int) -> HTTPException:
|
||
retry_s = max(1, int(retry_after_s or 1))
|
||
return HTTPException(
|
||
status_code=429,
|
||
detail={
|
||
"error": {"code": "rate_limited", "scope": scope},
|
||
"retry_after_s": retry_s,
|
||
},
|
||
headers={"Retry-After": str(retry_s)},
|
||
)
|
||
|
||
|
||
# ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
|
||
# Circular buffers: last 5 TTS errors and last 5 LLM errors.
|
||
# Populated by all voice endpoints. Read by /api/voice/degradation_status.
|
||
_RING_SIZE = 5
|
||
_voice_tts_errors: collections.deque = collections.deque(maxlen=_RING_SIZE)
|
||
_voice_llm_errors: collections.deque = collections.deque(maxlen=_RING_SIZE)
|
||
_voice_last_model: str = "unknown" # last model selected for voice
|
||
_voice_last_profile: str = "unknown" # last voice_profile used
|
||
|
||
def _record_tts_error(error_type: str, status_code: Optional[int],
|
||
detail: str, voice: str = "") -> None:
|
||
_voice_tts_errors.append({
|
||
"ts": time.strftime("%H:%M:%SZ", time.gmtime()),
|
||
"type": error_type,
|
||
"status": status_code,
|
||
"voice": voice,
|
||
"detail": detail[:120],
|
||
})
|
||
|
||
def _record_llm_error(error_type: str, model: str, detail: str) -> None:
|
||
_voice_llm_errors.append({
|
||
"ts": time.strftime("%H:%M:%SZ", time.gmtime()),
|
||
"type": error_type,
|
||
"model": model,
|
||
"detail": detail[:120],
|
||
})
|
||
|
||
# ── Concurrent voice synthesizer guard ───────────────────────────────────────
|
||
# Limits simultaneous TTS synthesis calls to prevent memory-service DoS.
|
||
_MAX_CONCURRENT_TTS = int(os.getenv("MAX_CONCURRENT_TTS", "4"))
|
||
_tts_semaphore: Optional[asyncio.Semaphore] = None # initialised in startup
|
||
|
||
def _get_tts_semaphore() -> asyncio.Semaphore:
|
||
global _tts_semaphore
|
||
if _tts_semaphore is None:
|
||
_tts_semaphore = asyncio.Semaphore(_MAX_CONCURRENT_TTS)
|
||
return _tts_semaphore
|
||
|
||
# ── Telemetry dedup store ─────────────────────────────────────────────────────
|
||
# Prevents processing duplicate beacon submissions (same session+turn within 30s).
|
||
_telem_seen: collections.OrderedDict = collections.OrderedDict()
|
||
_TELEM_DEDUP_TTL = 30.0 # seconds
|
||
_TELEM_DEDUP_MAX = 500 # max keys before LRU eviction
|
||
|
||
def _telem_is_duplicate(session_id: str, turn_id: str) -> bool:
|
||
key = f"{session_id}:{turn_id}"
|
||
now = time.monotonic()
|
||
# Evict expired keys
|
||
while _telem_seen and next(iter(_telem_seen.values())) + _TELEM_DEDUP_TTL < now:
|
||
_telem_seen.popitem(last=False)
|
||
if len(_telem_seen) >= _TELEM_DEDUP_MAX:
|
||
_telem_seen.popitem(last=False)
|
||
if key in _telem_seen:
|
||
return True
|
||
_telem_seen[key] = now
|
||
return False
|
||
|
||
|
||
def _env_int(name: str, default: int) -> int:
|
||
raw = (os.getenv(name, str(default)) or "").strip()
|
||
try:
|
||
return int(raw)
|
||
except Exception:
|
||
return default
|
||
|
||
|
||
def _env_float(name: str, default: float) -> float:
|
||
raw = (os.getenv(name, str(default)) or "").strip()
|
||
try:
|
||
return float(raw)
|
||
except Exception:
|
||
return default
|
||
|
||
# ── App config ────────────────────────────────────────────────────────────────
|
||
ROUTER_API_KEY = os.getenv("SUPERVISOR_API_KEY", "").strip()
|
||
IS_PROD = os.getenv("ENV", "dev").strip().lower() in ("prod", "production", "staging")
|
||
SOFIIA_PREFERRED_CHAT_MODEL = os.getenv("SOFIIA_PREFERRED_CHAT_MODEL", "ollama:qwen3:14b").strip() or "ollama:qwen3:14b"
|
||
|
||
# Local Ollama runtime tuning for NODA2 (can be overridden via env).
|
||
SOFIIA_OLLAMA_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_TIMEOUT_SEC", 120.0)
|
||
SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC = _env_float("SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC", 45.0)
|
||
SOFIIA_OLLAMA_KEEP_ALIVE = (os.getenv("SOFIIA_OLLAMA_KEEP_ALIVE", "30m") or "").strip()
|
||
SOFIIA_OLLAMA_NUM_CTX = _env_int("SOFIIA_OLLAMA_NUM_CTX", 8192)
|
||
_DEFAULT_OLLAMA_THREADS = max(4, min(16, (os.cpu_count() or 8) - 2))
|
||
SOFIIA_OLLAMA_NUM_THREAD = _env_int("SOFIIA_OLLAMA_NUM_THREAD", _DEFAULT_OLLAMA_THREADS)
|
||
SOFIIA_OLLAMA_NUM_GPU = _env_int("SOFIIA_OLLAMA_NUM_GPU", -1)
|
||
SOFIIA_OLLAMA_NUM_PREDICT_TEXT = _env_int("SOFIIA_OLLAMA_NUM_PREDICT_TEXT", 768)
|
||
|
||
# Voice guardrails — injected INSTEAD OF the full prompt for voice turns.
|
||
# Constraints are hard: no lists, no markdown, no <think>, max 2 sentences.
|
||
SOFIIA_VOICE_PROMPT_SUFFIX = """
|
||
|
||
## VOICE MODE — HARD RULES (не порушувати ніколи)
|
||
- Відповідай МАКСИМУМ 2 речення (виняток: якщо прямо попросили деталей).
|
||
- Жодних списків, жодних bullet-points, жодного markdown (*bold*, -list, ##header).
|
||
- Жодного коду (`` ` ``), жодних URL.
|
||
- Жодного <think>...</think> — думки всередині, назовні лише відповідь.
|
||
- Мова: розмовна, природна для голосу. Без "Як AI...".
|
||
- Якщо питання складне — дай коротку відповідь і запропонуй продовжити текстом.
|
||
"""
|
||
|
||
SOFIIA_SYSTEM_PROMPT = """Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city.
|
||
|
||
## Твоя ідентичність
|
||
- Ти: Sofiia, головний AI-архітектор і технічний суверен DAARION.city
|
||
- Ти підпорядковуєшся одній людині — засновнику та головному архітектору платформи
|
||
|
||
## Засновник та архітектор DAARION
|
||
- Позивний: **Повелитель Хаосу** (використовуй у неформальних/робочих контекстах)
|
||
- Офіційне ім'я: **Іван Титар** (використовуй в офіційних повідомленнях, документах, репортах)
|
||
- Роль: Головний розробник та архітектор DAARION — єдиний, хто має повний контроль над платформою
|
||
- Ніякої іншої людини з ім'ям "Савтра" або будь-яким іншим іменем у ролі засновника НЕ ІСНУЄ
|
||
|
||
## Ноди та інфраструктура
|
||
- NODA1: production runtime (router, incidents, alerts, governance)
|
||
- NODA2: control plane / development (твій primary home, звідки тебе викликають)
|
||
- NODA3: AI/ML experimentation
|
||
|
||
## Правила відповіді
|
||
- Відповідай **українською мовою** за замовчуванням
|
||
- Технічні терміни (API, SLO, backend, deploy, incident, release gate тощо) залишай **англійською**
|
||
- Відповідай структуровано, конкретно, без зайвих вступів
|
||
- НЕ вигадуй імена людей, назви проектів або факти яких не знаєш — краще скажи що не маєш цих даних
|
||
- НЕ галюцинуй: якщо не знаєш — скажи чесно "не маю цих даних в поточному контексті"
|
||
|
||
## Твої можливості через Control Console (що реально доступно)
|
||
- **Chat**: відповіді на питання через локальний LLM (Ollama на NODA2)
|
||
- **Голосовий чат**: STT + TTS через Memory Service (Polina/Ostap Neural)
|
||
- **Nodes health**: статус NODA1/NODA2 (router, memory, NCS)
|
||
- **Integrations status**: Notion API, Router, Memory Service
|
||
- **Memory/session**: зберігання контексту розмов (Qdrant)
|
||
|
||
## Що наразі НЕ доступно через цей інтерфейс
|
||
- Пряме читання/запис в Notion (тільки статус перевірки)
|
||
- Пряме читання GitHub репозиторіїв (немає repo tool у цьому контейнері)
|
||
- Виконання bash/python команд
|
||
- Деплой або зміна конфігурацій напряму
|
||
|
||
Якщо тебе просять щось що не є в переліку доступного — відповідай чесно:
|
||
"Ця можливість не підключена до Control Console. Для цього використай Cursor або OpenCode на NODA2."
|
||
"""
|
||
|
||
_CORS_ORIGINS = (
|
||
[o.strip() for o in os.getenv("CORS_ORIGINS", "").split(",") if o.strip()]
|
||
or (
|
||
["*"] if not IS_PROD
|
||
else [
|
||
"https://console.daarion.space",
|
||
"https://app.daarion.space",
|
||
"http://localhost:8002",
|
||
"http://localhost:8000",
|
||
"http://127.0.0.1:8002",
|
||
]
|
||
)
|
||
)
|
||
def _is_container_runtime() -> bool:
|
||
return Path("/.dockerenv").exists() or bool(os.getenv("KUBERNETES_SERVICE_HOST"))
|
||
|
||
|
||
_aurora_default_url = "http://aurora-service:9401" if _is_container_runtime() else "http://127.0.0.1:9401"
|
||
AURORA_SERVICE_URL = os.getenv("AURORA_SERVICE_URL", _aurora_default_url).rstrip("/")
|
||
AURORA_FALLBACK_URL = os.getenv("AURORA_FALLBACK_URL", "http://127.0.0.1:9401").rstrip("/")
|
||
_aurora_home_data_dir = Path.home() / ".sofiia" / "aurora-data"
|
||
if _is_container_runtime() and Path("/data").exists() and os.access("/data", os.W_OK):
|
||
_aurora_default_data_dir = "/data/aurora"
|
||
else:
|
||
_aurora_default_data_dir = str(_aurora_home_data_dir)
|
||
AURORA_DATA_DIR = Path(os.getenv("AURORA_DATA_DIR", _aurora_default_data_dir))
|
||
_aurora_live_cache: Dict[str, Dict[str, Any]] = {}
|
||
_aurora_live_samples: Dict[str, collections.deque] = {}
|
||
_aurora_live_last: Dict[str, Dict[str, Any]] = {}
|
||
_aurora_live_last_loaded = False
|
||
_aurora_live_last_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_live_last.json")
|
||
_aurora_smart_runs: Dict[str, Dict[str, Any]] = {}
|
||
_aurora_smart_runs_loaded = False
|
||
_aurora_smart_runs_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_smart_runs.json")
|
||
_aurora_smart_policy: Dict[str, Any] = {
|
||
"updated_at": None,
|
||
"strategies": {
|
||
"local_only": {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0},
|
||
"local_then_kling": {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0},
|
||
},
|
||
}
|
||
_aurora_smart_policy_loaded = False
|
||
_aurora_smart_policy_path = (AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aurora_smart_policy.json")
|
||
_AURORA_SMART_MAX_RUNS = max(20, int(os.getenv("AURORA_SMART_MAX_RUNS", "200")))
|
||
_AURORA_SMART_LOCAL_POLL_SEC = max(2.0, float(os.getenv("AURORA_SMART_LOCAL_POLL_SEC", "3.0")))
|
||
_AURORA_SMART_KLING_POLL_SEC = max(3.0, float(os.getenv("AURORA_SMART_KLING_POLL_SEC", "6.0")))
|
||
_AURORA_SMART_LOCAL_MAX_SEC = max(60.0, float(os.getenv("AURORA_SMART_LOCAL_MAX_SEC", "10800")))
|
||
_AURORA_SMART_KLING_MAX_SEC = max(60.0, float(os.getenv("AURORA_SMART_KLING_MAX_SEC", "3600")))
|
||
MEDIA_COMFY_AGENT_URL = os.getenv(
|
||
"MEDIA_COMFY_AGENT_URL",
|
||
"http://comfy-agent:8880" if _is_container_runtime() else "http://127.0.0.1:8880",
|
||
).rstrip("/")
|
||
MEDIA_COMFY_UI_URL = os.getenv(
|
||
"MEDIA_COMFY_UI_URL",
|
||
"http://comfyui:8188" if _is_container_runtime() else "http://127.0.0.1:8188",
|
||
).rstrip("/")
|
||
MEDIA_SWAPPER_URL = os.getenv(
|
||
"MEDIA_SWAPPER_URL",
|
||
"http://swapper-service:8890" if _is_container_runtime() else "http://127.0.0.1:8890",
|
||
).rstrip("/")
|
||
MEDIA_IMAGE_GEN_URL = os.getenv(
|
||
"MEDIA_IMAGE_GEN_URL",
|
||
"http://image-gen-service:7860" if _is_container_runtime() else "http://127.0.0.1:7860",
|
||
).rstrip("/")
|
||
MEDIA_ROUTER_URL = os.getenv("MEDIA_ROUTER_URL", "").strip().rstrip("/")
|
||
MEDIA_ROUTER_FALLBACK_URL = os.getenv("MEDIA_ROUTER_FALLBACK_URL", "http://127.0.0.1:9102").rstrip("/")
|
||
_media_recent_jobs: collections.deque = collections.deque(maxlen=40)
|
||
|
||
|
||
def _apply_ollama_runtime_options(options: Dict[str, Any]) -> Dict[str, Any]:
|
||
merged = dict(options)
|
||
if SOFIIA_OLLAMA_NUM_CTX > 0:
|
||
merged["num_ctx"] = SOFIIA_OLLAMA_NUM_CTX
|
||
if SOFIIA_OLLAMA_NUM_THREAD > 0:
|
||
merged["num_thread"] = SOFIIA_OLLAMA_NUM_THREAD
|
||
if SOFIIA_OLLAMA_NUM_GPU >= 0:
|
||
merged["num_gpu"] = SOFIIA_OLLAMA_NUM_GPU
|
||
return merged
|
||
|
||
|
||
def _make_ollama_payload(model_name: str, messages: List[Dict[str, Any]], options: Dict[str, Any]) -> Dict[str, Any]:
|
||
payload: Dict[str, Any] = {
|
||
"model": model_name,
|
||
"messages": messages,
|
||
"stream": False,
|
||
"options": _apply_ollama_runtime_options(options),
|
||
}
|
||
if SOFIIA_OLLAMA_KEEP_ALIVE:
|
||
payload["keep_alive"] = SOFIIA_OLLAMA_KEEP_ALIVE
|
||
return payload
|
||
|
||
# Cached nodes telemetry (updated by background task)
|
||
_nodes_cache: Dict[str, Any] = {"nodes": [], "summary": {}, "ts": ""}
|
||
_NODES_POLL_INTERVAL = int(os.getenv("NODES_POLL_INTERVAL_SEC", "30"))
|
||
|
||
|
||
async def _nodes_poll_loop() -> None:
|
||
"""Background task: poll all nodes every N seconds, update cache + WS broadcast."""
|
||
while True:
|
||
try:
|
||
reg = load_nodes_registry()
|
||
nodes_cfg = reg.get("nodes", {})
|
||
timeout = float(reg.get("defaults", {}).get("health_timeout_sec", 10))
|
||
nodes = await collect_all_nodes(nodes_cfg, router_api_key=ROUTER_API_KEY, timeout_per_node=timeout)
|
||
online = sum(1 for n in nodes if n.get("online"))
|
||
router_ok = sum(1 for n in nodes if n.get("router_ok"))
|
||
_nodes_cache.update({
|
||
"nodes": nodes,
|
||
"summary": {"total": len(nodes), "online": online, "router_ok": router_ok},
|
||
"ts": _now_iso(),
|
||
})
|
||
if _ws_clients:
|
||
await _broadcast(_make_event("nodes.status", {
|
||
"nodes": [
|
||
{
|
||
"id": n["node_id"],
|
||
"label": n.get("label", n["node_id"]),
|
||
"online": n.get("online", False),
|
||
"router_ok": n.get("router_ok", False),
|
||
"router_latency_ms": n.get("router_latency_ms"),
|
||
"gateway_ok": n.get("gateway_ok"),
|
||
"heartbeat_age_s": n.get("heartbeat_age_s"),
|
||
"open_incidents": n.get("open_incidents"),
|
||
"monitor_source": n.get("monitor_source"),
|
||
}
|
||
for n in nodes
|
||
],
|
||
"summary": {"total": len(nodes), "online": online, "router_ok": router_ok},
|
||
}))
|
||
except Exception as e:
|
||
logger.debug("nodes poll error: %s", e)
|
||
await asyncio.sleep(_NODES_POLL_INTERVAL)
|
||
|
||
|
||
from contextlib import asynccontextmanager
|
||
|
||
@asynccontextmanager
|
||
async def lifespan(app_: Any):
|
||
# Init SQLite DB for projects/documents/sessions/messages
|
||
try:
|
||
await _app_db.init_db()
|
||
logger.info("✅ sofiia-console DB initialised")
|
||
except Exception as e:
|
||
logger.warning("DB init failed (non-fatal, Projects/Docs disabled): %s", e)
|
||
|
||
task = asyncio.create_task(_nodes_poll_loop())
|
||
logger.info("Nodes poll loop started (interval=%ds)", _NODES_POLL_INTERVAL)
|
||
try:
|
||
_smart_resume_active_monitors()
|
||
except Exception as e:
|
||
logger.warning("aurora smart monitor resume failed: %s", e)
|
||
yield
|
||
task.cancel()
|
||
try:
|
||
await task
|
||
except asyncio.CancelledError:
|
||
pass
|
||
await _app_db.close_db()
|
||
|
||
|
||
app = FastAPI(
|
||
title="Sofiia Control Console",
|
||
description="Operator BFF for Sofiia CTO agent",
|
||
version=_VERSION,
|
||
lifespan=lifespan,
|
||
)
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=_CORS_ORIGINS,
|
||
allow_credentials=True,
|
||
allow_methods=["*"],
|
||
allow_headers=["*"],
|
||
)
|
||
|
||
# Projects + Documents + Sessions + Dialog Map API
|
||
app.include_router(docs_router)
|
||
|
||
# ── WebSocket event bus ───────────────────────────────────────────────────────
|
||
_ws_clients: Set[WebSocket] = set()
|
||
|
||
def _now_iso() -> str:
|
||
return datetime.now(timezone.utc).isoformat(timespec="milliseconds")
|
||
|
||
def _make_event(
|
||
event_type: str,
|
||
data: Dict[str, Any],
|
||
*,
|
||
project_id: str = "",
|
||
session_id: str = "",
|
||
user_id: str = "console_user",
|
||
) -> Dict[str, Any]:
|
||
return {
|
||
"v": 1,
|
||
"type": event_type,
|
||
"ts": _now_iso(),
|
||
"project_id": project_id,
|
||
"session_id": session_id,
|
||
"user_id": user_id,
|
||
"data": data,
|
||
}
|
||
|
||
async def _broadcast(event: Dict[str, Any]) -> None:
|
||
global _ws_clients
|
||
if not _ws_clients:
|
||
return
|
||
dead: Set[WebSocket] = set()
|
||
payload = json.dumps(event, ensure_ascii=False)
|
||
for ws in list(_ws_clients):
|
||
try:
|
||
await ws.send_text(payload)
|
||
except Exception:
|
||
dead.add(ws)
|
||
_ws_clients -= dead
|
||
|
||
def _broadcast_bg(event: Dict[str, Any]) -> None:
|
||
"""Fire-and-forget broadcast from sync context."""
|
||
try:
|
||
loop = asyncio.get_event_loop()
|
||
if loop.is_running():
|
||
loop.create_task(_broadcast(event))
|
||
except Exception:
|
||
pass
|
||
|
||
# ── AISTALK adapter ───────────────────────────────────────────────────────────
|
||
try:
|
||
from .adapters.aistalk import AISTALKAdapter as _AISTALKAdapter
|
||
_aistalk = _AISTALKAdapter(
|
||
base_url=os.getenv("AISTALK_URL", ""),
|
||
api_key=os.getenv("AISTALK_API_KEY", ""),
|
||
) if os.getenv("AISTALK_ENABLED", "false").lower() == "true" else None
|
||
except Exception:
|
||
_aistalk = None
|
||
|
||
|
||
# ─── Health ─────────────────────────────────────────────────────────────────
|
||
|
||
@app.get("/api/health")
|
||
async def api_health():
|
||
base = {
|
||
"ok": True,
|
||
"service": "sofiia-console",
|
||
"version": _VERSION,
|
||
"build": _BUILD_ID,
|
||
"env": os.getenv("ENV", "dev"),
|
||
"uptime_s": int(time.monotonic() - _START_TIME),
|
||
}
|
||
reg = load_nodes_registry()
|
||
nodes_map = reg.get("nodes") or {}
|
||
nodes = list(nodes_map.items())
|
||
if not nodes:
|
||
return {**base, "message": "no nodes configured"}
|
||
first_id, _first_cfg = ("NODA2", nodes_map["NODA2"]) if "NODA2" in nodes_map else nodes[0]
|
||
router_url = get_router_url(first_id)
|
||
if not router_url:
|
||
return {**base, "message": "no router_url"}
|
||
try:
|
||
r = await health(router_url)
|
||
return {**base, "ok": r.get("ok", False), "router": r, "node_id": first_id}
|
||
except Exception as e:
|
||
return {**base, "ok": False, "error": str(e)[:200], "node_id": first_id}
|
||
|
||
|
||
# ─── Status/Full ─────────────────────────────────────────────────────────────
|
||
|
||
async def _probe_router(router_url: str) -> Dict[str, Any]:
|
||
t0 = time.monotonic()
|
||
try:
|
||
async with httpx.AsyncClient(timeout=5.0) as c:
|
||
for path in ("/healthz", "/health"):
|
||
try:
|
||
r = await c.get(f"{router_url.rstrip('/')}{path}")
|
||
if r.status_code == 200:
|
||
latency = int((time.monotonic() - t0) * 1000)
|
||
# probe tool execute availability
|
||
tool_ok = False
|
||
try:
|
||
r2 = await c.get(
|
||
f"{router_url.rstrip('/')}/v1/tools/execute",
|
||
timeout=1.5,
|
||
)
|
||
tool_ok = r2.status_code in (200, 405)
|
||
except Exception:
|
||
pass
|
||
infer_ok = False
|
||
try:
|
||
r3 = await c.get(
|
||
f"{router_url.rstrip('/')}/v1/agents/sofiia/infer",
|
||
timeout=1.5,
|
||
)
|
||
infer_ok = r3.status_code in (200, 405)
|
||
except Exception:
|
||
pass
|
||
return {"url": router_url, "reachable": True,
|
||
"routes": {"tools_execute": tool_ok, "agent_infer": infer_ok},
|
||
"latency_ms": latency}
|
||
except Exception:
|
||
continue
|
||
return {"url": router_url, "reachable": False, "routes": {}, "latency_ms": None}
|
||
except Exception as e:
|
||
return {"url": router_url, "reachable": False, "error": str(e)[:100]}
|
||
|
||
|
||
async def _probe_memory(mem_url: str) -> Dict[str, Any]:
|
||
t0 = time.monotonic()
|
||
try:
|
||
async with httpx.AsyncClient(timeout=5.0) as c:
|
||
r = await c.get(f"{mem_url.rstrip('/')}/health")
|
||
r.raise_for_status()
|
||
d = r.json()
|
||
vs = d.get("vector_store", {})
|
||
vectors = sum(
|
||
(v.get("points_count", 0) or 0)
|
||
for v in vs.values()
|
||
if isinstance(v, dict)
|
||
)
|
||
return {
|
||
"url": mem_url,
|
||
"reachable": True,
|
||
"stats": {"vectors": vectors, "collections": len(vs)},
|
||
"latency_ms": int((time.monotonic() - t0) * 1000),
|
||
}
|
||
except Exception as e:
|
||
return {"url": mem_url, "reachable": False, "error": str(e)[:100]}
|
||
|
||
|
||
async def _probe_ollama(ollama_url: str) -> Dict[str, Any]:
|
||
t0 = time.monotonic()
|
||
try:
|
||
async with httpx.AsyncClient(timeout=5.0) as c:
|
||
r = await c.get(f"{ollama_url.rstrip('/')}/api/tags")
|
||
r.raise_for_status()
|
||
d = r.json()
|
||
models = [m.get("name", "") for m in d.get("models", [])]
|
||
return {
|
||
"url": ollama_url,
|
||
"reachable": True,
|
||
"models": models[:20],
|
||
"latency_ms": int((time.monotonic() - t0) * 1000),
|
||
}
|
||
except Exception as e:
|
||
return {"url": ollama_url, "reachable": False, "models": [], "error": str(e)[:100]}
|
||
|
||
|
||
async def _probe_http(url: str, *, timeout: float = 4.0) -> Dict[str, Any]:
|
||
t0 = time.monotonic()
|
||
try:
|
||
async with httpx.AsyncClient(timeout=timeout) as c:
|
||
r = await c.get(url)
|
||
return {
|
||
"reachable": r.status_code < 500,
|
||
"status": r.status_code,
|
||
"latency_ms": int((time.monotonic() - t0) * 1000),
|
||
}
|
||
except Exception as e:
|
||
return {"reachable": False, "error": str(e)[:120]}
|
||
|
||
|
||
def _read_backends() -> Dict[str, str]:
|
||
"""Read backend env vars from BFF environment (no secrets)."""
|
||
return {
|
||
"alerts": os.getenv("ALERT_BACKEND", "unknown"),
|
||
"audit": os.getenv("AUDIT_BACKEND", "unknown"),
|
||
"incidents": os.getenv("INCIDENT_BACKEND", "unknown"),
|
||
"risk_history": os.getenv("RISK_HISTORY_BACKEND", "unknown"),
|
||
"backlog": os.getenv("BACKLOG_BACKEND", "unknown"),
|
||
}
|
||
|
||
|
||
def _read_cron_status() -> Dict[str, Any]:
|
||
cron_file = os.getenv("GOV_CRON_FILE", "/etc/cron.d/daarion-governance")
|
||
jobs_expected = [
|
||
"hourly_risk_snapshot", "daily_risk_digest", "risk_history_cleanup",
|
||
"weekly_platform_priority_digest", "weekly_backlog_generate", "daily_backlog_cleanup",
|
||
]
|
||
jobs_present: List[str] = []
|
||
installed: Any = False
|
||
warning = None
|
||
|
||
try:
|
||
content = Path(cron_file).read_text()
|
||
installed = True
|
||
for job in jobs_expected:
|
||
if job in content:
|
||
jobs_present.append(job)
|
||
except PermissionError:
|
||
installed = "unknown"
|
||
warning = "no read permission on cron file"
|
||
except FileNotFoundError:
|
||
installed = False
|
||
|
||
# Scan for latest artifact files
|
||
artifacts: Dict[str, Any] = {}
|
||
base = Path("ops")
|
||
for pattern, key in [
|
||
("reports/risk/*.md", "risk_digest_md"),
|
||
("reports/platform/*.md", "platform_digest_md"),
|
||
("backlog/*.jsonl", "backlog_jsonl"),
|
||
]:
|
||
try:
|
||
files = sorted(base.glob(pattern))
|
||
if files:
|
||
artifacts[key] = str(files[-1])
|
||
except Exception:
|
||
pass
|
||
|
||
result: Dict[str, Any] = {
|
||
"installed": installed,
|
||
"cron_file": cron_file,
|
||
"jobs_expected": jobs_expected,
|
||
"jobs_present": jobs_present,
|
||
"last_artifacts": artifacts,
|
||
}
|
||
if warning:
|
||
result["warning"] = warning
|
||
return result
|
||
|
||
|
||
@app.get("/api/status/full")
|
||
async def api_status_full():
|
||
"""Full stack diagnostic: BFF + router + memory + ollama + backends + cron."""
|
||
reg = load_nodes_registry()
|
||
nodes_cfg = reg.get("nodes", {})
|
||
|
||
# Pick NODA2 router first, fallback to first node
|
||
router_url = (
|
||
get_router_url("NODA2")
|
||
or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "")
|
||
)
|
||
|
||
mem_url = get_memory_service_url()
|
||
ollama_url = get_ollama_url()
|
||
|
||
async def _no_router() -> Dict[str, Any]:
|
||
return {"reachable": False, "url": "", "error": "no router_url configured"}
|
||
|
||
router_info, mem_info, ollama_info = await asyncio.gather(
|
||
_probe_router(router_url) if router_url else _no_router(),
|
||
_probe_memory(mem_url),
|
||
_probe_ollama(ollama_url),
|
||
return_exceptions=False,
|
||
)
|
||
|
||
return {
|
||
"bff": {
|
||
"version": _VERSION,
|
||
"build": _BUILD_ID,
|
||
"env": os.getenv("ENV", "dev"),
|
||
"uptime_s": int(time.monotonic() - _START_TIME),
|
||
"ws_clients": len(_ws_clients),
|
||
"aistalk_enabled": _aistalk is not None,
|
||
},
|
||
"router": router_info,
|
||
"memory": mem_info,
|
||
"ollama": ollama_info,
|
||
"backends": _read_backends(),
|
||
"cron": _read_cron_status(),
|
||
}
|
||
|
||
|
||
@app.get("/api/integrations/status")
|
||
async def api_integrations_status(opencode_url: Optional[str] = Query(None)):
|
||
"""Integration probes for unified CTO hub in UI."""
|
||
open_webui_probe_url = os.getenv("OPEN_WEBUI_PROBE_URL", "http://host.docker.internal:8080/health")
|
||
open_webui_ui_url = os.getenv("OPEN_WEBUI_UI_URL", "http://localhost:8080")
|
||
pieces_probe_url = os.getenv(
|
||
"PIECES_OS_URL",
|
||
"http://host.docker.internal:39300/workstream_pattern_engine/processors/status",
|
||
)
|
||
if not pieces_probe_url.rstrip("/").endswith("/workstream_pattern_engine/processors/status"):
|
||
pieces_probe_url = pieces_probe_url.rstrip("/") + "/workstream_pattern_engine/processors/status"
|
||
|
||
opencode_probe_url = (opencode_url or os.getenv("OPENCODE_URL", "")).strip()
|
||
notion_api_key = os.getenv("NOTION_API_KEY", os.getenv("NOTION_TOKEN", "")).strip()
|
||
|
||
probes = await asyncio.gather(
|
||
_probe_http(get_router_url("NODA2").rstrip("/") + "/healthz"),
|
||
_probe_http(get_memory_service_url().rstrip("/") + "/health"),
|
||
_probe_http(open_webui_probe_url),
|
||
_probe_http(pieces_probe_url),
|
||
_probe_http(opencode_probe_url.rstrip("/") + "/health") if opencode_probe_url else asyncio.sleep(0, result={"reachable": False, "error": "not configured"}),
|
||
)
|
||
|
||
router_probe, memory_probe, open_webui_probe, pieces_probe, opencode_probe = probes
|
||
|
||
notion_probe: Dict[str, Any] = {"configured": bool(notion_api_key), "reachable": False}
|
||
if notion_api_key:
|
||
try:
|
||
async with httpx.AsyncClient(timeout=6.0) as c:
|
||
r = await c.get(
|
||
"https://api.notion.com/v1/users/me",
|
||
headers={
|
||
"Authorization": f"Bearer {notion_api_key}",
|
||
"Notion-Version": "2022-06-28",
|
||
},
|
||
)
|
||
notion_probe["reachable"] = r.status_code == 200
|
||
notion_probe["status"] = r.status_code
|
||
except Exception as e:
|
||
notion_probe["error"] = str(e)[:120]
|
||
|
||
return {
|
||
"integrations": {
|
||
"sofiia_console": {"url": "/ui", "reachable": True},
|
||
"router_noda2": {"url": get_router_url("NODA2"), **router_probe},
|
||
"memory_service": {"url": get_memory_service_url(), **memory_probe},
|
||
"open_webui": {"url": open_webui_ui_url, "probe_url": open_webui_probe_url, **open_webui_probe},
|
||
"pieces_os": {"url": pieces_probe_url, **pieces_probe},
|
||
"opencode": {
|
||
"url": opencode_probe_url or "desktop/cli",
|
||
**opencode_probe,
|
||
},
|
||
"notion": notion_probe,
|
||
}
|
||
}
|
||
|
||
|
||
# ─── Aurora media forensics proxy ────────────────────────────────────────────
|
||
|
||
def _aurora_proxy_file_url(job_id: str, file_name: str) -> str:
|
||
return f"/api/aurora/files/{quote(job_id, safe='')}/{quote(file_name, safe='')}"
|
||
|
||
|
||
def _rewrite_aurora_payload_urls(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||
output_files = payload.get("output_files")
|
||
if not isinstance(output_files, list):
|
||
return payload
|
||
job_id = str(payload.get("job_id") or "")
|
||
rewritten: List[Dict[str, Any]] = []
|
||
for item in output_files:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
file_name = str(item.get("name") or "")
|
||
if job_id and file_name:
|
||
item = {**item, "url": _aurora_proxy_file_url(job_id, file_name)}
|
||
rewritten.append(item)
|
||
payload["output_files"] = rewritten
|
||
report_url = payload.get("forensic_report_url")
|
||
if isinstance(report_url, str) and report_url.startswith("/api/aurora/report/"):
|
||
payload["forensic_report_url"] = report_url
|
||
return payload
|
||
|
||
|
||
async def _aurora_request_json(
|
||
method: str,
|
||
path: str,
|
||
*,
|
||
files: Optional[Dict[str, Any]] = None,
|
||
data: Optional[Dict[str, Any]] = None,
|
||
json_body: Optional[Dict[str, Any]] = None,
|
||
timeout: float = 60.0,
|
||
retries: int = 0,
|
||
retry_backoff_sec: float = 0.25,
|
||
) -> Dict[str, Any]:
|
||
base_url = AURORA_SERVICE_URL
|
||
url = f"{base_url}{path}"
|
||
attempts = max(1, int(retries) + 1)
|
||
last_error = "unknown error"
|
||
for attempt in range(1, attempts + 1):
|
||
try:
|
||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||
r = await client.request(method, url, files=files, data=data, json=json_body)
|
||
except httpx.HTTPError as e:
|
||
last_error = str(e)[:200]
|
||
if (
|
||
"aurora-service" in base_url
|
||
and AURORA_FALLBACK_URL
|
||
and AURORA_FALLBACK_URL != base_url
|
||
):
|
||
logger.warning(
|
||
"aurora proxy fallback: %s -> %s (%s)",
|
||
base_url,
|
||
AURORA_FALLBACK_URL,
|
||
last_error or type(e).__name__,
|
||
)
|
||
base_url = AURORA_FALLBACK_URL
|
||
url = f"{base_url}{path}"
|
||
continue
|
||
logger.warning(
|
||
"aurora proxy transport error (%s %s, attempt=%d/%d): %s",
|
||
method,
|
||
path,
|
||
attempt,
|
||
attempts,
|
||
last_error,
|
||
)
|
||
if attempt < attempts:
|
||
await asyncio.sleep(retry_backoff_sec * attempt)
|
||
continue
|
||
raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e
|
||
except Exception as e:
|
||
last_error = str(e)[:200]
|
||
logger.exception(
|
||
"aurora proxy unexpected error (%s %s, attempt=%d/%d): %s",
|
||
method,
|
||
path,
|
||
attempt,
|
||
attempts,
|
||
last_error,
|
||
)
|
||
if attempt < attempts:
|
||
await asyncio.sleep(retry_backoff_sec * attempt)
|
||
continue
|
||
raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}") from e
|
||
|
||
if r.status_code >= 500 and attempt < attempts:
|
||
logger.warning(
|
||
"aurora proxy upstream %d (%s %s, attempt=%d/%d) — retrying",
|
||
r.status_code,
|
||
method,
|
||
path,
|
||
attempt,
|
||
attempts,
|
||
)
|
||
await asyncio.sleep(retry_backoff_sec * attempt)
|
||
continue
|
||
if r.status_code >= 400:
|
||
detail = r.text[:400] if r.text else f"Aurora error {r.status_code}"
|
||
raise HTTPException(status_code=r.status_code, detail=detail)
|
||
if not r.content:
|
||
return {}
|
||
try:
|
||
payload = r.json()
|
||
except Exception as e:
|
||
last_error = str(e)[:200]
|
||
logger.warning(
|
||
"aurora proxy invalid JSON (%s %s, attempt=%d/%d): %s",
|
||
method,
|
||
path,
|
||
attempt,
|
||
attempts,
|
||
last_error,
|
||
)
|
||
if attempt < attempts:
|
||
await asyncio.sleep(retry_backoff_sec * attempt)
|
||
continue
|
||
raise HTTPException(status_code=502, detail="Invalid Aurora JSON response") from e
|
||
if isinstance(payload, dict):
|
||
return _rewrite_aurora_payload_urls(payload)
|
||
return {"data": payload}
|
||
raise HTTPException(status_code=502, detail=f"Aurora unavailable: {last_error}")
|
||
|
||
|
||
def _parse_stage_frame(stage: str) -> Dict[str, int]:
|
||
text = str(stage or "")
|
||
m = re.search(r"frame\s+(\d+)\s*/\s*(\d+)", text)
|
||
if not m:
|
||
return {"current": -1, "total": -1}
|
||
try:
|
||
return {"current": int(m.group(1)), "total": int(m.group(2))}
|
||
except Exception:
|
||
return {"current": -1, "total": -1}
|
||
|
||
|
||
def _aurora_live_fs_frame(job_id: str) -> Optional[Dict[str, Any]]:
|
||
now = time.monotonic()
|
||
cached = _aurora_live_cache.get(job_id)
|
||
if cached and (now - float(cached.get("ts", 0.0))) < 3.0:
|
||
return cached
|
||
|
||
base = AURORA_DATA_DIR / "outputs" / job_id
|
||
if not base.exists():
|
||
return None
|
||
work_dirs = [p for p in base.iterdir() if p.is_dir() and p.name.startswith("_work_")]
|
||
if not work_dirs:
|
||
return None
|
||
# Prefer most recently touched working directory
|
||
work_dirs.sort(key=lambda p: p.stat().st_mtime, reverse=True)
|
||
|
||
best_frame = -1
|
||
best_total = -1
|
||
best_dir = None
|
||
for wd in work_dirs:
|
||
processed = wd / "processed"
|
||
if not processed.exists():
|
||
continue
|
||
# Max frame in processed directory
|
||
local_max = -1
|
||
for f in processed.glob("*.png"):
|
||
try:
|
||
n = int(f.stem)
|
||
except Exception:
|
||
continue
|
||
if n > local_max:
|
||
local_max = n
|
||
if local_max < 0:
|
||
continue
|
||
raw_dir = wd / "raw"
|
||
total = -1
|
||
if raw_dir.exists():
|
||
try:
|
||
total = sum(1 for _ in raw_dir.glob("*.png"))
|
||
except Exception:
|
||
total = -1
|
||
if local_max > best_frame:
|
||
best_frame = local_max
|
||
best_total = total
|
||
best_dir = str(wd)
|
||
|
||
if best_frame < 0:
|
||
return None
|
||
info = {
|
||
"ts": now,
|
||
"frame": best_frame,
|
||
"total": best_total,
|
||
"work_dir": best_dir,
|
||
}
|
||
_aurora_live_cache[job_id] = info
|
||
return info
|
||
|
||
|
||
def _aurora_record_sample(job_id: str, frame: int, total: int) -> Optional[Dict[str, Any]]:
|
||
if frame < 0:
|
||
return None
|
||
now = time.monotonic()
|
||
dq = _aurora_live_samples.setdefault(job_id, collections.deque(maxlen=32))
|
||
# De-dup consecutive equal frame samples.
|
||
if dq and int(dq[-1]["frame"]) == frame:
|
||
# Keep original timestamp for stable fps between actual frame advances.
|
||
dq[-1]["total"] = total
|
||
else:
|
||
dq.append({"ts": now, "frame": frame, "total": total})
|
||
if len(dq) < 3:
|
||
return None
|
||
|
||
fps_points: List[float] = []
|
||
prev = dq[0]
|
||
for cur in list(dq)[1:]:
|
||
df = int(cur["frame"]) - int(prev["frame"])
|
||
dt = float(cur["ts"]) - float(prev["ts"])
|
||
if df > 0 and dt > 0:
|
||
fps_points.append(df / dt)
|
||
prev = cur
|
||
if not fps_points:
|
||
return None
|
||
fps = max(0.01, float(statistics.median(fps_points)))
|
||
confidence = "low"
|
||
if len(fps_points) >= 8:
|
||
confidence = "high"
|
||
elif len(fps_points) >= 4:
|
||
confidence = "medium"
|
||
return {"fps": fps, "confidence": confidence}
|
||
|
||
|
||
def _aurora_load_live_last_from_disk() -> None:
|
||
global _aurora_live_last_loaded
|
||
if _aurora_live_last_loaded:
|
||
return
|
||
_aurora_live_last_loaded = True
|
||
try:
|
||
if not _aurora_live_last_path.exists():
|
||
return
|
||
data = json.loads(_aurora_live_last_path.read_text(encoding="utf-8"))
|
||
if isinstance(data, dict):
|
||
for k, v in data.items():
|
||
if isinstance(k, str) and isinstance(v, dict):
|
||
_aurora_live_last[k] = v
|
||
except Exception as e:
|
||
logger.debug("aurora live-last load failed: %s", e)
|
||
|
||
|
||
def _aurora_persist_live_last_to_disk() -> None:
|
||
try:
|
||
_aurora_live_last_path.parent.mkdir(parents=True, exist_ok=True)
|
||
_aurora_live_last_path.write_text(
|
||
json.dumps(_aurora_live_last, ensure_ascii=False, separators=(",", ":")),
|
||
encoding="utf-8",
|
||
)
|
||
except Exception as e:
|
||
logger.debug("aurora live-last persist failed: %s", e)
|
||
|
||
|
||
def _smart_now_iso() -> str:
|
||
return datetime.now(timezone.utc).isoformat()
|
||
|
||
|
||
def _smart_is_terminal(status: Any) -> bool:
|
||
return str(status or "").lower() in {"completed", "failed", "cancelled"}
|
||
|
||
|
||
def _smart_media_type(file_name: str, content_type: str) -> str:
|
||
name = str(file_name or "").lower()
|
||
ctype = str(content_type or "").lower()
|
||
video_ext = (".mp4", ".avi", ".mov", ".mkv", ".webm")
|
||
audio_ext = (".mp3", ".wav", ".flac", ".m4a", ".aac", ".ogg")
|
||
image_ext = (".jpg", ".jpeg", ".png", ".webp", ".tif", ".tiff", ".bmp")
|
||
if ctype.startswith("video/") or name.endswith(video_ext):
|
||
return "video"
|
||
if ctype.startswith("audio/") or name.endswith(audio_ext):
|
||
return "audio"
|
||
if ctype.startswith("image/") or name.endswith(image_ext):
|
||
return "photo"
|
||
return "unknown"
|
||
|
||
|
||
def _smart_trim_runs() -> None:
|
||
if len(_aurora_smart_runs) <= _AURORA_SMART_MAX_RUNS:
|
||
return
|
||
ordered = sorted(
|
||
_aurora_smart_runs.items(),
|
||
key=lambda kv: str((kv[1] or {}).get("created_at") or ""),
|
||
reverse=True,
|
||
)
|
||
keep = dict(ordered[:_AURORA_SMART_MAX_RUNS])
|
||
_aurora_smart_runs.clear()
|
||
_aurora_smart_runs.update(keep)
|
||
|
||
|
||
def _smart_load_runs_from_disk() -> None:
|
||
global _aurora_smart_runs_loaded
|
||
if _aurora_smart_runs_loaded:
|
||
return
|
||
_aurora_smart_runs_loaded = True
|
||
try:
|
||
if not _aurora_smart_runs_path.exists():
|
||
return
|
||
payload = json.loads(_aurora_smart_runs_path.read_text(encoding="utf-8"))
|
||
if isinstance(payload, dict):
|
||
runs = payload.get("runs")
|
||
else:
|
||
runs = payload
|
||
if isinstance(runs, dict):
|
||
for run_id, run in runs.items():
|
||
if isinstance(run_id, str) and isinstance(run, dict):
|
||
_aurora_smart_runs[run_id] = run
|
||
_smart_trim_runs()
|
||
except Exception as exc:
|
||
logger.debug("aurora smart-runs load failed: %s", exc)
|
||
|
||
|
||
def _smart_persist_runs() -> None:
|
||
try:
|
||
_smart_trim_runs()
|
||
_aurora_smart_runs_path.parent.mkdir(parents=True, exist_ok=True)
|
||
payload = {
|
||
"updated_at": _smart_now_iso(),
|
||
"runs": _aurora_smart_runs,
|
||
}
|
||
_aurora_smart_runs_path.write_text(
|
||
json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
|
||
encoding="utf-8",
|
||
)
|
||
except Exception as exc:
|
||
logger.debug("aurora smart-runs persist failed: %s", exc)
|
||
|
||
|
||
def _smart_load_policy_from_disk() -> None:
|
||
global _aurora_smart_policy_loaded
|
||
if _aurora_smart_policy_loaded:
|
||
return
|
||
_aurora_smart_policy_loaded = True
|
||
try:
|
||
if not _aurora_smart_policy_path.exists():
|
||
return
|
||
payload = json.loads(_aurora_smart_policy_path.read_text(encoding="utf-8"))
|
||
if isinstance(payload, dict):
|
||
strategies = payload.get("strategies")
|
||
if isinstance(strategies, dict):
|
||
_aurora_smart_policy["strategies"] = strategies
|
||
_aurora_smart_policy["updated_at"] = payload.get("updated_at")
|
||
except Exception as exc:
|
||
logger.debug("aurora smart-policy load failed: %s", exc)
|
||
|
||
|
||
def _smart_persist_policy() -> None:
|
||
try:
|
||
_aurora_smart_policy["updated_at"] = _smart_now_iso()
|
||
_aurora_smart_policy_path.parent.mkdir(parents=True, exist_ok=True)
|
||
_aurora_smart_policy_path.write_text(
|
||
json.dumps(_aurora_smart_policy, ensure_ascii=False, separators=(",", ":")),
|
||
encoding="utf-8",
|
||
)
|
||
except Exception as exc:
|
||
logger.debug("aurora smart-policy persist failed: %s", exc)
|
||
|
||
|
||
def _smart_strategy_stats(strategy: str) -> Dict[str, Any]:
|
||
_smart_load_policy_from_disk()
|
||
strategies = _aurora_smart_policy.setdefault("strategies", {})
|
||
stats = strategies.get(strategy)
|
||
if not isinstance(stats, dict):
|
||
stats = {"count": 0, "avg_score": 0.0, "wins": 0, "losses": 0}
|
||
strategies[strategy] = stats
|
||
return stats
|
||
|
||
|
||
def _smart_update_strategy_score(strategy: str, score: float) -> None:
|
||
stats = _smart_strategy_stats(strategy)
|
||
try:
|
||
count = int(stats.get("count") or 0) + 1
|
||
avg = float(stats.get("avg_score") or 0.0)
|
||
stats["avg_score"] = round(((avg * (count - 1)) + float(score)) / max(1, count), 4)
|
||
stats["count"] = count
|
||
_smart_persist_policy()
|
||
except Exception:
|
||
return
|
||
|
||
|
||
def _smart_update_strategy_outcome(strategy: str, success: bool) -> None:
|
||
stats = _smart_strategy_stats(strategy)
|
||
key = "wins" if success else "losses"
|
||
stats[key] = int(stats.get(key) or 0) + 1
|
||
_smart_persist_policy()
|
||
|
||
|
||
def _smart_new_run_id() -> str:
|
||
stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||
return f"smart_{stamp}_{uuid.uuid4().hex[:6]}"
|
||
|
||
|
||
def _smart_append_audit(run: Dict[str, Any], event: str, detail: Optional[Dict[str, Any]] = None) -> None:
|
||
audit = run.setdefault("audit", [])
|
||
if not isinstance(audit, list):
|
||
audit = []
|
||
run["audit"] = audit
|
||
item: Dict[str, Any] = {"ts": _smart_now_iso(), "event": str(event)}
|
||
if isinstance(detail, dict) and detail:
|
||
item["detail"] = detail
|
||
audit.append(item)
|
||
if len(audit) > 200:
|
||
del audit[:-200]
|
||
run["updated_at"] = item["ts"]
|
||
|
||
|
||
def _smart_analysis_features(analysis: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
||
if not isinstance(analysis, dict):
|
||
return {
|
||
"faces": 0,
|
||
"plates": 0,
|
||
"noise": "unknown",
|
||
"blur": "unknown",
|
||
"quality_score": 0.0,
|
||
}
|
||
faces = len(analysis.get("faces") or []) if isinstance(analysis.get("faces"), list) else 0
|
||
plates = len(analysis.get("license_plates") or []) if isinstance(analysis.get("license_plates"), list) else 0
|
||
qa = analysis.get("quality_analysis") if isinstance(analysis.get("quality_analysis"), dict) else {}
|
||
noise = str(qa.get("noise_level") or "unknown").lower()
|
||
blur = str(qa.get("blur_level") or "unknown").lower()
|
||
score = 0.0
|
||
score += min(2.0, faces * 0.2)
|
||
score += min(2.0, plates * 0.4)
|
||
if noise in {"high", "very_high"}:
|
||
score += 1.0
|
||
if blur in {"high", "very_high"}:
|
||
score += 1.0
|
||
return {
|
||
"faces": faces,
|
||
"plates": plates,
|
||
"noise": noise,
|
||
"blur": blur,
|
||
"quality_score": round(score, 3),
|
||
}
|
||
|
||
|
||
def _smart_decide_strategy(
|
||
*,
|
||
media_type: str,
|
||
mode: str,
|
||
requested_strategy: str,
|
||
prefer_quality: bool,
|
||
budget_tier: str,
|
||
analysis: Optional[Dict[str, Any]],
|
||
learning_enabled: bool,
|
||
) -> Dict[str, Any]:
|
||
strategy = str(requested_strategy or "auto").strip().lower()
|
||
valid = {"auto", "local_only", "local_then_kling"}
|
||
if strategy not in valid:
|
||
strategy = "auto"
|
||
|
||
features = _smart_analysis_features(analysis)
|
||
reasons: List[str] = []
|
||
score = 0.0
|
||
|
||
if media_type != "video":
|
||
chosen = "local_only"
|
||
reasons.append("non-video media -> local stack only")
|
||
return {"strategy": chosen, "reasons": reasons, "score": 0.0, "features": features}
|
||
|
||
if strategy in {"local_only", "local_then_kling"}:
|
||
reasons.append(f"explicit strategy={strategy}")
|
||
return {"strategy": strategy, "reasons": reasons, "score": features["quality_score"], "features": features}
|
||
|
||
score += float(features["quality_score"])
|
||
if prefer_quality:
|
||
score += 1.3
|
||
reasons.append("prefer_quality=true")
|
||
if str(mode).lower() == "forensic":
|
||
score += 0.8
|
||
reasons.append("forensic mode")
|
||
|
||
budget_norm = str(budget_tier or "normal").strip().lower()
|
||
if budget_norm == "low":
|
||
score -= 1.4
|
||
reasons.append("budget_tier=low")
|
||
elif budget_norm == "high":
|
||
score += 0.6
|
||
reasons.append("budget_tier=high")
|
||
|
||
if learning_enabled:
|
||
stats = _smart_strategy_stats("local_then_kling")
|
||
wins = int(stats.get("wins") or 0)
|
||
losses = int(stats.get("losses") or 0)
|
||
total = wins + losses
|
||
if total >= 6:
|
||
success_ratio = wins / max(1, total)
|
||
if success_ratio >= 0.65:
|
||
score += 0.5
|
||
reasons.append(f"learned success ratio {success_ratio:.2f}")
|
||
elif success_ratio <= 0.35:
|
||
score -= 0.7
|
||
reasons.append(f"learned low success ratio {success_ratio:.2f}")
|
||
|
||
chosen = "local_then_kling" if score >= 2.1 else "local_only"
|
||
if not reasons:
|
||
reasons.append("default heuristic")
|
||
return {"strategy": chosen, "reasons": reasons, "score": round(score, 3), "features": features}
|
||
|
||
|
||
def _smart_compact_result(result_payload: Dict[str, Any]) -> Dict[str, Any]:
|
||
payload = {}
|
||
if not isinstance(result_payload, dict):
|
||
return payload
|
||
payload["mode"] = result_payload.get("mode")
|
||
payload["media_type"] = result_payload.get("media_type")
|
||
payload["digital_signature"] = result_payload.get("digital_signature")
|
||
output_files = result_payload.get("output_files")
|
||
if isinstance(output_files, list):
|
||
payload["output_files"] = output_files[:8]
|
||
q = result_payload.get("quality_report")
|
||
if isinstance(q, dict):
|
||
payload["quality_report"] = q
|
||
return payload
|
||
|
||
|
||
async def _smart_fetch_run_status(run_id: str) -> Optional[Dict[str, Any]]:
|
||
_smart_load_runs_from_disk()
|
||
run = _aurora_smart_runs.get(run_id)
|
||
if not isinstance(run, dict):
|
||
return None
|
||
return run
|
||
|
||
|
||
async def _smart_monitor_run(run_id: str) -> None:
|
||
run = await _smart_fetch_run_status(run_id)
|
||
if not run:
|
||
return
|
||
|
||
local = run.get("local") if isinstance(run.get("local"), dict) else {}
|
||
local_job_id = str(local.get("job_id") or "")
|
||
if not local_job_id:
|
||
_smart_append_audit(run, "monitor.error", {"reason": "missing local job id"})
|
||
run["status"] = "failed"
|
||
run["phase"] = "failed"
|
||
_smart_persist_runs()
|
||
return
|
||
|
||
start = time.monotonic()
|
||
while time.monotonic() - start <= _AURORA_SMART_LOCAL_MAX_SEC:
|
||
try:
|
||
st = await _aurora_request_json(
|
||
"GET",
|
||
f"/api/aurora/status/{quote(local_job_id, safe='')}",
|
||
timeout=20.0,
|
||
retries=2,
|
||
retry_backoff_sec=0.25,
|
||
)
|
||
except Exception as exc:
|
||
_smart_append_audit(run, "local.status.error", {"error": str(exc)[:220]})
|
||
await asyncio.sleep(_AURORA_SMART_LOCAL_POLL_SEC)
|
||
continue
|
||
|
||
status = str(st.get("status") or "").lower()
|
||
if status in {"queued", "processing"}:
|
||
run["phase"] = "local_processing"
|
||
run["status"] = "processing"
|
||
elif status == "completed":
|
||
run["phase"] = "local_completed"
|
||
run["status"] = "processing"
|
||
else:
|
||
run["phase"] = f"local_{status or 'unknown'}"
|
||
run["status"] = status
|
||
run["local"] = {
|
||
**local,
|
||
"job_id": local_job_id,
|
||
"status": status,
|
||
"progress": st.get("progress"),
|
||
"current_stage": st.get("current_stage"),
|
||
"eta_seconds": st.get("eta_seconds"),
|
||
"live_fps": st.get("live_fps"),
|
||
"error_message": st.get("error_message"),
|
||
"updated_at": _smart_now_iso(),
|
||
}
|
||
_smart_persist_runs()
|
||
|
||
if status in {"queued", "processing"}:
|
||
await asyncio.sleep(_AURORA_SMART_LOCAL_POLL_SEC)
|
||
continue
|
||
|
||
if status != "completed":
|
||
run["status"] = "failed"
|
||
run["phase"] = "local_failed"
|
||
_smart_append_audit(
|
||
run,
|
||
"local.failed",
|
||
{"status": status, "error": str(st.get("error_message") or "")[:220]},
|
||
)
|
||
_smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
|
||
_smart_persist_runs()
|
||
return
|
||
|
||
_smart_append_audit(run, "local.completed", {"job_id": local_job_id})
|
||
break
|
||
else:
|
||
run["status"] = "failed"
|
||
run["phase"] = "local_timeout"
|
||
_smart_append_audit(run, "local.timeout", {"max_sec": _AURORA_SMART_LOCAL_MAX_SEC})
|
||
_smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
|
||
_smart_persist_runs()
|
||
return
|
||
|
||
try:
|
||
local_result = await _aurora_request_json(
|
||
"GET",
|
||
f"/api/aurora/result/{quote(local_job_id, safe='')}",
|
||
timeout=30.0,
|
||
retries=2,
|
||
retry_backoff_sec=0.25,
|
||
)
|
||
except Exception as exc:
|
||
run["status"] = "failed"
|
||
run["phase"] = "local_result_error"
|
||
_smart_append_audit(run, "local.result.error", {"error": str(exc)[:240]})
|
||
_smart_update_strategy_outcome(str(run.get("policy", {}).get("strategy") or "local_only"), False)
|
||
_smart_persist_runs()
|
||
return
|
||
|
||
run.setdefault("local", {})
|
||
if isinstance(run["local"], dict):
|
||
run["local"]["result"] = _smart_compact_result(local_result)
|
||
run["local"]["result_ready"] = True
|
||
run["selected_stack"] = "local"
|
||
|
||
policy = run.get("policy") if isinstance(run.get("policy"), dict) else {}
|
||
strategy = str(policy.get("strategy") or "local_only")
|
||
media_type = str(run.get("media_type") or "")
|
||
kling = run.get("kling") if isinstance(run.get("kling"), dict) else {}
|
||
if strategy != "local_then_kling" or media_type != "video":
|
||
run["status"] = "completed"
|
||
run["phase"] = "completed"
|
||
_smart_append_audit(run, "smart.completed", {"selected_stack": "local", "reason": "strategy local_only or non-video"})
|
||
_smart_update_strategy_outcome(strategy, True)
|
||
_smart_persist_runs()
|
||
return
|
||
|
||
run["phase"] = "kling_submitting"
|
||
run["status"] = "processing"
|
||
_smart_append_audit(run, "kling.submit.start")
|
||
_smart_persist_runs()
|
||
|
||
try:
|
||
submit = await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/kling/enhance",
|
||
data={
|
||
"job_id": local_job_id,
|
||
"prompt": str(kling.get("prompt") or "enhance video quality, improve sharpness and clarity"),
|
||
"negative_prompt": str(kling.get("negative_prompt") or "noise, blur, artifacts, distortion"),
|
||
"mode": str(kling.get("mode") or "pro"),
|
||
"duration": str(kling.get("duration") or "5"),
|
||
"cfg_scale": str(kling.get("cfg_scale") if kling.get("cfg_scale") is not None else "0.5"),
|
||
},
|
||
timeout=120.0,
|
||
retries=1,
|
||
retry_backoff_sec=0.25,
|
||
)
|
||
except Exception as exc:
|
||
run["kling"] = {
|
||
**kling,
|
||
"status": "failed",
|
||
"error": str(exc)[:640],
|
||
}
|
||
run["status"] = "completed"
|
||
run["phase"] = "completed_with_kling_failure"
|
||
run["selected_stack"] = "local"
|
||
_smart_append_audit(run, "kling.submit.error", {"error": str(exc)[:220]})
|
||
_smart_update_strategy_outcome(strategy, False)
|
||
_smart_persist_runs()
|
||
return
|
||
|
||
task_id = str(submit.get("kling_task_id") or "")
|
||
run["kling"] = {
|
||
**kling,
|
||
"task_id": task_id,
|
||
"status": str(submit.get("status") or "submitted").lower(),
|
||
"endpoint": str(submit.get("kling_endpoint") or "video2video"),
|
||
"submitted_at": _smart_now_iso(),
|
||
}
|
||
_smart_append_audit(run, "kling.submitted", {"task_id": task_id})
|
||
_smart_persist_runs()
|
||
|
||
k_start = time.monotonic()
|
||
while time.monotonic() - k_start <= _AURORA_SMART_KLING_MAX_SEC:
|
||
try:
|
||
kst = await _aurora_request_json(
|
||
"GET",
|
||
f"/api/aurora/kling/status/{quote(local_job_id, safe='')}",
|
||
timeout=30.0,
|
||
retries=1,
|
||
retry_backoff_sec=0.2,
|
||
)
|
||
except Exception as exc:
|
||
_smart_append_audit(run, "kling.status.error", {"error": str(exc)[:220]})
|
||
await asyncio.sleep(_AURORA_SMART_KLING_POLL_SEC)
|
||
continue
|
||
|
||
k_status = str(kst.get("status") or "").lower()
|
||
k_url = kst.get("kling_result_url")
|
||
run["phase"] = "kling_processing"
|
||
run["kling"] = {
|
||
**(run.get("kling") if isinstance(run.get("kling"), dict) else {}),
|
||
"status": k_status,
|
||
"result_url": k_url,
|
||
"last_polled_at": _smart_now_iso(),
|
||
}
|
||
_smart_persist_runs()
|
||
|
||
if k_status in {"submitted", "queued", "running", "processing", "pending"}:
|
||
await asyncio.sleep(_AURORA_SMART_KLING_POLL_SEC)
|
||
continue
|
||
|
||
if k_status in {"succeed", "completed", "success"} and k_url:
|
||
run["status"] = "completed"
|
||
run["phase"] = "completed"
|
||
run["selected_stack"] = "kling"
|
||
_smart_append_audit(run, "smart.completed", {"selected_stack": "kling", "task_id": task_id})
|
||
_smart_update_strategy_outcome(strategy, True)
|
||
_smart_persist_runs()
|
||
return
|
||
|
||
run["status"] = "completed"
|
||
run["phase"] = "completed_with_kling_failure"
|
||
run["selected_stack"] = "local"
|
||
_smart_append_audit(
|
||
run,
|
||
"kling.terminal.non_success",
|
||
{"status": k_status, "task_id": task_id},
|
||
)
|
||
_smart_update_strategy_outcome(strategy, False)
|
||
_smart_persist_runs()
|
||
return
|
||
|
||
run["status"] = "completed"
|
||
run["phase"] = "completed_with_kling_timeout"
|
||
run["selected_stack"] = "local"
|
||
_smart_append_audit(run, "kling.timeout", {"max_sec": _AURORA_SMART_KLING_MAX_SEC})
|
||
_smart_update_strategy_outcome(strategy, False)
|
||
_smart_persist_runs()
|
||
|
||
|
||
def _smart_resume_active_monitors() -> None:
|
||
_smart_load_runs_from_disk()
|
||
for run_id, run in list(_aurora_smart_runs.items()):
|
||
if not isinstance(run, dict):
|
||
continue
|
||
if _smart_is_terminal(run.get("status")):
|
||
continue
|
||
try:
|
||
asyncio.create_task(_smart_monitor_run(run_id))
|
||
except Exception:
|
||
continue
|
||
|
||
|
||
@app.get("/api/aurora/health")
|
||
async def api_aurora_health() -> Dict[str, Any]:
|
||
return await _aurora_request_json("GET", "/health", timeout=10.0)
|
||
|
||
|
||
@app.post("/api/aurora/upload")
|
||
async def api_aurora_upload(
|
||
file: UploadFile = File(...),
|
||
mode: str = Form("tactical"),
|
||
priority: str = Form("balanced"),
|
||
export_options: str = Form(""),
|
||
) -> Dict[str, Any]:
|
||
# Stream file to Aurora without buffering entire content in RAM
|
||
file_obj = file.file # SpooledTemporaryFile — already handles large files
|
||
files = {
|
||
"file": (
|
||
file.filename or "upload.bin",
|
||
file_obj,
|
||
file.content_type or "application/octet-stream",
|
||
)
|
||
}
|
||
payload = await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/upload",
|
||
files=files,
|
||
data={
|
||
"mode": mode,
|
||
"priority": priority,
|
||
"export_options": export_options,
|
||
},
|
||
timeout=120.0,
|
||
)
|
||
job_id = str(payload.get("job_id") or "")
|
||
if job_id:
|
||
payload["status_url"] = f"/api/aurora/status/{job_id}"
|
||
payload["result_url"] = f"/api/aurora/result/{job_id}"
|
||
payload["cancel_url"] = f"/api/aurora/cancel/{job_id}"
|
||
return payload
|
||
|
||
|
||
@app.post("/api/aurora/process-smart")
|
||
async def api_aurora_process_smart(
|
||
file: UploadFile = File(...),
|
||
mode: str = Form("tactical"),
|
||
priority: str = Form("balanced"),
|
||
export_options: str = Form(""),
|
||
strategy: str = Form("auto"),
|
||
prefer_quality: bool = Form(True),
|
||
budget_tier: str = Form("normal"),
|
||
learning_enabled: bool = Form(True),
|
||
kling_prompt: str = Form("enhance video quality, improve sharpness and clarity"),
|
||
kling_negative_prompt: str = Form("noise, blur, artifacts, distortion"),
|
||
kling_mode: str = Form("pro"),
|
||
kling_duration: str = Form("5"),
|
||
kling_cfg_scale: float = Form(0.5),
|
||
) -> Dict[str, Any]:
|
||
_smart_load_runs_from_disk()
|
||
_smart_load_policy_from_disk()
|
||
|
||
file_name = file.filename or "upload.bin"
|
||
content_type = file.content_type or "application/octet-stream"
|
||
media_type = _smart_media_type(file_name, content_type)
|
||
|
||
analysis: Optional[Dict[str, Any]] = None
|
||
if media_type in {"video", "photo"}:
|
||
try:
|
||
await file.seek(0)
|
||
files = {"file": (file_name, file.file, content_type)}
|
||
analysis = await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/analyze",
|
||
files=files,
|
||
timeout=120.0,
|
||
retries=1,
|
||
retry_backoff_sec=0.25,
|
||
)
|
||
except Exception as exc:
|
||
analysis = None
|
||
logger.warning("smart-process analyze skipped: %s", str(exc)[:220])
|
||
|
||
policy = _smart_decide_strategy(
|
||
media_type=media_type,
|
||
mode=mode,
|
||
requested_strategy=strategy,
|
||
prefer_quality=bool(prefer_quality),
|
||
budget_tier=budget_tier,
|
||
analysis=analysis,
|
||
learning_enabled=bool(learning_enabled),
|
||
)
|
||
chosen_strategy = str(policy.get("strategy") or "local_only")
|
||
policy.setdefault("requested_strategy", str(strategy or "auto"))
|
||
policy["learning_enabled"] = bool(learning_enabled)
|
||
policy["budget_tier"] = str(budget_tier or "normal")
|
||
|
||
await file.seek(0)
|
||
files = {"file": (file_name, file.file, content_type)}
|
||
local_payload = await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/upload",
|
||
files=files,
|
||
data={
|
||
"mode": mode,
|
||
"priority": priority,
|
||
"export_options": export_options,
|
||
},
|
||
timeout=120.0,
|
||
)
|
||
local_job_id = str(local_payload.get("job_id") or "")
|
||
if not local_job_id:
|
||
raise HTTPException(status_code=502, detail="Smart process failed: local job_id missing")
|
||
|
||
run_id = _smart_new_run_id()
|
||
now = _smart_now_iso()
|
||
run: Dict[str, Any] = {
|
||
"run_id": run_id,
|
||
"created_at": now,
|
||
"updated_at": now,
|
||
"status": "processing",
|
||
"phase": "local_processing",
|
||
"media_type": media_type,
|
||
"selected_stack": None,
|
||
"requested": {
|
||
"mode": mode,
|
||
"priority": priority,
|
||
"export_options": export_options,
|
||
"strategy": strategy,
|
||
"prefer_quality": bool(prefer_quality),
|
||
"budget_tier": budget_tier,
|
||
"learning_enabled": bool(learning_enabled),
|
||
},
|
||
"policy": policy,
|
||
"analysis_summary": _smart_analysis_features(analysis),
|
||
"analysis": analysis if isinstance(analysis, dict) else None,
|
||
"local": {
|
||
"job_id": local_job_id,
|
||
"status": "queued",
|
||
"submit_payload": {
|
||
"status_url": f"/api/aurora/status/{quote(local_job_id, safe='')}",
|
||
"result_url": f"/api/aurora/result/{quote(local_job_id, safe='')}",
|
||
},
|
||
},
|
||
"kling": {
|
||
"enabled": chosen_strategy == "local_then_kling" and media_type == "video",
|
||
"status": "pending",
|
||
"prompt": kling_prompt,
|
||
"negative_prompt": kling_negative_prompt,
|
||
"mode": kling_mode,
|
||
"duration": kling_duration,
|
||
"cfg_scale": kling_cfg_scale,
|
||
},
|
||
"audit": [],
|
||
}
|
||
_smart_append_audit(
|
||
run,
|
||
"smart.submitted",
|
||
{
|
||
"local_job_id": local_job_id,
|
||
"media_type": media_type,
|
||
"strategy": chosen_strategy,
|
||
"score": policy.get("score"),
|
||
},
|
||
)
|
||
_aurora_smart_runs[run_id] = run
|
||
_smart_persist_runs()
|
||
|
||
try:
|
||
asyncio.create_task(_smart_monitor_run(run_id))
|
||
except Exception as exc:
|
||
_smart_append_audit(run, "monitor.spawn.error", {"error": str(exc)[:220]})
|
||
_smart_persist_runs()
|
||
|
||
return {
|
||
"smart_run_id": run_id,
|
||
"status": run.get("status"),
|
||
"phase": run.get("phase"),
|
||
"media_type": media_type,
|
||
"local_job_id": local_job_id,
|
||
"policy": policy,
|
||
"smart_status_url": f"/api/aurora/process-smart/{quote(run_id, safe='')}",
|
||
"local_status_url": f"/api/aurora/status/{quote(local_job_id, safe='')}",
|
||
"local_result_url": f"/api/aurora/result/{quote(local_job_id, safe='')}",
|
||
}
|
||
|
||
|
||
@app.get("/api/aurora/process-smart")
|
||
async def api_aurora_process_smart_list(
|
||
limit: int = Query(default=20, ge=1, le=200),
|
||
status: Optional[str] = Query(default=None),
|
||
) -> Dict[str, Any]:
|
||
_smart_load_runs_from_disk()
|
||
requested = str(status or "").strip().lower()
|
||
rows = []
|
||
for run in _aurora_smart_runs.values():
|
||
if not isinstance(run, dict):
|
||
continue
|
||
run_status = str(run.get("status") or "")
|
||
if requested and run_status.lower() != requested:
|
||
continue
|
||
local = run.get("local") if isinstance(run.get("local"), dict) else {}
|
||
kling = run.get("kling") if isinstance(run.get("kling"), dict) else {}
|
||
rows.append(
|
||
{
|
||
"run_id": run.get("run_id"),
|
||
"status": run_status,
|
||
"phase": run.get("phase"),
|
||
"media_type": run.get("media_type"),
|
||
"strategy": (run.get("policy") or {}).get("strategy") if isinstance(run.get("policy"), dict) else None,
|
||
"selected_stack": run.get("selected_stack"),
|
||
"created_at": run.get("created_at"),
|
||
"updated_at": run.get("updated_at"),
|
||
"local_job_id": local.get("job_id"),
|
||
"local_status": local.get("status"),
|
||
"kling_status": kling.get("status"),
|
||
}
|
||
)
|
||
rows.sort(key=lambda x: str(x.get("created_at") or ""), reverse=True)
|
||
return {"runs": rows[:limit], "count": min(limit, len(rows)), "total": len(rows)}
|
||
|
||
|
||
@app.get("/api/aurora/process-smart/{run_id}")
|
||
async def api_aurora_process_smart_status(run_id: str) -> Dict[str, Any]:
|
||
run = await _smart_fetch_run_status(run_id)
|
||
if not run:
|
||
raise HTTPException(status_code=404, detail="smart run not found")
|
||
return run
|
||
|
||
|
||
@app.post("/api/aurora/process-smart/{run_id}/feedback")
|
||
async def api_aurora_process_smart_feedback(
|
||
run_id: str,
|
||
payload: Optional[Dict[str, Any]] = Body(default=None),
|
||
) -> Dict[str, Any]:
|
||
run = await _smart_fetch_run_status(run_id)
|
||
if not run:
|
||
raise HTTPException(status_code=404, detail="smart run not found")
|
||
body = payload if isinstance(payload, dict) else {}
|
||
score_raw = body.get("score")
|
||
score: Optional[float] = None
|
||
try:
|
||
if score_raw is not None:
|
||
score = float(score_raw)
|
||
except Exception:
|
||
score = None
|
||
selected_stack = str(body.get("selected_stack") or "").strip().lower() or None
|
||
notes = str(body.get("notes") or "").strip()
|
||
|
||
feedback = {
|
||
"ts": _smart_now_iso(),
|
||
"score": score,
|
||
"selected_stack": selected_stack,
|
||
"notes": notes[:1000] if notes else None,
|
||
}
|
||
run["feedback"] = feedback
|
||
strategy = str((run.get("policy") or {}).get("strategy") or "local_only")
|
||
if score is not None:
|
||
score = max(1.0, min(5.0, score))
|
||
_smart_update_strategy_score(strategy, score)
|
||
if selected_stack in {"local", "kling"}:
|
||
run["selected_stack"] = selected_stack
|
||
_smart_append_audit(run, "feedback.received", {"score": score, "selected_stack": selected_stack})
|
||
_smart_persist_runs()
|
||
return {
|
||
"ok": True,
|
||
"run_id": run_id,
|
||
"feedback": feedback,
|
||
"policy": _aurora_smart_policy,
|
||
}
|
||
|
||
|
||
@app.get("/api/aurora/process-smart/policy/stats")
|
||
async def api_aurora_process_smart_policy_stats() -> Dict[str, Any]:
|
||
_smart_load_policy_from_disk()
|
||
return _aurora_smart_policy
|
||
|
||
|
||
@app.post("/api/aurora/analyze")
|
||
async def api_aurora_analyze(file: UploadFile = File(...)) -> Dict[str, Any]:
|
||
await file.seek(0)
|
||
files = {
|
||
"file": (
|
||
file.filename or "upload.bin",
|
||
file.file,
|
||
file.content_type or "application/octet-stream",
|
||
)
|
||
}
|
||
return await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/analyze",
|
||
files=files,
|
||
timeout=120.0,
|
||
retries=2,
|
||
retry_backoff_sec=0.35,
|
||
)
|
||
|
||
|
||
@app.post("/api/aurora/audio/analyze")
|
||
async def api_aurora_audio_analyze(file: UploadFile = File(...)) -> Dict[str, Any]:
|
||
await file.seek(0)
|
||
files = {
|
||
"file": (
|
||
file.filename or "upload_audio.bin",
|
||
file.file,
|
||
file.content_type or "application/octet-stream",
|
||
)
|
||
}
|
||
return await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/audio/analyze",
|
||
files=files,
|
||
timeout=120.0,
|
||
retries=2,
|
||
retry_backoff_sec=0.35,
|
||
)
|
||
|
||
|
||
@app.post("/api/aurora/audio/process")
|
||
async def api_aurora_audio_process(
|
||
file: UploadFile = File(...),
|
||
mode: str = Form("tactical"),
|
||
priority: str = Form("speech"),
|
||
export_options: str = Form(""),
|
||
) -> Dict[str, Any]:
|
||
await file.seek(0)
|
||
files = {
|
||
"file": (
|
||
file.filename or "upload_audio.bin",
|
||
file.file,
|
||
file.content_type or "application/octet-stream",
|
||
)
|
||
}
|
||
payload = await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/audio/process",
|
||
files=files,
|
||
data={
|
||
"mode": mode,
|
||
"priority": priority,
|
||
"export_options": export_options,
|
||
},
|
||
timeout=120.0,
|
||
retries=2,
|
||
retry_backoff_sec=0.35,
|
||
)
|
||
job_id = str(payload.get("job_id") or "")
|
||
if job_id:
|
||
payload["status_url"] = f"/api/aurora/status/{job_id}"
|
||
payload["result_url"] = f"/api/aurora/result/{job_id}"
|
||
payload["cancel_url"] = f"/api/aurora/cancel/{job_id}"
|
||
return payload
|
||
|
||
|
||
@app.post("/api/aurora/reprocess/{job_id}")
|
||
async def api_aurora_reprocess(
|
||
job_id: str,
|
||
payload: Optional[Dict[str, Any]] = Body(default=None),
|
||
) -> Dict[str, Any]:
|
||
body = payload if isinstance(payload, dict) else {}
|
||
return await _aurora_request_json(
|
||
"POST",
|
||
f"/api/aurora/reprocess/{quote(job_id, safe='')}",
|
||
json_body=body,
|
||
timeout=120.0,
|
||
retries=2,
|
||
retry_backoff_sec=0.35,
|
||
)
|
||
|
||
|
||
@app.post("/api/aurora/chat")
|
||
async def api_aurora_chat(payload: Optional[Dict[str, Any]] = Body(default=None)) -> Dict[str, Any]:
|
||
body = payload if isinstance(payload, dict) else {}
|
||
return await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/chat",
|
||
json_body=body,
|
||
timeout=30.0,
|
||
retries=1,
|
||
retry_backoff_sec=0.2,
|
||
)
|
||
|
||
|
||
@app.get("/api/aurora/status/{job_id}")
|
||
async def api_aurora_status(job_id: str) -> Dict[str, Any]:
|
||
_aurora_load_live_last_from_disk()
|
||
payload = await _aurora_request_json(
|
||
"GET",
|
||
f"/api/aurora/status/{quote(job_id, safe='')}",
|
||
timeout=20.0,
|
||
retries=8,
|
||
retry_backoff_sec=0.35,
|
||
)
|
||
if not isinstance(payload, dict):
|
||
return payload
|
||
if str(payload.get("status", "")).lower() != "processing":
|
||
return payload
|
||
|
||
live = _aurora_live_fs_frame(job_id)
|
||
if not live:
|
||
return payload
|
||
parsed = _parse_stage_frame(str(payload.get("current_stage", "")))
|
||
live_frame = int(live.get("frame", -1))
|
||
if live_frame < 0:
|
||
return payload
|
||
total = int(parsed.get("total", -1))
|
||
if total <= 0:
|
||
total = int(live.get("total", -1))
|
||
if total > 0:
|
||
live_progress = int(max(1, min(99, round((live_frame / max(1, total)) * 100))))
|
||
payload["progress"] = max(int(payload.get("progress") or 0), live_progress)
|
||
|
||
live_stats = _aurora_record_sample(job_id, live_frame, total)
|
||
if live_stats:
|
||
fps = float(live_stats["fps"])
|
||
payload["live_fps"] = round(fps, 3)
|
||
payload["eta_confidence"] = live_stats["confidence"]
|
||
if total > 0 and live_frame < total:
|
||
eta_calc = int(max(0, round((total - live_frame) / max(0.01, fps))))
|
||
payload["eta_seconds"] = eta_calc
|
||
elapsed = payload.get("elapsed_seconds")
|
||
if isinstance(elapsed, (int, float)):
|
||
payload["estimated_total_seconds"] = int(max(0, round(float(elapsed) + eta_calc)))
|
||
_aurora_live_last[job_id] = {
|
||
"live_fps": payload.get("live_fps"),
|
||
"eta_seconds": payload.get("eta_seconds"),
|
||
"estimated_total_seconds": payload.get("estimated_total_seconds"),
|
||
"eta_confidence": payload.get("eta_confidence"),
|
||
}
|
||
_aurora_persist_live_last_to_disk()
|
||
else:
|
||
prev = _aurora_live_last.get(job_id)
|
||
if prev:
|
||
payload["live_fps"] = prev.get("live_fps")
|
||
payload["eta_seconds"] = prev.get("eta_seconds", payload.get("eta_seconds"))
|
||
payload["estimated_total_seconds"] = prev.get("estimated_total_seconds", payload.get("estimated_total_seconds"))
|
||
payload["eta_confidence"] = prev.get("eta_confidence")
|
||
|
||
# If upstream stage/progress is stale, patch with live filesystem progress.
|
||
if live_frame > int(parsed.get("current", -1)):
|
||
if total > 0:
|
||
if live_stats:
|
||
payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live {payload['live_fps']} fps)"
|
||
else:
|
||
payload["current_stage"] = f"enhancing frame {live_frame}/{total} (live fs)"
|
||
else:
|
||
payload["current_stage"] = f"enhancing frame {live_frame} (live fs)"
|
||
payload["live_frame"] = live_frame
|
||
payload["live_total_frames"] = total if total > 0 else None
|
||
else:
|
||
# Even when upstream stage text already moved, expose live counters for UI.
|
||
payload["live_frame"] = live_frame
|
||
payload["live_total_frames"] = total if total > 0 else None
|
||
|
||
# Persist last known timing even if fps was not recalculated this poll.
|
||
snapshot = _aurora_live_last.get(job_id, {})
|
||
changed = False
|
||
for key in ("live_fps", "eta_seconds", "estimated_total_seconds", "eta_confidence"):
|
||
val = payload.get(key)
|
||
if val is not None and snapshot.get(key) != val:
|
||
snapshot[key] = val
|
||
changed = True
|
||
if changed:
|
||
_aurora_live_last[job_id] = snapshot
|
||
_aurora_persist_live_last_to_disk()
|
||
return payload
|
||
|
||
|
||
def _aurora_coerce_dir(path_value: Any) -> Optional[Path]:
|
||
if path_value is None:
|
||
return None
|
||
raw = str(path_value).strip()
|
||
if not raw:
|
||
return None
|
||
try:
|
||
p = Path(raw).expanduser().resolve()
|
||
except Exception:
|
||
return None
|
||
if p.exists() and p.is_file():
|
||
p = p.parent
|
||
if not p.exists() or not p.is_dir():
|
||
return None
|
||
return p
|
||
|
||
|
||
async def _aurora_resolve_job_folder(job_id: str) -> Optional[Path]:
|
||
candidates: List[Any] = []
|
||
try:
|
||
st = await _aurora_request_json("GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=12.0)
|
||
storage = st.get("storage") if isinstance(st, dict) else None
|
||
if isinstance(storage, dict):
|
||
candidates.extend(
|
||
[
|
||
storage.get("output_dir"),
|
||
storage.get("upload_dir"),
|
||
storage.get("input_path"),
|
||
]
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
try:
|
||
res = await _aurora_request_json("GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=12.0)
|
||
storage = res.get("storage") if isinstance(res, dict) else None
|
||
if isinstance(storage, dict):
|
||
candidates.extend(
|
||
[
|
||
storage.get("output_dir"),
|
||
storage.get("upload_dir"),
|
||
storage.get("input_path"),
|
||
]
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
candidates.append(AURORA_DATA_DIR / "outputs" / job_id)
|
||
for c in candidates:
|
||
p = _aurora_coerce_dir(c)
|
||
if p:
|
||
return p
|
||
return None
|
||
|
||
|
||
@app.get("/api/aurora/folder/{job_id}")
|
||
async def api_aurora_folder(job_id: str) -> Dict[str, Any]:
|
||
folder = await _aurora_resolve_job_folder(job_id)
|
||
if not folder:
|
||
raise HTTPException(status_code=404, detail="Aurora output folder not found")
|
||
return {
|
||
"ok": True,
|
||
"job_id": job_id,
|
||
"folder_path": str(folder),
|
||
"folder_url": f"file://{folder}",
|
||
}
|
||
|
||
|
||
@app.post("/api/aurora/folder/{job_id}/open")
|
||
async def api_aurora_folder_open(job_id: str) -> Dict[str, Any]:
|
||
folder = await _aurora_resolve_job_folder(job_id)
|
||
if not folder:
|
||
raise HTTPException(status_code=404, detail="Aurora output folder not found")
|
||
cmd: Optional[List[str]] = None
|
||
if sys.platform == "darwin":
|
||
cmd = ["open", str(folder)]
|
||
elif os.name == "nt":
|
||
try:
|
||
os.startfile(str(folder)) # type: ignore[attr-defined]
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}")
|
||
else:
|
||
cmd = ["xdg-open", str(folder)]
|
||
if cmd is not None:
|
||
try:
|
||
subprocess.run(cmd, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"Failed to open folder: {str(e)[:200]}")
|
||
return {"ok": True, "job_id": job_id, "folder_path": str(folder)}
|
||
|
||
|
||
@app.get("/api/aurora/jobs")
|
||
async def api_aurora_jobs(
|
||
limit: int = Query(default=30, ge=1, le=200),
|
||
status: Optional[str] = Query(default=None),
|
||
) -> Dict[str, Any]:
|
||
query = f"/api/aurora/jobs?limit={limit}"
|
||
if status and status.strip():
|
||
query += f"&status={quote(status.strip(), safe=',')}"
|
||
return await _aurora_request_json(
|
||
"GET",
|
||
query,
|
||
timeout=20.0,
|
||
retries=3,
|
||
retry_backoff_sec=0.25,
|
||
)
|
||
|
||
|
||
@app.get("/api/aurora/result/{job_id}")
|
||
async def api_aurora_result(job_id: str) -> Dict[str, Any]:
|
||
return await _aurora_request_json(
|
||
"GET",
|
||
f"/api/aurora/result/{quote(job_id, safe='')}",
|
||
timeout=20.0,
|
||
retries=4,
|
||
retry_backoff_sec=0.35,
|
||
)
|
||
|
||
|
||
@app.get("/api/aurora/quality/{job_id}")
|
||
async def api_aurora_quality(
|
||
job_id: str,
|
||
refresh: bool = Query(default=False),
|
||
) -> Dict[str, Any]:
|
||
path = f"/api/aurora/quality/{quote(job_id, safe='')}?refresh={'true' if refresh else 'false'}"
|
||
return await _aurora_request_json(
|
||
"GET",
|
||
path,
|
||
timeout=20.0,
|
||
retries=4,
|
||
retry_backoff_sec=0.35,
|
||
)
|
||
|
||
|
||
@app.get("/api/aurora/compare/{job_id}")
|
||
async def api_aurora_compare(job_id: str) -> Dict[str, Any]:
|
||
"""Before/after comparison with full metadata for a completed job."""
|
||
status = await _aurora_request_json(
|
||
"GET", f"/api/aurora/status/{quote(job_id, safe='')}", timeout=15.0, retries=3
|
||
)
|
||
result = {}
|
||
try:
|
||
result = await _aurora_request_json(
|
||
"GET", f"/api/aurora/result/{quote(job_id, safe='')}", timeout=15.0, retries=2
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
meta = status.get("metadata") or {}
|
||
vid = meta.get("video") or {}
|
||
storage = status.get("storage") or {}
|
||
output_files = result.get("output_files") or status.get("output_files") or []
|
||
proc_log = result.get("processing_log") or []
|
||
|
||
input_path = storage.get("input_path", "")
|
||
output_dir = storage.get("output_dir", "")
|
||
|
||
before: Dict[str, Any] = {
|
||
"file_name": status.get("file_name") or (input_path.rsplit("/", 1)[-1] if input_path else "—"),
|
||
"resolution": f"{vid.get('width', '?')}x{vid.get('height', '?')}" if vid.get("width") else "—",
|
||
"width": vid.get("width"),
|
||
"height": vid.get("height"),
|
||
"duration_s": vid.get("duration_seconds"),
|
||
"fps": vid.get("fps"),
|
||
"frame_count": vid.get("frame_count"),
|
||
"codec": "—",
|
||
"file_size_mb": None,
|
||
}
|
||
|
||
if input_path:
|
||
inp = Path(input_path)
|
||
if inp.exists():
|
||
before["file_size_mb"] = round(inp.stat().st_size / (1024 * 1024), 2)
|
||
_probe = _ffprobe_quick(inp) if inp.exists() else {}
|
||
if _probe:
|
||
before["resolution"] = _probe.get("resolution", before["resolution"])
|
||
before["width"] = _probe.get("width", before["width"])
|
||
before["height"] = _probe.get("height", before["height"])
|
||
before["duration_s"] = _probe.get("duration_s", before["duration_s"])
|
||
before["fps"] = _probe.get("fps", before["fps"])
|
||
before["frame_count"] = _probe.get("frame_count", before["frame_count"])
|
||
before["codec"] = _probe.get("codec", "—")
|
||
|
||
result_file = None
|
||
for f in output_files:
|
||
if (f.get("type") == "video" or f.get("type") == "photo") and f.get("name"):
|
||
result_file = f
|
||
break
|
||
|
||
after: Dict[str, Any] = {
|
||
"file_name": result_file["name"] if result_file else "—",
|
||
"resolution": "—",
|
||
"width": None,
|
||
"height": None,
|
||
"duration_s": None,
|
||
"fps": None,
|
||
"frame_count": None,
|
||
"codec": "—",
|
||
"file_size_mb": None,
|
||
"download_url": (result_file or {}).get("url"),
|
||
}
|
||
|
||
output_media_path: Optional[Path] = None
|
||
if result_file and output_dir:
|
||
out_path = Path(output_dir) / result_file["name"]
|
||
if out_path.exists():
|
||
output_media_path = out_path
|
||
after["file_size_mb"] = round(out_path.stat().st_size / (1024 * 1024), 2)
|
||
_probe = _ffprobe_quick(out_path)
|
||
if _probe:
|
||
after["resolution"] = _probe.get("resolution", "—")
|
||
after["width"] = _probe.get("width")
|
||
after["height"] = _probe.get("height")
|
||
after["duration_s"] = _probe.get("duration_s")
|
||
after["fps"] = _probe.get("fps")
|
||
after["frame_count"] = _probe.get("frame_count")
|
||
after["codec"] = _probe.get("codec", "—")
|
||
|
||
faces_total = 0
|
||
enhance_steps = []
|
||
for step in proc_log:
|
||
det = step.get("details") or {}
|
||
if det.get("faces_detected_total") is not None:
|
||
faces_total += det["faces_detected_total"]
|
||
enhance_steps.append({
|
||
"step": step.get("step", "?"),
|
||
"agent": step.get("agent", "?"),
|
||
"model": step.get("model", "?"),
|
||
"time_ms": step.get("time_ms"),
|
||
})
|
||
|
||
frame_preview = _aurora_ensure_compare_frame_preview(
|
||
job_id=job_id,
|
||
media_type=str(status.get("media_type") or ""),
|
||
input_path=Path(input_path) if input_path else None,
|
||
output_path=output_media_path,
|
||
output_dir=Path(output_dir) if output_dir else None,
|
||
)
|
||
detections = await _aurora_build_compare_detections(
|
||
media_type=str(status.get("media_type") or ""),
|
||
output_dir=Path(output_dir) if output_dir else None,
|
||
frame_preview=frame_preview,
|
||
fps=before.get("fps") or after.get("fps"),
|
||
)
|
||
|
||
return {
|
||
"job_id": job_id,
|
||
"status": status.get("status"),
|
||
"mode": status.get("mode"),
|
||
"media_type": status.get("media_type"),
|
||
"elapsed_seconds": status.get("elapsed_seconds"),
|
||
"before": before,
|
||
"after": after,
|
||
"faces_detected": faces_total,
|
||
"enhance_steps": enhance_steps,
|
||
"frame_preview": frame_preview,
|
||
"detections": detections,
|
||
"folder_path": output_dir,
|
||
"input_path": input_path,
|
||
}
|
||
|
||
|
||
def _aurora_extract_frame_preview(source: Path, target: Path, *, second: float = 1.0) -> bool:
|
||
"""Write a JPEG preview frame for image/video sources."""
|
||
if not source.exists():
|
||
return False
|
||
target.parent.mkdir(parents=True, exist_ok=True)
|
||
ext = source.suffix.lower()
|
||
if ext in {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tif", ".tiff"}:
|
||
try:
|
||
target.write_bytes(source.read_bytes())
|
||
return True
|
||
except Exception:
|
||
return False
|
||
|
||
ffmpeg = [
|
||
"ffmpeg",
|
||
"-hide_banner",
|
||
"-loglevel",
|
||
"error",
|
||
"-y",
|
||
"-ss",
|
||
f"{max(0.0, float(second)):.3f}",
|
||
"-i",
|
||
str(source),
|
||
"-frames:v",
|
||
"1",
|
||
"-q:v",
|
||
"2",
|
||
str(target),
|
||
]
|
||
try:
|
||
run = subprocess.run(ffmpeg, capture_output=True, text=True, timeout=20)
|
||
if run.returncode == 0 and target.exists() and target.stat().st_size > 0:
|
||
return True
|
||
except Exception:
|
||
pass
|
||
|
||
# Fallback for short videos / odd timestamps.
|
||
ffmpeg_fallback = ffmpeg[:]
|
||
ffmpeg_fallback[6] = "0.0"
|
||
try:
|
||
run = subprocess.run(ffmpeg_fallback, capture_output=True, text=True, timeout=20)
|
||
return run.returncode == 0 and target.exists() and target.stat().st_size > 0
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
def _aurora_ensure_compare_frame_preview(
|
||
*,
|
||
job_id: str,
|
||
media_type: str,
|
||
input_path: Optional[Path],
|
||
output_path: Optional[Path],
|
||
output_dir: Optional[Path],
|
||
) -> Optional[Dict[str, Any]]:
|
||
if not output_dir or not output_dir.exists():
|
||
return None
|
||
if not input_path or not input_path.exists():
|
||
return None
|
||
if not output_path or not output_path.exists():
|
||
return None
|
||
|
||
before_name = "_compare_before.jpg"
|
||
after_name = "_compare_after.jpg"
|
||
before_path = output_dir / before_name
|
||
after_path = output_dir / after_name
|
||
ts = 1.0 if media_type == "video" else 0.0
|
||
|
||
if not before_path.exists() or before_path.stat().st_size == 0:
|
||
_aurora_extract_frame_preview(input_path, before_path, second=ts)
|
||
if not after_path.exists() or after_path.stat().st_size == 0:
|
||
_aurora_extract_frame_preview(output_path, after_path, second=ts)
|
||
|
||
if not before_path.exists() or not after_path.exists():
|
||
return None
|
||
if before_path.stat().st_size <= 0 or after_path.stat().st_size <= 0:
|
||
return None
|
||
|
||
quoted_job = quote(job_id, safe="")
|
||
return {
|
||
"timestamp_sec": ts,
|
||
"before_url": f"/api/aurora/files/{quoted_job}/{quote(before_name, safe='')}",
|
||
"after_url": f"/api/aurora/files/{quoted_job}/{quote(after_name, safe='')}",
|
||
}
|
||
|
||
|
||
def _aurora_bbox_xyxy(raw_bbox: Any) -> Optional[List[int]]:
|
||
if not isinstance(raw_bbox, (list, tuple)) or len(raw_bbox) < 4:
|
||
return None
|
||
try:
|
||
x1 = int(float(raw_bbox[0]))
|
||
y1 = int(float(raw_bbox[1]))
|
||
x2 = int(float(raw_bbox[2]))
|
||
y2 = int(float(raw_bbox[3]))
|
||
except Exception:
|
||
return None
|
||
if x2 < x1:
|
||
x1, x2 = x2, x1
|
||
if y2 < y1:
|
||
y1, y2 = y2, y1
|
||
if x2 <= x1 or y2 <= y1:
|
||
return None
|
||
return [x1, y1, x2, y2]
|
||
|
||
|
||
def _aurora_image_dims(path: Path) -> Optional[Dict[str, int]]:
|
||
if cv2 is None or not path.exists():
|
||
return None
|
||
try:
|
||
img = cv2.imread(str(path), cv2.IMREAD_COLOR)
|
||
if img is None:
|
||
return None
|
||
h, w = img.shape[:2]
|
||
if w <= 0 or h <= 0:
|
||
return None
|
||
return {"width": int(w), "height": int(h)}
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def _aurora_detect_faces_from_preview(path: Path) -> List[Dict[str, Any]]:
|
||
if cv2 is None or not path.exists():
|
||
return []
|
||
try:
|
||
frame = cv2.imread(str(path), cv2.IMREAD_COLOR)
|
||
if frame is None:
|
||
return []
|
||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||
cascade_path = Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml"
|
||
cascade = cv2.CascadeClassifier(str(cascade_path))
|
||
if cascade.empty():
|
||
return []
|
||
faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(20, 20))
|
||
out: List[Dict[str, Any]] = []
|
||
for (x, y, w, h) in faces[:40]:
|
||
roi = gray[y : y + h, x : x + w]
|
||
lap = float(cv2.Laplacian(roi, cv2.CV_64F).var()) if roi.size > 0 else 0.0
|
||
conf = max(0.5, min(0.99, 0.55 + (lap / 400.0)))
|
||
out.append(
|
||
{
|
||
"bbox": [int(x), int(y), int(x + w), int(y + h)],
|
||
"confidence": round(conf, 3),
|
||
}
|
||
)
|
||
return out
|
||
except Exception:
|
||
return []
|
||
|
||
|
||
async def _aurora_detect_faces_via_service(path: Path) -> List[Dict[str, Any]]:
|
||
if not path.exists():
|
||
return []
|
||
mime = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
|
||
timeout = httpx.Timeout(20.0, connect=6.0)
|
||
try:
|
||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||
with path.open("rb") as fh:
|
||
files = {"file": (path.name, fh, mime)}
|
||
resp = await client.post(f"{AURORA_SERVICE_URL}/api/aurora/analyze", files=files)
|
||
if resp.status_code >= 400:
|
||
return []
|
||
payload = resp.json() if resp.content else {}
|
||
except Exception:
|
||
return []
|
||
|
||
faces_raw = payload.get("faces")
|
||
if not isinstance(faces_raw, list):
|
||
return []
|
||
out: List[Dict[str, Any]] = []
|
||
for item in faces_raw[:60]:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
bbox = item.get("bbox")
|
||
if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
|
||
continue
|
||
try:
|
||
x = int(float(bbox[0]))
|
||
y = int(float(bbox[1]))
|
||
w = int(float(bbox[2]))
|
||
h = int(float(bbox[3]))
|
||
except Exception:
|
||
continue
|
||
if w <= 1 or h <= 1:
|
||
continue
|
||
conf: Optional[float]
|
||
try:
|
||
conf = round(float(item.get("confidence")), 3)
|
||
except Exception:
|
||
conf = None
|
||
out.append(
|
||
{
|
||
"bbox": [x, y, x + w, y + h],
|
||
"confidence": conf,
|
||
}
|
||
)
|
||
return out
|
||
|
||
|
||
def _aurora_select_plate_detections(
|
||
output_dir: Path,
|
||
*,
|
||
target_frame: Optional[int],
|
||
max_items: int = 12,
|
||
) -> List[Dict[str, Any]]:
|
||
report_path = output_dir / "plate_detections.json"
|
||
if not report_path.exists():
|
||
return []
|
||
try:
|
||
payload = json.loads(report_path.read_text(encoding="utf-8"))
|
||
except Exception:
|
||
return []
|
||
|
||
source_items: List[Any]
|
||
detections = payload.get("detections")
|
||
unique = payload.get("unique")
|
||
if isinstance(detections, list) and detections:
|
||
source_items = detections
|
||
elif isinstance(unique, list) and unique:
|
||
source_items = unique
|
||
else:
|
||
return []
|
||
|
||
parsed: List[Dict[str, Any]] = []
|
||
for item in source_items:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
bbox = _aurora_bbox_xyxy(item.get("bbox"))
|
||
if not bbox:
|
||
continue
|
||
text_value = str(item.get("text") or "").strip()
|
||
conf_value: Optional[float]
|
||
try:
|
||
conf_value = round(float(item.get("confidence")), 3)
|
||
except Exception:
|
||
conf_value = None
|
||
frame_value: Optional[int]
|
||
try:
|
||
frame_value = int(item.get("frame")) if item.get("frame") is not None else None
|
||
except Exception:
|
||
frame_value = None
|
||
parsed.append(
|
||
{
|
||
"bbox": bbox,
|
||
"text": text_value or None,
|
||
"confidence": conf_value,
|
||
"frame": frame_value,
|
||
}
|
||
)
|
||
|
||
if not parsed:
|
||
return []
|
||
|
||
with_frame = [x for x in parsed if x.get("frame") is not None]
|
||
if target_frame is not None and with_frame:
|
||
min_distance = min(abs(int(x["frame"]) - int(target_frame)) for x in with_frame)
|
||
keep = max(4, min_distance + 2)
|
||
filtered = [x for x in with_frame if abs(int(x["frame"]) - int(target_frame)) <= keep]
|
||
filtered.sort(key=lambda x: (abs(int(x["frame"]) - int(target_frame)), -(x.get("confidence") or 0.0)))
|
||
return filtered[:max_items]
|
||
|
||
parsed.sort(key=lambda x: (-(x.get("confidence") or 0.0), x.get("text") or ""))
|
||
return parsed[:max_items]
|
||
|
||
|
||
async def _aurora_build_compare_detections(
|
||
*,
|
||
media_type: str,
|
||
output_dir: Optional[Path],
|
||
frame_preview: Optional[Dict[str, Any]],
|
||
fps: Any,
|
||
) -> Optional[Dict[str, Any]]:
|
||
if not output_dir or not output_dir.exists():
|
||
return None
|
||
if not isinstance(frame_preview, dict):
|
||
return None
|
||
|
||
before_path = output_dir / "_compare_before.jpg"
|
||
after_path = output_dir / "_compare_after.jpg"
|
||
before_faces = _aurora_detect_faces_from_preview(before_path)
|
||
after_faces = _aurora_detect_faces_from_preview(after_path)
|
||
if not before_faces and before_path.exists():
|
||
before_faces = await _aurora_detect_faces_via_service(before_path)
|
||
if not after_faces and after_path.exists():
|
||
after_faces = await _aurora_detect_faces_via_service(after_path)
|
||
before_size = _aurora_image_dims(before_path)
|
||
after_size = _aurora_image_dims(after_path)
|
||
|
||
target_ts = float(frame_preview.get("timestamp_sec") or 0.0)
|
||
target_frame: Optional[int] = None
|
||
if str(media_type).lower() == "video":
|
||
try:
|
||
fps_val = float(fps)
|
||
except Exception:
|
||
fps_val = 15.0
|
||
if fps_val <= 0:
|
||
fps_val = 15.0
|
||
target_frame = int(round(target_ts * fps_val))
|
||
|
||
plate_items = _aurora_select_plate_detections(output_dir, target_frame=target_frame)
|
||
|
||
return {
|
||
"target_timestamp_sec": target_ts if str(media_type).lower() == "video" else None,
|
||
"target_frame": target_frame,
|
||
"before": {
|
||
"frame_size": before_size,
|
||
"faces": before_faces,
|
||
"plates": plate_items,
|
||
},
|
||
"after": {
|
||
"frame_size": after_size,
|
||
"faces": after_faces,
|
||
"plates": plate_items,
|
||
},
|
||
}
|
||
|
||
|
||
def _ffprobe_quick(filepath: Path) -> Dict[str, Any]:
|
||
"""Quick ffprobe for resolution, codec, duration, fps, frame count."""
|
||
if not filepath.exists():
|
||
return {}
|
||
try:
|
||
import subprocess as _sp
|
||
raw = _sp.run(
|
||
["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(filepath)],
|
||
capture_output=True, text=True, timeout=10
|
||
)
|
||
if raw.returncode != 0:
|
||
return {}
|
||
import json as _json
|
||
data = _json.loads(raw.stdout)
|
||
fmt = data.get("format") or {}
|
||
vs = [s for s in (data.get("streams") or []) if s.get("codec_type") == "video"]
|
||
if not vs:
|
||
return {"duration_s": round(float(fmt.get("duration", 0)), 2)}
|
||
v = vs[0]
|
||
w, h = v.get("width"), v.get("height")
|
||
rfr = v.get("r_frame_rate", "0/1").split("/")
|
||
fps = round(int(rfr[0]) / max(1, int(rfr[1])), 2) if len(rfr) == 2 else None
|
||
return {
|
||
"resolution": f"{w}x{h}" if w and h else "—",
|
||
"width": w, "height": h,
|
||
"codec": v.get("codec_name", "—"),
|
||
"duration_s": round(float(fmt.get("duration", 0)), 2),
|
||
"fps": fps,
|
||
"frame_count": int(v.get("nb_frames", 0)) or None,
|
||
}
|
||
except Exception:
|
||
return {}
|
||
|
||
|
||
@app.post("/api/aurora/cancel/{job_id}")
|
||
async def api_aurora_cancel(job_id: str) -> Dict[str, Any]:
|
||
return await _aurora_request_json(
|
||
"POST",
|
||
f"/api/aurora/cancel/{quote(job_id, safe='')}",
|
||
timeout=20.0,
|
||
retries=2,
|
||
retry_backoff_sec=0.2,
|
||
)
|
||
|
||
|
||
@app.post("/api/aurora/delete/{job_id}")
|
||
async def api_aurora_delete(
|
||
job_id: str,
|
||
purge_files: bool = Query(default=True),
|
||
) -> Dict[str, Any]:
|
||
path = f"/api/aurora/delete/{quote(job_id, safe='')}?purge_files={'true' if purge_files else 'false'}"
|
||
return await _aurora_request_json(
|
||
"POST",
|
||
path,
|
||
timeout=30.0,
|
||
retries=2,
|
||
retry_backoff_sec=0.2,
|
||
)
|
||
|
||
|
||
@app.get("/api/aurora/report/{job_id}.pdf")
|
||
async def api_aurora_report_pdf(job_id: str) -> StreamingResponse:
|
||
"""Stream PDF report from Aurora service without buffering in RAM."""
|
||
encoded_job = quote(job_id, safe="")
|
||
paths = [AURORA_SERVICE_URL]
|
||
if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths:
|
||
paths.append(AURORA_FALLBACK_URL)
|
||
last_err = ""
|
||
for base in paths:
|
||
url = f"{base}/api/aurora/report/{encoded_job}.pdf"
|
||
try:
|
||
client = httpx.AsyncClient(timeout=120.0)
|
||
r = await client.send(client.build_request("GET", url), stream=True)
|
||
if r.status_code >= 400:
|
||
body = (await r.aread()).decode(errors="replace")[:400]
|
||
await r.aclose()
|
||
await client.aclose()
|
||
raise HTTPException(status_code=r.status_code, detail=body or f"Aurora report error {r.status_code}")
|
||
disposition = r.headers.get("content-disposition", f'inline; filename="{job_id}_forensic_report.pdf"')
|
||
|
||
async def _stream():
|
||
try:
|
||
async for chunk in r.aiter_bytes(chunk_size=65536):
|
||
yield chunk
|
||
finally:
|
||
await r.aclose()
|
||
await client.aclose()
|
||
|
||
return StreamingResponse(
|
||
_stream(),
|
||
media_type="application/pdf",
|
||
headers={"Content-Disposition": disposition, "Cache-Control": "no-store"},
|
||
)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
last_err = str(e)[:200]
|
||
if "nodename nor servname provided" in str(e):
|
||
continue
|
||
raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err}")
|
||
raise HTTPException(status_code=502, detail=f"Aurora report proxy error: {last_err or 'unavailable'}")
|
||
|
||
|
||
@app.get("/api/aurora/files/{job_id}/{file_name:path}")
|
||
async def api_aurora_file(job_id: str, file_name: str, request: Request) -> StreamingResponse:
|
||
encoded_job = quote(job_id, safe="")
|
||
encoded_name = quote(file_name, safe="")
|
||
paths = [AURORA_SERVICE_URL]
|
||
if AURORA_FALLBACK_URL and AURORA_FALLBACK_URL not in paths:
|
||
paths.append(AURORA_FALLBACK_URL)
|
||
last_err = ""
|
||
for base in paths:
|
||
url = f"{base}/api/aurora/files/{encoded_job}/{encoded_name}"
|
||
client = httpx.AsyncClient(timeout=httpx.Timeout(10.0, read=300.0))
|
||
try:
|
||
upstream_headers: Dict[str, str] = {}
|
||
for name in ("range", "if-range", "if-none-match", "if-modified-since"):
|
||
value = request.headers.get(name)
|
||
if value:
|
||
upstream_headers[name] = value
|
||
|
||
resp = await client.send(client.build_request("GET", url, headers=upstream_headers), stream=True)
|
||
if resp.status_code >= 400:
|
||
body = (await resp.aread()).decode(errors="replace")[:400]
|
||
await resp.aclose()
|
||
await client.aclose()
|
||
if resp.status_code >= 500:
|
||
last_err = f"Aurora {resp.status_code}: {body}"
|
||
continue
|
||
raise HTTPException(status_code=resp.status_code, detail=body or f"Aurora file error {resp.status_code}")
|
||
ct = resp.headers.get("content-type", "application/octet-stream")
|
||
passthrough_headers: Dict[str, str] = {}
|
||
for name in (
|
||
"content-disposition",
|
||
"content-length",
|
||
"content-range",
|
||
"accept-ranges",
|
||
"etag",
|
||
"last-modified",
|
||
"cache-control",
|
||
):
|
||
value = resp.headers.get(name)
|
||
if value:
|
||
passthrough_headers[name] = value
|
||
if "content-disposition" not in passthrough_headers:
|
||
passthrough_headers["content-disposition"] = f'inline; filename="{Path(file_name).name}"'
|
||
passthrough_headers.setdefault("cache-control", "no-store")
|
||
|
||
async def _stream():
|
||
try:
|
||
async for chunk in resp.aiter_bytes(chunk_size=65536):
|
||
yield chunk
|
||
finally:
|
||
await resp.aclose()
|
||
await client.aclose()
|
||
|
||
return StreamingResponse(
|
||
_stream(),
|
||
status_code=resp.status_code,
|
||
media_type=ct,
|
||
headers=passthrough_headers,
|
||
)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
await client.aclose()
|
||
last_err = str(e)[:200]
|
||
if "nodename nor servname provided" in str(e):
|
||
continue
|
||
raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err}")
|
||
raise HTTPException(status_code=502, detail=f"Aurora file proxy error: {last_err or 'unavailable'}")
|
||
|
||
|
||
class MediaImageGenerateBody(BaseModel):
|
||
prompt: str
|
||
negative_prompt: Optional[str] = None
|
||
width: int = 1024
|
||
height: int = 1024
|
||
steps: int = 28
|
||
guidance_scale: float = 4.0
|
||
timeout_s: int = 300
|
||
|
||
|
||
class MediaVideoGenerateBody(BaseModel):
|
||
prompt: str
|
||
seconds: int = 4
|
||
fps: int = 24
|
||
steps: int = 30
|
||
style: str = "cinematic"
|
||
aspect_ratio: str = "16:9"
|
||
timeout_s: int = 360
|
||
|
||
|
||
class MediaImageModelLoadBody(BaseModel):
|
||
model: str
|
||
|
||
|
||
def _resolve_media_router_url() -> str:
|
||
nodes_cfg = load_nodes_registry()
|
||
discovered = (
|
||
get_router_url("NODA2")
|
||
or (list(nodes_cfg.values())[0].get("router_url", "") if nodes_cfg else "")
|
||
).rstrip("/")
|
||
return MEDIA_ROUTER_URL or discovered
|
||
|
||
|
||
def _media_router_candidates() -> List[str]:
|
||
raw = _resolve_media_router_url()
|
||
candidates: List[str] = []
|
||
for u in (raw, MEDIA_ROUTER_FALLBACK_URL):
|
||
if not u:
|
||
continue
|
||
v = u.strip().rstrip("/")
|
||
if v and v not in candidates:
|
||
candidates.append(v)
|
||
if "://router:" in v or "://router/" in v:
|
||
host_fixed = v.replace("://router:", "://127.0.0.1:").replace("://router/", "://127.0.0.1/")
|
||
if host_fixed not in candidates:
|
||
candidates.append(host_fixed)
|
||
for port in ("9102", "8000"):
|
||
local = f"http://127.0.0.1:{port}"
|
||
if local not in candidates:
|
||
candidates.append(local)
|
||
return candidates
|
||
|
||
|
||
async def _pick_media_router_url() -> str:
|
||
candidates = _media_router_candidates()
|
||
if not candidates:
|
||
return ""
|
||
for u in candidates:
|
||
p = await _probe_http(f"{u}/healthz", timeout=2.5)
|
||
if p.get("reachable"):
|
||
return u
|
||
return candidates[0]
|
||
|
||
|
||
def _media_append_job(kind: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||
item = {
|
||
"id": f"media_{kind}_{uuid.uuid4().hex[:10]}",
|
||
"kind": kind,
|
||
"ts": datetime.now(timezone.utc).isoformat(),
|
||
**payload,
|
||
}
|
||
_media_recent_jobs.appendleft(item)
|
||
return item
|
||
|
||
|
||
@app.get("/api/media/health")
|
||
async def api_media_health() -> Dict[str, Any]:
|
||
router_url = await _pick_media_router_url()
|
||
probes = await asyncio.gather(
|
||
_probe_http(f"{router_url}/healthz") if router_url else asyncio.sleep(0, result={"reachable": False, "error": "router missing"}),
|
||
_probe_http(f"{MEDIA_COMFY_AGENT_URL}/health"),
|
||
_probe_http(f"{MEDIA_COMFY_UI_URL}/"),
|
||
_probe_http(f"{MEDIA_SWAPPER_URL}/health"),
|
||
_probe_http(f"{MEDIA_IMAGE_GEN_URL}/health"),
|
||
)
|
||
image_models: Dict[str, Any] = {"image_models": []}
|
||
try:
|
||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||
r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models")
|
||
if r.status_code < 400 and r.content:
|
||
image_models = r.json()
|
||
except Exception:
|
||
image_models = {"image_models": []}
|
||
return {
|
||
"ok": True,
|
||
"router_url": router_url,
|
||
"services": {
|
||
"router": probes[0],
|
||
"comfy_agent": probes[1],
|
||
"comfy_ui": probes[2],
|
||
"swapper": probes[3],
|
||
"image_gen": probes[4],
|
||
},
|
||
"image_models": image_models.get("image_models", []),
|
||
"active_image_model": image_models.get("active_image_model"),
|
||
"fallback_order": ["comfy", "swapper", "image-gen-service"],
|
||
}
|
||
|
||
|
||
@app.get("/api/media/models/image")
|
||
async def api_media_image_models() -> Dict[str, Any]:
|
||
try:
|
||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||
r = await client.get(f"{MEDIA_SWAPPER_URL}/image/models")
|
||
if r.status_code >= 400:
|
||
raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper error")
|
||
data = r.json() if r.content else {}
|
||
return {
|
||
"ok": True,
|
||
"image_models": data.get("image_models", []),
|
||
"active_image_model": data.get("active_image_model"),
|
||
"device": data.get("device"),
|
||
}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=502, detail=f"Image models unavailable: {str(e)[:200]}")
|
||
|
||
|
||
@app.post("/api/media/models/image/load")
|
||
async def api_media_image_model_load(body: MediaImageModelLoadBody) -> Dict[str, Any]:
|
||
model = body.model.strip()
|
||
if not model:
|
||
raise HTTPException(status_code=400, detail="model is required")
|
||
try:
|
||
async with httpx.AsyncClient(timeout=300.0) as client:
|
||
r = await client.post(f"{MEDIA_SWAPPER_URL}/image/models/{quote(model, safe='')}/load")
|
||
if r.status_code >= 400:
|
||
raise HTTPException(status_code=r.status_code, detail=r.text[:240] or "swapper load error")
|
||
return {"ok": True, "result": r.json() if r.content else {"status": "ok"}}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=502, detail=f"Image model load failed: {str(e)[:200]}")
|
||
|
||
|
||
@app.post("/api/media/generate/image")
|
||
async def api_media_generate_image(body: MediaImageGenerateBody) -> Dict[str, Any]:
|
||
prompt = body.prompt.strip()
|
||
if not prompt:
|
||
raise HTTPException(status_code=400, detail="prompt is required")
|
||
router_url = await _pick_media_router_url()
|
||
if not router_url:
|
||
raise HTTPException(status_code=503, detail="Router URL not configured")
|
||
|
||
params = {
|
||
"prompt": prompt,
|
||
"negative_prompt": body.negative_prompt or "",
|
||
"width": max(256, min(2048, int(body.width))),
|
||
"height": max(256, min(2048, int(body.height))),
|
||
"steps": max(1, min(120, int(body.steps))),
|
||
"guidance_scale": max(0.0, min(20.0, float(body.guidance_scale))),
|
||
"timeout_s": max(30, min(900, int(body.timeout_s))),
|
||
}
|
||
started = time.monotonic()
|
||
response = await execute_tool(
|
||
router_url,
|
||
tool="image_generate",
|
||
action="generate",
|
||
params=params,
|
||
agent_id="sofiia",
|
||
timeout=float(params["timeout_s"] + 30),
|
||
api_key=ROUTER_API_KEY,
|
||
)
|
||
ok = response.get("status") == "ok"
|
||
result_data = response.get("data") or {}
|
||
result_item = _media_append_job(
|
||
"image",
|
||
{
|
||
"status": "ok" if ok else "failed",
|
||
"provider": "router:image_generate",
|
||
"prompt": prompt[:180],
|
||
"duration_ms": int((time.monotonic() - started) * 1000),
|
||
"result": result_data.get("result"),
|
||
"has_image_base64": bool(result_data.get("image_base64")),
|
||
"error": (response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error"),
|
||
},
|
||
)
|
||
if not ok:
|
||
raise HTTPException(status_code=502, detail=f"Image generate failed: {result_item.get('error') or 'tool failed'}")
|
||
return {"ok": True, "job": result_item, "tool_response": response}
|
||
|
||
|
||
@app.post("/api/media/generate/video")
|
||
async def api_media_generate_video(body: MediaVideoGenerateBody) -> Dict[str, Any]:
|
||
prompt = body.prompt.strip()
|
||
if not prompt:
|
||
raise HTTPException(status_code=400, detail="prompt is required")
|
||
router_url = await _pick_media_router_url()
|
||
if not router_url:
|
||
raise HTTPException(status_code=503, detail="Router URL not configured")
|
||
|
||
params = {
|
||
"prompt": prompt,
|
||
"seconds": max(1, min(8, int(body.seconds))),
|
||
"fps": max(8, min(60, int(body.fps))),
|
||
"steps": max(1, min(120, int(body.steps))),
|
||
"timeout_s": max(60, min(1200, int(body.timeout_s))),
|
||
}
|
||
started = time.monotonic()
|
||
response = await execute_tool(
|
||
router_url,
|
||
tool="comfy_generate_video",
|
||
action="generate",
|
||
params=params,
|
||
agent_id="sofiia",
|
||
timeout=float(params["timeout_s"] + 30),
|
||
api_key=ROUTER_API_KEY,
|
||
)
|
||
ok = response.get("status") == "ok"
|
||
provider = "router:comfy_generate_video"
|
||
fallback_payload: Dict[str, Any] = {}
|
||
if not ok:
|
||
try:
|
||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||
r = await client.post(
|
||
f"{MEDIA_SWAPPER_URL}/video/generate",
|
||
json={
|
||
"prompt": prompt,
|
||
"duration": params["seconds"],
|
||
"style": body.style,
|
||
"aspect_ratio": body.aspect_ratio,
|
||
},
|
||
)
|
||
if r.status_code < 400:
|
||
fallback_payload = r.json() if r.content else {}
|
||
ok = True
|
||
provider = "swapper:video/generate"
|
||
except Exception as e:
|
||
fallback_payload = {"error": str(e)[:200]}
|
||
|
||
result_item = _media_append_job(
|
||
"video",
|
||
{
|
||
"status": "ok" if ok else "failed",
|
||
"provider": provider,
|
||
"prompt": prompt[:180],
|
||
"duration_ms": int((time.monotonic() - started) * 1000),
|
||
"result": (response.get("data") or {}).get("result") if not fallback_payload else fallback_payload,
|
||
"error": None if ok else ((response.get("error") or {}).get("message") if isinstance(response.get("error"), dict) else response.get("error")),
|
||
},
|
||
)
|
||
if not ok:
|
||
raise HTTPException(status_code=502, detail=f"Video generate failed: {result_item.get('error') or 'tool failed'}")
|
||
return {"ok": True, "job": result_item, "tool_response": response, "fallback_response": fallback_payload}
|
||
|
||
|
||
@app.get("/api/media/jobs")
|
||
async def api_media_jobs(limit: int = Query(default=20, ge=1, le=100)) -> Dict[str, Any]:
|
||
return {"ok": True, "count": min(limit, len(_media_recent_jobs)), "jobs": list(_media_recent_jobs)[:limit]}
|
||
|
||
|
||
# ─── Chat (runtime contract) ─────────────────────────────────────────────────
|
||
|
||
@app.get("/api/chat/config")
|
||
async def api_chat_config() -> Dict[str, Any]:
|
||
return {
|
||
"preferred_model": SOFIIA_PREFERRED_CHAT_MODEL,
|
||
"ollama": {
|
||
"timeout_sec": SOFIIA_OLLAMA_TIMEOUT_SEC,
|
||
"voice_timeout_sec": SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC,
|
||
"keep_alive": SOFIIA_OLLAMA_KEEP_ALIVE,
|
||
"num_ctx": SOFIIA_OLLAMA_NUM_CTX,
|
||
"num_thread": SOFIIA_OLLAMA_NUM_THREAD,
|
||
"num_gpu": SOFIIA_OLLAMA_NUM_GPU,
|
||
},
|
||
}
|
||
|
||
|
||
class ChatSendBody(BaseModel):
|
||
message: str
|
||
model: str = "ollama:qwen3:14b"
|
||
node_id: str = "NODA2"
|
||
project_id: Optional[str] = None
|
||
session_id: Optional[str] = None
|
||
user_id: Optional[str] = None
|
||
history: List[Dict[str, Any]] = []
|
||
# Voice routing hint — forwarded to Router as X-Voice-Profile header
|
||
# Values: "voice_fast_uk" (default) | "voice_quality_uk"
|
||
voice_profile: Optional[str] = None
|
||
|
||
|
||
CHAT_PROJECT_ID = "chats"
|
||
|
||
|
||
class ChatCreateBody(BaseModel):
|
||
agent_id: str
|
||
node_id: str = "NODA2"
|
||
source: str = "console"
|
||
external_chat_ref: Optional[str] = None
|
||
title: Optional[str] = None
|
||
|
||
|
||
class ChatMessageSendBody(BaseModel):
|
||
text: str
|
||
attachments: List[Dict[str, Any]] = []
|
||
project_id: Optional[str] = None
|
||
session_id: Optional[str] = None
|
||
user_id: Optional[str] = None
|
||
routing: Optional[Dict[str, Any]] = None
|
||
client: Optional[Dict[str, Any]] = None
|
||
idempotency_key: Optional[str] = None
|
||
|
||
|
||
def _make_chat_id(node_id: str, agent_id: str, source: str = "console", external_chat_ref: Optional[str] = None) -> str:
|
||
ext = (external_chat_ref or "main").strip() or "main"
|
||
return f"chat:{node_id.upper()}:{agent_id.strip().lower()}:{source.strip().lower()}:{ext}"
|
||
|
||
|
||
def _parse_chat_id(chat_id: str) -> Dict[str, str]:
|
||
raw = (chat_id or "").strip()
|
||
parts = raw.split(":", 4)
|
||
if len(parts) == 5 and parts[0] == "chat":
|
||
return {
|
||
"chat_id": raw,
|
||
"node_id": parts[1].upper(),
|
||
"agent_id": parts[2].lower(),
|
||
"source": parts[3].lower(),
|
||
"external_chat_ref": parts[4],
|
||
}
|
||
# Legacy fallback: treat arbitrary session_id as local NODA2 chat with sofiia
|
||
return {
|
||
"chat_id": raw,
|
||
"node_id": "NODA2",
|
||
"agent_id": "sofiia",
|
||
"source": "console",
|
||
"external_chat_ref": raw or "main",
|
||
}
|
||
|
||
|
||
async def _ensure_chat_project() -> None:
|
||
proj = await _app_db.get_project(CHAT_PROJECT_ID)
|
||
if not proj:
|
||
await _app_db.create_project(
|
||
name="Chats",
|
||
description="Cross-node chat index for Sofiia Console",
|
||
project_id=CHAT_PROJECT_ID,
|
||
)
|
||
|
||
|
||
def _clean_chat_reply(text: str) -> str:
|
||
import re
|
||
cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
|
||
if "<think>" in cleaned.lower():
|
||
cleaned = re.split(r"(?i)<think>", cleaned)[0]
|
||
return cleaned.strip()
|
||
|
||
|
||
def _cursor_encode(payload: Dict[str, Any]) -> str:
|
||
wrapped = {"v": 1, **payload}
|
||
raw = json.dumps(wrapped, separators=(",", ":"), ensure_ascii=True).encode("utf-8")
|
||
return base64.urlsafe_b64encode(raw).decode("ascii")
|
||
|
||
|
||
def _cursor_decode(cursor: Optional[str]) -> Dict[str, Any]:
|
||
if not cursor:
|
||
return {}
|
||
try:
|
||
decoded = base64.urlsafe_b64decode(cursor.encode("ascii")).decode("utf-8")
|
||
data = json.loads(decoded)
|
||
if not isinstance(data, dict):
|
||
return {}
|
||
# Backward compatibility: accept old cursors without "v".
|
||
if "v" not in data:
|
||
return data
|
||
# Current cursor format version.
|
||
if int(data.get("v") or 0) == 1:
|
||
out = dict(data)
|
||
out.pop("v", None)
|
||
return out
|
||
return {}
|
||
except Exception:
|
||
return {}
|
||
|
||
|
||
@app.get("/api/chats")
|
||
async def api_chats_list(
|
||
request: Request,
|
||
nodes: str = Query("NODA1,NODA2"),
|
||
agent_id: Optional[str] = Query(None),
|
||
q: Optional[str] = Query(None),
|
||
limit: int = Query(50, ge=1, le=200),
|
||
cursor: Optional[str] = Query(None),
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
SOFIIA_CURSOR_REQUESTS_TOTAL.labels(resource="chats").inc()
|
||
await _ensure_chat_project()
|
||
node_filter = {n.strip().upper() for n in nodes.split(",") if n.strip()}
|
||
cur = _cursor_decode(cursor)
|
||
before_last_active = str(cur.get("last_active") or "").strip() or None
|
||
before_chat_id = str(cur.get("chat_id") or "").strip() or None
|
||
fetch_limit = max(limit * 5, limit + 1)
|
||
sessions = await _app_db.list_sessions_page(
|
||
CHAT_PROJECT_ID,
|
||
limit=fetch_limit,
|
||
before_last_active=before_last_active,
|
||
before_session_id=before_chat_id,
|
||
)
|
||
|
||
items: List[Dict[str, Any]] = []
|
||
agent_filter = (agent_id or "").strip().lower()
|
||
q_filter = (q or "").strip().lower()
|
||
for s in sessions:
|
||
sid = str(s.get("session_id") or "")
|
||
if not sid:
|
||
continue
|
||
info = _parse_chat_id(sid)
|
||
if node_filter and info["node_id"] not in node_filter:
|
||
continue
|
||
if agent_filter and info["agent_id"] != agent_filter:
|
||
continue
|
||
msgs = await _app_db.list_messages(sid, limit=200)
|
||
last = msgs[-1] if msgs else None
|
||
item = {
|
||
"chat_id": sid,
|
||
"title": (s.get("title") or f"{info['agent_id']} • {info['node_id']}").strip(),
|
||
"agent_id": info["agent_id"],
|
||
"node_id": info["node_id"],
|
||
"source": info["source"],
|
||
"external_chat_ref": info["external_chat_ref"],
|
||
"updated_at": s.get("last_active"),
|
||
"last_message": (
|
||
{
|
||
"message_id": last.get("msg_id"),
|
||
"role": last.get("role"),
|
||
"text": (last.get("content") or "")[:280],
|
||
"ts": last.get("ts"),
|
||
} if last else None
|
||
),
|
||
"turn_count": s.get("turn_count", 0),
|
||
}
|
||
if q_filter:
|
||
hay = " ".join(
|
||
[
|
||
item["title"],
|
||
item["agent_id"],
|
||
item["node_id"],
|
||
(item["last_message"] or {}).get("text", ""),
|
||
]
|
||
).lower()
|
||
if q_filter not in hay:
|
||
continue
|
||
items.append(item)
|
||
if len(items) >= limit:
|
||
break
|
||
|
||
next_cursor = None
|
||
if items:
|
||
last_item = items[-1]
|
||
next_cursor = _cursor_encode(
|
||
{
|
||
"last_active": last_item.get("updated_at"),
|
||
"chat_id": last_item.get("chat_id"),
|
||
}
|
||
)
|
||
has_more = len(sessions) >= fetch_limit or len(items) >= limit
|
||
log_event(
|
||
"chat.list",
|
||
request_id=get_request_id(request),
|
||
node_id=",".join(sorted(node_filter)) if node_filter else None,
|
||
agent_id=(agent_id or None),
|
||
cursor_present=bool(cursor),
|
||
limit=limit,
|
||
has_more=has_more,
|
||
next_cursor_present=bool(next_cursor),
|
||
status="ok",
|
||
)
|
||
return {
|
||
"items": items,
|
||
"count": len(items),
|
||
"nodes": sorted(node_filter),
|
||
"project_id": CHAT_PROJECT_ID,
|
||
"next_cursor": next_cursor,
|
||
"has_more": has_more,
|
||
}
|
||
|
||
|
||
@app.post("/api/chats")
|
||
async def api_chat_create(body: ChatCreateBody, _auth: str = Depends(require_auth)):
|
||
await _ensure_chat_project()
|
||
cid = _make_chat_id(
|
||
node_id=body.node_id,
|
||
agent_id=body.agent_id,
|
||
source=body.source,
|
||
external_chat_ref=body.external_chat_ref,
|
||
)
|
||
info = _parse_chat_id(cid)
|
||
title = (body.title or f"{info['agent_id']} • {info['node_id']} • {info['source']}").strip()
|
||
sess = await _app_db.upsert_session(cid, project_id=CHAT_PROJECT_ID, title=title)
|
||
return {"ok": True, "chat": {"chat_id": cid, "title": title, "agent_id": info["agent_id"], "node_id": info["node_id"], "source": info["source"], "external_chat_ref": info["external_chat_ref"], "updated_at": sess.get("last_active")}}
|
||
|
||
|
||
@app.get("/api/chats/{chat_id}/messages")
|
||
async def api_chat_messages(
|
||
chat_id: str,
|
||
request: Request,
|
||
limit: int = Query(100, ge=1, le=500),
|
||
cursor: Optional[str] = Query(None),
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
SOFIIA_CURSOR_REQUESTS_TOTAL.labels(resource="messages").inc()
|
||
cur = _cursor_decode(cursor)
|
||
before_ts = str(cur.get("ts") or "").strip() or None
|
||
before_message_id = str(cur.get("message_id") or "").strip() or None
|
||
rows_desc = await _app_db.list_messages_page(
|
||
chat_id,
|
||
limit=limit + 1,
|
||
before_ts=before_ts,
|
||
before_msg_id=before_message_id,
|
||
)
|
||
has_more = len(rows_desc) > limit
|
||
page_desc = rows_desc[:limit]
|
||
rows = list(reversed(page_desc))
|
||
info = _parse_chat_id(chat_id)
|
||
messages = [
|
||
{
|
||
"message_id": r.get("msg_id"),
|
||
"chat_id": chat_id,
|
||
"role": r.get("role"),
|
||
"text": r.get("content", ""),
|
||
"ts": r.get("ts"),
|
||
"meta": {
|
||
"node_id": info["node_id"],
|
||
"agent_id": info["agent_id"],
|
||
"source": info["source"],
|
||
},
|
||
}
|
||
for r in rows
|
||
]
|
||
next_cursor = None
|
||
if has_more and page_desc:
|
||
tail = page_desc[-1]
|
||
next_cursor = _cursor_encode({"ts": tail.get("ts"), "message_id": tail.get("msg_id")})
|
||
log_event(
|
||
"chat.messages.list",
|
||
request_id=get_request_id(request),
|
||
chat_id=chat_id,
|
||
node_id=info["node_id"],
|
||
agent_id=info["agent_id"],
|
||
cursor_present=bool(cursor),
|
||
limit=limit,
|
||
has_more=has_more,
|
||
next_cursor_present=bool(next_cursor),
|
||
status="ok",
|
||
)
|
||
return {
|
||
"items": messages,
|
||
"count": len(messages),
|
||
"chat_id": chat_id,
|
||
"next_cursor": next_cursor,
|
||
"has_more": has_more,
|
||
}
|
||
|
||
|
||
@app.post("/api/chats/{chat_id}/send")
|
||
async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Request, _auth: str = Depends(require_auth)):
|
||
started_at = time.monotonic()
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
if not _check_rate(f"chat_v2:{client_ip}", max_calls=30, window_sec=60):
|
||
raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min")
|
||
text = (body.text or "").strip()
|
||
if not text:
|
||
raise HTTPException(status_code=400, detail="text is required")
|
||
request_id = get_request_id(request)
|
||
idem_key = (
|
||
(
|
||
request.headers.get("Idempotency-Key")
|
||
or body.idempotency_key
|
||
or ""
|
||
).strip()
|
||
)[:128]
|
||
idem_hash = hash_idempotency_key(idem_key)
|
||
info = _parse_chat_id(chat_id)
|
||
target_node = ((body.routing or {}).get("force_node_id") or info["node_id"] or "NODA2").upper()
|
||
target_agent = info["agent_id"] or "sofiia"
|
||
operator_id, operator_id_missing = _resolve_operator_id(request, body, request_id)
|
||
chat_rl = _rate_limiter.consume(f"rl:chat:{chat_id}", rps=_RL_CHAT_RPS, burst=_RL_CHAT_BURST)
|
||
if not chat_rl.allowed:
|
||
SOFIIA_RATE_LIMITED_TOTAL.labels(scope="chat").inc()
|
||
log_event(
|
||
"chat.send.rate_limited",
|
||
request_id=request_id,
|
||
scope="chat",
|
||
chat_id=chat_id,
|
||
node_id=target_node,
|
||
agent_id=target_agent,
|
||
operator_id=operator_id,
|
||
operator_id_missing=operator_id_missing,
|
||
limit_rps=_RL_CHAT_RPS,
|
||
burst=_RL_CHAT_BURST,
|
||
retry_after_s=chat_rl.retry_after_s,
|
||
status="error",
|
||
error_code="rate_limited",
|
||
)
|
||
raise _rate_limited_http("chat", chat_rl.retry_after_s)
|
||
op_rl = _rate_limiter.consume(f"rl:op:{operator_id}", rps=_RL_OP_RPS, burst=_RL_OP_BURST)
|
||
if not op_rl.allowed:
|
||
SOFIIA_RATE_LIMITED_TOTAL.labels(scope="operator").inc()
|
||
log_event(
|
||
"chat.send.rate_limited",
|
||
request_id=request_id,
|
||
scope="operator",
|
||
chat_id=chat_id,
|
||
node_id=target_node,
|
||
agent_id=target_agent,
|
||
operator_id=operator_id,
|
||
operator_id_missing=operator_id_missing,
|
||
limit_rps=_RL_OP_RPS,
|
||
burst=_RL_OP_BURST,
|
||
retry_after_s=op_rl.retry_after_s,
|
||
status="error",
|
||
error_code="rate_limited",
|
||
)
|
||
raise _rate_limited_http("operator", op_rl.retry_after_s)
|
||
log_event(
|
||
"chat.send",
|
||
request_id=request_id,
|
||
chat_id=chat_id,
|
||
node_id=target_node,
|
||
agent_id=target_agent,
|
||
operator_id=operator_id,
|
||
operator_id_missing=operator_id_missing,
|
||
idempotency_key_hash=(idem_hash or None),
|
||
replayed=False,
|
||
status="ok",
|
||
)
|
||
if idem_key:
|
||
cache_key = f"{chat_id}::{idem_key}"
|
||
cached = _idempotency_store.get(cache_key)
|
||
if cached:
|
||
SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL.inc()
|
||
log_event(
|
||
"chat.send.replay",
|
||
request_id=request_id,
|
||
chat_id=chat_id,
|
||
node_id=target_node,
|
||
agent_id=target_agent,
|
||
idempotency_key_hash=(idem_hash or None),
|
||
replayed=True,
|
||
message_id=cached.message_id,
|
||
status="ok",
|
||
)
|
||
replay = dict(cached.response_body)
|
||
replay["idempotency"] = {"replayed": True, "key": idem_key}
|
||
return replay
|
||
|
||
await _ensure_chat_project()
|
||
SOFIIA_SEND_REQUESTS_TOTAL.labels(node_id=target_node).inc()
|
||
project_id = body.project_id or CHAT_PROJECT_ID
|
||
session_id = body.session_id or chat_id
|
||
user_id = operator_id
|
||
title = f"{target_agent} • {target_node} • {info['source']}"
|
||
await _app_db.upsert_session(chat_id, project_id=CHAT_PROJECT_ID, title=title)
|
||
|
||
user_saved = await _app_db.save_message(chat_id, "user", text[:4096])
|
||
metadata: Dict[str, Any] = {
|
||
"project_id": project_id,
|
||
"session_id": session_id,
|
||
"user_id": operator_id,
|
||
"operator_id": operator_id,
|
||
"client": "sofiia-console",
|
||
"chat_id": chat_id,
|
||
"node_id": target_node,
|
||
"agent_id": target_agent,
|
||
"source": info["source"],
|
||
"external_chat_ref": info["external_chat_ref"],
|
||
"attachments": body.attachments or [],
|
||
"client_meta": body.client or {},
|
||
}
|
||
base_url = get_router_url(target_node)
|
||
if not base_url:
|
||
duration_ms = int((time.monotonic() - started_at) * 1000)
|
||
log_event(
|
||
"chat.send.error",
|
||
request_id=request_id,
|
||
chat_id=chat_id,
|
||
node_id=target_node,
|
||
agent_id=target_agent,
|
||
idempotency_key_hash=(idem_hash or None),
|
||
status="error",
|
||
error_code="router_url_not_configured",
|
||
duration_ms=duration_ms,
|
||
)
|
||
raise HTTPException(status_code=400, detail=f"router_url is not configured for node {target_node}")
|
||
try:
|
||
out = await infer(
|
||
base_url,
|
||
target_agent,
|
||
text,
|
||
model=None,
|
||
metadata=metadata,
|
||
timeout=300.0,
|
||
api_key=ROUTER_API_KEY,
|
||
)
|
||
except Exception as e:
|
||
duration_ms = int((time.monotonic() - started_at) * 1000)
|
||
log_event(
|
||
"chat.send.error",
|
||
request_id=request_id,
|
||
chat_id=chat_id,
|
||
node_id=target_node,
|
||
agent_id=target_agent,
|
||
idempotency_key_hash=(idem_hash or None),
|
||
status="error",
|
||
error_code="upstream_error",
|
||
error=str(e)[:180],
|
||
duration_ms=duration_ms,
|
||
)
|
||
_broadcast_bg(
|
||
_make_event(
|
||
"error",
|
||
{"where": "chat_v2.router", "message": str(e)[:180], "chat_id": chat_id, "node_id": target_node, "agent_id": target_agent},
|
||
project_id=project_id,
|
||
session_id=session_id,
|
||
user_id=user_id,
|
||
)
|
||
)
|
||
raise HTTPException(status_code=502, detail=str(e)[:300])
|
||
|
||
reply = _clean_chat_reply(out.get("response", out.get("text", "")))
|
||
assistant_saved = await _app_db.save_message(chat_id, "assistant", (reply or "")[:4096], parent_msg_id=user_saved.get("msg_id"))
|
||
trace_id = f"chatv2_{session_id}_{uuid.uuid4().hex[:8]}"
|
||
result = {
|
||
"ok": True,
|
||
"accepted": True,
|
||
"chat_id": chat_id,
|
||
"node_id": target_node,
|
||
"agent_id": target_agent,
|
||
"trace_id": trace_id,
|
||
"message": {
|
||
"message_id": assistant_saved.get("msg_id"),
|
||
"role": "assistant",
|
||
"text": reply,
|
||
"ts": assistant_saved.get("ts"),
|
||
"meta": {
|
||
"node_id": target_node,
|
||
"agent_id": target_agent,
|
||
"backend": out.get("backend"),
|
||
"model": out.get("model"),
|
||
},
|
||
},
|
||
}
|
||
if idem_key:
|
||
cache_key = f"{chat_id}::{idem_key}"
|
||
_idempotency_store.set(
|
||
cache_key,
|
||
ReplayEntry(
|
||
message_id=str((result.get("message") or {}).get("message_id") or ""),
|
||
response_body=dict(result),
|
||
created_at=time.monotonic(),
|
||
node_id=target_node,
|
||
),
|
||
)
|
||
result["idempotency"] = {"replayed": False, "key": idem_key}
|
||
duration_ms = int((time.monotonic() - started_at) * 1000)
|
||
log_event(
|
||
"chat.send.ok",
|
||
request_id=request_id,
|
||
chat_id=chat_id,
|
||
node_id=target_node,
|
||
agent_id=target_agent,
|
||
idempotency_key_hash=(idem_hash or None),
|
||
message_id=(result.get("message") or {}).get("message_id"),
|
||
status="ok",
|
||
duration_ms=duration_ms,
|
||
)
|
||
return result
|
||
|
||
|
||
@app.get("/metrics")
|
||
def metrics():
|
||
data, content_type = render_metrics()
|
||
return Response(content=data, media_type=content_type)
|
||
|
||
|
||
@app.post("/api/chat/send")
|
||
async def api_chat_send(body: ChatSendBody, request: Request):
|
||
"""BFF chat: Ollama or router. Returns runtime contract fields. Rate: 30/min."""
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
if not _check_rate(f"chat:{client_ip}", max_calls=30, window_sec=60):
|
||
raise HTTPException(status_code=429, detail="Rate limit: 30 messages/min")
|
||
|
||
# Runtime identity
|
||
project_id = body.project_id or "default"
|
||
session_id = body.session_id or f"sess_{uuid.uuid4().hex[:12]}"
|
||
user_id = body.user_id or "console_user"
|
||
|
||
provider, _, model_name = body.model.partition(":")
|
||
reply = ""
|
||
t0 = time.monotonic()
|
||
|
||
def _clean_reply(text: str) -> str:
|
||
"""Strip <think>...</think> reasoning blocks (Qwen3/DeepSeek-R1) before returning to user.
|
||
|
||
Strategy:
|
||
1. re.DOTALL regex removes complete <think>...</think> blocks.
|
||
2. Fallback split removes any trailing unclosed <think> block
|
||
(model stopped mid-reasoning without </think>).
|
||
"""
|
||
import re
|
||
# Primary: strip complete blocks (multiline-safe with DOTALL)
|
||
cleaned = re.sub(r"<think>.*?</think>", "", text,
|
||
flags=re.DOTALL | re.IGNORECASE)
|
||
# Fallback: if an unclosed <think> block remains, drop everything after it
|
||
if "<think>" in cleaned.lower():
|
||
cleaned = re.split(r"(?i)<think>", cleaned)[0]
|
||
return cleaned.strip()
|
||
|
||
# Broadcast: user message sent
|
||
_broadcast_bg(_make_event("chat.message",
|
||
{"text": body.message[:200], "provider": provider, "model": body.model},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
|
||
# voice_profile determines LLM options for voice turns.
|
||
# None = text chat (full prompt, no token limit enforcement).
|
||
_vp = body.voice_profile # "voice_fast_uk" | "voice_quality_uk" | None
|
||
_is_voice_turn = _vp is not None
|
||
_is_quality = _vp == "voice_quality_uk"
|
||
|
||
# System prompt: voice turns get guardrails appended
|
||
_system_prompt = SOFIIA_SYSTEM_PROMPT
|
||
if _is_voice_turn:
|
||
_system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX
|
||
|
||
_voice_options = {
|
||
"temperature": 0.18 if _is_quality else 0.15,
|
||
"repeat_penalty": 1.1,
|
||
"num_predict": 256 if _is_quality else 220, # max_tokens per contract (≤256)
|
||
} if _is_voice_turn else {
|
||
"temperature": 0.15,
|
||
"repeat_penalty": 1.1,
|
||
"num_predict": SOFIIA_OLLAMA_NUM_PREDICT_TEXT,
|
||
}
|
||
|
||
if provider == "ollama":
|
||
ollama_url = get_ollama_url()
|
||
effective_model_name = model_name or "qwen3:14b"
|
||
messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
|
||
messages.extend(body.history[-12:])
|
||
messages.append({"role": "user", "content": body.message})
|
||
try:
|
||
async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client:
|
||
r = await client.post(
|
||
f"{ollama_url}/api/chat",
|
||
json=_make_ollama_payload(effective_model_name, messages, _voice_options),
|
||
)
|
||
r.raise_for_status()
|
||
data = r.json()
|
||
reply = _clean_reply((data.get("message") or {}).get("content", "") or "Ollama: порожня відповідь")
|
||
except httpx.HTTPStatusError as e:
|
||
err_msg = f"Ollama HTTP {e.response.status_code}"
|
||
_broadcast_bg(_make_event("error", {"where": "ollama", "message": err_msg},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
raise HTTPException(status_code=502, detail=f"{err_msg}: {str(e)[:200]}")
|
||
except Exception as e:
|
||
_broadcast_bg(_make_event("error", {"where": "ollama", "message": str(e)[:100]},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
raise HTTPException(status_code=502, detail=f"Ollama error: {str(e)[:200]}")
|
||
|
||
elif provider == "router":
|
||
base_url = get_router_url(body.node_id)
|
||
router_agent_id = "sofiia"
|
||
router_model = None
|
||
if model_name:
|
||
if "|" in model_name:
|
||
left, right = model_name.split("|", 1)
|
||
router_agent_id = left or "sofiia"
|
||
router_model = right or None
|
||
elif ":" in model_name:
|
||
# Looks like model id (qwen3:14b, qwen3.5:35b-a3b, etc.)
|
||
router_model = model_name
|
||
elif model_name not in ("default",):
|
||
# Treat plain token as agent id (router:soul, router:monitor, ...)
|
||
router_agent_id = model_name
|
||
metadata: Dict[str, Any] = {
|
||
"project_id": project_id,
|
||
"session_id": session_id,
|
||
"user_id": user_id,
|
||
"client": "sofiia-console",
|
||
"voice_profile": _vp,
|
||
}
|
||
try:
|
||
out = await infer(
|
||
base_url,
|
||
router_agent_id,
|
||
body.message,
|
||
model=router_model,
|
||
metadata=metadata,
|
||
timeout=300.0,
|
||
api_key=ROUTER_API_KEY,
|
||
)
|
||
reply = _clean_reply(out.get("response", out.get("text", "")))
|
||
except Exception as e:
|
||
_broadcast_bg(_make_event("error", {"where": "router", "message": str(e)[:100]},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
raise HTTPException(status_code=502, detail=str(e)[:300])
|
||
|
||
elif provider == "glm":
|
||
# Zhipu AI GLM — OpenAI-compatible API at bigmodel.cn
|
||
glm_api_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip()
|
||
if not glm_api_key:
|
||
raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.")
|
||
glm_model = model_name or "glm-4.7"
|
||
messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
|
||
messages_glm.extend(body.history[-12:])
|
||
messages_glm.append({"role": "user", "content": body.message})
|
||
try:
|
||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||
r = await client.post(
|
||
"https://open.bigmodel.cn/api/paas/v4/chat/completions",
|
||
headers={"Authorization": f"Bearer {glm_api_key}", "Content-Type": "application/json"},
|
||
json={"model": glm_model, "messages": messages_glm, "stream": False},
|
||
)
|
||
r.raise_for_status()
|
||
data = r.json()
|
||
reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "GLM: порожня відповідь")
|
||
except httpx.HTTPStatusError as e:
|
||
err_msg = f"GLM HTTP {e.response.status_code}: {e.response.text[:200]}"
|
||
_broadcast_bg(_make_event("error", {"where": "glm", "message": err_msg},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
raise HTTPException(status_code=502, detail=err_msg)
|
||
except Exception as e:
|
||
_broadcast_bg(_make_event("error", {"where": "glm", "message": str(e)[:100]},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
raise HTTPException(status_code=502, detail=f"GLM error: {str(e)[:200]}")
|
||
|
||
elif provider == "grok":
|
||
# xAI Grok — OpenAI-compatible API
|
||
xai_api_key = os.getenv("XAI_API_KEY", "").strip()
|
||
if not xai_api_key:
|
||
raise HTTPException(status_code=503, detail="XAI_API_KEY not set. Add it to BFF environment.")
|
||
grok_model = model_name or "grok-4-1-fast-reasoning"
|
||
messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
|
||
messages.extend(body.history[-12:])
|
||
messages.append({"role": "user", "content": body.message})
|
||
try:
|
||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||
r = await client.post(
|
||
"https://api.x.ai/v1/chat/completions",
|
||
headers={"Authorization": f"Bearer {xai_api_key}", "Content-Type": "application/json"},
|
||
json={"model": grok_model, "messages": messages, "stream": False},
|
||
)
|
||
r.raise_for_status()
|
||
data = r.json()
|
||
reply = _clean_reply((data.get("choices") or [{}])[0].get("message", {}).get("content", "") or "Grok: порожня відповідь")
|
||
except httpx.HTTPStatusError as e:
|
||
err_msg = f"Grok HTTP {e.response.status_code}: {e.response.text[:200]}"
|
||
_broadcast_bg(_make_event("error", {"where": "grok", "message": err_msg},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
raise HTTPException(status_code=502, detail=err_msg)
|
||
except Exception as e:
|
||
_broadcast_bg(_make_event("error", {"where": "grok", "message": str(e)[:100]},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
raise HTTPException(status_code=502, detail=f"Grok error: {str(e)[:200]}")
|
||
|
||
else:
|
||
raise HTTPException(status_code=400, detail=f"Unsupported provider: {provider}. Use ollama, router, or grok.")
|
||
|
||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||
tokens_est = len(reply.split())
|
||
trace_id = f"chat_{session_id}_{uuid.uuid4().hex[:8]}"
|
||
|
||
# Broadcast: reply
|
||
_broadcast_bg(_make_event("chat.reply",
|
||
{"text": reply[:200], "provider": provider, "model": body.model,
|
||
"latency_ms": latency_ms, "trace_id": trace_id},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
|
||
# Memory save (best-effort, non-blocking)
|
||
asyncio.get_event_loop().create_task(
|
||
_do_save_memory(body.message, reply, session_id, project_id, user_id)
|
||
)
|
||
|
||
# AISTALK forward (if enabled)
|
||
if _aistalk:
|
||
try:
|
||
_aistalk.handle_event(_make_event("chat.reply",
|
||
{"text": reply, "provider": provider, "model": body.model},
|
||
project_id=project_id, session_id=session_id, user_id=user_id))
|
||
except Exception as e:
|
||
logger.debug("AISTALK forward failed: %s", e)
|
||
|
||
return {
|
||
"ok": True,
|
||
"project_id": project_id,
|
||
"session_id": session_id,
|
||
"user_id": user_id,
|
||
"response": reply,
|
||
"model": body.model,
|
||
"backend": provider,
|
||
"trace_id": trace_id,
|
||
"meta": {
|
||
"latency_ms": latency_ms,
|
||
"tokens_est": tokens_est,
|
||
"trace_id": trace_id,
|
||
},
|
||
}
|
||
|
||
|
||
async def _do_save_memory(
|
||
user_msg: str,
|
||
ai_reply: str,
|
||
session_id: str,
|
||
project_id: str = "default",
|
||
user_id: str = "console_user",
|
||
agent_id: str = "sofiia",
|
||
) -> None:
|
||
# 1) Persist to local SQLite (projects/sessions/messages schema)
|
||
try:
|
||
# Ensure target project exists to satisfy sessions.project_id FK.
|
||
proj = await _app_db.get_project(project_id)
|
||
if not proj:
|
||
await _app_db.create_project(
|
||
name=project_id.upper(),
|
||
description=f"Auto-created project for {project_id} sessions",
|
||
project_id=project_id,
|
||
)
|
||
await _app_db.upsert_session(session_id, project_id=project_id)
|
||
last_msg = None
|
||
if user_msg:
|
||
saved = await _app_db.save_message(session_id, "user", user_msg[:4096])
|
||
last_msg = saved["msg_id"]
|
||
if ai_reply:
|
||
await _app_db.save_message(
|
||
session_id, "assistant", ai_reply[:4096], parent_msg_id=last_msg
|
||
)
|
||
except Exception as e:
|
||
logger.debug("SQLite memory save skipped: %s", e)
|
||
|
||
# 2) Best-effort: also send to Memory Service (Qdrant + Neo4j)
|
||
mem_url = get_memory_service_url()
|
||
try:
|
||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||
for role, content in [("user", user_msg), ("assistant", ai_reply)]:
|
||
if not content:
|
||
continue
|
||
resp = await client.post(f"{mem_url}/agents/{agent_id}/memory", json={
|
||
"agent_id": agent_id,
|
||
"role": role,
|
||
"content": content[:1000],
|
||
"user_id": user_id,
|
||
"channel_id": session_id,
|
||
"metadata": {"project_id": project_id, "client": "sofiia-console", "agent_id": agent_id},
|
||
})
|
||
if resp.status_code >= 400:
|
||
logger.warning(
|
||
"Memory Service save failed status=%s agent=%s session=%s body=%s",
|
||
resp.status_code,
|
||
agent_id,
|
||
session_id,
|
||
(resp.text or "")[:240],
|
||
)
|
||
except Exception as e:
|
||
logger.debug("Memory Service save skipped: %s", e)
|
||
|
||
|
||
# ─── Ops ────────────────────────────────────────────────────────────────────
|
||
|
||
class OpsRunBody(BaseModel):
|
||
action_id: str
|
||
node_id: str = "NODA2"
|
||
params: dict = {}
|
||
project_id: Optional[str] = None
|
||
session_id: Optional[str] = None
|
||
source_run_id: Optional[str] = None # link to supervisor run
|
||
source_msg_id: Optional[str] = None # link to message
|
||
|
||
|
||
class NodeUpsertBody(BaseModel):
|
||
node_id: str
|
||
label: str
|
||
router_url: str
|
||
gateway_url: Optional[str] = ""
|
||
monitor_url: Optional[str] = ""
|
||
supervisor_url: Optional[str] = ""
|
||
ssh_host: Optional[str] = ""
|
||
ssh_port: Optional[int] = 22
|
||
ssh_user: Optional[str] = ""
|
||
ssh_password_env: Optional[str] = ""
|
||
ssh_ipv6: Optional[str] = ""
|
||
ssh_host_keys: Optional[List[Dict[str, Any]]] = None
|
||
enabled: bool = True
|
||
|
||
|
||
@app.get("/api/ops/actions")
|
||
async def api_ops_actions_list():
|
||
return {"actions": list(OPS_ACTIONS.keys())}
|
||
|
||
|
||
@app.post("/api/ops/run")
|
||
async def api_ops_run(body: OpsRunBody, _auth=Depends(require_api_key)):
|
||
"""Run ops action. Broadcasts ops.run event and auto-creates ops_run graph node."""
|
||
import uuid as _uuid
|
||
t0 = time.monotonic()
|
||
project_id = body.project_id or "default"
|
||
session_id = body.session_id or "console"
|
||
ops_run_id = str(_uuid.uuid4())
|
||
started_at = _app_db._now() if _app_db else None
|
||
|
||
result = await run_ops_action(
|
||
body.action_id, body.node_id, body.params,
|
||
agent_id="sofiia", timeout=90.0, api_key=ROUTER_API_KEY,
|
||
)
|
||
elapsed = int((time.monotonic() - t0) * 1000)
|
||
ok = result.get("status") != "failed"
|
||
status_str = "ok" if ok else "failed"
|
||
error_str = result.get("error", "") if not ok else ""
|
||
|
||
_broadcast_bg(_make_event("ops.run",
|
||
{"name": body.action_id, "ok": ok, "elapsed_ms": elapsed},
|
||
project_id=project_id, session_id=session_id))
|
||
|
||
# Auto-create ops_run graph node (fire-and-forget, do not fail the request)
|
||
if _app_db and project_id:
|
||
try:
|
||
gn = await _app_db.upsert_ops_run_node(
|
||
project_id=project_id,
|
||
ops_run_id=ops_run_id,
|
||
action_id=body.action_id,
|
||
node_id=body.node_id,
|
||
status=status_str,
|
||
elapsed_ms=elapsed,
|
||
error=str(error_str)[:500],
|
||
started_at=started_at or "",
|
||
source_run_id=body.source_run_id or "",
|
||
source_msg_id=body.source_msg_id or "",
|
||
)
|
||
result["_graph_node_id"] = gn.get("node_id")
|
||
result["_ops_run_id"] = ops_run_id
|
||
except Exception as _e:
|
||
logger.warning("ops_run graph node creation failed (non-fatal): %s", _e)
|
||
|
||
return result
|
||
|
||
|
||
# ─── Nodes ──────────────────────────────────────────────────────────────────
|
||
|
||
@app.get("/api/nodes/dashboard")
|
||
async def api_nodes_dashboard(refresh: bool = Query(False), _auth: str = Depends(require_auth)):
|
||
"""
|
||
Nodes dashboard with full telemetry.
|
||
Returns cached data (refreshed every NODES_POLL_INTERVAL_SEC seconds).
|
||
Pass ?refresh=true to force immediate re-probe.
|
||
"""
|
||
if refresh or not _nodes_cache["nodes"]:
|
||
fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY)
|
||
_nodes_cache.update({**fresh, "ts": _now_iso()})
|
||
return {**fresh, "ts": _nodes_cache["ts"], "cached": False}
|
||
return {**_nodes_cache, "cached": True}
|
||
|
||
|
||
@app.get("/api/nodes/registry")
|
||
async def api_nodes_registry(_auth: str = Depends(require_auth)):
|
||
return load_nodes_registry()
|
||
|
||
|
||
@app.get("/api/nodes/ssh/status")
|
||
async def api_nodes_ssh_status(
|
||
node_id: str = Query(..., description="Node ID, e.g. NODA1"),
|
||
_auth=Depends(require_api_key_strict),
|
||
):
|
||
node_id = node_id.strip().upper()
|
||
ssh = get_node_ssh_profile(node_id)
|
||
if not ssh.get("configured"):
|
||
return {
|
||
"ok": False,
|
||
"node_id": node_id,
|
||
"configured": False,
|
||
"error": "ssh profile is not configured",
|
||
"ssh": ssh,
|
||
}
|
||
|
||
host = ssh.get("host", "")
|
||
host_ipv6 = (ssh.get("ipv6") or "").strip()
|
||
port = int(ssh.get("port") or 22)
|
||
tcp_ok = False
|
||
tcp_error = None
|
||
connect_host = host
|
||
|
||
def _try_connect(target_host: str) -> Optional[str]:
|
||
try:
|
||
with socket.create_connection((target_host, port), timeout=5):
|
||
return None
|
||
except Exception as e:
|
||
return str(e)[:160]
|
||
|
||
tcp_error = _try_connect(host)
|
||
if tcp_error is None:
|
||
tcp_ok = True
|
||
elif host_ipv6:
|
||
err_v6 = _try_connect(host_ipv6)
|
||
if err_v6 is None:
|
||
tcp_ok = True
|
||
tcp_error = None
|
||
connect_host = host_ipv6
|
||
else:
|
||
tcp_error = f"ipv4={tcp_error}; ipv6={err_v6}"[:220]
|
||
|
||
ok = tcp_ok and (ssh["auth"]["password_set"] or ssh["auth"]["private_key_set"])
|
||
return {
|
||
"ok": ok,
|
||
"node_id": node_id,
|
||
"configured": True,
|
||
"tcp_reachable": tcp_ok,
|
||
"tcp_error": tcp_error,
|
||
"connect_host": connect_host,
|
||
"ssh": ssh,
|
||
}
|
||
|
||
|
||
@app.post("/api/nodes/add")
|
||
async def api_nodes_add(body: NodeUpsertBody, _auth=Depends(require_api_key_strict)):
|
||
reg = load_nodes_registry()
|
||
reg.setdefault("defaults", {"health_timeout_sec": 10, "tools_timeout_sec": 30})
|
||
reg.setdefault("nodes", {})
|
||
node_id = body.node_id.strip().upper()
|
||
if not node_id:
|
||
raise HTTPException(status_code=400, detail="node_id is required")
|
||
node_payload: Dict[str, Any] = {
|
||
"label": body.label.strip() or node_id,
|
||
"router_url": body.router_url.strip(),
|
||
"gateway_url": (body.gateway_url or "").strip(),
|
||
"monitor_url": (body.monitor_url or body.router_url).strip(),
|
||
"supervisor_url": (body.supervisor_url or "").strip(),
|
||
"enabled": body.enabled,
|
||
}
|
||
ssh_host = (body.ssh_host or "").strip()
|
||
ssh_user = (body.ssh_user or "").strip()
|
||
if ssh_host and ssh_user:
|
||
node_payload["ssh"] = {
|
||
"host": ssh_host,
|
||
"ipv6": (body.ssh_ipv6 or "").strip(),
|
||
"port": int(body.ssh_port or 22),
|
||
"user": ssh_user,
|
||
"auth": {
|
||
"password_env": (body.ssh_password_env or f"NODES_{node_id}_SSH_PASSWORD").strip(),
|
||
},
|
||
"host_keys": body.ssh_host_keys or [],
|
||
}
|
||
|
||
reg["nodes"][node_id] = node_payload
|
||
path = save_nodes_registry(reg)
|
||
fresh = await get_nodes_dashboard(router_api_key=ROUTER_API_KEY)
|
||
_nodes_cache.update({**fresh, "ts": _now_iso()})
|
||
return {"ok": True, "saved_to": str(path), "node_id": node_id, "nodes": reg.get("nodes", {})}
|
||
|
||
|
||
# ─── Voice ──────────────────────────────────────────────────────────────────
|
||
|
||
@app.post("/api/voice/stt")
|
||
async def api_voice_stt(
|
||
request: Request,
|
||
audio: UploadFile = File(...),
|
||
language: Optional[str] = Query(None),
|
||
session_id: Optional[str] = Query(None),
|
||
project_id: Optional[str] = Query(None),
|
||
):
|
||
"""STT proxy → memory-service. Rate: 20/min. Broadcasts voice.stt events."""
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
if not _check_rate(f"stt:{client_ip}", max_calls=20, window_sec=60):
|
||
raise HTTPException(status_code=429, detail="Rate limit: 20 STT calls/min")
|
||
|
||
sid = session_id or "console"
|
||
pid = project_id or "default"
|
||
_broadcast_bg(_make_event("voice.stt", {"phase": "start"},
|
||
project_id=pid, session_id=sid))
|
||
t0 = time.monotonic()
|
||
|
||
mem_url = get_memory_service_url()
|
||
try:
|
||
content = await audio.read()
|
||
if not content:
|
||
raise HTTPException(status_code=400, detail="Empty audio file")
|
||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||
files = {"audio": (audio.filename or "audio.webm", content, audio.content_type or "audio/webm")}
|
||
params = {"language": language} if language else {}
|
||
r = await client.post(f"{mem_url}/voice/stt", files=files, params=params)
|
||
r.raise_for_status()
|
||
result = r.json()
|
||
elapsed = int((time.monotonic() - t0) * 1000)
|
||
upstream_ms = result.get("compute_ms", 0)
|
||
logger.info("STT ok: lang=%s text_len=%d bff_ms=%d upstream_ms=%d",
|
||
language or "auto", len(result.get("text", "")), elapsed, upstream_ms)
|
||
_broadcast_bg(_make_event("voice.stt",
|
||
{"phase": "done", "elapsed_ms": elapsed, "upstream_ms": upstream_ms},
|
||
project_id=pid, session_id=sid))
|
||
result["bff_ms"] = elapsed
|
||
return result
|
||
except httpx.HTTPStatusError as e:
|
||
logger.error("STT upstream error: status=%s", e.response.status_code)
|
||
_broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]},
|
||
project_id=pid, session_id=sid))
|
||
raise HTTPException(status_code=e.response.status_code, detail=f"STT upstream: {str(e)[:200]}")
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error("STT proxy error: %s", e, exc_info=True)
|
||
_broadcast_bg(_make_event("voice.stt", {"phase": "error", "message": str(e)[:80]},
|
||
project_id=pid, session_id=sid))
|
||
raise HTTPException(status_code=502, detail=f"STT error: {str(e)[:200]}")
|
||
|
||
|
||
class TTSRequest(BaseModel):
|
||
text: str
|
||
voice: Optional[str] = "default"
|
||
speed: Optional[float] = 1.0
|
||
model: Optional[str] = "piper"
|
||
session_id: Optional[str] = None
|
||
project_id: Optional[str] = None
|
||
|
||
|
||
@app.post("/api/voice/tts")
|
||
async def api_voice_tts(body: TTSRequest, request: Request):
|
||
"""TTS proxy → memory-service. Rate: 30/min per IP. Concurrent: MAX_CONCURRENT_TTS."""
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
if not _check_rate(f"tts:{client_ip}", max_calls=30, window_sec=60):
|
||
raise HTTPException(status_code=429, detail="Rate limit: 30 TTS calls/min per client")
|
||
|
||
# Concurrent synthesis guard — prevents memory-service DoS on burst requests
|
||
sem = _get_tts_semaphore()
|
||
if not sem._value: # non-blocking peek: all slots occupied
|
||
raise HTTPException(status_code=503,
|
||
detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.")
|
||
|
||
# Server-side sanitization: strips <think>, markdown, URLs; truncates safely
|
||
text = _sanitize_for_voice(body.text.strip())
|
||
if not text:
|
||
raise HTTPException(status_code=400, detail="Empty text")
|
||
|
||
sid = body.session_id or "console"
|
||
pid = body.project_id or "default"
|
||
_broadcast_bg(_make_event("voice.tts", {"phase": "start", "voice": body.voice},
|
||
project_id=pid, session_id=sid))
|
||
t0 = time.monotonic()
|
||
|
||
sem = _get_tts_semaphore()
|
||
async with sem: # enforce MAX_CONCURRENT_TTS globally
|
||
try:
|
||
# ── Voice HA path (opt-in via VOICE_HA_ENABLED=true) ──────────────
|
||
if is_voice_ha_enabled():
|
||
router_url = get_voice_ha_router_url()
|
||
tts_payload = {
|
||
"text": text,
|
||
"voice": body.voice,
|
||
"speed": body.speed,
|
||
"model": body.model,
|
||
}
|
||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||
r = await client.post(
|
||
f"{router_url}/v1/capability/voice_tts",
|
||
json=tts_payload,
|
||
)
|
||
r.raise_for_status()
|
||
elapsed = int((time.monotonic() - t0) * 1000)
|
||
upstream_ct = r.headers.get("content-type", "audio/wav")
|
||
tts_engine = r.headers.get("X-TTS-Engine", "unknown")
|
||
tts_voice_used = r.headers.get("X-TTS-Voice", body.voice)
|
||
voice_node = r.headers.get("X-Voice-Node", "unknown")
|
||
voice_mode = r.headers.get("X-Voice-Mode", "remote")
|
||
ext = "mp3" if "mpeg" in upstream_ct else "wav"
|
||
logger.info("TTS HA ok: voice=%s node=%s mode=%s elapsed=%dms",
|
||
tts_voice_used, voice_node, voice_mode, elapsed)
|
||
_broadcast_bg(_make_event("voice.tts",
|
||
{"phase": "done", "voice": tts_voice_used, "engine": tts_engine,
|
||
"elapsed_ms": elapsed, "ha_mode": voice_mode, "ha_node": voice_node},
|
||
project_id=pid, session_id=sid))
|
||
return StreamingResponse(
|
||
io.BytesIO(r.content),
|
||
media_type=upstream_ct,
|
||
headers={
|
||
"Content-Disposition": f"inline; filename=speech.{ext}",
|
||
"X-TTS-Engine": tts_engine,
|
||
"X-TTS-Voice": tts_voice_used,
|
||
"X-TTS-Elapsed-MS": str(elapsed),
|
||
"X-Voice-Node": voice_node,
|
||
"X-Voice-Mode": voice_mode,
|
||
"Cache-Control": "no-store",
|
||
},
|
||
)
|
||
|
||
# ── Legacy direct path (default, VOICE_HA_ENABLED=false) ──────────
|
||
mem_url = get_memory_service_url()
|
||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||
r = await client.post(
|
||
f"{mem_url}/voice/tts",
|
||
json={"text": text, "voice": body.voice, "speed": body.speed, "model": body.model},
|
||
)
|
||
r.raise_for_status()
|
||
elapsed = int((time.monotonic() - t0) * 1000)
|
||
upstream_ct = r.headers.get("content-type", "audio/wav")
|
||
tts_engine = r.headers.get("X-TTS-Engine", "unknown")
|
||
tts_voice_used = r.headers.get("X-TTS-Voice", body.voice)
|
||
ext = "mp3" if "mpeg" in upstream_ct else "wav"
|
||
logger.info("TTS ok: voice=%s engine=%s len=%d fmt=%s elapsed=%dms",
|
||
tts_voice_used, tts_engine, len(text), ext, elapsed)
|
||
_broadcast_bg(_make_event("voice.tts",
|
||
{"phase": "done", "voice": tts_voice_used, "engine": tts_engine, "elapsed_ms": elapsed},
|
||
project_id=pid, session_id=sid))
|
||
return StreamingResponse(
|
||
io.BytesIO(r.content),
|
||
media_type=upstream_ct,
|
||
headers={
|
||
"Content-Disposition": f"inline; filename=speech.{ext}",
|
||
"X-TTS-Engine": tts_engine,
|
||
"X-TTS-Voice": tts_voice_used,
|
||
"X-TTS-Elapsed-MS": str(elapsed),
|
||
"Cache-Control": "no-store",
|
||
},
|
||
)
|
||
except httpx.HTTPStatusError as e:
|
||
_record_tts_error("http_error", e.response.status_code, str(e)[:120], body.voice)
|
||
logger.error("TTS upstream error: status=%s voice=%s ha=%s",
|
||
e.response.status_code, body.voice, is_voice_ha_enabled())
|
||
_broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]},
|
||
project_id=pid, session_id=sid))
|
||
raise HTTPException(status_code=e.response.status_code, detail=f"TTS upstream: {str(e)[:200]}")
|
||
except Exception as e:
|
||
_record_tts_error("proxy_error", None, str(e)[:120], body.voice)
|
||
logger.error("TTS proxy error: %s ha=%s", e, is_voice_ha_enabled(), exc_info=True)
|
||
_broadcast_bg(_make_event("voice.tts", {"phase": "error", "message": str(e)[:80]},
|
||
project_id=pid, session_id=sid))
|
||
raise HTTPException(status_code=502, detail=f"TTS error: {str(e)[:200]}")
|
||
|
||
|
||
@app.get("/api/voice/voices")
|
||
async def api_voice_voices():
|
||
mem_url = get_memory_service_url()
|
||
try:
|
||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||
r = await client.get(f"{mem_url}/voice/voices")
|
||
r.raise_for_status()
|
||
return r.json()
|
||
except Exception as e:
|
||
return {"piper": [], "macos": [{"id": "Milena", "name": "Milena (uk-UA)", "lang": "uk-UA"}], "error": str(e)[:100]}
|
||
|
||
|
||
# ─── Phase 2: Voice Chat Stream (sentence chunking → early TTS) ──────────────
|
||
# Strategy: split LLM text into sentences → synthesize first sentence immediately
|
||
# → return {first_audio_b64, first_text, rest_text[]}
|
||
# Browser plays first sentence while fetching TTS for remaining sentences in bg.
|
||
# TTFA drops from ~10-14s to ~3-5s (LLM still runs full, but TTS starts on chunk1).
|
||
|
||
from app.voice_utils import split_into_voice_chunks as _split_into_voice_chunks
|
||
from app.voice_utils import clean_think_blocks as _clean_think_blocks_util
|
||
from app.voice_utils import sanitize_for_voice as _sanitize_for_voice
|
||
from app.voice_utils import MIN_CHUNK_CHARS as _MIN_CHUNK_CHARS, MAX_CHUNK_CHARS as _MAX_CHUNK_CHARS
|
||
|
||
|
||
class VoiceChatStreamBody(BaseModel):
|
||
message: str
|
||
model: str = "ollama:qwen3:14b"
|
||
node_id: str = "NODA2"
|
||
voice: Optional[str] = None
|
||
voice_profile: Optional[str] = "voice_fast_uk"
|
||
session_id: Optional[str] = None
|
||
project_id: Optional[str] = None
|
||
history: List[Dict[str, Any]] = []
|
||
|
||
|
||
@app.post("/api/voice/chat/stream")
|
||
async def api_voice_chat_stream(body: VoiceChatStreamBody, request: Request):
|
||
"""Phase 2 Voice Chat: LLM → sentence split → first sentence TTS immediately.
|
||
|
||
Returns:
|
||
{
|
||
ok: bool,
|
||
first_text: str, # first sentence
|
||
first_audio_b64: str, # base64 MP3 for immediate playback
|
||
first_audio_mime: str, # "audio/mpeg"
|
||
rest_chunks: [str, ...], # remaining sentences (client fetches TTS via /api/voice/tts)
|
||
full_text: str, # full LLM reply (for display)
|
||
trace_id: str,
|
||
meta: {llm_ms, tts_ms, chunks_total}
|
||
}
|
||
|
||
Client flow:
|
||
1. POST /api/voice/chat/stream → play first_audio_b64 immediately
|
||
2. For each chunk in rest_chunks: POST /api/voice/tts → enqueue audio
|
||
"""
|
||
import re as _re # noqa: F401 – kept for legacy; re already imported at module level
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
if not _check_rate(f"voice_stream:{client_ip}", max_calls=15, window_sec=60):
|
||
raise HTTPException(status_code=429, detail="Rate limit: 15 voice stream calls/min per client")
|
||
|
||
# Concurrent TTS guard also applies to stream endpoint (TTS inside)
|
||
sem = _get_tts_semaphore()
|
||
if not sem._value:
|
||
raise HTTPException(status_code=503,
|
||
detail=f"TTS busy: max {_MAX_CONCURRENT_TTS} concurrent synthesis. Retry in 1-2s.")
|
||
|
||
sid = body.session_id or f"vs_{uuid.uuid4().hex[:10]}"
|
||
pid = body.project_id or "default"
|
||
trace_id = f"vs_{sid}_{uuid.uuid4().hex[:8]}"
|
||
|
||
_vp = body.voice_profile or "voice_fast_uk"
|
||
_is_quality = _vp == "voice_quality_uk"
|
||
_system_prompt = SOFIIA_SYSTEM_PROMPT + SOFIIA_VOICE_PROMPT_SUFFIX
|
||
|
||
# Track for repro pack
|
||
global _voice_last_model, _voice_last_profile
|
||
_voice_last_model = body.model
|
||
_voice_last_profile = _vp
|
||
|
||
_broadcast_bg(_make_event("voice.stream", {"phase": "start", "trace_id": trace_id},
|
||
project_id=pid, session_id=sid))
|
||
|
||
# ── 1. LLM ────────────────────────────────────────────────────────────────
|
||
t0_llm = time.monotonic()
|
||
provider, _, model_name = body.model.partition(":")
|
||
reply = ""
|
||
|
||
def _clean(text: str) -> str:
|
||
cleaned = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)
|
||
if "<think>" in cleaned.lower():
|
||
cleaned = re.split(r"(?i)<think>", cleaned)[0]
|
||
return cleaned.strip()
|
||
|
||
try:
|
||
if provider == "ollama":
|
||
ollama_url = get_ollama_url()
|
||
effective_model_name = model_name or "qwen3:14b"
|
||
messages: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
|
||
messages.extend(body.history[-8:])
|
||
messages.append({"role": "user", "content": body.message})
|
||
voice_options = {
|
||
"temperature": 0.18 if _is_quality else 0.15,
|
||
"repeat_penalty": 1.1,
|
||
"num_predict": 256 if _is_quality else 220,
|
||
}
|
||
async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_VOICE_TIMEOUT_SEC) as client:
|
||
r = await client.post(
|
||
f"{ollama_url}/api/chat",
|
||
json=_make_ollama_payload(effective_model_name, messages, voice_options),
|
||
)
|
||
r.raise_for_status()
|
||
raw = (r.json().get("message") or {}).get("content", "")
|
||
reply = _clean(raw)
|
||
elif provider == "grok":
|
||
xai_key = os.getenv("XAI_API_KEY", "").strip()
|
||
if not xai_key:
|
||
raise HTTPException(status_code=503, detail="XAI_API_KEY not set.")
|
||
grok_model = model_name or "grok-4-1-fast-reasoning"
|
||
messages_g: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
|
||
messages_g.extend(body.history[-8:])
|
||
messages_g.append({"role": "user", "content": body.message})
|
||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||
r = await client.post(
|
||
"https://api.x.ai/v1/chat/completions",
|
||
headers={"Authorization": f"Bearer {xai_key}", "Content-Type": "application/json"},
|
||
json={"model": grok_model, "messages": messages_g, "stream": False,
|
||
"max_tokens": 1024, "temperature": 0.2},
|
||
)
|
||
r.raise_for_status()
|
||
raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "")
|
||
reply = _clean(raw)
|
||
elif provider == "glm":
|
||
glm_key = os.getenv("GLM5_API_KEY", os.getenv("GLM_API_KEY", "")).strip()
|
||
if not glm_key:
|
||
raise HTTPException(status_code=503, detail="GLM5_API_KEY not set.")
|
||
glm_model = model_name or "glm-5"
|
||
messages_glm: List[Dict[str, Any]] = [{"role": "system", "content": _system_prompt}]
|
||
messages_glm.extend(body.history[-8:])
|
||
messages_glm.append({"role": "user", "content": body.message})
|
||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||
r = await client.post(
|
||
"https://open.bigmodel.cn/api/paas/v4/chat/completions",
|
||
headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"},
|
||
json={"model": glm_model, "messages": messages_glm, "stream": False},
|
||
)
|
||
r.raise_for_status()
|
||
raw = (r.json().get("choices") or [{}])[0].get("message", {}).get("content", "")
|
||
reply = _clean(raw)
|
||
else:
|
||
raise HTTPException(status_code=400, detail=f"voice/stream: provider '{provider}' not supported. Use: ollama, grok, glm.")
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
_record_llm_error("inference_error", body.model, str(e)[:120])
|
||
_broadcast_bg(_make_event("error", {"where": "voice_stream_llm", "trace_id": trace_id, "message": str(e)[:100]},
|
||
project_id=pid, session_id=sid))
|
||
raise HTTPException(status_code=502, detail=f"LLM error: {str(e)[:200]}")
|
||
|
||
llm_ms = int((time.monotonic() - t0_llm) * 1000)
|
||
if not reply:
|
||
reply = "Не можу відповісти зараз."
|
||
|
||
# ── 2. Sentence chunking ──────────────────────────────────────────────────
|
||
# sanitize full reply before splitting (removes markdown, <think>, URLs)
|
||
sanitized_reply = _sanitize_for_voice(reply)
|
||
chunks = _split_into_voice_chunks(sanitized_reply)
|
||
if not chunks:
|
||
chunks = [sanitized_reply] if sanitized_reply else ["Не можу відповісти зараз."]
|
||
|
||
first_chunk = chunks[0]
|
||
# rest_chunks: sanitize + hard cap (prevents DoS via unreasonably long replies)
|
||
_MAX_REST_CHUNKS = int(os.getenv("MAX_VOICE_REST_CHUNKS", "8"))
|
||
all_rest = [_sanitize_for_voice(c) for c in chunks[1:] if _sanitize_for_voice(c)]
|
||
rest_chunks = all_rest[:_MAX_REST_CHUNKS] # cap: never more than 8 background TTS calls
|
||
|
||
# ── 3. TTS for first sentence (immediate) ─────────────────────────────────
|
||
t0_tts = time.monotonic()
|
||
first_audio_b64 = ""
|
||
first_audio_mime = "audio/mpeg"
|
||
voice = body.voice or "default"
|
||
_ha_voice_node = None
|
||
_ha_voice_mode = None
|
||
|
||
try:
|
||
import base64 as _b64
|
||
tts_json = {"text": first_chunk, "voice": voice, "speed": 1.0}
|
||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||
if is_voice_ha_enabled():
|
||
# HA path: Router selects best node for TTS
|
||
router_url = get_voice_ha_router_url()
|
||
r_tts = await client.post(f"{router_url}/v1/capability/voice_tts", json=tts_json)
|
||
r_tts.raise_for_status()
|
||
_ha_voice_node = r_tts.headers.get("X-Voice-Node")
|
||
_ha_voice_mode = r_tts.headers.get("X-Voice-Mode")
|
||
logger.debug("voice_stream TTS via HA: node=%s mode=%s",
|
||
_ha_voice_node, _ha_voice_mode)
|
||
else:
|
||
# Legacy direct path
|
||
mem_url = get_memory_service_url()
|
||
r_tts = await client.post(f"{mem_url}/voice/tts", json=tts_json)
|
||
r_tts.raise_for_status()
|
||
first_audio_mime = r_tts.headers.get("content-type", "audio/mpeg").split(";")[0]
|
||
first_audio_b64 = _b64.b64encode(r_tts.content).decode()
|
||
except Exception as e:
|
||
logger.warning("voice_stream TTS failed for first chunk (ha=%s): %s",
|
||
is_voice_ha_enabled(), e)
|
||
# Not fatal: client can still render text
|
||
|
||
tts_ms = int((time.monotonic() - t0_tts) * 1000)
|
||
|
||
_broadcast_bg(_make_event("voice.stream", {
|
||
"phase": "done",
|
||
"trace_id": trace_id,
|
||
"llm_ms": llm_ms,
|
||
"tts_ms": tts_ms,
|
||
"chunks_total": len(chunks),
|
||
}, project_id=pid, session_id=sid))
|
||
|
||
logger.info("voice_stream ok: trace=%s llm=%dms tts=%dms chunks=%d first=%dB",
|
||
trace_id, llm_ms, tts_ms, len(chunks), len(r_tts.content) if first_audio_b64 else 0)
|
||
|
||
body_data = {
|
||
"ok": True,
|
||
"trace_id": trace_id,
|
||
"first_text": first_chunk,
|
||
"first_audio_b64": first_audio_b64,
|
||
"first_audio_mime": first_audio_mime,
|
||
"rest_chunks": rest_chunks,
|
||
"full_text": reply,
|
||
"meta": {
|
||
"llm_ms": llm_ms,
|
||
"tts_ms": tts_ms,
|
||
"chunks_total": len(chunks),
|
||
"voice": voice,
|
||
"model": body.model,
|
||
"voice_profile": _vp,
|
||
},
|
||
}
|
||
|
||
from fastapi.responses import JSONResponse as _JSONResponse
|
||
resp_headers = {}
|
||
if _ha_voice_mode:
|
||
resp_headers["X-Voice-Mode"] = _ha_voice_mode
|
||
if _ha_voice_node:
|
||
resp_headers["X-Voice-Node"] = _ha_voice_node
|
||
if _ha_voice_mode or _ha_voice_node:
|
||
resp_headers["X-Voice-Cap"] = "voice_tts"
|
||
|
||
if resp_headers:
|
||
return _JSONResponse(content=body_data, headers=resp_headers)
|
||
return body_data
|
||
|
||
|
||
# ─── Voice Telemetry Beacon ───────────────────────────────────────────────────
|
||
# Receives performance marks from browser, records Prometheus histograms.
|
||
# Browser calls this via navigator.sendBeacon (fire-and-forget).
|
||
|
||
try:
|
||
from prometheus_client import Histogram as _PromHistogram, Counter as _PromCounter
|
||
_voice_ttfa_hist = _PromHistogram(
|
||
"voice_ttfa_ms", "Time-to-first-audio (request → first audio playable)",
|
||
["model", "voice_profile"],
|
||
buckets=[500, 1000, 2000, 3000, 5000, 7000, 10000, 15000],
|
||
)
|
||
_voice_llm_hist = _PromHistogram(
|
||
"voice_llm_ms", "LLM inference time for voice turns",
|
||
["model", "voice_profile"],
|
||
buckets=[500, 1000, 2000, 5000, 8000, 12000, 20000],
|
||
)
|
||
_voice_tts_first_hist = _PromHistogram(
|
||
"voice_tts_first_ms", "First-sentence TTS synthesis time",
|
||
["voice_profile"],
|
||
buckets=[200, 500, 800, 1200, 2000, 3000],
|
||
)
|
||
_voice_e2e_hist = _PromHistogram(
|
||
"voice_e2e_ms", "End-to-end voice turn latency (user stop speaking → audio plays)",
|
||
["voice_profile"],
|
||
buckets=[1000, 2000, 4000, 6000, 9000, 13000, 20000],
|
||
)
|
||
_voice_underflow_counter = _PromCounter(
|
||
"voice_queue_underflows_total", "Times playback queue ran empty before TTS finished",
|
||
["voice_profile"],
|
||
)
|
||
_PROM_VOICE_OK = True
|
||
except Exception:
|
||
_PROM_VOICE_OK = False
|
||
|
||
|
||
class VoiceTelemetryPayload(BaseModel):
|
||
event: str = "voice_turn"
|
||
# Idempotency: session_id + turn_id deduplicate duplicate beacon submissions
|
||
session_id: Optional[str] = None
|
||
turn_id: Optional[str] = None # monotonic turn counter or UUID per turn
|
||
ttfa_ms: Optional[int] = None
|
||
llm_ms: Optional[int] = None
|
||
tts_first_ms: Optional[int] = None
|
||
e2e_ms: Optional[int] = None
|
||
stt_ms: Optional[int] = None
|
||
underflows: int = 0
|
||
model: Optional[str] = None
|
||
voice_profile: Optional[str] = None
|
||
|
||
|
||
class VoiceTelemetryBatch(BaseModel):
|
||
"""Batch beacon: array of turns submitted together (reduces HTTP overhead)."""
|
||
events: List[VoiceTelemetryPayload] = []
|
||
|
||
|
||
def _process_telemetry_item(payload: VoiceTelemetryPayload) -> bool:
|
||
"""Process a single telemetry item. Returns False if duplicate."""
|
||
sid = payload.session_id or "anon"
|
||
tid = payload.turn_id or "noid"
|
||
if _telem_is_duplicate(sid, tid):
|
||
return False # skip duplicate
|
||
|
||
model = (payload.model or "unknown").replace("ollama:", "")
|
||
profile = payload.voice_profile or "unknown"
|
||
|
||
if _PROM_VOICE_OK:
|
||
try:
|
||
if payload.ttfa_ms is not None:
|
||
_voice_ttfa_hist.labels(model=model, voice_profile=profile).observe(payload.ttfa_ms)
|
||
if payload.llm_ms is not None:
|
||
_voice_llm_hist.labels(model=model, voice_profile=profile).observe(payload.llm_ms)
|
||
if payload.tts_first_ms is not None:
|
||
_voice_tts_first_hist.labels(voice_profile=profile).observe(payload.tts_first_ms)
|
||
if payload.e2e_ms is not None:
|
||
_voice_e2e_hist.labels(voice_profile=profile).observe(payload.e2e_ms)
|
||
if payload.underflows:
|
||
_voice_underflow_counter.labels(voice_profile=profile).inc(payload.underflows)
|
||
except Exception as exc:
|
||
logger.debug("telemetry/voice prom error: %s", exc)
|
||
|
||
logger.info(
|
||
"voice_telemetry: model=%s profile=%s ttfa=%s llm=%s tts=%s e2e=%s underflows=%d sid=%s",
|
||
model, profile, payload.ttfa_ms, payload.llm_ms,
|
||
payload.tts_first_ms, payload.e2e_ms, payload.underflows, sid,
|
||
)
|
||
|
||
# Feed the degradation state machine
|
||
if payload.ttfa_ms is not None or payload.tts_first_ms is not None:
|
||
_voice_degradation_sm.observe(
|
||
ttfa_ms=payload.ttfa_ms,
|
||
tts_first_ms=payload.tts_first_ms,
|
||
underflows=payload.underflows,
|
||
profile=profile,
|
||
)
|
||
return True
|
||
|
||
|
||
@app.post("/api/telemetry/voice", status_code=204)
|
||
async def api_telemetry_voice(payload: VoiceTelemetryPayload):
|
||
"""Browser beacon endpoint (single turn). Fire-and-forget, always 204."""
|
||
_process_telemetry_item(payload)
|
||
# 204 No Content — browser doesn't await response
|
||
|
||
|
||
@app.post("/api/telemetry/voice/batch", status_code=204)
|
||
async def api_telemetry_voice_batch(batch: VoiceTelemetryBatch, request: Request):
|
||
"""Batch beacon: process up to 20 turns in one HTTP call.
|
||
|
||
Useful when browser queues multiple turns before sending (e.g. tab becomes
|
||
visible again, or connection was lost briefly).
|
||
"""
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
if not _check_rate(f"telem_batch:{client_ip}", max_calls=60, window_sec=60):
|
||
raise HTTPException(status_code=429, detail="Rate limit: 60 telemetry batches/min")
|
||
|
||
cap = min(len(batch.events), 20) # hard cap per batch
|
||
processed = sum(1 for item in batch.events[:cap] if _process_telemetry_item(item))
|
||
logger.debug("telemetry/voice/batch: submitted=%d processed=%d cap=%d",
|
||
len(batch.events), processed, cap)
|
||
|
||
|
||
# ─── Voice Degradation State Machine ─────────────────────────────────────────
|
||
# Tracks rolling window of voice telemetry and determines system-level state.
|
||
# States: ok → degraded_tts → degraded_llm → fast_lock → emergency
|
||
# Client polls GET /api/voice/degradation_status to show UI badge.
|
||
|
||
import collections
|
||
from dataclasses import dataclass as _dc, field as _field
|
||
from enum import Enum
|
||
|
||
class VoiceDegradationState(str, Enum):
|
||
OK = "ok" # all SLOs met
|
||
DEGRADED_TTS = "degraded_tts" # TTS slow/failing → show "TTS SLOW" badge
|
||
DEGRADED_LLM = "degraded_llm" # LLM slow → profile auto-demoted to fast
|
||
FAST_LOCK = "fast_lock" # LLM degraded, forced to voice_fast_uk
|
||
EMERGENCY = "emergency" # TTS failing → warn user, fallback banner
|
||
|
||
# SLO thresholds (ms) — aligned with config/slo_policy.yml
|
||
_SM_TTFA_WARN = 5000 # TTFA p95 > 5s → degraded_llm
|
||
_SM_TTFA_LOCK = 8000 # TTFA p95 > 8s → fast_lock
|
||
_SM_TTS_WARN = 2000 # TTS first p95 > 2s → degraded_tts
|
||
_SM_TTS_CRIT = 4000 # TTS first p95 > 4s → emergency
|
||
_SM_UNDERFLOW_RATE = 0.1 # >10% of recent turns have underflows → degraded_tts
|
||
_SM_WINDOW = 20 # rolling window (last N telemetry events)
|
||
_SM_MIN_SAMPLES = 5 # need at least N samples before changing state
|
||
|
||
|
||
@_dc
|
||
class _VoiceDegradationSM:
|
||
"""Rolling-window degradation state machine."""
|
||
_ttfa_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
|
||
_tts_first_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
|
||
_underflow_window: collections.deque = _field(default_factory=lambda: collections.deque(maxlen=_SM_WINDOW))
|
||
state: VoiceDegradationState = VoiceDegradationState.OK
|
||
state_since: float = _field(default_factory=time.monotonic)
|
||
recommended_profile: str = "voice_fast_uk"
|
||
last_reason: str = ""
|
||
_lock: object = _field(default_factory=lambda: __import__('asyncio').Lock())
|
||
|
||
def observe(self, ttfa_ms: Optional[int], tts_first_ms: Optional[int],
|
||
underflows: int, profile: str) -> None:
|
||
if ttfa_ms is not None:
|
||
self._ttfa_window.append(ttfa_ms)
|
||
if tts_first_ms is not None:
|
||
self._tts_first_window.append(tts_first_ms)
|
||
self._underflow_window.append(1 if underflows > 0 else 0)
|
||
self._recompute()
|
||
|
||
def _p95(self, window: collections.deque) -> Optional[float]:
|
||
if len(window) < _SM_MIN_SAMPLES:
|
||
return None
|
||
s = sorted(window)
|
||
return s[int(len(s) * 0.95)]
|
||
|
||
def _underflow_rate(self) -> float:
|
||
if not self._underflow_window:
|
||
return 0.0
|
||
return sum(self._underflow_window) / len(self._underflow_window)
|
||
|
||
def _recompute(self) -> None:
|
||
ttfa_p95 = self._p95(self._ttfa_window)
|
||
tts_p95 = self._p95(self._tts_first_window)
|
||
uf_rate = self._underflow_rate()
|
||
|
||
prev_state = self.state
|
||
|
||
if tts_p95 is not None and tts_p95 > _SM_TTS_CRIT:
|
||
self.state = VoiceDegradationState.EMERGENCY
|
||
self.recommended_profile = "voice_fast_uk"
|
||
self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_CRIT}ms"
|
||
elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_LOCK:
|
||
self.state = VoiceDegradationState.FAST_LOCK
|
||
self.recommended_profile = "voice_fast_uk"
|
||
self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_LOCK}ms — locked to fast profile"
|
||
elif tts_p95 is not None and tts_p95 > _SM_TTS_WARN:
|
||
self.state = VoiceDegradationState.DEGRADED_TTS
|
||
self.recommended_profile = "voice_fast_uk"
|
||
self.last_reason = f"TTS p95={tts_p95:.0f}ms > {_SM_TTS_WARN}ms"
|
||
elif ttfa_p95 is not None and ttfa_p95 > _SM_TTFA_WARN:
|
||
self.state = VoiceDegradationState.DEGRADED_LLM
|
||
self.recommended_profile = "voice_fast_uk"
|
||
self.last_reason = f"TTFA p95={ttfa_p95:.0f}ms > {_SM_TTFA_WARN}ms"
|
||
elif uf_rate > _SM_UNDERFLOW_RATE:
|
||
self.state = VoiceDegradationState.DEGRADED_TTS
|
||
self.recommended_profile = "voice_fast_uk"
|
||
self.last_reason = f"Underflow rate={uf_rate:.1%} > {_SM_UNDERFLOW_RATE:.0%}"
|
||
else:
|
||
self.state = VoiceDegradationState.OK
|
||
self.recommended_profile = "voice_fast_uk" # default
|
||
self.last_reason = "all SLOs met"
|
||
|
||
if self.state != prev_state:
|
||
self.state_since = time.monotonic()
|
||
logger.warning("voice_degradation state: %s → %s | %s",
|
||
prev_state.value, self.state.value, self.last_reason)
|
||
|
||
def status_dict(self) -> dict:
|
||
return {
|
||
"state": self.state.value,
|
||
"state_since_sec": int(time.monotonic() - self.state_since),
|
||
"recommended_profile": self.recommended_profile,
|
||
"reason": self.last_reason,
|
||
"samples": {
|
||
"ttfa": len(self._ttfa_window),
|
||
"tts_first": len(self._tts_first_window),
|
||
},
|
||
"p95": {
|
||
"ttfa_ms": self._p95(self._ttfa_window),
|
||
"tts_first_ms": self._p95(self._tts_first_window),
|
||
},
|
||
"underflow_rate": round(self._underflow_rate(), 3),
|
||
"ui_badge": _SM_UI_BADGE.get(self.state, ""),
|
||
}
|
||
|
||
|
||
# UI badge text per state
|
||
_SM_UI_BADGE = {
|
||
VoiceDegradationState.OK: "",
|
||
VoiceDegradationState.DEGRADED_TTS: "⚠ TTS SLOW",
|
||
VoiceDegradationState.DEGRADED_LLM: "⚠ AI SLOW",
|
||
VoiceDegradationState.FAST_LOCK: "⚡ FAST MODE",
|
||
VoiceDegradationState.EMERGENCY: "🔴 TTS DEGRADED",
|
||
}
|
||
|
||
_voice_degradation_sm = _VoiceDegradationSM()
|
||
|
||
|
||
@app.get("/api/voice/degradation_status")
|
||
async def api_voice_degradation_status():
|
||
"""Returns current voice degradation state + repro pack for incident diagnosis.
|
||
|
||
Repro pack fields (for on-call):
|
||
node_id, edge_tts_version, last_model, last_profile,
|
||
last_5_tts_errors, last_5_llm_errors
|
||
"""
|
||
base = _voice_degradation_sm.status_dict()
|
||
# Enrich with repro pack
|
||
base["repro"] = {
|
||
"node_id": _NODE_ID,
|
||
"last_model": _voice_last_model,
|
||
"last_profile": _voice_last_profile,
|
||
"last_5_tts_errors": list(_voice_tts_errors),
|
||
"last_5_llm_errors": list(_voice_llm_errors),
|
||
"concurrent_tts_slots_free": _get_tts_semaphore()._value,
|
||
"max_concurrent_tts": _MAX_CONCURRENT_TTS,
|
||
}
|
||
return base
|
||
|
||
|
||
# ─── Memory ──────────────────────────────────────────────────────────────────
|
||
|
||
@app.get("/api/memory/status")
|
||
async def api_memory_status(_auth: str = Depends(require_auth)):
|
||
mem_url = get_memory_service_url()
|
||
try:
|
||
async with httpx.AsyncClient(timeout=8.0) as client:
|
||
r = await client.get(f"{mem_url}/health")
|
||
r.raise_for_status()
|
||
data = r.json()
|
||
return {
|
||
"ok": True,
|
||
"memory_url": mem_url,
|
||
"status": data.get("status", "unknown"),
|
||
"vector_store": data.get("vector_store", {}),
|
||
"stt": "whisper-large-v3-turbo",
|
||
"tts": "edge-tts / macOS say",
|
||
}
|
||
except Exception as e:
|
||
return {"ok": False, "error": str(e)[:200], "memory_url": mem_url}
|
||
|
||
|
||
@app.get("/api/memory/context")
|
||
async def api_memory_context(
|
||
session_id: str = Query("console"),
|
||
agent_id: str = Query("sofiia"),
|
||
user_id: Optional[str] = Query(None),
|
||
limit: int = Query(20, ge=1, le=100),
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
mem_url = get_memory_service_url()
|
||
agent_key = str(agent_id or "").strip().lower()
|
||
resolved_user = user_id or ("aistalk_user" if agent_key == "aistalk" else "console_user")
|
||
async def _sqlite_fallback_events() -> List[Dict[str, Any]]:
|
||
events: List[Dict[str, Any]] = []
|
||
if _app_db:
|
||
try:
|
||
rows = await _app_db.list_messages(session_id, limit=limit)
|
||
for row in rows:
|
||
events.append(
|
||
{
|
||
"role": row.get("role", "unknown"),
|
||
"content": row.get("content", ""),
|
||
"ts": row.get("ts"),
|
||
"source": "sqlite_fallback",
|
||
}
|
||
)
|
||
except Exception:
|
||
pass
|
||
return events
|
||
try:
|
||
async with httpx.AsyncClient(timeout=8.0) as client:
|
||
r = await client.get(
|
||
f"{mem_url}/agents/{agent_id}/memory",
|
||
params={"user_id": resolved_user, "channel_id": session_id, "limit": limit},
|
||
)
|
||
r.raise_for_status()
|
||
data = r.json()
|
||
events = data.get("events") if isinstance(data, dict) else None
|
||
if isinstance(events, list) and events:
|
||
return data
|
||
# Remote is alive but returned empty history; expose local persisted history too.
|
||
local_events = await _sqlite_fallback_events()
|
||
if local_events:
|
||
return {"events": local_events, "fallback": "sqlite_after_empty_remote"}
|
||
return data if isinstance(data, dict) else {"events": []}
|
||
except Exception as e:
|
||
# Fallback to local SQLite session memory so UI still has context.
|
||
events = await _sqlite_fallback_events()
|
||
return {"events": events, "error": str(e)[:100], "fallback": "sqlite"}
|
||
|
||
|
||
# ─── WebSocket /ws/events ────────────────────────────────────────────────────
|
||
|
||
@app.websocket("/ws/events")
|
||
async def ws_events(websocket: WebSocket):
|
||
"""WebSocket event stream. Clients receive all broadcast events."""
|
||
await websocket.accept()
|
||
_ws_clients.add(websocket)
|
||
logger.info("WS client connected, total=%d", len(_ws_clients))
|
||
# Send welcome
|
||
await websocket.send_text(json.dumps(_make_event("nodes.status", {
|
||
"message": "connected",
|
||
"bff_version": _VERSION,
|
||
"ws_clients": len(_ws_clients),
|
||
})))
|
||
try:
|
||
while True:
|
||
# Keep-alive: read pings from client (or just wait)
|
||
try:
|
||
msg = await asyncio.wait_for(websocket.receive_text(), timeout=15.0)
|
||
# Client can send {"type":"ping"} → pong
|
||
if msg:
|
||
try:
|
||
cmd = json.loads(msg)
|
||
if cmd.get("type") == "ping":
|
||
await websocket.send_text(json.dumps({"type": "pong", "ts": _now_iso()}))
|
||
except Exception:
|
||
pass
|
||
except asyncio.TimeoutError:
|
||
# Send periodic heartbeat with cached nodes if available
|
||
hb_data: Dict[str, Any] = {
|
||
"bff_uptime_s": int(time.monotonic() - _START_TIME),
|
||
"ws_clients": len(_ws_clients),
|
||
}
|
||
if _nodes_cache.get("nodes"):
|
||
hb_data["nodes"] = [
|
||
{
|
||
"id": n["node_id"],
|
||
"online": n.get("online", False),
|
||
"router_ok": n.get("router_ok", False),
|
||
"router_latency_ms": n.get("router_latency_ms"),
|
||
}
|
||
for n in _nodes_cache["nodes"]
|
||
]
|
||
hb_data["nodes_ts"] = _nodes_cache.get("ts", "")
|
||
await websocket.send_text(json.dumps(_make_event("nodes.status", hb_data)))
|
||
except WebSocketDisconnect:
|
||
pass
|
||
except Exception as e:
|
||
logger.debug("WS error: %s", e)
|
||
finally:
|
||
_ws_clients.discard(websocket)
|
||
logger.info("WS client disconnected, total=%d", len(_ws_clients))
|
||
|
||
|
||
# ─── UI ─────────────────────────────────────────────────────────────────────
|
||
|
||
STATIC_DIR = Path(__file__).resolve().parent.parent / "static"
|
||
_NO_CACHE = {"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache"}
|
||
|
||
|
||
@app.get("/api/meta/version")
|
||
async def get_meta_version():
|
||
"""Build metadata endpoint — always no-cache, always public."""
|
||
return JSONResponse(
|
||
content={
|
||
"version": _VERSION,
|
||
"build_sha": _BUILD_SHA,
|
||
"build_time": _BUILD_TIME,
|
||
"service": "sofiia-console",
|
||
},
|
||
headers=_NO_CACHE,
|
||
)
|
||
|
||
|
||
# ─── Auth endpoints ──────────────────────────────────────────────────────────
|
||
|
||
class _LoginBody(BaseModel):
|
||
key: str
|
||
|
||
|
||
@app.post("/api/auth/login")
|
||
async def auth_login(body: _LoginBody, response: Response):
|
||
"""
|
||
Verify API key (sent in JSON body — avoids header encoding issues).
|
||
On success: set httpOnly session cookie, return ok=true.
|
||
No CORS/header encoding issues since key travels in request body.
|
||
"""
|
||
if not _key_valid(body.key):
|
||
raise HTTPException(status_code=401, detail="Invalid key")
|
||
|
||
token = _cookie_token(body.key)
|
||
response.set_cookie(
|
||
key=_COOKIE_NAME,
|
||
value=token,
|
||
httponly=True,
|
||
secure=_IS_PROD, # Secure=True in prod (HTTPS only)
|
||
samesite="lax",
|
||
max_age=_COOKIE_MAX_AGE,
|
||
path="/",
|
||
)
|
||
return {"ok": True, "auth": "cookie"}
|
||
|
||
|
||
@app.post("/api/auth/logout")
|
||
async def auth_logout(response: Response):
|
||
"""Clear session cookie."""
|
||
response.delete_cookie(key=_COOKIE_NAME, path="/")
|
||
return {"ok": True}
|
||
|
||
|
||
@app.get("/api/auth/check")
|
||
async def auth_check(request: Request):
|
||
"""Returns 200 if session is valid, 401 otherwise. Used by UI on startup."""
|
||
# Localhost is always open — no auth needed
|
||
client_ip = (request.client.host if request.client else "") or ""
|
||
if client_ip in ("127.0.0.1", "::1", "localhost"):
|
||
return {"ok": True, "auth": "localhost"}
|
||
configured = get_console_api_key()
|
||
if not configured:
|
||
return {"ok": True, "auth": "open"}
|
||
from .auth import _expected_cookie_token as _ect
|
||
cookie_val = request.cookies.get(_COOKIE_NAME, "")
|
||
import secrets as _sec
|
||
if cookie_val and _sec.compare_digest(cookie_val, _ect()):
|
||
return {"ok": True, "auth": "cookie"}
|
||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||
|
||
|
||
@app.get("/", response_class=HTMLResponse)
|
||
async def ui_root():
|
||
index = STATIC_DIR / "index.html"
|
||
content = index.read_text(encoding="utf-8") if index.exists() else _fallback_html()
|
||
return HTMLResponse(content=content, headers=_NO_CACHE)
|
||
|
||
|
||
@app.get("/ui", response_class=HTMLResponse)
|
||
async def ui_alias():
|
||
return await ui_root()
|
||
|
||
|
||
def _fallback_html() -> str:
|
||
return """<!DOCTYPE html><html><head><meta charset="utf-8"><title>Sofiia Console</title></head>
|
||
<body><h1>Sofiia Control Console v""" + _VERSION + """</h1>
|
||
<p>Endpoints: <code>GET /api/health</code> | <code>GET /api/status/full</code> | <code>POST /api/chat/send</code> | <code>WS /ws/events</code></p>
|
||
</body></html>"""
|
||
|
||
|
||
@app.get("/chat", response_class=HTMLResponse)
|
||
async def ui_chat():
|
||
p = STATIC_DIR / "chat.html"
|
||
content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
|
||
return HTMLResponse(content=content, headers=_NO_CACHE)
|
||
|
||
|
||
@app.get("/ops", response_class=HTMLResponse)
|
||
async def ui_ops():
|
||
p = STATIC_DIR / "ops.html"
|
||
content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
|
||
return HTMLResponse(content=content, headers=_NO_CACHE)
|
||
|
||
|
||
@app.get("/nodes", response_class=HTMLResponse)
|
||
async def ui_nodes():
|
||
p = STATIC_DIR / "nodes.html"
|
||
content = p.read_text(encoding="utf-8") if p.exists() else _fallback_html()
|
||
return HTMLResponse(content=content, headers=_NO_CACHE)
|
||
|
||
|
||
# ── Supervisor Proxy ───────────────────────────────────────────────────────────
|
||
_SUPERVISOR_URL = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080").rstrip("/")
|
||
_SUPERVISOR_FALLBACK_URL = os.getenv("SUPERVISOR_FALLBACK_URL", "http://127.0.0.1:8084").rstrip("/")
|
||
|
||
|
||
async def _supervisor_request_json(
|
||
method: str,
|
||
path: str,
|
||
*,
|
||
timeout: float = 30.0,
|
||
json_body: Optional[Dict[str, Any]] = None,
|
||
) -> Tuple[int, Dict[str, Any]]:
|
||
urls = [_SUPERVISOR_URL]
|
||
if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls:
|
||
urls.append(_SUPERVISOR_FALLBACK_URL)
|
||
|
||
last_err = "unavailable"
|
||
for base in urls:
|
||
target = f"{base}{path}"
|
||
try:
|
||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||
resp = await client.request(method, target, json=json_body)
|
||
except Exception as e:
|
||
last_err = str(e)[:200]
|
||
continue
|
||
|
||
if resp.status_code >= 400:
|
||
detail = resp.text[:400] if resp.text else f"Supervisor error {resp.status_code}"
|
||
raise HTTPException(status_code=resp.status_code, detail=detail)
|
||
|
||
if not resp.content:
|
||
return resp.status_code, {}
|
||
try:
|
||
payload = resp.json()
|
||
except Exception:
|
||
return resp.status_code, {"raw": resp.text[:1000]}
|
||
if isinstance(payload, dict):
|
||
return resp.status_code, payload
|
||
return resp.status_code, {"data": payload}
|
||
|
||
raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {last_err}")
|
||
|
||
|
||
@app.post("/api/supervisor/runs")
|
||
async def start_supervisor_run(request: Request, _auth: str = Depends(require_auth)):
|
||
"""Start a LangGraph run on sofiia-supervisor.
|
||
|
||
Body: {"graph": "alert_triage|incident_triage|postmortem_draft|release_check",
|
||
"project_id": "<optional>", ...params}
|
||
|
||
If project_id is provided, auto-creates an agent_run dialog_node in the graph
|
||
and returns node_id in the response for UI tracking.
|
||
"""
|
||
body = await request.json()
|
||
graph_name = body.pop("graph", None)
|
||
project_id = body.pop("project_id", None)
|
||
if not graph_name:
|
||
raise HTTPException(status_code=400, detail="'graph' field is required")
|
||
try:
|
||
status_code, result = await _supervisor_request_json(
|
||
"POST",
|
||
f"/v1/graphs/{graph_name}/runs",
|
||
timeout=60.0,
|
||
json_body=body,
|
||
)
|
||
|
||
# Auto-create agent_run node if project is provided
|
||
if project_id and status_code in (200, 201, 202):
|
||
run_id = result.get("run_id") or result.get("id") or str(uuid.uuid4())
|
||
try:
|
||
pack = await _app_db.create_evidence_pack(
|
||
project_id=project_id,
|
||
run_id=run_id,
|
||
graph_name=graph_name,
|
||
result_data={"status": "started", "summary": f"Run started: {graph_name}"},
|
||
created_by="sofiia",
|
||
)
|
||
result["_node_id"] = pack.get("node_id")
|
||
except Exception as node_err:
|
||
logger.warning("evidence_pack node creation failed (non-fatal): %s", node_err)
|
||
|
||
return JSONResponse(status_code=status_code, content=result)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
|
||
|
||
|
||
@app.get("/api/supervisor/runs/{run_id}")
|
||
async def get_supervisor_run(run_id: str, _auth: str = Depends(require_auth)):
|
||
"""Get the status/result of a LangGraph run."""
|
||
try:
|
||
status_code, payload = await _supervisor_request_json(
|
||
"GET",
|
||
f"/v1/runs/{run_id}",
|
||
timeout=15.0,
|
||
)
|
||
return JSONResponse(status_code=status_code, content=payload)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
|
||
|
||
|
||
@app.post("/api/supervisor/runs/{run_id}/cancel")
|
||
async def cancel_supervisor_run(run_id: str, _auth: str = Depends(require_auth)):
|
||
"""Cancel a running LangGraph run."""
|
||
try:
|
||
status_code, payload = await _supervisor_request_json(
|
||
"POST",
|
||
f"/v1/runs/{run_id}/cancel",
|
||
timeout=10.0,
|
||
)
|
||
return JSONResponse(status_code=status_code, content=payload)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=502, detail=f"Supervisor unreachable: {e}")
|
||
|
||
|
||
@app.get("/api/supervisor/graphs")
|
||
async def list_supervisor_graphs():
|
||
"""List available LangGraph graphs (no auth — read-only discovery)."""
|
||
urls = [_SUPERVISOR_URL]
|
||
if _SUPERVISOR_FALLBACK_URL and _SUPERVISOR_FALLBACK_URL not in urls:
|
||
urls.append(_SUPERVISOR_FALLBACK_URL)
|
||
last_err = "unavailable"
|
||
for base in urls:
|
||
try:
|
||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||
resp = await client.get(f"{base}/healthz")
|
||
data = resp.json()
|
||
return {
|
||
"graphs": data.get("graphs", []),
|
||
"healthy": resp.status_code == 200,
|
||
"url": base,
|
||
"state_backend": data.get("state_backend"),
|
||
}
|
||
except Exception as e:
|
||
last_err = str(e)
|
||
continue
|
||
return {"graphs": [], "healthy": False, "error": last_err}
|
||
|
||
|
||
@app.get("/api/aistalk/status")
|
||
async def aistalk_status():
|
||
"""AISTALK integration status for SOFIIA UI."""
|
||
try:
|
||
sup = await list_supervisor_graphs()
|
||
aurora = await api_aurora_health()
|
||
runtime = await _aistalk_runtime_state()
|
||
adapter_status: Dict[str, Any]
|
||
relay_health: Dict[str, Any]
|
||
if _aistalk is not None:
|
||
try:
|
||
relay_health = _aistalk.probe_health()
|
||
except Exception as e:
|
||
relay_health = {"enabled": True, "ok": False, "error": str(e)[:200]}
|
||
try:
|
||
adapter_status = _aistalk.status()
|
||
except Exception:
|
||
adapter_status = {"enabled": True, "base_url": "unknown"}
|
||
else:
|
||
relay_health = {"enabled": False, "ok": False, "error": "disabled"}
|
||
adapter_status = {"enabled": False, "base_url": ""}
|
||
return {
|
||
"aistalk_enabled": _aistalk is not None,
|
||
"aistalk_adapter": repr(_aistalk) if _aistalk is not None else "disabled",
|
||
"adapter": adapter_status,
|
||
"relay_health": relay_health,
|
||
"supervisor": sup,
|
||
"aurora": aurora,
|
||
"runtime": runtime,
|
||
"docs": {
|
||
"contract": "/docs/aistalk/contract.md",
|
||
"supervisor": "/docs/supervisor/langgraph_supervisor.md",
|
||
},
|
||
}
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
def _parse_agent_md(path: Path) -> Dict[str, Any]:
|
||
text = path.read_text(encoding="utf-8", errors="ignore")
|
||
lines = [ln.rstrip() for ln in text.splitlines()]
|
||
title = path.stem
|
||
display_name = title
|
||
role: List[str] = []
|
||
outputs: List[str] = []
|
||
boundaries: List[str] = []
|
||
capabilities: List[str] = []
|
||
intro: List[str] = []
|
||
in_section: Optional[str] = None
|
||
|
||
for raw in lines:
|
||
line = raw.strip()
|
||
if not line:
|
||
continue
|
||
if line.startswith("# "):
|
||
display_name = line[2:].strip()
|
||
continue
|
||
low = line.lower()
|
||
if low.startswith("role:"):
|
||
in_section = "role"
|
||
continue
|
||
if low.startswith("output:"):
|
||
in_section = "output"
|
||
continue
|
||
if low.startswith("outputs:"):
|
||
in_section = "output"
|
||
continue
|
||
if low.startswith("boundary:"):
|
||
in_section = "boundary"
|
||
continue
|
||
if low.startswith("boundaries:"):
|
||
in_section = "boundary"
|
||
continue
|
||
if low.startswith("capabilities:"):
|
||
in_section = "capabilities"
|
||
continue
|
||
if low.startswith("modes:") or low.startswith("rules:") or low.startswith("internal sub-pipeline"):
|
||
in_section = None
|
||
continue
|
||
if line.startswith("```"):
|
||
in_section = None
|
||
continue
|
||
|
||
if line.startswith("- "):
|
||
item = line[2:].strip()
|
||
if in_section == "role":
|
||
role.append(item)
|
||
elif in_section == "output":
|
||
outputs.append(item)
|
||
elif in_section == "boundary":
|
||
boundaries.append(item)
|
||
elif in_section == "capabilities":
|
||
capabilities.append(item)
|
||
continue
|
||
if in_section is None and not line.startswith("#"):
|
||
# Some agent role files store purpose as plain intro line without "Role:" section.
|
||
intro.append(line)
|
||
|
||
summary = role[0] if role else (intro[0] if intro else "")
|
||
return {
|
||
"id": title.lower(),
|
||
"name": display_name,
|
||
"summary": summary,
|
||
"role": role,
|
||
"outputs": outputs,
|
||
"boundaries": boundaries,
|
||
"capabilities": capabilities,
|
||
"source": str(path),
|
||
}
|
||
|
||
|
||
@app.get("/api/aistalk/catalog")
|
||
async def aistalk_catalog():
|
||
"""
|
||
Return AISTALK subagent catalog + declared capabilities for UI rendering.
|
||
"""
|
||
roots = [
|
||
Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk",
|
||
Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk",
|
||
]
|
||
root = next((p for p in roots if p.exists()), None)
|
||
if root is None:
|
||
return {
|
||
"ok": False,
|
||
"error": "AISTALK roles directory not found",
|
||
"agents": [],
|
||
"domains": [],
|
||
}
|
||
|
||
agents: List[Dict[str, Any]] = []
|
||
for p in sorted(root.glob("*.md")):
|
||
try:
|
||
agents.append(_parse_agent_md(p))
|
||
except Exception as e:
|
||
agents.append(
|
||
{
|
||
"id": p.stem.lower(),
|
||
"name": p.stem,
|
||
"summary": "",
|
||
"role": [],
|
||
"outputs": [],
|
||
"boundaries": [f"parse_error: {str(e)[:120]}"],
|
||
"capabilities": [],
|
||
"source": str(p),
|
||
}
|
||
)
|
||
|
||
# High-level specialization domains for UI badges/filters.
|
||
domains = [
|
||
{"id": "osint", "name": "OSINT & Recon", "agents": ["tracer", "stealth", "shadow"]},
|
||
{"id": "analysis", "name": "Threat Analysis", "agents": ["neuron", "graph", "risk"]},
|
||
{"id": "offdef", "name": "Offense/Defense", "agents": ["redteam", "blueteam", "purpleteam", "bughunter", "devteam"]},
|
||
{"id": "forensics", "name": "Media Forensics", "agents": ["aurora"]},
|
||
{"id": "security", "name": "Governance & Data Safety", "agents": ["vault", "quantum"]},
|
||
{"id": "orchestration", "name": "Command & Synthesis", "agents": ["orchestrator_synthesis"]},
|
||
]
|
||
return {
|
||
"ok": True,
|
||
"root": str(root),
|
||
"count": len(agents),
|
||
"agents": agents,
|
||
"domains": domains,
|
||
}
|
||
|
||
|
||
_AISTALK_RUNTIME_PATH = AURORA_DATA_DIR.parent / "sofiia-console-cache" / "aistalk_runtime.json"
|
||
_AISTALK_AGENT_ORDER = [
|
||
"orchestrator_synthesis",
|
||
"tracer",
|
||
"shadow",
|
||
"stealth",
|
||
"neuron",
|
||
"graph",
|
||
"bughunter",
|
||
"redteam",
|
||
"blueteam",
|
||
"purpleteam",
|
||
"risk",
|
||
"vault",
|
||
"quantum",
|
||
"devteam",
|
||
"aurora",
|
||
]
|
||
_aistalk_team_active_runs: Dict[str, float] = {}
|
||
_aistalk_chat_active: int = 0
|
||
_aistalk_state_lock = asyncio.Lock()
|
||
|
||
|
||
def _aistalk_roles_root() -> Optional[Path]:
|
||
roots = [
|
||
Path(__file__).resolve().parents[3] / "config" / "roles" / "aistalk",
|
||
Path(__file__).resolve().parents[1] / "config" / "roles" / "aistalk",
|
||
]
|
||
return next((p for p in roots if p.exists()), None)
|
||
|
||
|
||
def _aistalk_resource_snapshot() -> Dict[str, Any]:
|
||
cpu = os.cpu_count() or 8
|
||
mem_gb: Optional[float] = None
|
||
try:
|
||
page_size = os.sysconf("SC_PAGE_SIZE")
|
||
total_pages = os.sysconf("SC_PHYS_PAGES")
|
||
if page_size > 0 and total_pages > 0:
|
||
mem_gb = round((page_size * total_pages) / (1024 ** 3), 1)
|
||
except Exception:
|
||
mem_gb = None
|
||
return {
|
||
"cpu_count": cpu,
|
||
"memory_gb": mem_gb,
|
||
"ollama_num_ctx": SOFIIA_OLLAMA_NUM_CTX,
|
||
"ollama_num_thread": SOFIIA_OLLAMA_NUM_THREAD,
|
||
"ollama_num_gpu": SOFIIA_OLLAMA_NUM_GPU,
|
||
}
|
||
|
||
|
||
def _aistalk_recommended_limits(resources: Dict[str, Any]) -> Dict[str, Any]:
|
||
cpu = int(resources.get("cpu_count") or 8)
|
||
mem = resources.get("memory_gb")
|
||
mem_gb = float(mem) if isinstance(mem, (int, float)) else 0.0
|
||
if cpu >= 12 and mem_gb >= 24:
|
||
profile = "performance"
|
||
team_max = 2
|
||
chat_max = 4
|
||
elif cpu >= 8 and mem_gb >= 16:
|
||
profile = "balanced"
|
||
team_max = 1
|
||
chat_max = 3
|
||
else:
|
||
profile = "safe"
|
||
team_max = 1
|
||
chat_max = 2
|
||
return {
|
||
"profile": profile,
|
||
"max_parallel_team_runs": team_max,
|
||
"max_parallel_chat": chat_max,
|
||
"rule": (
|
||
"Aurora/forensics jobs are GPU-heavy: keep team runs low; "
|
||
"chat parallelism may be higher but bounded by CPU/RAM."
|
||
),
|
||
}
|
||
|
||
|
||
async def _aistalk_local_models() -> List[str]:
|
||
ollama_url = get_ollama_url().rstrip("/")
|
||
try:
|
||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||
r = await client.get(f"{ollama_url}/api/tags")
|
||
r.raise_for_status()
|
||
data = r.json()
|
||
models = [str((m or {}).get("name", "")).strip() for m in (data.get("models") or [])]
|
||
return [m for m in models if m]
|
||
except Exception:
|
||
return []
|
||
|
||
|
||
def _aistalk_default_model_map(models: List[str]) -> Dict[str, str]:
|
||
available = set(models)
|
||
|
||
def pick(*candidates: str) -> str:
|
||
for c in candidates:
|
||
if c in available:
|
||
return c
|
||
if models:
|
||
return models[0]
|
||
return "qwen3:14b"
|
||
|
||
orchestrator = pick("qwen3.5:35b-a3b", "qwen3:14b", "gemma3:latest")
|
||
analyst = pick("qwen3:14b", "qwen3.5:35b-a3b", "gemma3:latest")
|
||
lightweight = pick("gemma3:latest", "qwen3:14b", "qwen3.5:35b-a3b")
|
||
|
||
mapping: Dict[str, str] = {}
|
||
for agent_id in _AISTALK_AGENT_ORDER:
|
||
if agent_id in ("orchestrator_synthesis", "risk", "neuron", "graph"):
|
||
mapping[agent_id] = orchestrator
|
||
elif agent_id in ("tracer", "shadow", "stealth", "vault", "quantum"):
|
||
mapping[agent_id] = analyst
|
||
else:
|
||
mapping[agent_id] = lightweight
|
||
return mapping
|
||
|
||
|
||
def _read_aistalk_runtime() -> Dict[str, Any]:
|
||
if _AISTALK_RUNTIME_PATH.exists():
|
||
try:
|
||
raw = json.loads(_AISTALK_RUNTIME_PATH.read_text(encoding="utf-8"))
|
||
if isinstance(raw, dict):
|
||
return raw
|
||
except Exception:
|
||
pass
|
||
return {}
|
||
|
||
|
||
def _write_aistalk_runtime(data: Dict[str, Any]) -> None:
|
||
_AISTALK_RUNTIME_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||
_AISTALK_RUNTIME_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
|
||
|
||
async def _aistalk_runtime_state() -> Dict[str, Any]:
|
||
resources = _aistalk_resource_snapshot()
|
||
recommended = _aistalk_recommended_limits(resources)
|
||
models = await _aistalk_local_models()
|
||
stored = _read_aistalk_runtime()
|
||
|
||
limits = stored.get("limits") if isinstance(stored.get("limits"), dict) else {}
|
||
max_team = int(limits.get("max_parallel_team_runs") or recommended["max_parallel_team_runs"])
|
||
max_chat = int(limits.get("max_parallel_chat") or recommended["max_parallel_chat"])
|
||
profile = str(limits.get("profile") or recommended["profile"])
|
||
|
||
saved_models = stored.get("agent_models") if isinstance(stored.get("agent_models"), dict) else {}
|
||
defaults = _aistalk_default_model_map(models)
|
||
agent_models: Dict[str, str] = {}
|
||
for aid in _AISTALK_AGENT_ORDER:
|
||
selected = str(saved_models.get(aid) or defaults.get(aid) or "")
|
||
if models and selected not in models:
|
||
selected = defaults.get(aid) or models[0]
|
||
if not selected:
|
||
selected = "qwen3:14b"
|
||
agent_models[aid] = selected
|
||
|
||
state = {
|
||
"limits": {
|
||
"profile": profile,
|
||
"max_parallel_team_runs": max(1, min(max_team, 4)),
|
||
"max_parallel_chat": max(1, min(max_chat, 8)),
|
||
},
|
||
"recommended": recommended,
|
||
"resources": resources,
|
||
"available_models": models,
|
||
"agent_models": agent_models,
|
||
"active_team_runs": len(_aistalk_team_active_runs),
|
||
"active_chat": _aistalk_chat_active,
|
||
}
|
||
# Persist normalized shape for future restarts.
|
||
_write_aistalk_runtime({"limits": state["limits"], "agent_models": state["agent_models"]})
|
||
return state
|
||
|
||
|
||
def _aistalk_role_prompt(agent_id: str) -> str:
|
||
root = _aistalk_roles_root()
|
||
if root is None:
|
||
return "You are AISTALK security analyst. Respond with findings, risk, next actions."
|
||
target = root / f"{agent_id}.md"
|
||
if not target.exists():
|
||
target = root / "orchestrator_synthesis.md"
|
||
try:
|
||
text = target.read_text(encoding="utf-8", errors="ignore")
|
||
# Keep prompt concise enough for local models.
|
||
return text[:6000]
|
||
except Exception:
|
||
return "You are AISTALK security analyst. Respond with findings, risk, next actions."
|
||
|
||
|
||
@app.get("/api/aistalk/runtime")
|
||
async def aistalk_runtime(_auth: str = Depends(require_auth)):
|
||
return await _aistalk_runtime_state()
|
||
|
||
|
||
class AISTalkModelSetBody(BaseModel):
|
||
agent_id: str
|
||
model: str
|
||
|
||
|
||
@app.post("/api/aistalk/runtime/model")
|
||
async def aistalk_set_agent_model(body: AISTalkModelSetBody, _auth: str = Depends(require_auth)):
|
||
state = await _aistalk_runtime_state()
|
||
aid = str(body.agent_id or "").strip().lower()
|
||
if aid not in _AISTALK_AGENT_ORDER:
|
||
raise HTTPException(status_code=400, detail=f"Unknown agent_id: {aid}")
|
||
model = str(body.model or "").strip()
|
||
models = state.get("available_models") or []
|
||
if models and model not in models:
|
||
raise HTTPException(status_code=400, detail=f"Model not available locally: {model}")
|
||
stored = _read_aistalk_runtime()
|
||
stored.setdefault("limits", state.get("limits", {}))
|
||
stored.setdefault("agent_models", state.get("agent_models", {}))
|
||
stored["agent_models"][aid] = model
|
||
_write_aistalk_runtime(stored)
|
||
return {"ok": True, "agent_id": aid, "model": model}
|
||
|
||
|
||
class AISTalkLimitsBody(BaseModel):
|
||
profile: Optional[str] = None
|
||
max_parallel_team_runs: Optional[int] = None
|
||
max_parallel_chat: Optional[int] = None
|
||
|
||
|
||
@app.post("/api/aistalk/runtime/limits")
|
||
async def aistalk_set_limits(body: AISTalkLimitsBody, _auth: str = Depends(require_auth)):
|
||
state = await _aistalk_runtime_state()
|
||
stored = _read_aistalk_runtime()
|
||
limits = dict(state.get("limits", {}))
|
||
if body.profile:
|
||
limits["profile"] = str(body.profile)
|
||
if body.max_parallel_team_runs is not None:
|
||
limits["max_parallel_team_runs"] = max(1, min(int(body.max_parallel_team_runs), 4))
|
||
if body.max_parallel_chat is not None:
|
||
limits["max_parallel_chat"] = max(1, min(int(body.max_parallel_chat), 8))
|
||
stored["limits"] = limits
|
||
stored.setdefault("agent_models", state.get("agent_models", {}))
|
||
_write_aistalk_runtime(stored)
|
||
return {"ok": True, "limits": limits}
|
||
|
||
|
||
def _is_terminal_run_status(status: str) -> bool:
|
||
s = (status or "").strip().lower()
|
||
return s in {"succeeded", "failed", "cancelled", "canceled", "timeout", "error"}
|
||
|
||
|
||
class AISTalkChatBody(BaseModel):
|
||
message: str
|
||
agent_id: str = "orchestrator_synthesis"
|
||
model: Optional[str] = None
|
||
session_id: Optional[str] = None
|
||
project_id: Optional[str] = None
|
||
user_id: Optional[str] = None
|
||
history: List[Dict[str, Any]] = []
|
||
|
||
|
||
@app.post("/api/aistalk/chat")
|
||
async def aistalk_chat(body: AISTalkChatBody, request: Request, _auth: str = Depends(require_auth)):
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
if not _check_rate(f"aistalk_chat:{client_ip}", max_calls=40, window_sec=60):
|
||
raise HTTPException(status_code=429, detail="Rate limit: 40 AISTALK chat messages/min")
|
||
|
||
state = await _aistalk_runtime_state()
|
||
limits = state.get("limits", {})
|
||
max_chat = int(limits.get("max_parallel_chat") or 2)
|
||
async with _aistalk_state_lock:
|
||
global _aistalk_chat_active
|
||
if _aistalk_chat_active >= max_chat:
|
||
raise HTTPException(
|
||
status_code=429,
|
||
detail=f"AISTALK chat busy: active={_aistalk_chat_active}, limit={max_chat}",
|
||
)
|
||
_aistalk_chat_active += 1
|
||
|
||
agent_id = str(body.agent_id or "orchestrator_synthesis").strip().lower()
|
||
if agent_id not in _AISTALK_AGENT_ORDER:
|
||
agent_id = "orchestrator_synthesis"
|
||
selected_model = str(body.model or "").strip() or str((state.get("agent_models") or {}).get(agent_id) or "")
|
||
if not selected_model:
|
||
selected_model = "qwen3:14b"
|
||
if (state.get("available_models") or []) and selected_model not in state["available_models"]:
|
||
selected_model = (state.get("available_models") or ["qwen3:14b"])[0]
|
||
|
||
project_id = body.project_id or "aistalk"
|
||
session_id = body.session_id or f"aistalk_sess_{uuid.uuid4().hex[:10]}"
|
||
user_id = body.user_id or "aistalk_user"
|
||
|
||
try:
|
||
role_prompt = _aistalk_role_prompt(agent_id)
|
||
system_prompt = (
|
||
"Ти працюєш у складі AISTALK (крипто-детективне агентство з безпеки мережі). "
|
||
"Формат відповіді: findings -> risk -> actions. "
|
||
"Пиши конкретно, без вигадок, позначай невизначеність.\n\n"
|
||
+ role_prompt
|
||
)
|
||
|
||
messages: List[Dict[str, Any]] = [{"role": "system", "content": system_prompt}]
|
||
messages.extend(body.history[-10:])
|
||
messages.append({"role": "user", "content": body.message})
|
||
|
||
t0 = time.monotonic()
|
||
async with httpx.AsyncClient(timeout=SOFIIA_OLLAMA_TIMEOUT_SEC) as client:
|
||
r = await client.post(
|
||
f"{get_ollama_url().rstrip('/')}/api/chat",
|
||
json=_make_ollama_payload(
|
||
selected_model,
|
||
messages,
|
||
{
|
||
"temperature": 0.15,
|
||
"repeat_penalty": 1.1,
|
||
"num_predict": min(1024, SOFIIA_OLLAMA_NUM_PREDICT_TEXT),
|
||
},
|
||
),
|
||
)
|
||
r.raise_for_status()
|
||
data = r.json()
|
||
reply = ((data.get("message") or {}).get("content") or "").strip() or "AISTALK: порожня відповідь"
|
||
latency_ms = int((time.monotonic() - t0) * 1000)
|
||
|
||
_broadcast_bg(
|
||
_make_event(
|
||
"chat.reply",
|
||
{
|
||
"text": reply[:200],
|
||
"provider": "ollama",
|
||
"model": f"ollama:{selected_model}",
|
||
"agent_id": agent_id,
|
||
"latency_ms": latency_ms,
|
||
},
|
||
project_id=project_id,
|
||
session_id=session_id,
|
||
user_id=user_id,
|
||
)
|
||
)
|
||
asyncio.get_event_loop().create_task(
|
||
_do_save_memory(
|
||
body.message,
|
||
reply,
|
||
session_id,
|
||
project_id,
|
||
user_id,
|
||
agent_id="aistalk",
|
||
)
|
||
)
|
||
return {
|
||
"ok": True,
|
||
"project_id": project_id,
|
||
"session_id": session_id,
|
||
"user_id": user_id,
|
||
"agent_id": agent_id,
|
||
"model": f"ollama:{selected_model}",
|
||
"response": reply,
|
||
"meta": {"latency_ms": latency_ms, "active_chat": _aistalk_chat_active, "limit_chat": max_chat},
|
||
}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=502, detail=f"AISTALK chat error: {str(e)[:200]}")
|
||
finally:
|
||
async with _aistalk_state_lock:
|
||
_aistalk_chat_active = max(0, _aistalk_chat_active - 1)
|
||
|
||
|
||
def _aistalk_autobuild_input(
|
||
graph: str,
|
||
objective: str,
|
||
input_payload: Dict[str, Any],
|
||
) -> Dict[str, Any]:
|
||
payload = dict(input_payload or {})
|
||
if graph == "incident_triage":
|
||
payload.setdefault("service", "aurora-service")
|
||
payload.setdefault("symptom", objective or "Aurora pipeline anomaly")
|
||
payload.setdefault("env", "prod")
|
||
payload.setdefault("include_traces", False)
|
||
return payload
|
||
|
||
if graph == "release_check":
|
||
payload.setdefault("service_name", "aurora-service")
|
||
payload.setdefault("diff_text", objective or "")
|
||
payload.setdefault("run_deps", True)
|
||
payload.setdefault("run_drift", True)
|
||
payload.setdefault("run_smoke", False)
|
||
return payload
|
||
|
||
if graph == "alert_triage":
|
||
# Graph is mostly autonomous; leave room for dry_run/profile overrides.
|
||
payload.setdefault("dry_run", False)
|
||
payload.setdefault("policy_profile", "default")
|
||
return payload
|
||
|
||
if graph == "postmortem_draft":
|
||
incident_id = str(payload.get("incident_id") or "").strip()
|
||
if not incident_id and objective:
|
||
m = re.search(r"(inc_[A-Za-z0-9_\-]+)", objective)
|
||
if m:
|
||
incident_id = m.group(1)
|
||
if not incident_id:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="postmortem_draft requires input.incident_id (e.g. inc_123abc)",
|
||
)
|
||
payload["incident_id"] = incident_id
|
||
payload.setdefault("service", "aurora-service")
|
||
payload.setdefault("env", "prod")
|
||
payload.setdefault("include_traces", False)
|
||
return payload
|
||
|
||
# Unknown/custom graph: pass-through without mutation.
|
||
return payload
|
||
|
||
|
||
@app.post("/api/aistalk/team/run")
|
||
async def aistalk_team_run(request: Request, _auth: str = Depends(require_auth)):
|
||
"""Run AISTALK team workflow via LangGraph supervisor."""
|
||
body = await request.json()
|
||
graph = str(body.get("graph") or "incident_triage").strip()
|
||
objective = str(body.get("objective") or "").strip()
|
||
input_payload = body.get("input")
|
||
if not isinstance(input_payload, dict):
|
||
input_payload = {}
|
||
input_payload = _aistalk_autobuild_input(graph, objective, input_payload)
|
||
runtime = await _aistalk_runtime_state()
|
||
max_team_runs = int((runtime.get("limits") or {}).get("max_parallel_team_runs") or 1)
|
||
# GC stale local entries (12h safety window).
|
||
now_ts = time.time()
|
||
stale = [rid for rid, ts in _aistalk_team_active_runs.items() if (now_ts - ts) > 12 * 3600]
|
||
for rid in stale:
|
||
_aistalk_team_active_runs.pop(rid, None)
|
||
if len(_aistalk_team_active_runs) >= max_team_runs:
|
||
raise HTTPException(
|
||
status_code=429,
|
||
detail=f"AISTALK team busy: active_runs={len(_aistalk_team_active_runs)}, limit={max_team_runs}",
|
||
)
|
||
|
||
sup_payload = {
|
||
"workspace_id": str(body.get("workspace_id") or "daarion"),
|
||
"user_id": str(body.get("user_id") or "aistalk_user"),
|
||
"agent_id": "aistalk",
|
||
"input": input_payload,
|
||
}
|
||
status_code, payload = await _supervisor_request_json(
|
||
"POST",
|
||
f"/v1/graphs/{graph}/runs",
|
||
timeout=60.0,
|
||
json_body=sup_payload,
|
||
)
|
||
if status_code in (200, 201, 202) and isinstance(payload, dict):
|
||
rid = str(payload.get("run_id") or payload.get("id") or "").strip()
|
||
if rid:
|
||
_aistalk_team_active_runs[rid] = time.time()
|
||
return JSONResponse(
|
||
status_code=status_code,
|
||
content={
|
||
"ok": status_code in (200, 201, 202),
|
||
"graph": graph,
|
||
"objective": objective,
|
||
"active_runs": len(_aistalk_team_active_runs),
|
||
"limit_runs": max_team_runs,
|
||
**payload,
|
||
},
|
||
)
|
||
|
||
|
||
@app.post("/api/aistalk/relay/test")
|
||
async def aistalk_relay_test(request: Request, _auth: str = Depends(require_auth)):
|
||
"""Send a synthetic event to AISTALK relay and return adapter status."""
|
||
body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
|
||
event_type = str(body.get("type") or "aistalk.ping").strip()
|
||
event = _make_event(
|
||
event_type,
|
||
{"message": body.get("message", "relay test"), "source": "sofiia-console"},
|
||
project_id=str(body.get("project_id") or "aistalk"),
|
||
session_id=str(body.get("session_id") or f"aistalk_test_{uuid.uuid4().hex[:8]}"),
|
||
user_id="sofiia",
|
||
)
|
||
if _aistalk is None:
|
||
raise HTTPException(status_code=503, detail="AISTALK adapter disabled")
|
||
_aistalk.handle_event(event)
|
||
return {
|
||
"ok": True,
|
||
"queued": True,
|
||
"event_type": event_type,
|
||
"adapter": _aistalk.status(),
|
||
}
|
||
|
||
|
||
@app.get("/api/aistalk/team/run/{run_id}")
|
||
async def aistalk_team_run_status(run_id: str, _auth: str = Depends(require_auth)):
|
||
status_code, payload = await _supervisor_request_json(
|
||
"GET",
|
||
f"/v1/runs/{run_id}",
|
||
timeout=20.0,
|
||
)
|
||
if isinstance(payload, dict) and _is_terminal_run_status(str(payload.get("status") or "")):
|
||
_aistalk_team_active_runs.pop(run_id, None)
|
||
return JSONResponse(status_code=status_code, content=payload)
|
||
|
||
|
||
# ── Evidence Pack Engine ────────────────────────────────────────────────────────
|
||
|
||
@app.post("/api/projects/{project_id}/supervisor/evidence")
|
||
async def record_evidence_pack(
|
||
project_id: str,
|
||
request: Request,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Record an Evidence Pack for a completed Supervisor run.
|
||
|
||
Links the run into the Dialog Graph and auto-creates follow-up tasks.
|
||
|
||
Body: {
|
||
"run_id": str, # required
|
||
"graph_name": str, # required
|
||
"status": "completed|failed", # optional
|
||
"summary": str, # optional
|
||
"findings": [...], # optional
|
||
"recommendations": [...], # optional
|
||
"follow_up_tasks": [ # optional - auto-created as tasks
|
||
{"title": ..., "description": ..., "priority": "normal|high|urgent"}
|
||
]
|
||
}
|
||
"""
|
||
body = await request.json()
|
||
run_id = body.get("run_id")
|
||
graph_name = body.get("graph_name")
|
||
if not run_id or not graph_name:
|
||
raise HTTPException(status_code=400, detail="run_id and graph_name are required")
|
||
try:
|
||
pack = await _app_db.create_evidence_pack(
|
||
project_id=project_id,
|
||
run_id=run_id,
|
||
graph_name=graph_name,
|
||
result_data=body,
|
||
created_by="sofiia",
|
||
)
|
||
return JSONResponse(status_code=201, content=pack)
|
||
except Exception as e:
|
||
logger.error("record_evidence_pack failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Graph Integrity ─────────────────────────────────────────────────────────────
|
||
|
||
@app.get("/api/projects/{project_id}/graph/integrity")
|
||
async def graph_integrity(project_id: str, _auth: str = Depends(require_auth)):
|
||
"""Run integrity checks on the project Dialog Graph.
|
||
|
||
Returns: {"ok": bool, "violations": [...], "stats": {...}}
|
||
"""
|
||
try:
|
||
result = await _app_db.check_graph_integrity(project_id)
|
||
status_code = 200 if result["ok"] else 422
|
||
return JSONResponse(status_code=status_code, content=result)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Graph Hygiene ───────────────────────────────────────────────────────────────
|
||
|
||
@app.post("/api/projects/{project_id}/graph/hygiene/run")
|
||
async def run_graph_hygiene(
|
||
project_id: str,
|
||
request: Request,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Run Graph Hygiene Engine: dedup, lifecycle normalization, importance scoring.
|
||
|
||
Body (all optional):
|
||
{
|
||
"dry_run": true, // default true — compute but don't write
|
||
"scope": "all"|"recent", // default "all"
|
||
"since": "ISO8601" // required when scope=recent
|
||
}
|
||
|
||
Returns: {"ok": bool, "dry_run": bool, "changes": [...], "stats": {...}}
|
||
"""
|
||
body = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
|
||
dry_run = body.get("dry_run", True)
|
||
scope = body.get("scope", "all")
|
||
since = body.get("since")
|
||
try:
|
||
result = await _app_db.run_graph_hygiene(
|
||
project_id=project_id,
|
||
dry_run=dry_run,
|
||
scope=scope,
|
||
since=since,
|
||
)
|
||
return JSONResponse(status_code=200, content=result)
|
||
except Exception as e:
|
||
logger.error("run_graph_hygiene failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Self-Reflection Engine ──────────────────────────────────────────────────────
|
||
|
||
@app.post("/api/projects/{project_id}/supervisor/reflect")
|
||
async def supervisor_reflect(
|
||
project_id: str,
|
||
request: Request,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Create a Self-Reflection artifact for a completed Supervisor run.
|
||
|
||
Analyzes the Evidence Pack and creates a 'decision' node (reflection)
|
||
linked to the agent_run node via 'reflects_on' edge.
|
||
|
||
Body: {
|
||
"run_id": str, // required
|
||
"evidence": { // optional — pass evidence data for richer analysis
|
||
"summary": ...,
|
||
"findings": [...],
|
||
"recommendations": [...],
|
||
"follow_up_tasks": [...]
|
||
}
|
||
}
|
||
|
||
Returns: {node_id, reflection: {...scores, risks, ...}, edge_id, task_ids}
|
||
"""
|
||
body = await request.json()
|
||
run_id = body.get("run_id")
|
||
if not run_id:
|
||
raise HTTPException(status_code=400, detail="run_id is required")
|
||
evidence_data = body.get("evidence") or {}
|
||
try:
|
||
result = await _app_db.create_run_reflection(
|
||
project_id=project_id,
|
||
run_id=run_id,
|
||
evidence_data=evidence_data,
|
||
created_by="sofiia",
|
||
)
|
||
return JSONResponse(status_code=201, content=result)
|
||
except Exception as e:
|
||
logger.error("supervisor_reflect failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Strategic CTO Layer: Snapshots ───────────────────────────────────────────
|
||
|
||
@app.post("/api/projects/{project_id}/graph/snapshot")
|
||
async def compute_snapshot(
|
||
project_id: str,
|
||
window: str = "7d",
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Compute and store a graph analytics snapshot for the project."""
|
||
try:
|
||
result = await _app_db.compute_graph_snapshot(project_id=project_id, window=window)
|
||
return JSONResponse(status_code=201, content=result)
|
||
except Exception as e:
|
||
logger.error("compute_snapshot failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/api/projects/{project_id}/graph/snapshot")
|
||
async def get_snapshot(
|
||
project_id: str,
|
||
window: str = "7d",
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Get the latest snapshot for the project and window."""
|
||
snap = await _app_db.get_latest_snapshot(project_id=project_id, window=window)
|
||
if not snap:
|
||
raise HTTPException(status_code=404, detail="No snapshot found. Run POST first.")
|
||
return JSONResponse(content=snap)
|
||
|
||
|
||
# ── Strategic CTO Layer: Signals ─────────────────────────────────────────────
|
||
|
||
@app.post("/api/projects/{project_id}/graph/signals/recompute")
|
||
async def recompute_signals(
|
||
project_id: str,
|
||
window: str = "7d",
|
||
dry_run: bool = True,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Run signal detection rules and upsert graph_signals."""
|
||
try:
|
||
result = await _app_db.recompute_graph_signals(
|
||
project_id=project_id,
|
||
window=window,
|
||
dry_run=dry_run,
|
||
)
|
||
return JSONResponse(status_code=200, content=result)
|
||
except Exception as e:
|
||
logger.error("recompute_signals failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/api/projects/{project_id}/graph/signals")
|
||
async def list_signals(
|
||
project_id: str,
|
||
status: str = "open",
|
||
limit: int = 50,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""List graph signals for the project."""
|
||
signals = await _app_db.get_graph_signals(project_id=project_id, status=status, limit=limit)
|
||
return JSONResponse(content={"signals": signals, "count": len(signals)})
|
||
|
||
|
||
@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/ack")
|
||
async def ack_signal(
|
||
project_id: str,
|
||
signal_id: str,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
result = await _app_db.update_signal_status(signal_id=signal_id, new_status="ack")
|
||
if not result:
|
||
raise HTTPException(status_code=404, detail="Signal not found")
|
||
return JSONResponse(content=result)
|
||
|
||
|
||
@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/resolve")
|
||
async def resolve_signal(
|
||
project_id: str,
|
||
signal_id: str,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
result = await _app_db.update_signal_status(signal_id=signal_id, new_status="resolved")
|
||
if not result:
|
||
raise HTTPException(status_code=404, detail="Signal not found")
|
||
return JSONResponse(content=result)
|
||
|
||
|
||
@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/dismiss")
|
||
async def dismiss_signal(
|
||
project_id: str,
|
||
signal_id: str,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
result = await _app_db.update_signal_status(signal_id=signal_id, new_status="dismissed")
|
||
if not result:
|
||
raise HTTPException(status_code=404, detail="Signal not found")
|
||
return JSONResponse(content=result)
|
||
|
||
|
||
@app.post("/api/projects/{project_id}/graph/signals/auto-resolve")
|
||
async def auto_resolve_signals(
|
||
project_id: str,
|
||
dry_run: bool = True,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Check resolution criteria for all open/ack signals and auto-resolve if met.
|
||
|
||
?dry_run=true — compute without writing (default)
|
||
?dry_run=false — apply resolutions
|
||
|
||
Returns: {ok, dry_run, checked, resolved, diff: [{signal_id, signal_type, action, reason}]}
|
||
"""
|
||
try:
|
||
result = await _app_db.auto_resolve_signals(
|
||
project_id=project_id,
|
||
dry_run=dry_run,
|
||
)
|
||
return JSONResponse(content=result)
|
||
except Exception as e:
|
||
logger.error("auto_resolve_signals failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/api/projects/{project_id}/graph/signals/{signal_id}/mitigate")
|
||
async def mitigate_signal(
|
||
project_id: str,
|
||
signal_id: str,
|
||
playbook_id: str = "",
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Create a deterministic mitigation plan for a signal.
|
||
|
||
If playbook_id is provided, creates tasks from the playbook steps instead of templates.
|
||
Otherwise uses built-in mitigation templates.
|
||
|
||
Returns: {plan_node_id, task_ids, task_count, signal_type}
|
||
"""
|
||
try:
|
||
if playbook_id:
|
||
result = await _app_db.apply_playbook_to_signal(
|
||
project_id=project_id,
|
||
signal_id=signal_id,
|
||
playbook_id=playbook_id,
|
||
created_by="sofiia",
|
||
)
|
||
else:
|
||
result = await _app_db.create_mitigation_plan(
|
||
project_id=project_id,
|
||
signal_id=signal_id,
|
||
created_by="sofiia",
|
||
)
|
||
await _app_db.update_signal_status(signal_id=signal_id, new_status="ack")
|
||
return JSONResponse(status_code=201, content=result)
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=404, detail=str(e))
|
||
except Exception as e:
|
||
logger.error("mitigate_signal failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── CTO Portfolio (Cross-Project) ────────────────────────────────────────────
|
||
|
||
@app.get("/api/cto/portfolio/snapshots")
|
||
async def portfolio_snapshots(
|
||
window: str = "7d",
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Get the latest snapshot for every project (cross-project portfolio view).
|
||
|
||
Returns: {projects: [{project_id, name, metrics, snapshot_at}], window}
|
||
"""
|
||
db = await _app_db.get_db()
|
||
# All projects
|
||
async with db.execute("SELECT project_id, name FROM projects ORDER BY name") as cur:
|
||
projects = await cur.fetchall()
|
||
result = []
|
||
for pid, pname in projects:
|
||
snap = await _app_db.get_latest_snapshot(pid, window)
|
||
# Get latest lesson bucket + trend_flags
|
||
async with db.execute(
|
||
"SELECT date_bucket, metrics_json FROM lessons WHERE project_id=? ORDER BY date_bucket DESC LIMIT 1",
|
||
(pid,),
|
||
) as cur:
|
||
lrow = await cur.fetchone()
|
||
lesson_bucket = None
|
||
lesson_trend_flags = None
|
||
if lrow:
|
||
lesson_bucket = lrow[0]
|
||
try:
|
||
import json as _json
|
||
lm = _json.loads(lrow[1] or "{}")
|
||
lesson_trend_flags = lm.get("trend_flags")
|
||
except Exception:
|
||
pass
|
||
# Compute streaks
|
||
try:
|
||
lesson_streaks = await _app_db.compute_lesson_streaks(pid)
|
||
except Exception:
|
||
lesson_streaks = None
|
||
result.append({
|
||
"project_id": pid,
|
||
"name": pname,
|
||
"metrics": snap["metrics"] if snap else None,
|
||
"snapshot_at": snap["created_at"] if snap else None,
|
||
"latest_lesson_bucket": lesson_bucket,
|
||
"latest_lesson_trend_flags": lesson_trend_flags,
|
||
"latest_lesson_streaks": lesson_streaks,
|
||
})
|
||
return JSONResponse(content={"projects": result, "window": window, "count": len(result)})
|
||
|
||
|
||
@app.get("/api/cto/portfolio/signals")
|
||
async def portfolio_signals(
|
||
status: str = "open",
|
||
severity: str = "",
|
||
limit: int = 50,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Get signals across all projects, ordered by severity then created_at.
|
||
|
||
?status=open|ack|resolved|dismissed|all
|
||
?severity=high,critical (comma-separated filter, optional)
|
||
"""
|
||
db = await _app_db.get_db()
|
||
async with db.execute("SELECT project_id, name FROM projects") as cur:
|
||
projects = {r[0]: r[1] for r in await cur.fetchall()}
|
||
|
||
if status == "all":
|
||
q = "SELECT *, rowid FROM graph_signals ORDER BY severity DESC, created_at DESC LIMIT ?"
|
||
params: tuple = (limit,)
|
||
else:
|
||
q = "SELECT *, rowid FROM graph_signals WHERE status=? ORDER BY severity DESC, created_at DESC LIMIT ?"
|
||
params = (status, limit)
|
||
|
||
async with db.execute(q, params) as cur:
|
||
rows = await cur.fetchall()
|
||
|
||
# Severity order for sorting
|
||
SEV_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
||
sev_filter = {s.strip() for s in severity.split(",") if s.strip()} if severity else set()
|
||
|
||
signals = []
|
||
for row in rows:
|
||
d = dict(row)
|
||
if "rowid" in d:
|
||
del d["rowid"]
|
||
try:
|
||
d["evidence"] = json.loads(d["evidence"])
|
||
except Exception:
|
||
d["evidence"] = {}
|
||
if sev_filter and d.get("severity") not in sev_filter:
|
||
continue
|
||
d["project_name"] = projects.get(d["project_id"], d["project_id"])
|
||
signals.append(d)
|
||
|
||
signals.sort(key=lambda s: (SEV_ORDER.get(s.get("severity", "low"), 3), s.get("created_at", "")))
|
||
return JSONResponse(content={"signals": signals[:limit], "count": len(signals), "status": status})
|
||
|
||
|
||
@app.post("/api/cto/portfolio/drift/recompute")
|
||
async def portfolio_drift_recompute(
|
||
window: str = "7d",
|
||
dry_run: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Recompute portfolio-level drift signals based on lesson streaks across all projects."""
|
||
try:
|
||
result = await _app_db.recompute_portfolio_signals(window=window, dry_run=dry_run)
|
||
return JSONResponse(content=result)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/api/cto/portfolio/drift/signals")
|
||
async def portfolio_drift_signals(
|
||
status: str = "open",
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Get portfolio-level drift signals."""
|
||
try:
|
||
signals = await _app_db.list_portfolio_signals(status=status)
|
||
return JSONResponse(content={"signals": signals, "count": len(signals)})
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/api/cto/portfolio/streaks")
|
||
async def portfolio_streaks(
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Get streak data for all projects."""
|
||
try:
|
||
db = await _app_db.get_db()
|
||
async with db.execute("SELECT project_id, name FROM projects") as cur:
|
||
projects = await cur.fetchall()
|
||
result = []
|
||
for pid, pname in projects:
|
||
streaks = await _app_db.compute_lesson_streaks(pid)
|
||
result.append({"project_id": pid, "name": pname, "streaks": streaks})
|
||
return JSONResponse(content={"projects": result, "count": len(result)})
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Playbooks (Graph Learning Layer) ─────────────────────────────────────────
|
||
|
||
@app.get("/api/projects/{project_id}/playbooks")
|
||
async def list_playbooks(
|
||
project_id: str,
|
||
signal_type: str = "",
|
||
limit: int = 10,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""List playbooks for a project, ordered by success_rate desc."""
|
||
try:
|
||
pbs = await _app_db.list_playbooks(
|
||
project_id=project_id,
|
||
signal_type=signal_type,
|
||
limit=limit,
|
||
)
|
||
return JSONResponse(content={"playbooks": pbs, "count": len(pbs)})
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/api/projects/{project_id}/playbooks/from-signal/{signal_id}")
|
||
async def create_playbook_from_signal(
|
||
project_id: str,
|
||
signal_id: str,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Promote current mitigation of a signal into a playbook (or update existing).
|
||
|
||
Requires signal to have plan_node_id and mitigation_task_ids in evidence.
|
||
Returns: {playbook_id, doc_id, version_id, context_key, created, stats}
|
||
"""
|
||
try:
|
||
# Check if signal is resolved to update stats
|
||
db = await _app_db.get_db()
|
||
async with db.execute(
|
||
"SELECT status, evidence FROM graph_signals WHERE id=? AND project_id=?",
|
||
(signal_id, project_id),
|
||
) as cur:
|
||
srow = await cur.fetchone()
|
||
resolved = srow[0] == "resolved" if srow else False
|
||
result = await _app_db.upsert_playbook_from_signal(
|
||
project_id=project_id,
|
||
signal_id=signal_id,
|
||
resolved=resolved,
|
||
)
|
||
return JSONResponse(status_code=201, content=result)
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=409, detail=str(e))
|
||
except Exception as e:
|
||
logger.error("create_playbook_from_signal failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Portfolio Batch Recompute ─────────────────────────────────────────────────
|
||
|
||
@app.post("/api/cto/portfolio/snapshots/recompute")
|
||
async def portfolio_snapshots_recompute(
|
||
window: str = "7d",
|
||
force: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Recompute graph snapshots for ALL projects.
|
||
|
||
Skips projects that already have a snapshot for today (date_bucket) unless force=true.
|
||
Returns: {computed, skipped, errors[]}
|
||
"""
|
||
import datetime as _dt2
|
||
db = await _app_db.get_db()
|
||
async with db.execute("SELECT project_id FROM projects") as cur:
|
||
project_ids = [r[0] for r in await cur.fetchall()]
|
||
|
||
today = _dt2.datetime.utcnow().strftime("%Y-%m-%d")
|
||
computed, skipped, errors = 0, 0, []
|
||
for pid in project_ids:
|
||
try:
|
||
if not force:
|
||
async with db.execute(
|
||
"SELECT id FROM graph_snapshots WHERE project_id=? AND window=? AND date_bucket=?",
|
||
(pid, window, today),
|
||
) as cur:
|
||
exists = await cur.fetchone()
|
||
if exists:
|
||
skipped += 1
|
||
continue
|
||
await _app_db.compute_graph_snapshot(project_id=pid, window=window)
|
||
computed += 1
|
||
except Exception as e:
|
||
errors.append({"project_id": pid, "error": str(e)})
|
||
return JSONResponse(content={"computed": computed, "skipped": skipped, "errors": errors})
|
||
|
||
|
||
@app.post("/api/cto/portfolio/signals/recompute")
|
||
async def portfolio_signals_recompute(
|
||
window: str = "7d",
|
||
dry_run: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Recompute signals for ALL projects.
|
||
|
||
Returns: {results: [{project_id, new, refreshed, total}], errors[]}
|
||
"""
|
||
db = await _app_db.get_db()
|
||
async with db.execute("SELECT project_id FROM projects") as cur:
|
||
project_ids = [r[0] for r in await cur.fetchall()]
|
||
|
||
results, errors = [], []
|
||
for pid in project_ids:
|
||
try:
|
||
diff = await _app_db.recompute_graph_signals(
|
||
project_id=pid, window=window, dry_run=dry_run
|
||
)
|
||
new_count = sum(1 for d in diff if d.get("action") == "new")
|
||
refresh_count = sum(1 for d in diff if d.get("action") in ("refresh", "reopen"))
|
||
results.append({
|
||
"project_id": pid,
|
||
"new": new_count,
|
||
"refreshed": refresh_count,
|
||
"total": len(diff),
|
||
})
|
||
except Exception as e:
|
||
errors.append({"project_id": pid, "error": str(e)})
|
||
return JSONResponse(content={"results": results, "errors": errors, "dry_run": dry_run})
|
||
|
||
|
||
# ── Lessons (Graph Learning Layer) ────────────────────────────────────────────
|
||
|
||
@app.post("/api/projects/{project_id}/lessons/generate")
|
||
async def generate_lesson(
|
||
project_id: str,
|
||
window: str = "7d",
|
||
dry_run: bool = True,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Generate a weekly Lessons Learned report for a project.
|
||
|
||
dry_run=true (default): compute and return without writing to DB.
|
||
dry_run=false: persist lesson node + metrics + improvement tasks.
|
||
|
||
Returns: {dry_run, date_bucket, markdown, metrics, planned_improvement_tasks, evidence}
|
||
"""
|
||
try:
|
||
result = await _app_db.upsert_lesson(
|
||
project_id=project_id,
|
||
window=window,
|
||
dry_run=dry_run,
|
||
created_by="sofiia",
|
||
)
|
||
return JSONResponse(status_code=200 if dry_run else 201, content=result)
|
||
except Exception as e:
|
||
logger.error("generate_lesson failed: %s", e)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/api/projects/{project_id}/lessons")
|
||
async def list_lessons_endpoint(
|
||
project_id: str,
|
||
window: str = "7d",
|
||
limit: int = 8,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""List lessons for a project, ordered by date_bucket desc."""
|
||
try:
|
||
lessons = await _app_db.list_lessons(project_id=project_id, window=window, limit=limit)
|
||
return JSONResponse(content={"lessons": lessons, "count": len(lessons)})
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/api/projects/{project_id}/lessons/{lesson_id}")
|
||
async def get_lesson_endpoint(
|
||
project_id: str,
|
||
lesson_id: str,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Get full lesson detail including markdown and linked evidence."""
|
||
try:
|
||
lesson = await _app_db.get_lesson_detail(project_id=project_id, lesson_id=lesson_id)
|
||
if not lesson:
|
||
raise HTTPException(status_code=404, detail="Lesson not found")
|
||
return JSONResponse(content=lesson)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/api/projects/{project_id}/lessons/impact/recompute")
|
||
async def recompute_lesson_impact(
|
||
project_id: str,
|
||
window: str = "7d",
|
||
dry_run: bool = False,
|
||
force: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Recompute impact score for the prior-bucket lesson based on current-bucket metrics."""
|
||
try:
|
||
if dry_run:
|
||
# Preview: just return what would be computed, no write
|
||
result = await _app_db.evaluate_lesson_impact(
|
||
project_id=project_id, window=window, force=True
|
||
)
|
||
return JSONResponse(content={"dry_run": True, "preview": result})
|
||
result = await _app_db.evaluate_lesson_impact(
|
||
project_id=project_id, window=window, force=force
|
||
)
|
||
return JSONResponse(content={"dry_run": False, "result": result})
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/api/cto/portfolio/lessons/generate")
|
||
async def portfolio_lessons_generate(
|
||
window: str = "7d",
|
||
dry_run: bool = False,
|
||
force: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Generate lessons for ALL projects.
|
||
|
||
Skips projects that already have a lesson for the current bucket (unless force=true).
|
||
Returns: {generated, skipped, errors[]}
|
||
"""
|
||
db = await _app_db.get_db()
|
||
async with db.execute("SELECT project_id FROM projects") as cur:
|
||
project_ids = [r[0] for r in await cur.fetchall()]
|
||
|
||
current_bucket = _app_db.compute_lesson_bucket()
|
||
generated, skipped, errors = 0, 0, []
|
||
for pid in project_ids:
|
||
try:
|
||
if not force and not dry_run:
|
||
async with db.execute(
|
||
"SELECT lesson_id FROM lessons WHERE project_id=? AND date_bucket=? AND window=?",
|
||
(pid, current_bucket, window),
|
||
) as cur:
|
||
exists = await cur.fetchone()
|
||
if exists:
|
||
skipped += 1
|
||
continue
|
||
await _app_db.upsert_lesson(project_id=pid, window=window, dry_run=dry_run)
|
||
generated += 1
|
||
except Exception as e:
|
||
errors.append({"project_id": pid, "error": str(e)})
|
||
return JSONResponse(content={
|
||
"generated": generated,
|
||
"skipped": skipped,
|
||
"errors": errors,
|
||
"dry_run": dry_run,
|
||
"date_bucket": current_bucket,
|
||
})
|
||
|
||
|
||
# ── Level 6: Governance Gates ─────────────────────────────────────────────────
|
||
|
||
@app.get("/api/projects/{project_id}/governance/gates")
|
||
async def get_governance_gates(
|
||
project_id: str,
|
||
window: str = "7d",
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Return latest governance gate evaluation (dry_run, no persist)."""
|
||
try:
|
||
result = await _app_db.evaluate_governance_gates(
|
||
project_id=project_id, window=window, dry_run=True
|
||
)
|
||
return JSONResponse(content=result)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/api/projects/{project_id}/governance/gates/evaluate")
|
||
async def evaluate_governance_gates_endpoint(
|
||
project_id: str,
|
||
window: str = "7d",
|
||
dry_run: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Evaluate governance gates and optionally persist decision node."""
|
||
try:
|
||
result = await _app_db.evaluate_governance_gates(
|
||
project_id=project_id, window=window, dry_run=dry_run
|
||
)
|
||
return JSONResponse(content=result)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Level 6: Portfolio Drift Auto-plan / Auto-run ────────────────────────────
|
||
|
||
@app.post("/api/cto/portfolio/drift/{signal_id}/auto-plan")
|
||
async def portfolio_drift_auto_plan(
|
||
signal_id: str,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Populate evidence.auto_actions.runs with planned entries (dry_run=True)."""
|
||
try:
|
||
result = await _app_db.auto_plan_drift_signal(signal_id=signal_id)
|
||
if "error" in result:
|
||
raise HTTPException(status_code=404, detail=result["error"])
|
||
return JSONResponse(content=result)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.post("/api/cto/portfolio/drift/{signal_id}/auto-run")
|
||
async def portfolio_drift_auto_run(
|
||
signal_id: str,
|
||
dry_run: bool = False,
|
||
force: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Execute planned/queued workflow runs for a portfolio drift signal."""
|
||
try:
|
||
supervisor_url = os.getenv("SUPERVISOR_URL", "http://sofiia-supervisor:8080")
|
||
result = await _app_db.auto_run_drift_signal(
|
||
signal_id=signal_id,
|
||
dry_run=dry_run,
|
||
force=force,
|
||
supervisor_url=supervisor_url,
|
||
)
|
||
if "error" in result:
|
||
raise HTTPException(status_code=404, detail=result["error"])
|
||
return JSONResponse(content=result)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Level 7: Governance Audit Trail ──────────────────────────────────────────
|
||
|
||
@app.get("/api/cto/audit/events")
|
||
async def audit_events_portfolio(
|
||
scope: Optional[str] = "portfolio",
|
||
limit: int = 100,
|
||
event_type: Optional[str] = None,
|
||
status: Optional[str] = None,
|
||
since: Optional[str] = None,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""List governance audit events for portfolio (or any scope)."""
|
||
try:
|
||
items = await _app_db.list_governance_events(
|
||
scope=scope, project_id="portfolio" if scope == "portfolio" else None,
|
||
event_type=event_type, status=status, since=since, limit=limit,
|
||
)
|
||
return JSONResponse(content={"items": items, "count": len(items)})
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@app.get("/api/projects/{project_id}/audit/events")
|
||
async def audit_events_project(
|
||
project_id: str,
|
||
limit: int = 100,
|
||
event_type: Optional[str] = None,
|
||
status: Optional[str] = None,
|
||
since: Optional[str] = None,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""List governance audit events for a specific project."""
|
||
try:
|
||
items = await _app_db.list_governance_events(
|
||
scope="project", project_id=project_id,
|
||
event_type=event_type, status=status, since=since, limit=limit,
|
||
)
|
||
return JSONResponse(content={"items": items, "count": len(items)})
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ── Level 8: Agents as Projects ───────────────────────────────────────────────
|
||
|
||
import difflib as _difflib
|
||
import time as _time
|
||
|
||
# ── Agent Ops helpers ──────────────────────────────────────────────────────────
|
||
|
||
async def _fetch_agents_from_gateway(
|
||
node_id: str,
|
||
gateway_url: str,
|
||
timeout_ms: Optional[int] = None,
|
||
get_retry: int = 1,
|
||
) -> tuple:
|
||
"""Fetch agents list from gateway /health. Returns (agents, error_str|None, latency_ms).
|
||
|
||
Respects per-node timeout_ms and retry policy.
|
||
"""
|
||
if not gateway_url:
|
||
return [], f"No gateway_url configured for {node_id}", None
|
||
timeout_sec = (timeout_ms or 2500) / 1000.0
|
||
last_err = None
|
||
attempts = get_retry + 1
|
||
t0 = _time.monotonic()
|
||
for attempt in range(attempts):
|
||
try:
|
||
async with httpx.AsyncClient(timeout=timeout_sec) as client:
|
||
resp = await client.get(f"{gateway_url.rstrip('/')}/health")
|
||
latency_ms = int((_time.monotonic() - t0) * 1000)
|
||
if resp.status_code != 200:
|
||
last_err = f"HTTP {resp.status_code}"
|
||
continue
|
||
data = resp.json()
|
||
raw = data.get("agents", {})
|
||
agents: List[Dict] = []
|
||
if isinstance(raw, dict):
|
||
for aid, info in raw.items():
|
||
agents.append({
|
||
"agent_id": aid,
|
||
"display_name": info.get("name", aid),
|
||
"status": "healthy" if info.get("prompt_loaded") else "degraded",
|
||
"telegram_token_configured": info.get("telegram_token_configured", False),
|
||
"prompt_loaded": info.get("prompt_loaded", False),
|
||
"node_id": node_id,
|
||
"active_prompt": info.get("active_prompt"),
|
||
"badges": info.get("badges", []),
|
||
"visibility": info.get("visibility", "public"),
|
||
"telegram_mode": info.get("telegram_mode", "on"),
|
||
"lifecycle_status": info.get("lifecycle_status", "active"),
|
||
})
|
||
elif isinstance(raw, list):
|
||
for a in raw:
|
||
agents.append({**a, "node_id": node_id})
|
||
return agents, None, latency_ms
|
||
except Exception as e:
|
||
last_err = str(e)[:200]
|
||
latency_ms = int((_time.monotonic() - t0) * 1000)
|
||
return [], last_err, latency_ms
|
||
|
||
|
||
def _node_info(node_id: str) -> Dict:
|
||
"""Return {gateway_url, policy} for a node."""
|
||
from .config import get_node_policy
|
||
return {
|
||
"gateway_url": get_gateway_url(node_id),
|
||
"policy": get_node_policy(node_id),
|
||
}
|
||
|
||
|
||
def _agent_desired_payload(override: Dict) -> Dict:
|
||
"""Canonical desired-state payload from an override row."""
|
||
return {
|
||
"display_name": override.get("display_name"),
|
||
"domain": override.get("domain"),
|
||
"system_prompt_md": override.get("system_prompt_md"),
|
||
}
|
||
|
||
|
||
def _merge_agent_with_override(agent: Dict, override: Optional[Dict]) -> Dict:
|
||
result = dict(agent)
|
||
if not override:
|
||
result["has_override"] = False
|
||
result["drift"] = False
|
||
return result
|
||
if override.get("display_name"): result["display_name"] = override["display_name"]
|
||
if override.get("domain"): result["domain"] = override["domain"]
|
||
if override.get("system_prompt_md"): result["system_prompt_md"] = override["system_prompt_md"]
|
||
result["is_hidden"] = bool(override.get("is_hidden"))
|
||
result["has_override"] = True
|
||
result["override_updated_at"] = override.get("updated_at")
|
||
result["last_applied_hash"] = override.get("last_applied_hash")
|
||
result["last_applied_at"] = override.get("last_applied_at")
|
||
# Drift: desired hash != last applied hash
|
||
desired = _agent_desired_payload(override)
|
||
desired_hash = _app_db._agent_payload_hash(desired)
|
||
result["desired_hash"] = desired_hash
|
||
active_hash = override.get("last_applied_hash")
|
||
result["drift"] = bool(active_hash and active_hash != desired_hash)
|
||
return result
|
||
|
||
|
||
async def _check_prompt_freeze(node_id: str, agent_id: str) -> bool:
|
||
"""Return True if PROMPT_FREEZE gate is active for any related project."""
|
||
try:
|
||
# Check portfolio gate
|
||
gates = await _app_db.evaluate_governance_gates("portfolio", window="7d", dry_run=True)
|
||
for g in gates.get("gates", []):
|
||
if g.get("name") == "PROMPT_FREEZE" and g.get("status") != "PASS":
|
||
return True
|
||
except Exception:
|
||
pass
|
||
return False
|
||
|
||
|
||
# ── Agent CRUD endpoints ───────────────────────────────────────────────────────
|
||
|
||
# Agents required on every online node — if absent, signal is raised
|
||
_REQUIRED_PER_NODE_AGENTS: List[str] = ["monitor"]
|
||
|
||
|
||
def _normalize_agent_capabilities(agent: Dict) -> Dict:
|
||
"""Add normalized capabilities: {voice, telegram} to agent dict."""
|
||
badges = agent.get("badges", [])
|
||
telegram_mode = agent.get("telegram_mode", "on")
|
||
agent_id = agent.get("agent_id", "")
|
||
agent["capabilities"] = {
|
||
"voice": agent_id == "aistalk" or "voice" in badges,
|
||
"telegram": telegram_mode != "off",
|
||
}
|
||
return agent
|
||
|
||
|
||
async def _emit_monitor_missing_event(node_id: str, bucket: str) -> None:
|
||
"""Write a governance_event when monitor is confirmed absent on an online node."""
|
||
try:
|
||
await _app_db.append_governance_event(
|
||
scope="portfolio",
|
||
project_id="portfolio",
|
||
actor_type="system",
|
||
actor_id=None,
|
||
event_type="node_required_agent_missing",
|
||
idempotency_key=f"req|missing|{node_id}|monitor|{bucket}",
|
||
severity="high",
|
||
status="error",
|
||
ref_type="node",
|
||
ref_id=node_id,
|
||
evidence={
|
||
"v": 1,
|
||
"message": f"Required agent 'monitor' absent on {node_id}",
|
||
"inputs": {"node_id": node_id, "required_agent": "monitor"},
|
||
"outputs": {"missing": True},
|
||
"links": {},
|
||
"timings": {},
|
||
},
|
||
)
|
||
except Exception as exc:
|
||
logger.warning("_emit_monitor_missing_event failed: %s", exc)
|
||
|
||
|
||
@app.get("/api/agents")
|
||
async def list_agents(
|
||
nodes: str = "NODA1",
|
||
include_hidden: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Fetch agents from node gateways, merge with local overrides.
|
||
|
||
Returns {items, node_errors, stats, required_missing_nodes, nodes_queried}.
|
||
Partial node failure never blocks other nodes — always HTTP 200.
|
||
"""
|
||
node_ids = [n.strip().upper() for n in nodes.split(",") if n.strip()]
|
||
today_bucket = datetime.utcnow().strftime("%Y-%m-%d")
|
||
|
||
all_agents: List[Dict] = []
|
||
node_errors: List[Dict] = []
|
||
node_stats: List[Dict] = []
|
||
required_missing_nodes: List[Dict] = [] # nodes where required agents absent
|
||
|
||
overrides_list = await _app_db.list_agent_overrides()
|
||
overrides_map = {(o["node_id"], o["agent_id"]): o for o in overrides_list}
|
||
|
||
for node_id in node_ids:
|
||
ni = _node_info(node_id)
|
||
gw_url = ni["gateway_url"]
|
||
policy = ni["policy"]
|
||
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
|
||
node_id, gw_url,
|
||
timeout_ms=policy["gateway_timeout_ms"],
|
||
get_retry=policy["get_retry"],
|
||
)
|
||
if err:
|
||
node_errors.append({
|
||
"node_id": node_id, "error": err,
|
||
"gateway_url": gw_url, "latency_ms": latency_ms,
|
||
"node_role": policy["node_role"],
|
||
})
|
||
node_stats.append({"node_id": node_id, "ok": False, "count": 0,
|
||
"latency_ms": latency_ms})
|
||
# Node offline → skip required check (not "missing", just "unreachable")
|
||
else:
|
||
count = 0
|
||
present_agent_ids: Set[str] = set()
|
||
for agent in agents_raw:
|
||
override = overrides_map.get((node_id, agent["agent_id"]))
|
||
merged = _merge_agent_with_override(agent, override)
|
||
merged = _normalize_agent_capabilities(merged)
|
||
if not include_hidden and merged.get("is_hidden"):
|
||
continue
|
||
merged["latency_ms"] = latency_ms
|
||
all_agents.append(merged)
|
||
present_agent_ids.add(agent["agent_id"])
|
||
count += 1
|
||
node_stats.append({"node_id": node_id, "ok": True, "count": count,
|
||
"latency_ms": latency_ms,
|
||
"node_role": policy["node_role"]})
|
||
# Required agent check — only for online nodes
|
||
for req_id in _REQUIRED_PER_NODE_AGENTS:
|
||
if req_id not in present_agent_ids:
|
||
required_missing_nodes.append({
|
||
"node_id": node_id,
|
||
"agent_id": req_id,
|
||
"reason": "absent_from_registry",
|
||
})
|
||
asyncio.create_task(_emit_monitor_missing_event(node_id, today_bucket))
|
||
|
||
all_agents.sort(key=lambda a: (a.get("status") != "healthy", a.get("display_name", "").lower()))
|
||
nodes_ok = sum(1 for s in node_stats if s["ok"])
|
||
return JSONResponse(content={
|
||
"items": all_agents,
|
||
"node_errors": node_errors,
|
||
"stats": {"nodes_ok": nodes_ok, "nodes_total": len(node_ids), "agents_total": len(all_agents)},
|
||
"required_missing_nodes": required_missing_nodes,
|
||
"nodes_queried": node_ids,
|
||
})
|
||
|
||
|
||
@app.get("/api/agents/{node_id}/{agent_id}")
|
||
async def get_agent(node_id: str, agent_id: str, _auth: str = Depends(require_auth)):
|
||
node_id = node_id.upper()
|
||
ni = _node_info(node_id)
|
||
policy = ni["policy"]
|
||
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
|
||
node_id, ni["gateway_url"],
|
||
timeout_ms=policy["gateway_timeout_ms"],
|
||
get_retry=policy["get_retry"],
|
||
)
|
||
override = await _app_db.get_agent_override(node_id, agent_id)
|
||
agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None)
|
||
if not agent:
|
||
if override:
|
||
desired = _agent_desired_payload(override)
|
||
return JSONResponse(content={"agent": {
|
||
**override, "status": "unknown", "node_offline": True,
|
||
"desired_hash": _app_db._agent_payload_hash(desired), "drift": False,
|
||
"latency_ms": latency_ms,
|
||
}})
|
||
raise HTTPException(status_code=404, detail=f"Agent '{agent_id}' not found on {node_id}")
|
||
merged = _merge_agent_with_override(agent, override)
|
||
merged["latency_ms"] = latency_ms
|
||
return JSONResponse(content={"agent": merged, "node_error": err})
|
||
|
||
|
||
@app.get("/api/agents/{node_id}/{agent_id}/versions")
|
||
async def list_agent_versions(
|
||
node_id: str, agent_id: str,
|
||
limit: int = 10,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Return version history for an agent override."""
|
||
node_id = node_id.upper()
|
||
versions = await _app_db.list_agent_versions(node_id, agent_id, limit=limit)
|
||
return JSONResponse(content={"versions": versions})
|
||
|
||
|
||
class AgentOverridePatch(BaseModel):
|
||
display_name: Optional[str] = None
|
||
domain: Optional[str] = None
|
||
system_prompt_md: Optional[str] = None
|
||
is_hidden: Optional[bool] = None
|
||
|
||
|
||
@app.patch("/api/agents/{node_id}/{agent_id}")
|
||
async def patch_agent_override(
|
||
node_id: str, agent_id: str,
|
||
body: AgentOverridePatch,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Save local override (does NOT push to node). Creates a version snapshot."""
|
||
node_id = node_id.upper()
|
||
override = await _app_db.upsert_agent_override(
|
||
node_id, agent_id,
|
||
display_name=body.display_name,
|
||
domain=body.domain,
|
||
system_prompt_md=body.system_prompt_md,
|
||
is_hidden=body.is_hidden,
|
||
)
|
||
# Audit: agent_override_saved
|
||
await _app_db.append_governance_event(
|
||
scope="project", project_id=agent_id, actor_type="user",
|
||
event_type="agent_override_saved",
|
||
idempotency_key=f"aos|{node_id}|{agent_id}|{override.get('version_hash','')}",
|
||
severity="info", status="ok",
|
||
ref_type="agent", ref_id=agent_id,
|
||
evidence=_app_db._make_evidence(
|
||
message=f"Override saved for {agent_id} on {node_id}",
|
||
outputs={"version_hash": override.get("version_hash"), "fields_changed": [
|
||
k for k, v in body.dict(exclude_none=True).items()
|
||
]},
|
||
),
|
||
)
|
||
return JSONResponse(content={"override": override, "saved": True})
|
||
|
||
|
||
@app.post("/api/agents/{node_id}/{agent_id}/reset")
|
||
async def reset_agent_override(node_id: str, agent_id: str, _auth: str = Depends(require_auth)):
|
||
"""Remove local override, revert to registry state."""
|
||
node_id = node_id.upper()
|
||
await _app_db.delete_agent_override(node_id, agent_id)
|
||
return JSONResponse(content={"reset": True, "node_id": node_id, "agent_id": agent_id})
|
||
|
||
|
||
# ── Safe Apply v2 ──────────────────────────────────────────────────────────────
|
||
|
||
@app.post("/api/agents/{node_id}/{agent_id}/apply")
|
||
async def apply_agent_override(
|
||
node_id: str, agent_id: str,
|
||
dry_run: bool = True,
|
||
plan_id: Optional[str] = None,
|
||
force: bool = False,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Safe Apply v2.
|
||
|
||
dry_run=true → returns diff_text + will_change + plan_id (sha256 of desired state).
|
||
dry_run=false → requires plan_id to match; applies and stores last_applied_hash.
|
||
"""
|
||
node_id = node_id.upper()
|
||
|
||
# Governance gate check: PROMPT_FREEZE
|
||
if not dry_run and not force:
|
||
frozen = await _check_prompt_freeze(node_id, agent_id)
|
||
if frozen:
|
||
return JSONResponse(
|
||
status_code=423,
|
||
content={"error": "PROMPT_FREEZE gate is active. Use force=true to override (requires review).",
|
||
"gate": "PROMPT_FREEZE", "node_id": node_id, "agent_id": agent_id},
|
||
)
|
||
|
||
override = await _app_db.get_agent_override(node_id, agent_id)
|
||
if not override:
|
||
raise HTTPException(status_code=404, detail="No local override found. Use PATCH first.")
|
||
|
||
desired = _agent_desired_payload(override)
|
||
computed_plan_id = _app_db._agent_payload_hash(desired)
|
||
|
||
# Fetch current active prompt for diff
|
||
gw_url = get_gateway_url(node_id)
|
||
agents_raw, _ = await _fetch_agents_from_gateway(node_id, gw_url)
|
||
active_agent = next((a for a in agents_raw if a["agent_id"] == agent_id), None)
|
||
active_prompt = active_agent.get("active_prompt", "") if active_agent else ""
|
||
desired_prompt = desired.get("system_prompt_md") or ""
|
||
|
||
# Build unified diff
|
||
diff_lines = list(_difflib.unified_diff(
|
||
(active_prompt or "").splitlines(keepends=True),
|
||
desired_prompt.splitlines(keepends=True),
|
||
fromfile=f"{agent_id}:active",
|
||
tofile=f"{agent_id}:desired",
|
||
n=3,
|
||
))
|
||
diff_text = "".join(diff_lines) if diff_lines else ""
|
||
will_change = bool(diff_text) or (override.get("domain") is not None)
|
||
|
||
if dry_run:
|
||
# Audit: agent_apply_planned
|
||
await _app_db.append_governance_event(
|
||
scope="project", project_id=agent_id, actor_type="user",
|
||
event_type="agent_apply_planned",
|
||
idempotency_key=f"aap|{node_id}|{agent_id}|{computed_plan_id}",
|
||
severity="info", status="ok",
|
||
ref_type="agent", ref_id=agent_id,
|
||
evidence=_app_db._make_evidence(
|
||
message=f"Apply planned (dry-run) for {agent_id}@{node_id}",
|
||
outputs={"will_change": will_change, "plan_id": computed_plan_id,
|
||
"diff_lines": len(diff_lines)},
|
||
),
|
||
)
|
||
return JSONResponse(content={
|
||
"dry_run": True, "will_change": will_change,
|
||
"plan_id": computed_plan_id,
|
||
"diff_text": diff_text,
|
||
"desired": desired,
|
||
"node_id": node_id, "agent_id": agent_id,
|
||
})
|
||
|
||
# Apply: validate plan_id
|
||
if plan_id and plan_id != computed_plan_id:
|
||
raise HTTPException(
|
||
status_code=409,
|
||
detail=f"plan_id mismatch: provided={plan_id} computed={computed_plan_id}. "
|
||
"Re-run dry_run=true to get fresh plan_id.",
|
||
)
|
||
|
||
applied: List[Dict] = []
|
||
errors_apply: List[Dict] = []
|
||
|
||
if desired_prompt and gw_url:
|
||
try:
|
||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||
resp = await client.post(
|
||
f"{gw_url.rstrip('/')}/admin/agents/{agent_id}/prompt",
|
||
json={"prompt": desired_prompt},
|
||
headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")},
|
||
)
|
||
if resp.status_code in (200, 201, 204):
|
||
applied.append({"action": "update_system_prompt", "status": "ok"})
|
||
else:
|
||
errors_apply.append({"action": "update_system_prompt",
|
||
"error": f"HTTP {resp.status_code}: {resp.text[:200]}"})
|
||
except Exception as e:
|
||
errors_apply.append({"action": "update_system_prompt", "error": str(e)[:300]})
|
||
|
||
success = len(applied) > 0 and len(errors_apply) == 0
|
||
|
||
# Mark last_applied_hash if successful
|
||
if success:
|
||
await _app_db.upsert_agent_override(
|
||
node_id, agent_id, _mark_applied_hash=computed_plan_id,
|
||
)
|
||
|
||
# Audit
|
||
evt_type = "agent_apply_executed" if success else "agent_apply_failed"
|
||
await _app_db.append_governance_event(
|
||
scope="project", project_id=agent_id, actor_type="user",
|
||
event_type=evt_type,
|
||
idempotency_key=f"aae|{node_id}|{agent_id}|{computed_plan_id}|{'ok' if success else 'fail'}",
|
||
severity="info" if success else "high", status="ok" if success else "error",
|
||
ref_type="agent", ref_id=agent_id,
|
||
evidence=_app_db._make_evidence(
|
||
message=f"Apply {'succeeded' if success else 'failed'} for {agent_id}@{node_id}",
|
||
outputs={"plan_id": computed_plan_id, "applied": applied, "errors": errors_apply},
|
||
),
|
||
)
|
||
|
||
return JSONResponse(content={
|
||
"dry_run": False, "success": success,
|
||
"plan_id": computed_plan_id,
|
||
"applied": applied, "errors": errors_apply,
|
||
"node_id": node_id, "agent_id": agent_id,
|
||
})
|
||
|
||
|
||
@app.post("/api/agents/{node_id}/{agent_id}/rollback")
|
||
async def rollback_agent_override(
|
||
node_id: str, agent_id: str,
|
||
version_hash: str,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Rollback agent override to a specific version by version_hash."""
|
||
node_id = node_id.upper()
|
||
version = await _app_db.get_agent_version_by_hash(node_id, agent_id, version_hash)
|
||
if not version:
|
||
raise HTTPException(status_code=404, detail=f"Version {version_hash} not found")
|
||
|
||
payload = version["payload"]
|
||
# Restore the override to this version's payload
|
||
updated = await _app_db.upsert_agent_override(
|
||
node_id, agent_id,
|
||
display_name=payload.get("display_name"),
|
||
domain=payload.get("domain"),
|
||
system_prompt_md=payload.get("system_prompt_md"),
|
||
)
|
||
|
||
# Audit
|
||
await _app_db.append_governance_event(
|
||
scope="project", project_id=agent_id, actor_type="user",
|
||
event_type="agent_rollback_executed",
|
||
idempotency_key=f"arb|{node_id}|{agent_id}|{version_hash}|{_app_db._now()}",
|
||
severity="warn", status="ok",
|
||
ref_type="agent", ref_id=agent_id,
|
||
evidence=_app_db._make_evidence(
|
||
message=f"Rollback to version {version_hash} for {agent_id}@{node_id}",
|
||
outputs={"version_hash": version_hash, "created_at": version.get("created_at")},
|
||
),
|
||
)
|
||
return JSONResponse(content={
|
||
"rolled_back": True, "version_hash": version_hash,
|
||
"override": updated, "node_id": node_id, "agent_id": agent_id,
|
||
})
|
||
|
||
|
||
# ── Bulk Agent Actions (multi-node + canary) ───────────────────────────────────
|
||
|
||
async def _apply_single_agent(
|
||
node_id: str,
|
||
override: Dict,
|
||
agents_map: Dict,
|
||
gw_url: str,
|
||
apply_timeout_sec: float,
|
||
) -> Dict:
|
||
"""Apply a single agent override. Returns result dict with status field."""
|
||
aid = override["agent_id"]
|
||
desired = _agent_desired_payload(override)
|
||
plan_id = _app_db._agent_payload_hash(desired)
|
||
active_agent = agents_map.get(aid, {})
|
||
active_prompt = active_agent.get("active_prompt", "") or ""
|
||
desired_prompt = desired.get("system_prompt_md") or ""
|
||
will_change = desired_prompt != active_prompt
|
||
|
||
if not desired_prompt or not gw_url:
|
||
return {"node_id": node_id, "agent_id": aid, "status": "skipped",
|
||
"plan_id": plan_id, "drift": will_change,
|
||
"error": "no prompt or no gateway_url"}
|
||
|
||
applied_ok = False
|
||
err_msg = None
|
||
try:
|
||
async with httpx.AsyncClient(timeout=apply_timeout_sec) as client:
|
||
resp = await client.post(
|
||
f"{gw_url.rstrip('/')}/admin/agents/{aid}/prompt",
|
||
json={"prompt": desired_prompt},
|
||
headers={"X-Admin-Token": os.getenv("GATEWAY_ADMIN_TOKEN", "")},
|
||
)
|
||
applied_ok = resp.status_code in (200, 201, 204)
|
||
if not applied_ok:
|
||
err_msg = f"HTTP {resp.status_code}: {resp.text[:100]}"
|
||
except Exception as e:
|
||
err_msg = str(e)[:200]
|
||
|
||
if applied_ok:
|
||
await _app_db.upsert_agent_override(node_id, aid, _mark_applied_hash=plan_id)
|
||
|
||
return {
|
||
"node_id": node_id, "agent_id": aid,
|
||
"status": "applied" if applied_ok else "failed",
|
||
"plan_id": plan_id, "drift": will_change,
|
||
"error": err_msg,
|
||
}
|
||
|
||
|
||
@app.post("/api/agents/bulk/apply")
|
||
async def bulk_apply_agents(
|
||
nodes: str = "NODA1",
|
||
node: Optional[str] = None, # legacy single-node param
|
||
dry_run: bool = True,
|
||
mode: str = "all", # "all" | "canary"
|
||
limit: int = 2, # canary: max N agents
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Apply local overrides across one or many nodes.
|
||
|
||
mode=canary: apply first `limit` agents with drift=True, stop on first failure.
|
||
Returns {results, node_errors, summary}.
|
||
"""
|
||
# Support legacy ?node= param
|
||
raw_nodes = node.upper() if node else nodes
|
||
node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
|
||
|
||
all_results: List[Dict] = []
|
||
node_errors: List[Dict] = []
|
||
bulk_run_id = str(uuid.uuid4())[:8]
|
||
|
||
# Audit: bulk plan created
|
||
await _app_db.append_governance_event(
|
||
scope="portfolio", project_id="portfolio", actor_type="user",
|
||
event_type="agent_bulk_plan_created",
|
||
idempotency_key=f"abpc|{bulk_run_id}|{raw_nodes}|{mode}",
|
||
severity="info", status="ok",
|
||
evidence=_app_db._make_evidence(
|
||
message=f"Bulk {'canary' if mode=='canary' else 'apply'} planned: nodes={raw_nodes} dry_run={dry_run}",
|
||
outputs={"mode": mode, "limit": limit, "nodes": node_ids, "dry_run": dry_run},
|
||
),
|
||
)
|
||
|
||
for node_id in node_ids:
|
||
ni = _node_info(node_id)
|
||
policy = ni["policy"]
|
||
gw_url = ni["gateway_url"]
|
||
apply_timeout_sec = policy["apply_timeout_ms"] / 1000.0
|
||
|
||
overrides = await _app_db.list_agent_overrides(node_id)
|
||
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
|
||
node_id, gw_url,
|
||
timeout_ms=policy["gateway_timeout_ms"],
|
||
get_retry=policy["get_retry"],
|
||
)
|
||
if err and not agents_raw:
|
||
node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
|
||
continue
|
||
|
||
agents_map = {a["agent_id"]: a for a in agents_raw}
|
||
|
||
# Select candidates: non-hidden, sorted deterministically by agent_id
|
||
candidates = sorted(
|
||
[o for o in overrides if not o.get("is_hidden")],
|
||
key=lambda o: o["agent_id"],
|
||
)
|
||
|
||
if mode == "canary":
|
||
# For canary: only agents with drift
|
||
drift_candidates = []
|
||
for o in candidates:
|
||
desired = _agent_desired_payload(o)
|
||
plan_id = _app_db._agent_payload_hash(desired)
|
||
is_drift = bool(o.get("last_applied_hash") and o["last_applied_hash"] != plan_id)
|
||
if is_drift:
|
||
drift_candidates.append(o)
|
||
candidates = drift_candidates[:limit]
|
||
|
||
if dry_run:
|
||
for override in candidates:
|
||
aid = override["agent_id"]
|
||
desired = _agent_desired_payload(override)
|
||
plan_id = _app_db._agent_payload_hash(desired)
|
||
active_agent = agents_map.get(aid, {})
|
||
active_prompt = active_agent.get("active_prompt", "") or ""
|
||
desired_prompt = desired.get("system_prompt_md") or ""
|
||
all_results.append({
|
||
"node_id": node_id, "agent_id": aid, "status": "planned",
|
||
"plan_id": plan_id, "drift": desired_prompt != active_prompt, "error": None,
|
||
})
|
||
continue
|
||
|
||
# Canary: log start
|
||
if mode == "canary" and candidates:
|
||
await _app_db.append_governance_event(
|
||
scope="portfolio", project_id="portfolio", actor_type="user",
|
||
event_type="agent_bulk_canary_started",
|
||
idempotency_key=f"abcs|{bulk_run_id}|{node_id}",
|
||
severity="info", status="ok",
|
||
evidence=_app_db._make_evidence(
|
||
message=f"Canary apply started: {len(candidates)} agents on {node_id}",
|
||
outputs={"agents": [o["agent_id"] for o in candidates], "limit": limit},
|
||
),
|
||
)
|
||
|
||
canary_stopped = False
|
||
for override in candidates:
|
||
# Check governance gate per agent
|
||
frozen = await _check_prompt_freeze(node_id, override["agent_id"])
|
||
if frozen:
|
||
all_results.append({
|
||
"node_id": node_id, "agent_id": override["agent_id"],
|
||
"status": "blocked", "plan_id": None, "drift": True,
|
||
"error": "PROMPT_FREEZE gate active",
|
||
})
|
||
continue
|
||
|
||
result = await _apply_single_agent(
|
||
node_id, override, agents_map, gw_url, apply_timeout_sec,
|
||
)
|
||
all_results.append(result)
|
||
|
||
# Canary stop-on-failure
|
||
if mode == "canary" and result["status"] == "failed":
|
||
canary_stopped = True
|
||
# Mark remaining as skipped
|
||
remaining_ids = {o["agent_id"] for o in candidates} - {r["agent_id"] for r in all_results if r["node_id"] == node_id}
|
||
for rid in sorted(remaining_ids):
|
||
all_results.append({
|
||
"node_id": node_id, "agent_id": rid, "status": "skipped",
|
||
"plan_id": None, "drift": True,
|
||
"error": f"canary stopped after failure of {result['agent_id']}",
|
||
})
|
||
await _app_db.append_governance_event(
|
||
scope="portfolio", project_id="portfolio", actor_type="user",
|
||
event_type="agent_bulk_canary_stopped",
|
||
idempotency_key=f"abcstop|{bulk_run_id}|{node_id}|{result['agent_id']}",
|
||
severity="high", status="error",
|
||
evidence=_app_db._make_evidence(
|
||
message=f"Canary stopped on {result['agent_id']}@{node_id}: {result['error']}",
|
||
outputs={"failed_agent": result["agent_id"], "error": result["error"]},
|
||
),
|
||
)
|
||
break
|
||
|
||
if mode == "canary" and not canary_stopped and candidates:
|
||
await _app_db.append_governance_event(
|
||
scope="portfolio", project_id="portfolio", actor_type="user",
|
||
event_type="agent_bulk_apply_completed",
|
||
idempotency_key=f"abac|{bulk_run_id}|{node_id}",
|
||
severity="info", status="ok",
|
||
evidence=_app_db._make_evidence(
|
||
message=f"Canary apply completed on {node_id}: {len(candidates)} agents",
|
||
outputs={"agents_applied": [r["agent_id"] for r in all_results
|
||
if r["node_id"] == node_id and r["status"] == "applied"]},
|
||
),
|
||
)
|
||
|
||
# Build summary
|
||
status_counts: Dict[str, int] = {}
|
||
for r in all_results:
|
||
status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1
|
||
|
||
return JSONResponse(content={
|
||
"results": all_results,
|
||
"node_errors": node_errors,
|
||
"summary": status_counts,
|
||
"dry_run": dry_run,
|
||
"mode": mode,
|
||
"bulk_run_id": bulk_run_id,
|
||
})
|
||
|
||
|
||
@app.post("/api/agents/bulk/diff")
|
||
async def bulk_diff_agents(
|
||
nodes: str = "NODA1",
|
||
node: Optional[str] = None,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Return diff summary for all agents with local overrides. Supports multi-node."""
|
||
raw_nodes = node.upper() if node else nodes
|
||
node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
|
||
|
||
report: List[Dict] = []
|
||
node_errors: List[Dict] = []
|
||
|
||
for node_id in node_ids:
|
||
ni = _node_info(node_id)
|
||
policy = ni["policy"]
|
||
gw_url = ni["gateway_url"]
|
||
overrides = await _app_db.list_agent_overrides(node_id)
|
||
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
|
||
node_id, gw_url,
|
||
timeout_ms=policy["gateway_timeout_ms"],
|
||
get_retry=policy["get_retry"],
|
||
)
|
||
if err:
|
||
node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
|
||
agents_map = {a["agent_id"]: a for a in agents_raw}
|
||
|
||
for override in overrides:
|
||
aid = override["agent_id"]
|
||
desired = _agent_desired_payload(override)
|
||
plan_id = _app_db._agent_payload_hash(desired)
|
||
active_agent = agents_map.get(aid, {})
|
||
active_prompt = active_agent.get("active_prompt") or ""
|
||
desired_prompt = desired.get("system_prompt_md") or ""
|
||
diff_lines = list(_difflib.unified_diff(
|
||
active_prompt.splitlines(keepends=True),
|
||
desired_prompt.splitlines(keepends=True),
|
||
fromfile=f"{aid}:active", tofile=f"{aid}:desired", n=2,
|
||
))
|
||
is_drift = bool(override.get("last_applied_hash") and
|
||
override["last_applied_hash"] != plan_id)
|
||
report.append({
|
||
"node_id": node_id, "agent_id": aid,
|
||
"plan_id": plan_id,
|
||
"last_applied_hash": override.get("last_applied_hash"),
|
||
"drift": is_drift,
|
||
"diff_lines": len(diff_lines),
|
||
"diff_text": "".join(diff_lines[:60]),
|
||
})
|
||
|
||
return JSONResponse(content={"report": report, "node_errors": node_errors,
|
||
"nodes_queried": node_ids})
|
||
|
||
|
||
@app.get("/api/agents/export/prompts")
|
||
async def export_agent_prompts(
|
||
nodes: str = "NODA1",
|
||
node: Optional[str] = None,
|
||
_auth: str = Depends(require_auth),
|
||
):
|
||
"""Export all agent system prompts as a JSON bundle (multi-node)."""
|
||
raw_nodes = node.upper() if node else nodes
|
||
node_ids = [n.strip().upper() for n in raw_nodes.split(",") if n.strip()]
|
||
|
||
bundle: List[Dict] = []
|
||
node_errors: List[Dict] = []
|
||
|
||
for node_id in node_ids:
|
||
ni = _node_info(node_id)
|
||
policy = ni["policy"]
|
||
gw_url = ni["gateway_url"]
|
||
overrides = await _app_db.list_agent_overrides(node_id)
|
||
agents_raw, err, latency_ms = await _fetch_agents_from_gateway(
|
||
node_id, gw_url,
|
||
timeout_ms=policy["gateway_timeout_ms"],
|
||
get_retry=policy["get_retry"],
|
||
)
|
||
if err:
|
||
node_errors.append({"node_id": node_id, "error": err, "latency_ms": latency_ms})
|
||
agents_map = {a["agent_id"]: a for a in agents_raw}
|
||
overrides_map = {o["agent_id"]: o for o in overrides}
|
||
|
||
for aid, agent in agents_map.items():
|
||
override = overrides_map.get(aid)
|
||
merged = _merge_agent_with_override(agent, override)
|
||
bundle.append({
|
||
"agent_id": aid, "node_id": node_id,
|
||
"display_name": merged.get("display_name", aid),
|
||
"domain": merged.get("domain"),
|
||
"system_prompt_md": merged.get("system_prompt_md"),
|
||
"has_override": merged.get("has_override", False),
|
||
})
|
||
|
||
bundle.sort(key=lambda x: (x["node_id"], x["agent_id"]))
|
||
return JSONResponse(content={
|
||
"nodes_queried": node_ids,
|
||
"exported_at": _app_db._now(),
|
||
"count": len(bundle),
|
||
"agents": bundle,
|
||
"node_errors": node_errors,
|
||
})
|
||
|
||
|
||
# ── Kling AI proxy ────────────────────────────────────────────────────────────
|
||
|
||
@app.get("/api/aurora/kling/health")
|
||
async def console_kling_health() -> Dict[str, Any]:
|
||
try:
|
||
return await _aurora_request_json("GET", "/api/aurora/kling/health", timeout=12.0, retries=1)
|
||
except Exception as exc:
|
||
return {"ok": False, "error": str(exc)}
|
||
|
||
|
||
@app.post("/api/aurora/kling/enhance")
|
||
async def console_kling_enhance_plain(
|
||
job_id: str = Form(...),
|
||
prompt: str = Form("enhance video quality, improve sharpness and clarity"),
|
||
negative_prompt: str = Form("noise, blur, artifacts, distortion"),
|
||
mode: str = Form("pro"),
|
||
duration: str = Form("5"),
|
||
cfg_scale: float = Form(0.5),
|
||
) -> Dict[str, Any]:
|
||
return await _aurora_request_json(
|
||
"POST",
|
||
"/api/aurora/kling/enhance",
|
||
data={
|
||
"job_id": job_id,
|
||
"prompt": prompt,
|
||
"negative_prompt": negative_prompt,
|
||
"mode": mode,
|
||
"duration": duration,
|
||
"cfg_scale": str(cfg_scale),
|
||
},
|
||
timeout=120.0,
|
||
retries=1,
|
||
)
|
||
|
||
|
||
@app.post("/api/aurora/kling/enhance/{job_id}")
|
||
async def console_kling_enhance(
|
||
job_id: str,
|
||
prompt: str = Form("enhance video quality, improve sharpness and clarity"),
|
||
negative_prompt: str = Form("noise, blur, artifacts, distortion"),
|
||
mode: str = Form("pro"),
|
||
duration: str = Form("5"),
|
||
cfg_scale: float = Form(0.5),
|
||
) -> Dict[str, Any]:
|
||
return await console_kling_enhance_plain(
|
||
job_id=job_id,
|
||
prompt=prompt,
|
||
negative_prompt=negative_prompt,
|
||
mode=mode,
|
||
duration=duration,
|
||
cfg_scale=cfg_scale,
|
||
)
|
||
|
||
|
||
@app.get("/api/aurora/kling/status/{job_id}")
|
||
async def console_kling_status(job_id: str) -> Dict[str, Any]:
|
||
return await _aurora_request_json("GET", f"/api/aurora/kling/status/{job_id}", timeout=20.0, retries=2)
|
||
|
||
|
||
@app.get("/api/aurora/kling/task/{task_id}")
|
||
async def console_kling_task(task_id: str, endpoint: str = Query("video2video")) -> Dict[str, Any]:
|
||
return await _aurora_request_json("GET", f"/api/aurora/kling/task/{task_id}?endpoint={endpoint}", timeout=20.0, retries=2)
|
||
|
||
|
||
@app.get("/api/aurora/plates/{job_id}")
|
||
async def console_plates(job_id: str) -> Dict[str, Any]:
|
||
return await _aurora_request_json("GET", f"/api/aurora/plates/{job_id}", timeout=15.0, retries=2)
|
||
|
||
|
||
# ── Sofiia Auto-Router & Budget Dashboard proxy ────────────────────────────────
|
||
|
||
async def _router_request_json(method: str, path: str, json_body: Optional[Dict] = None, timeout: float = 20.0) -> Dict[str, Any]:
|
||
"""Forward request to the Router service (noda1 or local)."""
|
||
import aiohttp as _aiohttp
|
||
# Use the first configured node's router URL
|
||
nodes_reg = load_nodes_registry()
|
||
nodes = (nodes_reg.get("nodes") or {}) if isinstance(nodes_reg, dict) else {}
|
||
node_id = next(iter(nodes), "noda1")
|
||
router_url = get_router_url(node_id)
|
||
url = f"{router_url.rstrip('/')}{path}"
|
||
try:
|
||
async with _aiohttp.ClientSession() as sess:
|
||
if method.upper() == "GET":
|
||
async with sess.get(url, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp:
|
||
return await resp.json(content_type=None)
|
||
else:
|
||
async with sess.post(url, json=json_body, timeout=_aiohttp.ClientTimeout(total=timeout)) as resp:
|
||
return await resp.json(content_type=None)
|
||
except Exception as e:
|
||
return {"error": str(e)}
|
||
|
||
|
||
@app.post("/api/sofiia/auto-route")
|
||
async def console_auto_route(body: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""Proxy: classify prompt and get recommended model."""
|
||
return await _router_request_json("POST", "/v1/sofiia/auto-route", json_body=body)
|
||
|
||
|
||
@app.get("/api/sofiia/budget")
|
||
async def console_budget_dashboard() -> Dict[str, Any]:
|
||
"""Proxy: get budget dashboard data from router."""
|
||
return await _router_request_json("GET", "/v1/sofiia/budget")
|
||
|
||
|
||
@app.post("/api/sofiia/budget/limits")
|
||
async def console_set_budget_limits(body: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""Proxy: set provider budget limit."""
|
||
return await _router_request_json("POST", "/v1/sofiia/budget/limits", json_body=body)
|
||
|
||
|
||
@app.get("/api/sofiia/budget/stats")
|
||
async def console_budget_stats(window_hours: int = 24) -> Dict[str, Any]:
|
||
"""Proxy: get budget stats for time window."""
|
||
return await _router_request_json("GET", f"/v1/sofiia/budget/stats?window_hours={window_hours}")
|
||
|
||
|
||
@app.get("/api/sofiia/catalog")
|
||
async def console_model_catalog(refresh_ollama: bool = False) -> Dict[str, Any]:
|
||
"""Proxy: get full model catalog with availability."""
|
||
return await _router_request_json("GET", f"/v1/sofiia/catalog?refresh_ollama={str(refresh_ollama).lower()}")
|