Files
microdao-daarion/services/aurora-service/app/main.py

1328 lines
51 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import asyncio
import hashlib
import json
import logging
import os
import re
import shutil
import subprocess
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from fastapi import Body, FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from .analysis import (
analyze_photo,
analyze_video,
estimate_processing_seconds,
probe_video_metadata,
)
from .job_store import JobStore
from .langchain_scaffold import build_subagent_registry
from .orchestrator import AuroraOrchestrator, JobCancelledError
from .reporting import generate_forensic_report_pdf
from .schemas import AuroraMode, MediaType
from .subagents import runtime_diagnostics
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
DATA_DIR = Path(os.getenv("AURORA_DATA_DIR", "/data/aurora"))
PUBLIC_BASE_URL = os.getenv("AURORA_PUBLIC_BASE_URL", "http://localhost:9401").rstrip("/")
CORS_ORIGINS = os.getenv("AURORA_CORS_ORIGINS", "*")
RECOVERY_STRATEGY = os.getenv("AURORA_RECOVERY_STRATEGY", "requeue").strip().lower()
VIDEO_EXTENSIONS = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
AUDIO_EXTENSIONS = {".mp3", ".wav", ".flac", ".m4a", ".aac", ".ogg"}
PHOTO_EXTENSIONS = {".jpg", ".jpeg", ".png", ".tiff", ".tif", ".webp"}
MAX_CONCURRENT_JOBS = max(1, int(os.getenv("AURORA_MAX_CONCURRENT_JOBS", "1")))
store = JobStore(DATA_DIR)
orchestrator = AuroraOrchestrator(store.outputs_dir, PUBLIC_BASE_URL)
RUN_SLOT = asyncio.Semaphore(MAX_CONCURRENT_JOBS)
app = FastAPI(
title="Aurora Media Forensics Service",
description="AURORA tactical/forensic media pipeline scaffold for AISTALK",
version="0.1.0",
)
if CORS_ORIGINS.strip() == "*":
allow_origins = ["*"]
else:
allow_origins = [x.strip() for x in CORS_ORIGINS.split(",") if x.strip()]
app.add_middleware(
CORSMiddleware,
allow_origins=allow_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.on_event("startup")
async def recover_orphan_jobs() -> None:
recovered = store.recover_interrupted_jobs(
completed_at=utc_now_iso(),
message="Interrupted by aurora-service restart",
strategy=RECOVERY_STRATEGY,
)
if recovered:
logger.warning(
"Recovered %d interrupted Aurora jobs with strategy=%s",
recovered,
RECOVERY_STRATEGY,
)
queued = sorted(
[job for job in store.list_jobs() if job.status == "queued"],
key=lambda item: item.created_at,
)
for job in queued:
asyncio.create_task(run_job(job.job_id))
if queued:
logger.info("Rescheduled %d queued Aurora jobs on startup", len(queued))
cleaned = _cleanup_work_dirs()
if cleaned:
logger.info("Cleaned %d orphaned _work directories (%.1f MB freed)", cleaned["dirs"], cleaned["mb"])
def _cleanup_work_dirs() -> Dict[str, Any]:
"""Remove leftover _work_* directories from old PNG-based pipeline."""
total_freed = 0
dirs_removed = 0
for job_dir in store.outputs_dir.iterdir():
if not job_dir.is_dir():
continue
for entry in list(job_dir.iterdir()):
if entry.is_dir() and entry.name.startswith("_work"):
size = sum(f.stat().st_size for f in entry.rglob("*") if f.is_file())
shutil.rmtree(entry, ignore_errors=True)
total_freed += size
dirs_removed += 1
return {"dirs": dirs_removed, "mb": total_freed / (1024 * 1024)}
def utc_now_iso() -> str:
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
def safe_filename(file_name: str) -> str:
base = Path(file_name or "upload.bin").name
sanitized = re.sub(r"[^A-Za-z0-9._-]", "_", base).strip("._")
return sanitized or f"upload_{uuid.uuid4().hex[:8]}.bin"
def compute_sha256(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as f:
while True:
chunk = f.read(1024 * 1024)
if not chunk:
break
digest.update(chunk)
return f"sha256:{digest.hexdigest()}"
def detect_media_type(file_name: str, content_type: str) -> MediaType:
ext = Path(file_name).suffix.lower()
if content_type.startswith("video/") or ext in VIDEO_EXTENSIONS:
return "video"
if content_type.startswith("audio/") or ext in AUDIO_EXTENSIONS:
return "audio"
if content_type.startswith("image/") or ext in PHOTO_EXTENSIONS:
return "photo"
return "unknown"
def _normalize_mode(raw_mode: Optional[str], fallback: AuroraMode = "tactical") -> AuroraMode:
value = (raw_mode or fallback).strip().lower()
if value not in ("tactical", "forensic"):
return fallback
return value # type: ignore[return-value]
def _normalize_priority(raw_priority: Optional[str], fallback: str = "balanced") -> str:
value = (raw_priority or fallback).strip().lower()
if value not in {"balanced", "faces", "plates", "details", "speech"}:
return fallback
return value
def _job_storage_info(job: Any) -> Dict[str, str]:
upload_dir = (store.uploads_dir / job.job_id).resolve()
output_dir = (store.outputs_dir / job.job_id).resolve()
job_record = (store.jobs_dir / f"{job.job_id}.json").resolve()
payload = {
"upload_dir": str(upload_dir),
"output_dir": str(output_dir),
"job_record": str(job_record),
}
input_path = Path(str(job.input_path))
if input_path.exists():
payload["input_path"] = str(input_path.resolve())
return payload
def _queued_position(job_id: str) -> Optional[int]:
target = store.get_job(job_id)
if not target or target.status != "queued":
return None
queued: List[Any] = []
for path in sorted(store.jobs_dir.glob("*.json")):
try:
payload = json.loads(path.read_text(encoding="utf-8"))
if payload.get("status") == "queued":
queued.append(payload)
except Exception:
continue
queued.sort(key=lambda item: str(item.get("created_at") or ""))
for idx, item in enumerate(queued, start=1):
if str(item.get("job_id") or "") == job_id:
return idx
return None
def _resolve_source_media_path(job: Any, *, second_pass: bool = False) -> Path:
input_path = Path(str(job.input_path))
if not second_pass and input_path.exists() and input_path.is_file():
return input_path
result = getattr(job, "result", None)
if result and isinstance(getattr(result, "output_files", None), list):
for item in result.output_files:
file_type = str(getattr(item, "type", "")).lower()
file_name = str(getattr(item, "name", ""))
if file_type != str(job.media_type).lower():
continue
candidate = (store.outputs_dir / job.job_id / file_name)
if candidate.exists() and candidate.is_file():
return candidate
if input_path.exists() and input_path.is_file():
return input_path
raise HTTPException(status_code=409, detail=f"Source media not available for job {job.job_id}")
def _enqueue_job_from_path(
*,
source_path: Path,
file_name: str,
mode: AuroraMode,
media_type: MediaType,
priority: str,
export_options: Dict[str, Any],
metadata_patch: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
now = datetime.now(timezone.utc)
job_id = f"aurora_{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
upload_dir = store.uploads_dir / job_id
upload_dir.mkdir(parents=True, exist_ok=True)
input_path = upload_dir / safe_filename(file_name)
shutil.copy2(source_path, input_path)
input_hash = compute_sha256(input_path)
initial_metadata = _estimate_upload_metadata(
input_path=input_path,
media_type=media_type,
mode=mode,
)
if export_options:
initial_metadata["export_options"] = export_options
initial_metadata["priority"] = priority
if metadata_patch:
initial_metadata.update(metadata_patch)
store.create_job(
job_id=job_id,
file_name=input_path.name,
input_path=input_path,
input_hash=input_hash,
mode=mode,
media_type=media_type,
created_at=utc_now_iso(),
metadata=initial_metadata,
)
asyncio.create_task(run_job(job_id))
return {
"job_id": job_id,
"mode": mode,
"media_type": media_type,
"priority": priority,
"export_options": export_options,
"status_url": f"/api/aurora/status/{job_id}",
"result_url": f"/api/aurora/result/{job_id}",
"cancel_url": f"/api/aurora/cancel/{job_id}",
}
def model_dump(value: Any) -> Dict[str, Any]:
if hasattr(value, "model_dump"):
return value.model_dump()
return value.dict()
def _parse_iso_utc(value: Optional[str]) -> Optional[datetime]:
if not value:
return None
try:
return datetime.fromisoformat(value.replace("Z", "+00:00"))
except Exception:
return None
def _estimate_upload_metadata(input_path: Path, media_type: MediaType, mode: AuroraMode) -> Dict[str, Any]:
metadata: Dict[str, Any] = {}
if media_type == "video":
video_meta = probe_video_metadata(input_path)
if video_meta:
metadata["video"] = video_meta
estimate_s = estimate_processing_seconds(
media_type="video",
mode=mode,
width=int(video_meta.get("width") or 0),
height=int(video_meta.get("height") or 0),
frame_count=int(video_meta.get("frame_count") or 0),
)
if estimate_s:
metadata["estimated_processing_seconds"] = int(estimate_s)
elif media_type == "photo":
try:
import cv2 # type: ignore[import-untyped]
frame = cv2.imread(str(input_path), cv2.IMREAD_COLOR)
if frame is not None:
h, w = frame.shape[:2]
metadata["image"] = {"width": int(w), "height": int(h)}
estimate_s = estimate_processing_seconds(
media_type="photo",
mode=mode,
width=int(w),
height=int(h),
frame_count=1,
)
if estimate_s:
metadata["estimated_processing_seconds"] = int(estimate_s)
except Exception:
pass
elif media_type == "audio":
audio_meta = _probe_audio_metadata(input_path)
if audio_meta:
metadata["audio"] = audio_meta
duration_s = float(audio_meta.get("duration_seconds") or 0.0)
if duration_s > 0:
factor = 0.45 if mode == "tactical" else 1.25
metadata["estimated_processing_seconds"] = int(max(8, min(10800, duration_s * factor)))
return metadata
def _probe_audio_metadata(input_path: Path) -> Dict[str, Any]:
try:
cmd = [
"ffprobe",
"-v",
"error",
"-show_streams",
"-show_format",
"-print_format",
"json",
str(input_path),
]
proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
if proc.returncode != 0 or not proc.stdout:
return {}
payload = json.loads(proc.stdout)
streams = payload.get("streams") or []
audio_stream = next((s for s in streams if str(s.get("codec_type", "")).lower() == "audio"), None)
fmt = payload.get("format") or {}
duration_raw = (audio_stream or {}).get("duration") or fmt.get("duration")
duration = float(duration_raw) if duration_raw not in (None, "", "N/A") else 0.0
sample_rate_raw = (audio_stream or {}).get("sample_rate")
channels_raw = (audio_stream or {}).get("channels")
bitrate_raw = (audio_stream or {}).get("bit_rate") or fmt.get("bit_rate")
return {
"duration_seconds": round(duration, 3) if duration > 0 else None,
"sample_rate_hz": int(sample_rate_raw) if sample_rate_raw not in (None, "", "N/A") else None,
"channels": int(channels_raw) if channels_raw not in (None, "", "N/A") else None,
"bit_rate": int(bitrate_raw) if bitrate_raw not in (None, "", "N/A") else None,
"codec": (audio_stream or {}).get("codec_name"),
"container": fmt.get("format_name"),
}
except Exception:
return {}
def _analyze_audio(path: Path) -> Dict[str, Any]:
meta = _probe_audio_metadata(path)
duration = float(meta.get("duration_seconds") or 0.0)
bitrate = float(meta.get("bit_rate") or 0.0)
recommendations: List[str] = []
if duration <= 0:
recommendations.append("Не вдалося надійно визначити тривалість аудіо.")
if bitrate and bitrate < 128000:
recommendations.append("Низький bitrate: рекомендується forensic-режим та денойз перед транскрипцією.")
else:
recommendations.append("Рекомендується tactical denoise + speech enhance для швидкого перегляду.")
recommendations.append("Для доказового контуру: forensic mode + chain-of-custody + підпис результатів.")
estimate_tactical = int(max(6, min(7200, (duration or 20.0) * 0.45)))
estimate_forensic = int(max(12, min(14400, (duration or 20.0) * 1.25)))
return {
"media_type": "audio",
"audio": meta,
"quality_analysis": {
"bitrate_tier": "low" if bitrate and bitrate < 128000 else "normal",
"duration_bucket": "short" if duration and duration < 60 else "long" if duration and duration > 600 else "medium",
},
"recommendations": recommendations,
"suggested_priority": "speech",
"suggested_export": {
"format": "wav_pcm_s16le",
"sample_rate_hz": int(meta.get("sample_rate_hz") or 16000),
"channels": 1,
},
"estimated_processing_seconds": {
"tactical": estimate_tactical,
"forensic": estimate_forensic,
},
}
def _parse_export_options(raw_value: str) -> Dict[str, Any]:
if not raw_value:
return {}
try:
parsed = json.loads(raw_value)
except Exception as exc:
raise HTTPException(status_code=422, detail=f"Invalid export_options JSON: {exc}") from exc
if not isinstance(parsed, dict):
raise HTTPException(status_code=422, detail="export_options must be a JSON object")
return parsed
def _status_timing(job: Any) -> Dict[str, Optional[int]]:
started = _parse_iso_utc(job.started_at)
if not started:
return {
"elapsed_seconds": None,
"estimated_total_seconds": None,
"eta_seconds": None,
}
now = datetime.now(timezone.utc)
estimated_total: Optional[int] = None
eta: Optional[int] = None
if job.status in ("completed", "failed", "cancelled") and job.completed_at:
completed = _parse_iso_utc(job.completed_at)
if completed:
elapsed = max(0, int((completed - started).total_seconds()))
estimated_total = elapsed
eta = 0
else:
elapsed = max(0, int((now - started).total_seconds()))
else:
elapsed = max(0, int((now - started).total_seconds()))
if job.status == "processing":
hinted_total = None
if isinstance(job.metadata, dict):
hinted_total = job.metadata.get("estimated_processing_seconds")
if isinstance(hinted_total, (int, float)) and hinted_total > 0:
estimated_total = int(hinted_total)
elif job.progress >= 5:
estimated_total = int(elapsed / max(0.05, job.progress / 100.0))
stage_eta = None
if isinstance(job.current_stage, str):
match = re.search(r"eta ~([0-9]+)s", job.current_stage)
if match:
try:
stage_eta = int(match.group(1))
except Exception:
stage_eta = None
if estimated_total and estimated_total > 0:
eta = max(0, int(estimated_total - elapsed))
if stage_eta is not None:
# Early-stage per-frame ETA is noisy (model warmup / cache effects).
# Blend with metadata estimate first; trust stage ETA more after ~10%.
if eta is None:
eta = stage_eta
elif job.progress < 10:
eta = int((eta * 0.75) + (stage_eta * 0.25))
elif job.progress < 30:
eta = int((eta * 0.50) + (stage_eta * 0.50))
else:
eta = int((eta * 0.25) + (stage_eta * 0.75))
estimated_total = max(estimated_total or 0, elapsed + max(0, eta))
live_fps: Optional[float] = None
eta_confidence: Optional[str] = None
if isinstance(job.current_stage, str):
fps_match = re.search(r"\(([0-9]+(?:\.[0-9]+)?)\s*fps", job.current_stage)
if fps_match:
try:
live_fps = round(float(fps_match.group(1)), 2)
except Exception:
pass
skip_match = re.search(r"skip=([0-9]+)%", job.current_stage)
skip_pct = int(skip_match.group(1)) if skip_match else 0
if job.progress >= 30 and live_fps is not None:
eta_confidence = "high" if skip_pct < 50 else "medium"
elif job.progress >= 10:
eta_confidence = "medium"
elif job.progress >= 2:
eta_confidence = "low"
return {
"elapsed_seconds": elapsed,
"estimated_total_seconds": estimated_total,
"eta_seconds": eta,
"live_fps": live_fps,
"eta_confidence": eta_confidence,
}
async def run_job(job_id: str) -> None:
async with RUN_SLOT:
job = store.get_job(job_id)
if not job:
return
if job.status == "cancelled":
return
if job.cancel_requested:
store.mark_cancelled(job_id, completed_at=utc_now_iso())
return
store.mark_processing(job_id, started_at=utc_now_iso())
logger.info("aurora job started: %s (%s, %s)", job_id, job.media_type, job.mode)
def on_progress(progress: int, stage: str, step: Any = None) -> None:
store.set_progress(job_id, progress=progress, current_stage=stage)
if step is not None:
store.append_processing_step(job_id, step)
def is_cancelled() -> bool:
current = store.get_job(job_id)
return bool(current and current.cancel_requested)
try:
current_job = store.get_job(job_id)
if not current_job:
return
result = await asyncio.to_thread(
orchestrator.run,
current_job,
on_progress,
is_cancelled,
)
if is_cancelled():
store.mark_cancelled(job_id, completed_at=utc_now_iso())
return
completed_at = utc_now_iso()
store.mark_completed(job_id, result=result, completed_at=completed_at)
final_job = store.get_job(job_id)
if final_job and isinstance(final_job.metadata, dict):
meta = dict(final_job.metadata)
started = _parse_iso_utc(final_job.started_at)
completed = _parse_iso_utc(completed_at)
if started and completed:
meta["actual_processing_seconds"] = max(0, int((completed - started).total_seconds()))
if isinstance(result.metadata, dict):
meta["result_metadata"] = result.metadata
store.patch_job(job_id, metadata=meta)
logger.info("aurora job completed: %s", job_id)
except JobCancelledError:
store.mark_cancelled(job_id, completed_at=utc_now_iso())
logger.info("aurora job cancelled: %s", job_id)
except Exception as exc:
store.mark_failed(job_id, message=str(exc), completed_at=utc_now_iso())
logger.exception("aurora job failed: %s", job_id)
def _aurora_chat_reply(
*,
message: str,
job: Optional[Any],
analysis: Optional[Dict[str, Any]],
) -> Dict[str, Any]:
normalized_message = (message or "").strip()
lower = normalized_message.lower()
actions: List[Dict[str, Any]] = []
context: Dict[str, Any] = {}
lines: List[str] = []
if job:
timing = _status_timing(job)
storage = _job_storage_info(job)
context["job_id"] = job.job_id
context["status"] = job.status
context["stage"] = job.current_stage
context["timing"] = timing
context["storage"] = storage
lines.append(f"Job `{job.job_id}`: status `{job.status}`, stage `{job.current_stage}`.")
if job.status == "queued":
position = _queued_position(job.job_id)
if position:
lines.append(f"Черга: позиція #{position}.")
actions.append({"type": "refresh_status", "label": "Оновити статус"})
actions.append({"type": "cancel", "label": "Скасувати job"})
elif job.status == "processing":
elapsed = timing.get("elapsed_seconds")
eta = timing.get("eta_seconds")
if isinstance(elapsed, int):
if isinstance(eta, int):
lines.append(f"Минуло {elapsed}s, орієнтовно залишилось ~{eta}s.")
else:
lines.append(f"Минуло {elapsed}s, ETA ще уточнюється.")
actions.append({"type": "refresh_status", "label": "Оновити статус"})
actions.append({"type": "cancel", "label": "Скасувати job"})
elif job.status == "completed":
lines.append(f"Результати збережені в `{storage.get('output_dir', 'n/a')}`.")
actions.append({"type": "open_result", "label": "Відкрити результат"})
actions.append({"type": "reprocess", "label": "Повторити обробку", "second_pass": False})
actions.append({"type": "reprocess", "label": "Second pass", "second_pass": True})
elif job.status in ("failed", "cancelled"):
if job.error_message:
lines.append(f"Причина: {job.error_message}")
lines.append("Можна перезапустити обробку з тими самими або новими параметрами.")
actions.append({"type": "reprocess", "label": "Перезапустити job", "second_pass": False})
actions.append({"type": "reprocess", "label": "Second pass", "second_pass": True})
if any(token in lower for token in ("де", "where", "storage", "збереж")):
lines.append(
"Шляхи: "
f"input `{storage.get('input_path', 'n/a')}`, "
f"output `{storage.get('output_dir', 'n/a')}`, "
f"job `{storage.get('job_record', 'n/a')}`."
)
if analysis and isinstance(analysis, dict):
recs = analysis.get("recommendations")
if isinstance(recs, list) and recs:
top_recs = [str(x) for x in recs[:3]]
lines.append("Рекомендації pre-analysis: " + "; ".join(top_recs))
suggested_priority = str(analysis.get("suggested_priority") or "").strip()
if suggested_priority:
actions.append(
{
"type": "reprocess",
"label": f"Reprocess ({suggested_priority})",
"priority": suggested_priority,
"second_pass": False,
}
)
if not lines:
lines.append("Готова допомогти з обробкою. Надішліть файл або оберіть job для контексту.")
lines.append("Я можу пояснити ETA, місце збереження та запустити reprocess.")
actions.append({"type": "refresh_health", "label": "Перевірити Aurora"})
if any(token in lower for token in ("повтор", "reprocess", "ще раз", "second pass", "другий прохід")):
actions.append({"type": "reprocess", "label": "Запустити reprocess", "second_pass": "second pass" in lower})
if "скас" in lower or "cancel" in lower:
actions.append({"type": "cancel", "label": "Скасувати job"})
if "статус" in lower or "status" in lower:
actions.append({"type": "refresh_status", "label": "Оновити статус"})
deduped: List[Dict[str, Any]] = []
seen = set()
for action in actions:
key = json.dumps(action, sort_keys=True, ensure_ascii=True)
if key in seen:
continue
seen.add(key)
deduped.append(action)
return {
"agent": "Aurora",
"reply": "\n".join(lines),
"context": context,
"actions": deduped[:6],
}
@app.get("/health")
async def health() -> Dict[str, Any]:
subagents = build_subagent_registry()
return {
"status": "healthy",
"service": "aurora-service",
"data_dir": str(DATA_DIR),
"jobs": store.count_by_status(),
"runtime": runtime_diagnostics(),
"scheduler": {"max_concurrent_jobs": MAX_CONCURRENT_JOBS},
"langchain_scaffold": {
"enabled": True,
"subagents": list(subagents.keys()),
},
}
@app.post("/api/aurora/analyze")
async def analyze_media(file: UploadFile = File(...)) -> Dict[str, Any]:
file_name = safe_filename(file.filename or "upload.bin")
media_type = detect_media_type(file_name, file.content_type or "")
if media_type not in ("video", "photo", "audio"):
raise HTTPException(status_code=415, detail="Analyze supports video/photo/audio only")
analyze_dir = store.uploads_dir / "_analyze"
analyze_dir.mkdir(parents=True, exist_ok=True)
tmp_path = analyze_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
content = await file.read()
if not content:
raise HTTPException(status_code=400, detail="Empty upload")
tmp_path.write_bytes(content)
try:
if media_type == "video":
payload = analyze_video(tmp_path)
elif media_type == "audio":
payload = _analyze_audio(tmp_path)
else:
payload = analyze_photo(tmp_path)
payload["file_name"] = file_name
payload["media_type"] = media_type
return payload
except HTTPException:
raise
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Analyze failed: {exc}") from exc
finally:
try:
tmp_path.unlink(missing_ok=True)
except Exception:
pass
@app.post("/api/aurora/audio/analyze")
async def analyze_audio(file: UploadFile = File(...)) -> Dict[str, Any]:
file_name = safe_filename(file.filename or "upload_audio.bin")
media_type = detect_media_type(file_name, file.content_type or "")
if media_type != "audio":
raise HTTPException(status_code=415, detail="Audio analyze supports audio files only")
analyze_dir = store.uploads_dir / "_analyze_audio"
analyze_dir.mkdir(parents=True, exist_ok=True)
tmp_path = analyze_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
content = await file.read()
if not content:
raise HTTPException(status_code=400, detail="Empty upload")
tmp_path.write_bytes(content)
try:
payload = _analyze_audio(tmp_path)
payload["file_name"] = file_name
return payload
except HTTPException:
raise
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Audio analyze failed: {exc}") from exc
finally:
tmp_path.unlink(missing_ok=True)
@app.post("/api/aurora/audio/process")
async def process_audio(
file: UploadFile = File(...),
mode: str = Form("tactical"),
priority: str = Form("speech"),
export_options: str = Form(""),
) -> Dict[str, Any]:
file_name = safe_filename(file.filename or "upload_audio.bin")
media_type = detect_media_type(file_name, file.content_type or "")
if media_type != "audio":
raise HTTPException(status_code=415, detail="Audio process supports audio files only")
content = await file.read()
if not content:
raise HTTPException(status_code=400, detail="Empty upload")
normalized_mode = _normalize_mode(mode)
normalized_priority = _normalize_priority(priority, fallback="balanced")
parsed_export_options = _parse_export_options(export_options)
tmp_dir = store.uploads_dir / "_incoming_audio"
tmp_dir.mkdir(parents=True, exist_ok=True)
source_path = tmp_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
source_path.write_bytes(content)
try:
result = _enqueue_job_from_path(
source_path=source_path,
file_name=file_name,
mode=normalized_mode,
media_type="audio",
priority=normalized_priority,
export_options=parsed_export_options,
metadata_patch={"audio_pipeline": "scaffold_v1"},
)
result["pipeline"] = "audio_scaffold_v1"
return result
finally:
source_path.unlink(missing_ok=True)
@app.post("/api/aurora/upload")
async def upload_media(
file: UploadFile = File(...),
mode: str = Form("tactical"),
priority: str = Form("balanced"),
export_options: str = Form(""),
) -> Dict[str, Any]:
raw_mode = (mode or "").strip().lower()
if raw_mode and raw_mode not in ("tactical", "forensic"):
raise HTTPException(status_code=422, detail="mode must be 'tactical' or 'forensic'")
normalized_mode = _normalize_mode(mode)
if normalized_mode not in ("tactical", "forensic"):
raise HTTPException(status_code=422, detail="mode must be 'tactical' or 'forensic'")
file_name = safe_filename(file.filename or "upload.bin")
media_type = detect_media_type(file_name, file.content_type or "")
if media_type == "unknown":
raise HTTPException(status_code=415, detail="Unsupported media type")
content = await file.read()
if not content:
raise HTTPException(status_code=400, detail="Empty upload")
tmp_dir = store.uploads_dir / "_incoming"
tmp_dir.mkdir(parents=True, exist_ok=True)
source_path = tmp_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
source_path.write_bytes(content)
normalized_priority = _normalize_priority(priority, fallback="balanced")
parsed_export_options = _parse_export_options(export_options)
try:
return _enqueue_job_from_path(
source_path=source_path,
file_name=file_name,
mode=normalized_mode,
media_type=media_type,
priority=normalized_priority,
export_options=parsed_export_options,
)
finally:
source_path.unlink(missing_ok=True)
@app.post("/api/aurora/reprocess/{job_id}")
async def reprocess_media(
job_id: str,
payload: Optional[Dict[str, Any]] = Body(default=None),
) -> Dict[str, Any]:
source_job = store.get_job(job_id)
if not source_job:
raise HTTPException(status_code=404, detail="job not found")
body = payload if isinstance(payload, dict) else {}
second_pass = bool(body.get("second_pass", False))
source_path = _resolve_source_media_path(source_job, second_pass=second_pass)
source_meta = source_job.metadata if isinstance(source_job.metadata, dict) else {}
requested_mode = body.get("mode")
requested_priority = body.get("priority")
requested_export = body.get("export_options")
normalized_mode = _normalize_mode(
str(requested_mode) if isinstance(requested_mode, str) else source_job.mode,
fallback=source_job.mode,
)
normalized_priority = _normalize_priority(
str(requested_priority) if isinstance(requested_priority, str) else str(source_meta.get("priority") or "balanced"),
fallback="balanced",
)
export_options: Dict[str, Any] = {}
if isinstance(source_meta.get("export_options"), dict):
export_options.update(source_meta["export_options"])
if isinstance(requested_export, dict):
export_options = requested_export
result = _enqueue_job_from_path(
source_path=source_path,
file_name=source_job.file_name,
mode=normalized_mode,
media_type=source_job.media_type,
priority=normalized_priority,
export_options=export_options,
metadata_patch={
"reprocess_of": source_job.job_id,
"reprocess_second_pass": second_pass,
},
)
result["source_job_id"] = source_job.job_id
result["second_pass"] = second_pass
return result
@app.post("/api/aurora/chat")
async def aurora_chat(payload: Optional[Dict[str, Any]] = Body(default=None)) -> Dict[str, Any]:
body = payload if isinstance(payload, dict) else {}
message = str(body.get("message") or "").strip()
job_id = str(body.get("job_id") or "").strip()
analysis = body.get("analysis")
analysis_payload = analysis if isinstance(analysis, dict) else None
job = store.get_job(job_id) if job_id else None
response = _aurora_chat_reply(
message=message,
job=job,
analysis=analysis_payload,
)
if job_id and not job:
response["context"] = {
**(response.get("context") or {}),
"job_id": job_id,
"warning": "job not found",
}
return response
@app.get("/api/aurora/status/{job_id}")
async def job_status(job_id: str) -> Dict[str, Any]:
job = store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="job not found")
timing = _status_timing(job)
response = {
"job_id": job.job_id,
"status": job.status,
"progress": job.progress,
"current_stage": job.current_stage,
"mode": job.mode,
"media_type": job.media_type,
"error_message": job.error_message,
"created_at": job.created_at,
"started_at": job.started_at,
"completed_at": job.completed_at,
"processing_log_count": len(job.processing_log),
"elapsed_seconds": timing["elapsed_seconds"],
"estimated_total_seconds": timing["estimated_total_seconds"],
"eta_seconds": timing["eta_seconds"],
"live_fps": timing.get("live_fps"),
"eta_confidence": timing.get("eta_confidence"),
"queue_position": _queued_position(job_id),
"metadata": job.metadata,
"storage": _job_storage_info(job),
}
if job.result:
response["output_files"] = [model_dump(item) for item in job.result.output_files]
return response
@app.get("/api/aurora/jobs")
async def list_jobs(
limit: int = Query(default=30, ge=1, le=200),
status: Optional[str] = Query(default=None),
) -> Dict[str, Any]:
requested_statuses: Optional[set[str]] = None
if status and status.strip():
parts = {part.strip().lower() for part in status.split(",") if part.strip()}
valid = {"queued", "processing", "completed", "failed", "cancelled"}
requested_statuses = {part for part in parts if part in valid} or None
jobs = store.list_jobs()
if requested_statuses:
jobs = [job for job in jobs if job.status in requested_statuses]
jobs_sorted = sorted(
jobs,
key=lambda item: (
_parse_iso_utc(item.created_at) or datetime.fromtimestamp(0, tz=timezone.utc),
item.job_id,
),
reverse=True,
)
items: List[Dict[str, Any]] = []
for job in jobs_sorted[:limit]:
timing = _status_timing(job)
items.append(
{
"job_id": job.job_id,
"status": job.status,
"mode": job.mode,
"media_type": job.media_type,
"file_name": job.file_name,
"progress": job.progress,
"current_stage": job.current_stage,
"error_message": job.error_message,
"created_at": job.created_at,
"started_at": job.started_at,
"completed_at": job.completed_at,
"elapsed_seconds": timing["elapsed_seconds"],
"eta_seconds": timing["eta_seconds"],
"live_fps": timing.get("live_fps"),
"metadata": job.metadata if isinstance(job.metadata, dict) else {},
"queue_position": _queued_position(job.job_id),
"has_result": bool(job.result),
}
)
return {
"jobs": items,
"count": len(items),
"total": len(jobs_sorted),
}
@app.get("/api/aurora/result/{job_id}")
async def job_result(job_id: str) -> Dict[str, Any]:
job = store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="job not found")
if job.status != "completed" or not job.result:
raise HTTPException(
status_code=409,
detail=f"job not completed (status={job.status})",
)
payload = model_dump(job.result)
payload["storage"] = _job_storage_info(job)
if job.mode == "forensic":
payload["forensic_report_url"] = f"/api/aurora/report/{job_id}.pdf"
return payload
@app.get("/api/aurora/report/{job_id}.pdf")
async def job_forensic_pdf(job_id: str) -> FileResponse:
job = store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="job not found")
if job.status != "completed" or not job.result:
raise HTTPException(status_code=409, detail=f"job not completed (status={job.status})")
if job.mode != "forensic":
raise HTTPException(status_code=409, detail="forensic report is available only in forensic mode")
report_path = store.outputs_dir / job_id / "forensic_report.pdf"
try:
generate_forensic_report_pdf(job, report_path)
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Cannot generate forensic report: {exc}") from exc
return FileResponse(
path=report_path,
filename=f"{job_id}_forensic_report.pdf",
media_type="application/pdf",
)
@app.post("/api/aurora/cancel/{job_id}")
async def cancel_job(job_id: str) -> Dict[str, Any]:
job = store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="job not found")
updated = store.request_cancel(job_id)
return {
"job_id": updated.job_id,
"status": updated.status,
"cancel_requested": updated.cancel_requested,
}
@app.post("/api/aurora/delete/{job_id}")
async def delete_job(
job_id: str,
purge_files: bool = Query(default=True),
) -> Dict[str, Any]:
job = store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="job not found")
if job.status in ("queued", "processing"):
raise HTTPException(
status_code=409,
detail="job is not terminal; cancel it first",
)
deleted = store.delete_job(job_id, remove_artifacts=purge_files)
if not deleted:
raise HTTPException(status_code=404, detail="job not found")
return {
"job_id": job_id,
"deleted": True,
"purge_files": bool(purge_files),
}
@app.get("/api/aurora/storage")
async def storage_info() -> Dict[str, Any]:
"""Disk usage breakdown and per-job sizes."""
jobs = store.list_jobs()
per_job: List[Dict[str, Any]] = []
total_output = 0
total_upload = 0
total_work = 0
for job in jobs:
out_dir = store.outputs_dir / job.job_id
up_dir = store.uploads_dir / job.job_id
out_size = sum(f.stat().st_size for f in out_dir.rglob("*") if f.is_file()) if out_dir.exists() else 0
up_size = sum(f.stat().st_size for f in up_dir.rglob("*") if f.is_file()) if up_dir.exists() else 0
work_size = 0
if out_dir.exists():
for d in out_dir.iterdir():
if d.is_dir() and d.name.startswith("_work"):
work_size += sum(f.stat().st_size for f in d.rglob("*") if f.is_file())
total_output += out_size
total_upload += up_size
total_work += work_size
per_job.append({
"job_id": job.job_id,
"status": job.status,
"output_mb": round(out_size / (1024 * 1024), 1),
"upload_mb": round(up_size / (1024 * 1024), 1),
"work_mb": round(work_size / (1024 * 1024), 1),
})
models_dir = DATA_DIR / "models"
models_size = sum(f.stat().st_size for f in models_dir.rglob("*") if f.is_file()) if models_dir.exists() else 0
return {
"data_dir": str(DATA_DIR),
"total_mb": round((total_output + total_upload + total_work + models_size) / (1024 * 1024), 1),
"outputs_mb": round(total_output / (1024 * 1024), 1),
"uploads_mb": round(total_upload / (1024 * 1024), 1),
"orphan_work_mb": round(total_work / (1024 * 1024), 1),
"models_mb": round(models_size / (1024 * 1024), 1),
"jobs": sorted(per_job, key=lambda x: x["output_mb"], reverse=True),
}
@app.post("/api/aurora/cleanup")
async def cleanup_storage(
max_age_hours: int = Query(default=0, ge=0, description="Delete completed/failed/cancelled jobs older than N hours. 0 = only orphan _work dirs."),
) -> Dict[str, Any]:
"""Clean up orphaned _work directories and optionally old terminal jobs."""
result = _cleanup_work_dirs()
deleted_jobs: List[str] = []
if max_age_hours > 0:
cutoff = datetime.now(tz=timezone.utc).timestamp() - max_age_hours * 3600
for job in store.list_jobs():
if job.status not in ("completed", "failed", "cancelled"):
continue
ts = _parse_iso_utc(job.completed_at or job.created_at)
if ts and ts.timestamp() < cutoff:
store.delete_job(job.job_id, remove_artifacts=True)
deleted_jobs.append(job.job_id)
return {
"work_dirs_removed": result["dirs"],
"work_mb_freed": round(result["mb"], 1),
"jobs_deleted": deleted_jobs,
"jobs_deleted_count": len(deleted_jobs),
}
@app.get("/api/aurora/files/{job_id}/{file_name}")
async def download_output_file(job_id: str, file_name: str) -> FileResponse:
base = (store.outputs_dir / job_id).resolve()
target = (base / file_name).resolve()
if not str(target).startswith(str(base)):
raise HTTPException(status_code=403, detail="invalid file path")
if not target.exists() or not target.is_file():
raise HTTPException(status_code=404, detail="file not found")
return FileResponse(path=target, filename=target.name)
# ── Kling AI endpoints ────────────────────────────────────────────────────────
@app.get("/api/aurora/kling/health")
async def kling_health() -> Dict[str, Any]:
"""Check Kling AI connectivity."""
from .kling import kling_health_check
return kling_health_check()
@app.post("/api/aurora/kling/enhance")
async def kling_enhance_video(
job_id: str = Form(..., description="Aurora job_id whose result to enhance with Kling"),
prompt: str = Form("enhance video quality, improve sharpness and clarity", description="Enhancement guidance"),
negative_prompt: str = Form("noise, blur, artifacts, distortion", description="What to avoid"),
mode: str = Form("pro", description="'std' or 'pro'"),
duration: str = Form("5", description="'5' or '10' seconds"),
cfg_scale: float = Form(0.5, description="Prompt adherence 0.0-1.0"),
) -> Dict[str, Any]:
"""Submit Aurora job result to Kling AI for video-to-video enhancement."""
from .kling import kling_video_enhance, kling_upload_file
job = store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
if job.status != "completed":
raise HTTPException(status_code=409, detail=f"Job must be completed, current status: {job.status}")
result_path = store.outputs_dir / job_id / "aurora_result.mp4"
if not result_path.exists():
for ext in [".mov", ".avi", ".mkv"]:
alt = result_path.with_suffix(ext)
if alt.exists():
result_path = alt
break
if not result_path.exists():
raise HTTPException(status_code=404, detail="Result file not found for this job")
try:
upload_resp = kling_upload_file(result_path)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
if not file_id:
raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")
try:
task_resp = kling_video_enhance(
video_id=file_id,
prompt=prompt,
negative_prompt=negative_prompt,
mode=mode,
duration=duration,
cfg_scale=cfg_scale,
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
kling_meta_dir = store.outputs_dir / job_id
kling_meta_path = kling_meta_dir / "kling_task.json"
kling_meta_path.write_text(json.dumps({
"aurora_job_id": job_id,
"kling_task_id": task_id,
"kling_file_id": file_id,
"prompt": prompt,
"mode": mode,
"duration": duration,
"submitted_at": datetime.now(timezone.utc).isoformat(),
"status": "submitted",
}, ensure_ascii=False, indent=2), encoding="utf-8")
return {
"aurora_job_id": job_id,
"kling_task_id": task_id,
"kling_file_id": file_id,
"status": "submitted",
"status_url": f"/api/aurora/kling/status/{job_id}",
}
@app.get("/api/aurora/kling/status/{job_id}")
async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]:
"""Get Kling AI enhancement status for an Aurora job."""
from .kling import kling_video_task_status
kling_meta_path = store.outputs_dir / job_id / "kling_task.json"
if not kling_meta_path.exists():
raise HTTPException(status_code=404, detail=f"No Kling task for job {job_id}")
meta = json.loads(kling_meta_path.read_text(encoding="utf-8"))
task_id = meta.get("kling_task_id")
if not task_id:
raise HTTPException(status_code=404, detail="Kling task_id missing in metadata")
try:
status_resp = kling_video_task_status(task_id, endpoint="video2video")
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling status error: {str(exc)[:400]}") from exc
task_data = status_resp.get("data") or status_resp
state = task_data.get("task_status") or task_data.get("status") or "unknown"
meta["status"] = state
meta["last_checked"] = datetime.now(timezone.utc).isoformat()
result_url = None
works = task_data.get("task_result", {}).get("videos") or []
if works:
result_url = works[0].get("url")
if result_url:
meta["kling_result_url"] = result_url
meta["completed_at"] = datetime.now(timezone.utc).isoformat()
kling_meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
return {
"aurora_job_id": job_id,
"kling_task_id": task_id,
"status": state,
"kling_result_url": result_url,
"meta": meta,
}
@app.post("/api/aurora/kling/image2video")
async def kling_image_to_video(
file: UploadFile = File(..., description="Source image (frame)"),
prompt: str = Form("smooth motion, cinematic video, high quality"),
negative_prompt: str = Form("blur, artifacts, distortion"),
model: str = Form("kling-v1-5"),
mode: str = Form("pro"),
duration: str = Form("5"),
aspect_ratio: str = Form("16:9"),
) -> Dict[str, Any]:
"""Generate video from a still image using Kling AI."""
from .kling import kling_upload_file, kling_video_generate
file_name = file.filename or "frame.jpg"
content = await file.read()
if not content:
raise HTTPException(status_code=400, detail="Empty upload")
tmp_dir = store.uploads_dir / "_kling_i2v"
tmp_dir.mkdir(parents=True, exist_ok=True)
tmp_path = tmp_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
tmp_path.write_bytes(content)
try:
try:
upload_resp = kling_upload_file(tmp_path)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
if not file_id:
raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")
try:
task_resp = kling_video_generate(
image_id=file_id,
prompt=prompt,
negative_prompt=negative_prompt,
model=model,
mode=mode,
duration=duration,
aspect_ratio=aspect_ratio,
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
return {
"kling_task_id": task_id,
"kling_file_id": file_id,
"status": "submitted",
"status_url": f"/api/aurora/kling/task/{task_id}?endpoint=image2video",
}
finally:
tmp_path.unlink(missing_ok=True)
@app.get("/api/aurora/kling/task/{task_id}")
async def kling_get_task(task_id: str, endpoint: str = Query("video2video")) -> Dict[str, Any]:
"""Get status of any Kling task by ID."""
from .kling import kling_video_task_status
try:
return kling_video_task_status(task_id, endpoint=endpoint)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling task status error: {str(exc)[:400]}") from exc
@app.get("/api/aurora/plates/{job_id}")
async def get_plate_detections(job_id: str) -> Dict[str, Any]:
"""Return ALPR plate detection results for a completed job."""
job = store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
report_path = store.outputs_dir / job_id / "plate_detections.json"
if not report_path.exists():
return {
"job_id": job_id,
"plates_found": 0,
"unique_plates": 0,
"unique": [],
"detections": [],
"note": "No plate detection report found (job may predate ALPR support)",
}
data = json.loads(report_path.read_text(encoding="utf-8"))
return data