microdao-daarion/services/aurora-service/app/main.py

from __future__ import annotations

import asyncio
import hashlib
import json
import logging
import os
import re
import shutil
import subprocess
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional

from fastapi import Body, FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse

from .analysis import (
    analyze_photo,
    analyze_video,
    estimate_processing_seconds,
    probe_video_metadata,
)
from .job_store import JobStore
from .langchain_scaffold import build_subagent_registry
from .orchestrator import AuroraOrchestrator, JobCancelledError
from .reporting import generate_forensic_report_pdf
from .schemas import AuroraMode, MediaType
from .subagents import runtime_diagnostics

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

DATA_DIR = Path(os.getenv("AURORA_DATA_DIR", "/data/aurora"))
PUBLIC_BASE_URL = os.getenv("AURORA_PUBLIC_BASE_URL", "http://localhost:9401").rstrip("/")
CORS_ORIGINS = os.getenv("AURORA_CORS_ORIGINS", "*")
RECOVERY_STRATEGY = os.getenv("AURORA_RECOVERY_STRATEGY", "requeue").strip().lower()

VIDEO_EXTENSIONS = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
AUDIO_EXTENSIONS = {".mp3", ".wav", ".flac", ".m4a", ".aac", ".ogg"}
PHOTO_EXTENSIONS = {".jpg", ".jpeg", ".png", ".tiff", ".tif", ".webp"}
MAX_CONCURRENT_JOBS = max(1, int(os.getenv("AURORA_MAX_CONCURRENT_JOBS", "1")))

store = JobStore(DATA_DIR)
orchestrator = AuroraOrchestrator(store.outputs_dir, PUBLIC_BASE_URL)
RUN_SLOT = asyncio.Semaphore(MAX_CONCURRENT_JOBS)

app = FastAPI(
    title="Aurora Media Forensics Service",
    description="AURORA tactical/forensic media pipeline scaffold for AISTALK",
    version="0.1.0",
)

if CORS_ORIGINS.strip() == "*":
    allow_origins = ["*"]
else:
    allow_origins = [x.strip() for x in CORS_ORIGINS.split(",") if x.strip()]

app.add_middleware(
    CORSMiddleware,
    allow_origins=allow_origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.on_event("startup")
async def recover_orphan_jobs() -> None:
    recovered = store.recover_interrupted_jobs(
        completed_at=utc_now_iso(),
        message="Interrupted by aurora-service restart",
        strategy=RECOVERY_STRATEGY,
    )
    if recovered:
        logger.warning(
            "Recovered %d interrupted Aurora jobs with strategy=%s",
            recovered,
            RECOVERY_STRATEGY,
        )
    queued = sorted(
        [job for job in store.list_jobs() if job.status == "queued"],
        key=lambda item: item.created_at,
    )
    for job in queued:
        asyncio.create_task(run_job(job.job_id))
    if queued:
        logger.info("Rescheduled %d queued Aurora jobs on startup", len(queued))

    cleaned = _cleanup_work_dirs()
    if cleaned:
        logger.info("Cleaned %d orphaned _work directories (%.1f MB freed)", cleaned["dirs"], cleaned["mb"])


def _cleanup_work_dirs() -> Dict[str, Any]:
    """Remove leftover _work_* directories from old PNG-based pipeline."""
    total_freed = 0
    dirs_removed = 0
    for job_dir in store.outputs_dir.iterdir():
        if not job_dir.is_dir():
            continue
        for entry in list(job_dir.iterdir()):
            if entry.is_dir() and entry.name.startswith("_work"):
                size = sum(f.stat().st_size for f in entry.rglob("*") if f.is_file())
                shutil.rmtree(entry, ignore_errors=True)
                total_freed += size
                dirs_removed += 1
    return {"dirs": dirs_removed, "mb": total_freed / (1024 * 1024)}


def utc_now_iso() -> str:
    return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")


def safe_filename(file_name: str) -> str:
    base = Path(file_name or "upload.bin").name
    sanitized = re.sub(r"[^A-Za-z0-9._-]", "_", base).strip("._")
    return sanitized or f"upload_{uuid.uuid4().hex[:8]}.bin"


def compute_sha256(path: Path) -> str:
    digest = hashlib.sha256()
    with path.open("rb") as f:
        while True:
            chunk = f.read(1024 * 1024)
            if not chunk:
                break
            digest.update(chunk)
    return f"sha256:{digest.hexdigest()}"


def detect_media_type(file_name: str, content_type: str) -> MediaType:
    ext = Path(file_name).suffix.lower()
    if content_type.startswith("video/") or ext in VIDEO_EXTENSIONS:
        return "video"
    if content_type.startswith("audio/") or ext in AUDIO_EXTENSIONS:
        return "audio"
    if content_type.startswith("image/") or ext in PHOTO_EXTENSIONS:
        return "photo"
    return "unknown"


def _normalize_mode(raw_mode: Optional[str], fallback: AuroraMode = "tactical") -> AuroraMode:
    value = (raw_mode or fallback).strip().lower()
    if value not in ("tactical", "forensic"):
        return fallback
    return value  # type: ignore[return-value]


def _normalize_priority(raw_priority: Optional[str], fallback: str = "balanced") -> str:
    value = (raw_priority or fallback).strip().lower()
    if value not in {"balanced", "faces", "plates", "details", "speech"}:
        return fallback
    return value


def _job_storage_info(job: Any) -> Dict[str, str]:
    upload_dir = (store.uploads_dir / job.job_id).resolve()
    output_dir = (store.outputs_dir / job.job_id).resolve()
    job_record = (store.jobs_dir / f"{job.job_id}.json").resolve()
    payload = {
        "upload_dir": str(upload_dir),
        "output_dir": str(output_dir),
        "job_record": str(job_record),
    }
    input_path = Path(str(job.input_path))
    if input_path.exists():
        payload["input_path"] = str(input_path.resolve())
    return payload


def _queued_position(job_id: str) -> Optional[int]:
    target = store.get_job(job_id)
    if not target or target.status != "queued":
        return None
    queued: List[Any] = []
    for path in sorted(store.jobs_dir.glob("*.json")):
        try:
            payload = json.loads(path.read_text(encoding="utf-8"))
            if payload.get("status") == "queued":
                queued.append(payload)
        except Exception:
            continue
    queued.sort(key=lambda item: str(item.get("created_at") or ""))
    for idx, item in enumerate(queued, start=1):
        if str(item.get("job_id") or "") == job_id:
            return idx
    return None


def _resolve_source_media_path(job: Any, *, second_pass: bool = False) -> Path:
    input_path = Path(str(job.input_path))
    if not second_pass and input_path.exists() and input_path.is_file():
        return input_path

    result = getattr(job, "result", None)
    if result and isinstance(getattr(result, "output_files", None), list):
        for item in result.output_files:
            file_type = str(getattr(item, "type", "")).lower()
            file_name = str(getattr(item, "name", ""))
            if file_type != str(job.media_type).lower():
                continue
            candidate = (store.outputs_dir / job.job_id / file_name)
            if candidate.exists() and candidate.is_file():
                return candidate

    if input_path.exists() and input_path.is_file():
        return input_path
    raise HTTPException(status_code=409, detail=f"Source media not available for job {job.job_id}")


def _enqueue_job_from_path(
    *,
    source_path: Path,
    file_name: str,
    mode: AuroraMode,
    media_type: MediaType,
    priority: str,
    export_options: Dict[str, Any],
    metadata_patch: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
    now = datetime.now(timezone.utc)
    job_id = f"aurora_{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"

    upload_dir = store.uploads_dir / job_id
    upload_dir.mkdir(parents=True, exist_ok=True)
    input_path = upload_dir / safe_filename(file_name)
    shutil.copy2(source_path, input_path)

    input_hash = compute_sha256(input_path)
    initial_metadata = _estimate_upload_metadata(
        input_path=input_path,
        media_type=media_type,
        mode=mode,
    )
    if export_options:
        initial_metadata["export_options"] = export_options
    initial_metadata["priority"] = priority
    if metadata_patch:
        initial_metadata.update(metadata_patch)

    store.create_job(
        job_id=job_id,
        file_name=input_path.name,
        input_path=input_path,
        input_hash=input_hash,
        mode=mode,
        media_type=media_type,
        created_at=utc_now_iso(),
        metadata=initial_metadata,
    )
    asyncio.create_task(run_job(job_id))
    return {
        "job_id": job_id,
        "mode": mode,
        "media_type": media_type,
        "priority": priority,
        "export_options": export_options,
        "status_url": f"/api/aurora/status/{job_id}",
        "result_url": f"/api/aurora/result/{job_id}",
        "cancel_url": f"/api/aurora/cancel/{job_id}",
    }


def model_dump(value: Any) -> Dict[str, Any]:
    if hasattr(value, "model_dump"):
        return value.model_dump()
    return value.dict()


def _parse_iso_utc(value: Optional[str]) -> Optional[datetime]:
    if not value:
        return None
    try:
        return datetime.fromisoformat(value.replace("Z", "+00:00"))
    except Exception:
        return None


def _estimate_upload_metadata(input_path: Path, media_type: MediaType, mode: AuroraMode) -> Dict[str, Any]:
    metadata: Dict[str, Any] = {}
    if media_type == "video":
        video_meta = probe_video_metadata(input_path)
        if video_meta:
            metadata["video"] = video_meta
            estimate_s = estimate_processing_seconds(
                media_type="video",
                mode=mode,
                width=int(video_meta.get("width") or 0),
                height=int(video_meta.get("height") or 0),
                frame_count=int(video_meta.get("frame_count") or 0),
            )
            if estimate_s:
                metadata["estimated_processing_seconds"] = int(estimate_s)
    elif media_type == "photo":
        try:
            import cv2  # type: ignore[import-untyped]

            frame = cv2.imread(str(input_path), cv2.IMREAD_COLOR)
            if frame is not None:
                h, w = frame.shape[:2]
                metadata["image"] = {"width": int(w), "height": int(h)}
                estimate_s = estimate_processing_seconds(
                    media_type="photo",
                    mode=mode,
                    width=int(w),
                    height=int(h),
                    frame_count=1,
                )
                if estimate_s:
                    metadata["estimated_processing_seconds"] = int(estimate_s)
        except Exception:
            pass
    elif media_type == "audio":
        audio_meta = _probe_audio_metadata(input_path)
        if audio_meta:
            metadata["audio"] = audio_meta
            duration_s = float(audio_meta.get("duration_seconds") or 0.0)
            if duration_s > 0:
                factor = 0.45 if mode == "tactical" else 1.25
                metadata["estimated_processing_seconds"] = int(max(8, min(10800, duration_s * factor)))
    return metadata


def _probe_audio_metadata(input_path: Path) -> Dict[str, Any]:
    try:
        cmd = [
            "ffprobe",
            "-v",
            "error",
            "-show_streams",
            "-show_format",
            "-print_format",
            "json",
            str(input_path),
        ]
        proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
        if proc.returncode != 0 or not proc.stdout:
            return {}
        payload = json.loads(proc.stdout)
        streams = payload.get("streams") or []
        audio_stream = next((s for s in streams if str(s.get("codec_type", "")).lower() == "audio"), None)
        fmt = payload.get("format") or {}
        duration_raw = (audio_stream or {}).get("duration") or fmt.get("duration")
        duration = float(duration_raw) if duration_raw not in (None, "", "N/A") else 0.0
        sample_rate_raw = (audio_stream or {}).get("sample_rate")
        channels_raw = (audio_stream or {}).get("channels")
        bitrate_raw = (audio_stream or {}).get("bit_rate") or fmt.get("bit_rate")
        return {
            "duration_seconds": round(duration, 3) if duration > 0 else None,
            "sample_rate_hz": int(sample_rate_raw) if sample_rate_raw not in (None, "", "N/A") else None,
            "channels": int(channels_raw) if channels_raw not in (None, "", "N/A") else None,
            "bit_rate": int(bitrate_raw) if bitrate_raw not in (None, "", "N/A") else None,
            "codec": (audio_stream or {}).get("codec_name"),
            "container": fmt.get("format_name"),
        }
    except Exception:
        return {}


def _analyze_audio(path: Path) -> Dict[str, Any]:
    meta = _probe_audio_metadata(path)
    duration = float(meta.get("duration_seconds") or 0.0)
    bitrate = float(meta.get("bit_rate") or 0.0)
    recommendations: List[str] = []
    if duration <= 0:
        recommendations.append("Не вдалося надійно визначити тривалість аудіо.")
    if bitrate and bitrate < 128000:
        recommendations.append("Низький bitrate: рекомендується forensic-режим та денойз перед транскрипцією.")
    else:
        recommendations.append("Рекомендується tactical denoise + speech enhance для швидкого перегляду.")
    recommendations.append("Для доказового контуру: forensic mode + chain-of-custody + підпис результатів.")
    estimate_tactical = int(max(6, min(7200, (duration or 20.0) * 0.45)))
    estimate_forensic = int(max(12, min(14400, (duration or 20.0) * 1.25)))
    return {
        "media_type": "audio",
        "audio": meta,
        "quality_analysis": {
            "bitrate_tier": "low" if bitrate and bitrate < 128000 else "normal",
            "duration_bucket": "short" if duration and duration < 60 else "long" if duration and duration > 600 else "medium",
        },
        "recommendations": recommendations,
        "suggested_priority": "speech",
        "suggested_export": {
            "format": "wav_pcm_s16le",
            "sample_rate_hz": int(meta.get("sample_rate_hz") or 16000),
            "channels": 1,
        },
        "estimated_processing_seconds": {
            "tactical": estimate_tactical,
            "forensic": estimate_forensic,
        },
    }


def _parse_export_options(raw_value: str) -> Dict[str, Any]:
    if not raw_value:
        return {}
    try:
        parsed = json.loads(raw_value)
    except Exception as exc:
        raise HTTPException(status_code=422, detail=f"Invalid export_options JSON: {exc}") from exc
    if not isinstance(parsed, dict):
        raise HTTPException(status_code=422, detail="export_options must be a JSON object")
    return parsed


def _status_timing(job: Any) -> Dict[str, Optional[int]]:
    started = _parse_iso_utc(job.started_at)
    if not started:
        return {
            "elapsed_seconds": None,
            "estimated_total_seconds": None,
            "eta_seconds": None,
        }
    now = datetime.now(timezone.utc)
    estimated_total: Optional[int] = None
    eta: Optional[int] = None

    if job.status in ("completed", "failed", "cancelled") and job.completed_at:
        completed = _parse_iso_utc(job.completed_at)
        if completed:
            elapsed = max(0, int((completed - started).total_seconds()))
            estimated_total = elapsed
            eta = 0
        else:
            elapsed = max(0, int((now - started).total_seconds()))
    else:
        elapsed = max(0, int((now - started).total_seconds()))

    if job.status == "processing":
        hinted_total = None
        if isinstance(job.metadata, dict):
            hinted_total = job.metadata.get("estimated_processing_seconds")
        if isinstance(hinted_total, (int, float)) and hinted_total > 0:
            estimated_total = int(hinted_total)
        elif job.progress >= 5:
            estimated_total = int(elapsed / max(0.05, job.progress / 100.0))

        stage_eta = None
        if isinstance(job.current_stage, str):
            match = re.search(r"eta ~([0-9]+)s", job.current_stage)
            if match:
                try:
                    stage_eta = int(match.group(1))
                except Exception:
                    stage_eta = None

        if estimated_total and estimated_total > 0:
            eta = max(0, int(estimated_total - elapsed))
        if stage_eta is not None:
            # Early-stage per-frame ETA is noisy (model warmup / cache effects).
            # Blend with metadata estimate first; trust stage ETA more after ~10%.
            if eta is None:
                eta = stage_eta
            elif job.progress < 10:
                eta = int((eta * 0.75) + (stage_eta * 0.25))
            elif job.progress < 30:
                eta = int((eta * 0.50) + (stage_eta * 0.50))
            else:
                eta = int((eta * 0.25) + (stage_eta * 0.75))
            estimated_total = max(estimated_total or 0, elapsed + max(0, eta))

    live_fps: Optional[float] = None
    eta_confidence: Optional[str] = None
    if isinstance(job.current_stage, str):
        fps_match = re.search(r"\(([0-9]+(?:\.[0-9]+)?)\s*fps", job.current_stage)
        if fps_match:
            try:
                live_fps = round(float(fps_match.group(1)), 2)
            except Exception:
                pass
        skip_match = re.search(r"skip=([0-9]+)%", job.current_stage)
        skip_pct = int(skip_match.group(1)) if skip_match else 0
        if job.progress >= 30 and live_fps is not None:
            eta_confidence = "high" if skip_pct < 50 else "medium"
        elif job.progress >= 10:
            eta_confidence = "medium"
        elif job.progress >= 2:
            eta_confidence = "low"

    return {
        "elapsed_seconds": elapsed,
        "estimated_total_seconds": estimated_total,
        "eta_seconds": eta,
        "live_fps": live_fps,
        "eta_confidence": eta_confidence,
    }


async def run_job(job_id: str) -> None:
    async with RUN_SLOT:
        job = store.get_job(job_id)
        if not job:
            return
        if job.status == "cancelled":
            return
        if job.cancel_requested:
            store.mark_cancelled(job_id, completed_at=utc_now_iso())
            return

        store.mark_processing(job_id, started_at=utc_now_iso())
        logger.info("aurora job started: %s (%s, %s)", job_id, job.media_type, job.mode)

        def on_progress(progress: int, stage: str, step: Any = None) -> None:
            store.set_progress(job_id, progress=progress, current_stage=stage)
            if step is not None:
                store.append_processing_step(job_id, step)

        def is_cancelled() -> bool:
            current = store.get_job(job_id)
            return bool(current and current.cancel_requested)

        try:
            current_job = store.get_job(job_id)
            if not current_job:
                return
            result = await asyncio.to_thread(
                orchestrator.run,
                current_job,
                on_progress,
                is_cancelled,
            )
            if is_cancelled():
                store.mark_cancelled(job_id, completed_at=utc_now_iso())
                return
            completed_at = utc_now_iso()
            store.mark_completed(job_id, result=result, completed_at=completed_at)
            final_job = store.get_job(job_id)
            if final_job and isinstance(final_job.metadata, dict):
                meta = dict(final_job.metadata)
                started = _parse_iso_utc(final_job.started_at)
                completed = _parse_iso_utc(completed_at)
                if started and completed:
                    meta["actual_processing_seconds"] = max(0, int((completed - started).total_seconds()))
                if isinstance(result.metadata, dict):
                    meta["result_metadata"] = result.metadata
                store.patch_job(job_id, metadata=meta)
            logger.info("aurora job completed: %s", job_id)
        except JobCancelledError:
            store.mark_cancelled(job_id, completed_at=utc_now_iso())
            logger.info("aurora job cancelled: %s", job_id)
        except Exception as exc:
            store.mark_failed(job_id, message=str(exc), completed_at=utc_now_iso())
            logger.exception("aurora job failed: %s", job_id)


def _aurora_chat_reply(
    *,
    message: str,
    job: Optional[Any],
    analysis: Optional[Dict[str, Any]],
) -> Dict[str, Any]:
    normalized_message = (message or "").strip()
    lower = normalized_message.lower()
    actions: List[Dict[str, Any]] = []
    context: Dict[str, Any] = {}
    lines: List[str] = []

    if job:
        timing = _status_timing(job)
        storage = _job_storage_info(job)
        context["job_id"] = job.job_id
        context["status"] = job.status
        context["stage"] = job.current_stage
        context["timing"] = timing
        context["storage"] = storage
        lines.append(f"Job `{job.job_id}`: status `{job.status}`, stage `{job.current_stage}`.")

        if job.status == "queued":
            position = _queued_position(job.job_id)
            if position:
                lines.append(f"Черга: позиція #{position}.")
            actions.append({"type": "refresh_status", "label": "Оновити статус"})
            actions.append({"type": "cancel", "label": "Скасувати job"})
        elif job.status == "processing":
            elapsed = timing.get("elapsed_seconds")
            eta = timing.get("eta_seconds")
            if isinstance(elapsed, int):
                if isinstance(eta, int):
                    lines.append(f"Минуло {elapsed}s, орієнтовно залишилось ~{eta}s.")
                else:
                    lines.append(f"Минуло {elapsed}s, ETA ще уточнюється.")
            actions.append({"type": "refresh_status", "label": "Оновити статус"})
            actions.append({"type": "cancel", "label": "Скасувати job"})
        elif job.status == "completed":
            lines.append(f"Результати збережені в `{storage.get('output_dir', 'n/a')}`.")
            actions.append({"type": "open_result", "label": "Відкрити результат"})
            actions.append({"type": "reprocess", "label": "Повторити обробку", "second_pass": False})
            actions.append({"type": "reprocess", "label": "Second pass", "second_pass": True})
        elif job.status in ("failed", "cancelled"):
            if job.error_message:
                lines.append(f"Причина: {job.error_message}")
            lines.append("Можна перезапустити обробку з тими самими або новими параметрами.")
            actions.append({"type": "reprocess", "label": "Перезапустити job", "second_pass": False})
            actions.append({"type": "reprocess", "label": "Second pass", "second_pass": True})

        if any(token in lower for token in ("де", "where", "storage", "збереж")):
            lines.append(
                "Шляхи: "
                f"input `{storage.get('input_path', 'n/a')}`, "
                f"output `{storage.get('output_dir', 'n/a')}`, "
                f"job `{storage.get('job_record', 'n/a')}`."
            )

    if analysis and isinstance(analysis, dict):
        recs = analysis.get("recommendations")
        if isinstance(recs, list) and recs:
            top_recs = [str(x) for x in recs[:3]]
            lines.append("Рекомендації pre-analysis: " + "; ".join(top_recs))
        suggested_priority = str(analysis.get("suggested_priority") or "").strip()
        if suggested_priority:
            actions.append(
                {
                    "type": "reprocess",
                    "label": f"Reprocess ({suggested_priority})",
                    "priority": suggested_priority,
                    "second_pass": False,
                }
            )

    if not lines:
        lines.append("Готова допомогти з обробкою. Надішліть файл або оберіть job для контексту.")
        lines.append("Я можу пояснити ETA, місце збереження та запустити reprocess.")
        actions.append({"type": "refresh_health", "label": "Перевірити Aurora"})

    if any(token in lower for token in ("повтор", "reprocess", "ще раз", "second pass", "другий прохід")):
        actions.append({"type": "reprocess", "label": "Запустити reprocess", "second_pass": "second pass" in lower})
    if "скас" in lower or "cancel" in lower:
        actions.append({"type": "cancel", "label": "Скасувати job"})
    if "статус" in lower or "status" in lower:
        actions.append({"type": "refresh_status", "label": "Оновити статус"})

    deduped: List[Dict[str, Any]] = []
    seen = set()
    for action in actions:
        key = json.dumps(action, sort_keys=True, ensure_ascii=True)
        if key in seen:
            continue
        seen.add(key)
        deduped.append(action)

    return {
        "agent": "Aurora",
        "reply": "\n".join(lines),
        "context": context,
        "actions": deduped[:6],
    }


@app.get("/health")
async def health() -> Dict[str, Any]:
    subagents = build_subagent_registry()
    return {
        "status": "healthy",
        "service": "aurora-service",
        "data_dir": str(DATA_DIR),
        "jobs": store.count_by_status(),
        "runtime": runtime_diagnostics(),
        "scheduler": {"max_concurrent_jobs": MAX_CONCURRENT_JOBS},
        "langchain_scaffold": {
            "enabled": True,
            "subagents": list(subagents.keys()),
        },
    }


@app.post("/api/aurora/analyze")
async def analyze_media(file: UploadFile = File(...)) -> Dict[str, Any]:
    file_name = safe_filename(file.filename or "upload.bin")
    media_type = detect_media_type(file_name, file.content_type or "")
    if media_type not in ("video", "photo", "audio"):
        raise HTTPException(status_code=415, detail="Analyze supports video/photo/audio only")

    analyze_dir = store.uploads_dir / "_analyze"
    analyze_dir.mkdir(parents=True, exist_ok=True)
    tmp_path = analyze_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"

    content = await file.read()
    if not content:
        raise HTTPException(status_code=400, detail="Empty upload")
    tmp_path.write_bytes(content)

    try:
        if media_type == "video":
            payload = analyze_video(tmp_path)
        elif media_type == "audio":
            payload = _analyze_audio(tmp_path)
        else:
            payload = analyze_photo(tmp_path)
        payload["file_name"] = file_name
        payload["media_type"] = media_type
        return payload
    except HTTPException:
        raise
    except Exception as exc:
        raise HTTPException(status_code=500, detail=f"Analyze failed: {exc}") from exc
    finally:
        try:
            tmp_path.unlink(missing_ok=True)
        except Exception:
            pass


@app.post("/api/aurora/audio/analyze")
async def analyze_audio(file: UploadFile = File(...)) -> Dict[str, Any]:
    file_name = safe_filename(file.filename or "upload_audio.bin")
    media_type = detect_media_type(file_name, file.content_type or "")
    if media_type != "audio":
        raise HTTPException(status_code=415, detail="Audio analyze supports audio files only")
    analyze_dir = store.uploads_dir / "_analyze_audio"
    analyze_dir.mkdir(parents=True, exist_ok=True)
    tmp_path = analyze_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
    content = await file.read()
    if not content:
        raise HTTPException(status_code=400, detail="Empty upload")
    tmp_path.write_bytes(content)
    try:
        payload = _analyze_audio(tmp_path)
        payload["file_name"] = file_name
        return payload
    except HTTPException:
        raise
    except Exception as exc:
        raise HTTPException(status_code=500, detail=f"Audio analyze failed: {exc}") from exc
    finally:
        tmp_path.unlink(missing_ok=True)


@app.post("/api/aurora/audio/process")
async def process_audio(
    file: UploadFile = File(...),
    mode: str = Form("tactical"),
    priority: str = Form("speech"),
    export_options: str = Form(""),
) -> Dict[str, Any]:
    file_name = safe_filename(file.filename or "upload_audio.bin")
    media_type = detect_media_type(file_name, file.content_type or "")
    if media_type != "audio":
        raise HTTPException(status_code=415, detail="Audio process supports audio files only")
    content = await file.read()
    if not content:
        raise HTTPException(status_code=400, detail="Empty upload")
    normalized_mode = _normalize_mode(mode)
    normalized_priority = _normalize_priority(priority, fallback="balanced")
    parsed_export_options = _parse_export_options(export_options)
    tmp_dir = store.uploads_dir / "_incoming_audio"
    tmp_dir.mkdir(parents=True, exist_ok=True)
    source_path = tmp_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
    source_path.write_bytes(content)
    try:
        result = _enqueue_job_from_path(
            source_path=source_path,
            file_name=file_name,
            mode=normalized_mode,
            media_type="audio",
            priority=normalized_priority,
            export_options=parsed_export_options,
            metadata_patch={"audio_pipeline": "scaffold_v1"},
        )
        result["pipeline"] = "audio_scaffold_v1"
        return result
    finally:
        source_path.unlink(missing_ok=True)


@app.post("/api/aurora/upload")
async def upload_media(
    file: UploadFile = File(...),
    mode: str = Form("tactical"),
    priority: str = Form("balanced"),
    export_options: str = Form(""),
) -> Dict[str, Any]:
    raw_mode = (mode or "").strip().lower()
    if raw_mode and raw_mode not in ("tactical", "forensic"):
        raise HTTPException(status_code=422, detail="mode must be 'tactical' or 'forensic'")
    normalized_mode = _normalize_mode(mode)
    if normalized_mode not in ("tactical", "forensic"):
        raise HTTPException(status_code=422, detail="mode must be 'tactical' or 'forensic'")
    file_name = safe_filename(file.filename or "upload.bin")
    media_type = detect_media_type(file_name, file.content_type or "")
    if media_type == "unknown":
        raise HTTPException(status_code=415, detail="Unsupported media type")
    content = await file.read()
    if not content:
        raise HTTPException(status_code=400, detail="Empty upload")
    tmp_dir = store.uploads_dir / "_incoming"
    tmp_dir.mkdir(parents=True, exist_ok=True)
    source_path = tmp_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
    source_path.write_bytes(content)

    normalized_priority = _normalize_priority(priority, fallback="balanced")
    parsed_export_options = _parse_export_options(export_options)
    try:
        return _enqueue_job_from_path(
            source_path=source_path,
            file_name=file_name,
            mode=normalized_mode,
            media_type=media_type,
            priority=normalized_priority,
            export_options=parsed_export_options,
        )
    finally:
        source_path.unlink(missing_ok=True)


@app.post("/api/aurora/reprocess/{job_id}")
async def reprocess_media(
    job_id: str,
    payload: Optional[Dict[str, Any]] = Body(default=None),
) -> Dict[str, Any]:
    source_job = store.get_job(job_id)
    if not source_job:
        raise HTTPException(status_code=404, detail="job not found")

    body = payload if isinstance(payload, dict) else {}
    second_pass = bool(body.get("second_pass", False))
    source_path = _resolve_source_media_path(source_job, second_pass=second_pass)

    source_meta = source_job.metadata if isinstance(source_job.metadata, dict) else {}
    requested_mode = body.get("mode")
    requested_priority = body.get("priority")
    requested_export = body.get("export_options")

    normalized_mode = _normalize_mode(
        str(requested_mode) if isinstance(requested_mode, str) else source_job.mode,
        fallback=source_job.mode,
    )
    normalized_priority = _normalize_priority(
        str(requested_priority) if isinstance(requested_priority, str) else str(source_meta.get("priority") or "balanced"),
        fallback="balanced",
    )

    export_options: Dict[str, Any] = {}
    if isinstance(source_meta.get("export_options"), dict):
        export_options.update(source_meta["export_options"])
    if isinstance(requested_export, dict):
        export_options = requested_export

    result = _enqueue_job_from_path(
        source_path=source_path,
        file_name=source_job.file_name,
        mode=normalized_mode,
        media_type=source_job.media_type,
        priority=normalized_priority,
        export_options=export_options,
        metadata_patch={
            "reprocess_of": source_job.job_id,
            "reprocess_second_pass": second_pass,
        },
    )
    result["source_job_id"] = source_job.job_id
    result["second_pass"] = second_pass
    return result


@app.post("/api/aurora/chat")
async def aurora_chat(payload: Optional[Dict[str, Any]] = Body(default=None)) -> Dict[str, Any]:
    body = payload if isinstance(payload, dict) else {}
    message = str(body.get("message") or "").strip()
    job_id = str(body.get("job_id") or "").strip()
    analysis = body.get("analysis")
    analysis_payload = analysis if isinstance(analysis, dict) else None
    job = store.get_job(job_id) if job_id else None
    response = _aurora_chat_reply(
        message=message,
        job=job,
        analysis=analysis_payload,
    )
    if job_id and not job:
        response["context"] = {
            **(response.get("context") or {}),
            "job_id": job_id,
            "warning": "job not found",
        }
    return response


@app.get("/api/aurora/status/{job_id}")
async def job_status(job_id: str) -> Dict[str, Any]:
    job = store.get_job(job_id)
    if not job:
        raise HTTPException(status_code=404, detail="job not found")

    timing = _status_timing(job)
    response = {
        "job_id": job.job_id,
        "status": job.status,
        "progress": job.progress,
        "current_stage": job.current_stage,
        "mode": job.mode,
        "media_type": job.media_type,
        "error_message": job.error_message,
        "created_at": job.created_at,
        "started_at": job.started_at,
        "completed_at": job.completed_at,
        "processing_log_count": len(job.processing_log),
        "elapsed_seconds": timing["elapsed_seconds"],
        "estimated_total_seconds": timing["estimated_total_seconds"],
        "eta_seconds": timing["eta_seconds"],
        "live_fps": timing.get("live_fps"),
        "eta_confidence": timing.get("eta_confidence"),
        "queue_position": _queued_position(job_id),
        "metadata": job.metadata,
        "storage": _job_storage_info(job),
    }
    if job.result:
        response["output_files"] = [model_dump(item) for item in job.result.output_files]
    return response


@app.get("/api/aurora/jobs")
async def list_jobs(
    limit: int = Query(default=30, ge=1, le=200),
    status: Optional[str] = Query(default=None),
) -> Dict[str, Any]:
    requested_statuses: Optional[set[str]] = None
    if status and status.strip():
        parts = {part.strip().lower() for part in status.split(",") if part.strip()}
        valid = {"queued", "processing", "completed", "failed", "cancelled"}
        requested_statuses = {part for part in parts if part in valid} or None

    jobs = store.list_jobs()
    if requested_statuses:
        jobs = [job for job in jobs if job.status in requested_statuses]

    jobs_sorted = sorted(
        jobs,
        key=lambda item: (
            _parse_iso_utc(item.created_at) or datetime.fromtimestamp(0, tz=timezone.utc),
            item.job_id,
        ),
        reverse=True,
    )

    items: List[Dict[str, Any]] = []
    for job in jobs_sorted[:limit]:
        timing = _status_timing(job)
        items.append(
            {
                "job_id": job.job_id,
                "status": job.status,
                "mode": job.mode,
                "media_type": job.media_type,
                "file_name": job.file_name,
                "progress": job.progress,
                "current_stage": job.current_stage,
                "error_message": job.error_message,
                "created_at": job.created_at,
                "started_at": job.started_at,
                "completed_at": job.completed_at,
                "elapsed_seconds": timing["elapsed_seconds"],
                "eta_seconds": timing["eta_seconds"],
                "live_fps": timing.get("live_fps"),
                "metadata": job.metadata if isinstance(job.metadata, dict) else {},
                "queue_position": _queued_position(job.job_id),
                "has_result": bool(job.result),
            }
        )
    return {
        "jobs": items,
        "count": len(items),
        "total": len(jobs_sorted),
    }


@app.get("/api/aurora/result/{job_id}")
async def job_result(job_id: str) -> Dict[str, Any]:
    job = store.get_job(job_id)
    if not job:
        raise HTTPException(status_code=404, detail="job not found")
    if job.status != "completed" or not job.result:
        raise HTTPException(
            status_code=409,
            detail=f"job not completed (status={job.status})",
        )
    payload = model_dump(job.result)
    payload["storage"] = _job_storage_info(job)
    if job.mode == "forensic":
        payload["forensic_report_url"] = f"/api/aurora/report/{job_id}.pdf"
    return payload


@app.get("/api/aurora/report/{job_id}.pdf")
async def job_forensic_pdf(job_id: str) -> FileResponse:
    job = store.get_job(job_id)
    if not job:
        raise HTTPException(status_code=404, detail="job not found")
    if job.status != "completed" or not job.result:
        raise HTTPException(status_code=409, detail=f"job not completed (status={job.status})")
    if job.mode != "forensic":
        raise HTTPException(status_code=409, detail="forensic report is available only in forensic mode")

    report_path = store.outputs_dir / job_id / "forensic_report.pdf"
    try:
        generate_forensic_report_pdf(job, report_path)
    except Exception as exc:
        raise HTTPException(status_code=500, detail=f"Cannot generate forensic report: {exc}") from exc
    return FileResponse(
        path=report_path,
        filename=f"{job_id}_forensic_report.pdf",
        media_type="application/pdf",
    )


@app.post("/api/aurora/cancel/{job_id}")
async def cancel_job(job_id: str) -> Dict[str, Any]:
    job = store.get_job(job_id)
    if not job:
        raise HTTPException(status_code=404, detail="job not found")
    updated = store.request_cancel(job_id)
    return {
        "job_id": updated.job_id,
        "status": updated.status,
        "cancel_requested": updated.cancel_requested,
    }


@app.post("/api/aurora/delete/{job_id}")
async def delete_job(
    job_id: str,
    purge_files: bool = Query(default=True),
) -> Dict[str, Any]:
    job = store.get_job(job_id)
    if not job:
        raise HTTPException(status_code=404, detail="job not found")
    if job.status in ("queued", "processing"):
        raise HTTPException(
            status_code=409,
            detail="job is not terminal; cancel it first",
        )
    deleted = store.delete_job(job_id, remove_artifacts=purge_files)
    if not deleted:
        raise HTTPException(status_code=404, detail="job not found")
    return {
        "job_id": job_id,
        "deleted": True,
        "purge_files": bool(purge_files),
    }


@app.get("/api/aurora/storage")
async def storage_info() -> Dict[str, Any]:
    """Disk usage breakdown and per-job sizes."""
    jobs = store.list_jobs()
    per_job: List[Dict[str, Any]] = []
    total_output = 0
    total_upload = 0
    total_work = 0
    for job in jobs:
        out_dir = store.outputs_dir / job.job_id
        up_dir = store.uploads_dir / job.job_id
        out_size = sum(f.stat().st_size for f in out_dir.rglob("*") if f.is_file()) if out_dir.exists() else 0
        up_size = sum(f.stat().st_size for f in up_dir.rglob("*") if f.is_file()) if up_dir.exists() else 0
        work_size = 0
        if out_dir.exists():
            for d in out_dir.iterdir():
                if d.is_dir() and d.name.startswith("_work"):
                    work_size += sum(f.stat().st_size for f in d.rglob("*") if f.is_file())
        total_output += out_size
        total_upload += up_size
        total_work += work_size
        per_job.append({
            "job_id": job.job_id,
            "status": job.status,
            "output_mb": round(out_size / (1024 * 1024), 1),
            "upload_mb": round(up_size / (1024 * 1024), 1),
            "work_mb": round(work_size / (1024 * 1024), 1),
        })
    models_dir = DATA_DIR / "models"
    models_size = sum(f.stat().st_size for f in models_dir.rglob("*") if f.is_file()) if models_dir.exists() else 0
    return {
        "data_dir": str(DATA_DIR),
        "total_mb": round((total_output + total_upload + total_work + models_size) / (1024 * 1024), 1),
        "outputs_mb": round(total_output / (1024 * 1024), 1),
        "uploads_mb": round(total_upload / (1024 * 1024), 1),
        "orphan_work_mb": round(total_work / (1024 * 1024), 1),
        "models_mb": round(models_size / (1024 * 1024), 1),
        "jobs": sorted(per_job, key=lambda x: x["output_mb"], reverse=True),
    }


@app.post("/api/aurora/cleanup")
async def cleanup_storage(
    max_age_hours: int = Query(default=0, ge=0, description="Delete completed/failed/cancelled jobs older than N hours. 0 = only orphan _work dirs."),
) -> Dict[str, Any]:
    """Clean up orphaned _work directories and optionally old terminal jobs."""
    result = _cleanup_work_dirs()
    deleted_jobs: List[str] = []
    if max_age_hours > 0:
        cutoff = datetime.now(tz=timezone.utc).timestamp() - max_age_hours * 3600
        for job in store.list_jobs():
            if job.status not in ("completed", "failed", "cancelled"):
                continue
            ts = _parse_iso_utc(job.completed_at or job.created_at)
            if ts and ts.timestamp() < cutoff:
                store.delete_job(job.job_id, remove_artifacts=True)
                deleted_jobs.append(job.job_id)
    return {
        "work_dirs_removed": result["dirs"],
        "work_mb_freed": round(result["mb"], 1),
        "jobs_deleted": deleted_jobs,
        "jobs_deleted_count": len(deleted_jobs),
    }


@app.get("/api/aurora/files/{job_id}/{file_name}")
async def download_output_file(job_id: str, file_name: str) -> FileResponse:
    base = (store.outputs_dir / job_id).resolve()
    target = (base / file_name).resolve()
    if not str(target).startswith(str(base)):
        raise HTTPException(status_code=403, detail="invalid file path")
    if not target.exists() or not target.is_file():
        raise HTTPException(status_code=404, detail="file not found")
    return FileResponse(path=target, filename=target.name)


# ── Kling AI endpoints ────────────────────────────────────────────────────────

@app.get("/api/aurora/kling/health")
async def kling_health() -> Dict[str, Any]:
    """Check Kling AI connectivity."""
    from .kling import kling_health_check
    return kling_health_check()


@app.post("/api/aurora/kling/enhance")
async def kling_enhance_video(
    job_id: str = Form(..., description="Aurora job_id whose result to enhance with Kling"),
    prompt: str = Form("enhance video quality, improve sharpness and clarity", description="Enhancement guidance"),
    negative_prompt: str = Form("noise, blur, artifacts, distortion", description="What to avoid"),
    mode: str = Form("pro", description="'std' or 'pro'"),
    duration: str = Form("5", description="'5' or '10' seconds"),
    cfg_scale: float = Form(0.5, description="Prompt adherence 0.0-1.0"),
) -> Dict[str, Any]:
    """Submit Aurora job result to Kling AI for video-to-video enhancement."""
    from .kling import kling_video_enhance, kling_upload_file

    job = store.get_job(job_id)
    if not job:
        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
    if job.status != "completed":
        raise HTTPException(status_code=409, detail=f"Job must be completed, current status: {job.status}")

    result_path = store.outputs_dir / job_id / "aurora_result.mp4"
    if not result_path.exists():
        for ext in [".mov", ".avi", ".mkv"]:
            alt = result_path.with_suffix(ext)
            if alt.exists():
                result_path = alt
                break
    if not result_path.exists():
        raise HTTPException(status_code=404, detail="Result file not found for this job")

    try:
        upload_resp = kling_upload_file(result_path)
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
    file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")

    if not file_id:
        raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")

    try:
        task_resp = kling_video_enhance(
            video_id=file_id,
            prompt=prompt,
            negative_prompt=negative_prompt,
            mode=mode,
            duration=duration,
            cfg_scale=cfg_scale,
        )
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
    task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")

    kling_meta_dir = store.outputs_dir / job_id
    kling_meta_path = kling_meta_dir / "kling_task.json"
    kling_meta_path.write_text(json.dumps({
        "aurora_job_id": job_id,
        "kling_task_id": task_id,
        "kling_file_id": file_id,
        "prompt": prompt,
        "mode": mode,
        "duration": duration,
        "submitted_at": datetime.now(timezone.utc).isoformat(),
        "status": "submitted",
    }, ensure_ascii=False, indent=2), encoding="utf-8")

    return {
        "aurora_job_id": job_id,
        "kling_task_id": task_id,
        "kling_file_id": file_id,
        "status": "submitted",
        "status_url": f"/api/aurora/kling/status/{job_id}",
    }


@app.get("/api/aurora/kling/status/{job_id}")
async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]:
    """Get Kling AI enhancement status for an Aurora job."""
    from .kling import kling_video_task_status

    kling_meta_path = store.outputs_dir / job_id / "kling_task.json"
    if not kling_meta_path.exists():
        raise HTTPException(status_code=404, detail=f"No Kling task for job {job_id}")

    meta = json.loads(kling_meta_path.read_text(encoding="utf-8"))
    task_id = meta.get("kling_task_id")
    if not task_id:
        raise HTTPException(status_code=404, detail="Kling task_id missing in metadata")

    try:
        status_resp = kling_video_task_status(task_id, endpoint="video2video")
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"Kling status error: {str(exc)[:400]}") from exc
    task_data = status_resp.get("data") or status_resp
    state = task_data.get("task_status") or task_data.get("status") or "unknown"

    meta["status"] = state
    meta["last_checked"] = datetime.now(timezone.utc).isoformat()

    result_url = None
    works = task_data.get("task_result", {}).get("videos") or []
    if works:
        result_url = works[0].get("url")
        if result_url:
            meta["kling_result_url"] = result_url
            meta["completed_at"] = datetime.now(timezone.utc).isoformat()

    kling_meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

    return {
        "aurora_job_id": job_id,
        "kling_task_id": task_id,
        "status": state,
        "kling_result_url": result_url,
        "meta": meta,
    }


@app.post("/api/aurora/kling/image2video")
async def kling_image_to_video(
    file: UploadFile = File(..., description="Source image (frame)"),
    prompt: str = Form("smooth motion, cinematic video, high quality"),
    negative_prompt: str = Form("blur, artifacts, distortion"),
    model: str = Form("kling-v1-5"),
    mode: str = Form("pro"),
    duration: str = Form("5"),
    aspect_ratio: str = Form("16:9"),
) -> Dict[str, Any]:
    """Generate video from a still image using Kling AI."""
    from .kling import kling_upload_file, kling_video_generate

    file_name = file.filename or "frame.jpg"
    content = await file.read()
    if not content:
        raise HTTPException(status_code=400, detail="Empty upload")

    tmp_dir = store.uploads_dir / "_kling_i2v"
    tmp_dir.mkdir(parents=True, exist_ok=True)
    tmp_path = tmp_dir / f"{uuid.uuid4().hex[:12]}_{file_name}"
    tmp_path.write_bytes(content)

    try:
        try:
            upload_resp = kling_upload_file(tmp_path)
        except Exception as exc:
            raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
        file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
        if not file_id:
            raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")

        try:
            task_resp = kling_video_generate(
                image_id=file_id,
                prompt=prompt,
                negative_prompt=negative_prompt,
                model=model,
                mode=mode,
                duration=duration,
                aspect_ratio=aspect_ratio,
            )
        except Exception as exc:
            raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
        task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
        return {
            "kling_task_id": task_id,
            "kling_file_id": file_id,
            "status": "submitted",
            "status_url": f"/api/aurora/kling/task/{task_id}?endpoint=image2video",
        }
    finally:
        tmp_path.unlink(missing_ok=True)


@app.get("/api/aurora/kling/task/{task_id}")
async def kling_get_task(task_id: str, endpoint: str = Query("video2video")) -> Dict[str, Any]:
    """Get status of any Kling task by ID."""
    from .kling import kling_video_task_status
    try:
        return kling_video_task_status(task_id, endpoint=endpoint)
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"Kling task status error: {str(exc)[:400]}") from exc


@app.get("/api/aurora/plates/{job_id}")
async def get_plate_detections(job_id: str) -> Dict[str, Any]:
    """Return ALPR plate detection results for a completed job."""
    job = store.get_job(job_id)
    if not job:
        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")

    report_path = store.outputs_dir / job_id / "plate_detections.json"
    if not report_path.exists():
        return {
            "job_id": job_id,
            "plates_found": 0,
            "unique_plates": 0,
            "unique": [],
            "detections": [],
            "note": "No plate detection report found (job may predate ALPR support)",
        }
    data = json.loads(report_path.read_text(encoding="utf-8"))
    return data