microdao-daarion/services/aurora-service/app/subagents.py

from __future__ import annotations

import hashlib
import importlib
import json
import logging
import os
import queue
import shutil
import subprocess
import sys
import threading
import time
import uuid
from dataclasses import dataclass, field
from functools import lru_cache
from fractions import Fraction
from pathlib import Path
from threading import Lock
from typing import Any, Callable, Dict, List, Optional, Tuple

from .schemas import AuroraMode, MediaType, ProcessingStep

logger = logging.getLogger("aurora.subagents")

try:
    import cv2  # type: ignore[import-untyped]
except Exception:  # pragma: no cover - handled at runtime
    cv2 = None

try:
    import numpy as np  # type: ignore[import-untyped]
except Exception:  # pragma: no cover - handled at runtime
    np = None


GFPGAN_MODEL_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth"
REALESRGAN_MODEL_URL = (
    "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth"
)


def _env_flag(name: str, default: bool) -> bool:
    raw = os.getenv(name)
    if raw is None:
        return default
    return raw.strip().lower() in {"1", "true", "yes", "on"}


def _is_container_runtime() -> bool:
    return Path("/.dockerenv").exists() or bool(os.getenv("KUBERNETES_SERVICE_HOST"))


@lru_cache(maxsize=1)
def _ffmpeg_hwaccels_text() -> str:
    try:
        return _run_command(["ffmpeg", "-hide_banner", "-hwaccels"])
    except Exception:
        return ""


@lru_cache(maxsize=1)
def _ffmpeg_encoders_text() -> str:
    try:
        return _run_command(["ffmpeg", "-hide_banner", "-encoders"])
    except Exception:
        return ""


def _ffmpeg_has_hwaccel(name: str) -> bool:
    text = _ffmpeg_hwaccels_text().lower()
    return any(line.strip() == name.lower() for line in text.splitlines())


def _ffmpeg_has_encoder(name: str) -> bool:
    text = _ffmpeg_encoders_text().lower()
    return f" {name.lower()} " in f" {text} "


def _torch_capabilities() -> Dict[str, object]:
    payload: Dict[str, object] = {
        "torch": False,
        "torch_version": None,
        "cuda_available": False,
        "mps_backend": False,
        "mps_available": False,
        "mps_built": False,
    }
    try:
        import torch  # type: ignore[import-untyped]

        payload["torch"] = True
        payload["torch_version"] = getattr(torch, "__version__", None)
        payload["cuda_available"] = bool(torch.cuda.is_available())
        mps_backend = getattr(torch.backends, "mps", None)
        payload["mps_backend"] = bool(mps_backend)
        payload["mps_available"] = bool(mps_backend and mps_backend.is_available())
        payload["mps_built"] = bool(mps_backend and mps_backend.is_built())
    except Exception:
        pass
    return payload


def sha256_file(path: Path) -> str:
    digest = hashlib.sha256()
    with path.open("rb") as f:
        while True:
            chunk = f.read(1024 * 1024)
            if not chunk:
                break
            digest.update(chunk)
    return f"sha256:{digest.hexdigest()}"


def _copy_with_stage_suffix(input_path: Path, output_dir: Path, stage_suffix: str) -> Path:
    output_dir.mkdir(parents=True, exist_ok=True)
    suffix = input_path.suffix or ".bin"
    staged = output_dir / f"{input_path.stem}_{stage_suffix}{suffix}"
    shutil.copy2(input_path, staged)
    return staged


def _run_command(args: List[str]) -> str:
    process = subprocess.run(
        args,
        check=False,
        capture_output=True,
        text=True,
    )
    if process.returncode != 0:
        stderr = (process.stderr or "").strip()
        raise RuntimeError(f"Command failed ({process.returncode}): {' '.join(args)}\n{stderr}")
    return (process.stdout or "").strip()


def _ffmpeg_available() -> bool:
    return shutil.which("ffmpeg") is not None and shutil.which("ffprobe") is not None


def runtime_diagnostics() -> Dict[str, object]:
    torch_caps = _torch_capabilities()
    device = _ModelCache._device()
    is_container = _is_container_runtime()
    force_cpu = _env_flag("AURORA_FORCE_CPU", is_container)
    prefer_mps = _env_flag("AURORA_PREFER_MPS", True)
    enable_vtb = _env_flag("AURORA_ENABLE_VIDEOTOOLBOX", True)

    return {
        "opencv": cv2 is not None,
        "ffmpeg": _ffmpeg_available(),
        "ffmpeg_videotoolbox_hwaccel": _ffmpeg_has_hwaccel("videotoolbox"),
        "ffmpeg_h264_videotoolbox": _ffmpeg_has_encoder("h264_videotoolbox"),
        "ffmpeg_hevc_videotoolbox": _ffmpeg_has_encoder("hevc_videotoolbox"),
        "torch": bool(torch_caps["torch"]),
        "torch_version": torch_caps["torch_version"],
        "cuda_available": bool(torch_caps["cuda_available"]),
        "mps_backend": bool(torch_caps["mps_backend"]),
        "mps_available": bool(torch_caps["mps_available"]),
        "mps_built": bool(torch_caps["mps_built"]),
        "force_cpu": force_cpu,
        "prefer_mps": prefer_mps,
        "enable_videotoolbox": enable_vtb,
        "device": device,
        "container_runtime": _is_container_runtime(),
        "models_dir": os.getenv("AURORA_MODELS_DIR", "/data/aurora/models"),
    }


class PipelineCancelledError(RuntimeError):
    pass


@dataclass
class SubagentContext:
    job_id: str
    mode: AuroraMode
    media_type: MediaType
    input_hash: str
    output_dir: Path
    priority: str = "balanced"
    export_options: Dict[str, object] = field(default_factory=dict)
    cancel_check: Optional[Callable[[], bool]] = None
    stage_progress: Optional[Callable[[float, str], None]] = None


@dataclass
class SubagentRunResult:
    output_path: Path
    steps: List[ProcessingStep] = field(default_factory=list)
    artifacts: List[Path] = field(default_factory=list)
    metadata: Dict[str, str] = field(default_factory=dict)


def _resolve_models_dir() -> Path:
    target = Path(os.getenv("AURORA_MODELS_DIR", "/data/aurora/models")).expanduser()
    target.mkdir(parents=True, exist_ok=True)
    return target


def _ensure_persistent_gfpgan_weights() -> Path:
    persistent = _resolve_models_dir() / "gfpgan_weights"
    persistent.mkdir(parents=True, exist_ok=True)

    # In containers, some libs expect /app/gfpgan/weights.
    # In native macOS run we may not have write access to /app, so keep this best-effort.
    runtime_weights = Path(os.getenv("AURORA_GFPGAN_RUNTIME_WEIGHTS_DIR", "/app/gfpgan/weights"))
    try:
        runtime_weights.parent.mkdir(parents=True, exist_ok=True)
        if runtime_weights.exists() and not runtime_weights.is_symlink():
            for item in runtime_weights.iterdir():
                dst = persistent / item.name
                if not dst.exists():
                    shutil.move(str(item), str(dst))
            shutil.rmtree(runtime_weights, ignore_errors=True)
        if not runtime_weights.exists():
            runtime_weights.symlink_to(persistent, target_is_directory=True)
    except Exception:
        pass
    return persistent


def _warmup_gfpgan(restorer: object) -> None:
    """Run a tiny inference to trigger MPS JIT compilation up front."""
    try:
        dummy = np.zeros((64, 64, 3), dtype=np.uint8)
        restorer.enhance(dummy, has_aligned=False, only_center_face=False, paste_back=True)  # type: ignore[attr-defined]
    except Exception:
        pass


class _ModelCache:
    _lock = Lock()
    _gfpgan_by_mode: Dict[AuroraMode, object] = {}
    _realesrgan_by_mode: Dict[AuroraMode, object] = {}

    @classmethod
    def _download_model(cls, *, url: str, file_name: str) -> Path:
        target = _resolve_models_dir() / file_name
        if target.exists():
            return target
        from basicsr.utils.download_util import load_file_from_url  # type: ignore[import-untyped]

        downloaded = load_file_from_url(
            url=url,
            model_dir=str(target.parent),
            file_name=file_name,
            progress=True,
        )
        return Path(downloaded)

    @classmethod
    def _device(cls) -> str:
        is_container = _is_container_runtime()
        force_cpu = _env_flag("AURORA_FORCE_CPU", is_container)
        if force_cpu:
            return "cpu"
        prefer_mps = _env_flag("AURORA_PREFER_MPS", True)
        try:
            import torch  # type: ignore[import-untyped]

            if torch.cuda.is_available():
                return "cuda"
            mps_be = getattr(torch.backends, "mps", None)
            if prefer_mps and mps_be and mps_be.is_available() and mps_be.is_built():
                return "mps"
        except Exception:
            return "cpu"
        return "cpu"

    @classmethod
    def _patch_torchvision_compat(cls) -> None:
        try:
            importlib.import_module("torchvision.transforms.functional_tensor")
            return
        except Exception:
            pass
        try:
            ft = importlib.import_module("torchvision.transforms._functional_tensor")
            sys.modules["torchvision.transforms.functional_tensor"] = ft
        except Exception:
            return

    @classmethod
    def gfpgan(cls, mode: AuroraMode) -> object:
        with cls._lock:
            cached = cls._gfpgan_by_mode.get(mode)
            if cached is not None:
                return cached

            cls._patch_torchvision_compat()
            _ensure_persistent_gfpgan_weights()
            from gfpgan import GFPGANer  # type: ignore[import-untyped]

            model_path = cls._download_model(url=GFPGAN_MODEL_URL, file_name="GFPGANv1.4.pth")
            device = cls._device()
            logger.info("Loading GFPGAN mode=%s device=%s", mode, device)
            t0 = time.monotonic()
            restorer = GFPGANer(
                model_path=str(model_path),
                upscale=1,
                arch="clean",
                channel_multiplier=2,
                bg_upsampler=None,
                device=device,
            )
            if device == "mps" and np is not None:
                _warmup_gfpgan(restorer)
            logger.info("GFPGAN ready mode=%s device=%s elapsed=%.1fs", mode, device, time.monotonic() - t0)
            cls._gfpgan_by_mode[mode] = restorer
            return restorer

    @classmethod
    def realesrgan(cls, mode: AuroraMode) -> object:
        with cls._lock:
            cached = cls._realesrgan_by_mode.get(mode)
            if cached is not None:
                return cached

            cls._patch_torchvision_compat()
            from basicsr.archs.rrdbnet_arch import RRDBNet  # type: ignore[import-untyped]
            from realesrgan import RealESRGANer  # type: ignore[import-untyped]

            model_path = cls._download_model(url=REALESRGAN_MODEL_URL, file_name="RealESRGAN_x4plus.pth")
            rrdb = RRDBNet(
                num_in_ch=3,
                num_out_ch=3,
                num_feat=64,
                num_block=23,
                num_grow_ch=32,
                scale=4,
            )

            device = cls._device()
            use_half = device in ("cuda", "mps")
            if mode == "tactical":
                tile = 256
            elif device == "cpu":
                tile = int(os.getenv("AURORA_CPU_FORENSIC_TILE", "192"))
            else:
                tile = 0
            logger.info("Loading RealESRGAN mode=%s device=%s half=%s tile=%d", mode, device, use_half, tile)
            t0 = time.monotonic()
            upsampler = RealESRGANer(
                scale=4,
                model_path=str(model_path),
                model=rrdb,
                tile=tile,
                tile_pad=10,
                pre_pad=0,
                half=use_half,
                device=device,
            )
            logger.info("RealESRGAN ready mode=%s device=%s elapsed=%.1fs", mode, device, time.monotonic() - t0)
            cls._realesrgan_by_mode[mode] = upsampler
            return upsampler


def _clamp_int(val: int, low: int, high: int) -> int:
    return max(low, min(high, int(val)))


def _option_bool(opts: Optional[Dict[str, object]], key: str, default: bool) -> bool:
    if not opts:
        return default
    raw = opts.get(key)
    if raw is None:
        return default
    if isinstance(raw, bool):
        return raw
    if isinstance(raw, (int, float)):
        return bool(raw)
    return str(raw).strip().lower() in {"1", "true", "yes", "on"}


def _option_str(opts: Optional[Dict[str, object]], key: str, default: str = "") -> str:
    if not opts:
        return default
    raw = opts.get(key)
    if raw is None:
        return default
    return str(raw).strip()


def _option_float(opts: Optional[Dict[str, object]], key: str, default: float) -> float:
    if not opts:
        return default
    raw = opts.get(key)
    if raw is None:
        return default
    try:
        return float(raw)
    except Exception:
        return default


def _face_pipeline_config(
    *,
    mode: AuroraMode,
    media_type: MediaType,
    priority: str,
    export_options: Optional[Dict[str, object]],
) -> Dict[str, object]:
    opts = export_options or {}
    roi_hint = _option_str(opts, "roi", "").lower()
    task_hint = _option_str(opts, "task_hint", "")
    hint_lower = task_hint.lower()
    focus_profile = _option_str(opts, "focus_profile", "auto").lower()
    if focus_profile not in {"auto", "max_faces", "text_readability", "plates"}:
        focus_profile = "auto"
    if focus_profile == "auto":
        text_keywords = ("text", "logo", "label", "cap", "hat", "надпис", "напис", "кеп")
        face_keywords = ("face", "portrait", "облич", "портрет")
        plate_keywords = ("plate", "license", "номер", "знак")
        if any(k in hint_lower for k in text_keywords):
            focus_profile = "text_readability"
        elif any(k in hint_lower for k in face_keywords):
            focus_profile = "max_faces"
        elif any(k in hint_lower for k in plate_keywords):
            focus_profile = "plates"

    focus_faces = focus_profile == "max_faces"
    text_focus = focus_profile == "text_readability" or _option_bool(opts, "text_focus", False)
    focus_plates = focus_profile == "plates"

    roi_only_default = roi_hint in {"faces", "face", "auto_faces"} or priority == "faces" or focus_faces
    pre_denoise_default = media_type == "video" and (mode == "forensic" or priority == "faces" or text_focus or focus_plates)
    temporal_default = media_type == "video" and (mode == "forensic" or priority == "faces" or text_focus)
    deblur_default = priority == "faces" or mode == "forensic" or text_focus or focus_plates
    score_loop_default = mode == "forensic" or priority == "faces" or text_focus

    face_model = _option_str(opts, "face_model", "auto").lower()
    if face_model not in {"auto", "gfpgan", "codeformer"}:
        face_model = "auto"
    if focus_faces and face_model == "auto":
        face_model = "codeformer"

    return {
        "roi_only_faces": _option_bool(opts, "roi_only_faces", roi_only_default),
        "pre_denoise": _option_bool(opts, "pre_denoise", pre_denoise_default),
        "temporal_denoise": _option_bool(opts, "temporal_denoise", temporal_default),
        "deblur_before_face": _option_bool(opts, "deblur_before_face", deblur_default),
        "score_loop": _option_bool(opts, "score_loop", score_loop_default),
        "face_model": face_model,
        "denoise_strength": max(1.0, min(15.0, _option_float(opts, "denoise_strength", 4.0))),
        "deblur_amount": max(0.2, min(2.0, _option_float(opts, "deblur_amount", 0.8))),
        "focus_profile": focus_profile,
        "task_hint": task_hint,
        "text_focus": text_focus,
    }


@lru_cache(maxsize=1)
def _face_detector():
    if cv2 is None:
        return None
    cascade_path = Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml"
    detector = cv2.CascadeClassifier(str(cascade_path))
    if detector.empty():
        return None
    return detector


def _detect_face_boxes(frame_bgr, limit: int = 8) -> List[Tuple[int, int, int, int]]:
    if cv2 is None:
        return []
    detector = _face_detector()
    if detector is None:
        return []
    gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)

    scale_factor = float(os.getenv("AURORA_HAAR_SCALE", "1.05"))
    min_neighbors = int(os.getenv("AURORA_HAAR_MIN_NEIGHBORS", "2"))
    min_face = int(os.getenv("AURORA_HAAR_MIN_FACE", "15"))

    eq = cv2.equalizeHist(gray)
    found = detector.detectMultiScale(
        eq,
        scaleFactor=scale_factor,
        minNeighbors=min_neighbors,
        minSize=(min_face, min_face),
    )
    boxes: List[Tuple[int, int, int, int]] = []
    for (x, y, w, h) in found:
        boxes.append((int(x), int(y), int(w), int(h)))
    boxes.sort(key=lambda item: item[2] * item[3], reverse=True)
    return boxes[: max(1, limit)]


def _expand_roi(
    x: int,
    y: int,
    w: int,
    h: int,
    frame_w: int,
    frame_h: int,
    pad_ratio: float = 0.28,
) -> Tuple[int, int, int, int]:
    pad_x = int(w * pad_ratio)
    pad_y = int(h * pad_ratio)
    x1 = max(0, x - pad_x)
    y1 = max(0, y - pad_y)
    x2 = min(frame_w, x + w + pad_x)
    y2 = min(frame_h, y + h + pad_y)
    return x1, y1, x2, y2


def _pre_denoise_frame(frame_bgr, previous_denoised, strength: float, temporal: bool):
    if cv2 is None:
        return frame_bgr, previous_denoised
    h_val = float(max(1.0, min(15.0, strength)))
    denoised = cv2.fastNlMeansDenoisingColored(frame_bgr, None, h_val, h_val, 7, 21)
    if temporal and previous_denoised is not None:
        try:
            alpha = float(os.getenv("AURORA_TEMPORAL_DENOISE_ALPHA", "0.18"))
        except Exception:
            alpha = 0.18
        alpha = max(0.05, min(0.40, alpha))
        denoised = cv2.addWeighted(denoised, 1.0 - alpha, previous_denoised, alpha, 0.0)
    return denoised, denoised


def _deblur_unsharp(frame_bgr, amount: float):
    if cv2 is None:
        return frame_bgr
    amt = max(0.2, min(2.0, float(amount)))
    blurred = cv2.GaussianBlur(frame_bgr, (0, 0), sigmaX=1.2, sigmaY=1.2)
    sharpened = cv2.addWeighted(frame_bgr, 1.0 + amt, blurred, -amt, 0.0)
    return sharpened


def _patch_sharpness(patch) -> float:
    if cv2 is None:
        return 0.0
    gray = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
    return float(cv2.Laplacian(gray, cv2.CV_64F).var())


def _patch_diff(original_patch, candidate_patch) -> float:
    if np is None:
        return 0.0
    base = original_patch.astype(np.float32)
    cand = candidate_patch.astype(np.float32)
    return float(np.mean(np.abs(base - cand)))


def _compact_error_text(exc: Exception, limit: int = 220) -> str:
    text = str(exc).replace("\n", " ").strip()
    if len(text) <= limit:
        return text
    return text[: max(0, limit - 3)] + "..."


def _is_mps_conv_override_error(exc: Exception) -> bool:
    text = str(exc).lower()
    return "convolution_overrideable not implemented" in text


def _sr_soft_fallback(
    enhanced_img,
    requested_outscale: int,
) -> Tuple[object, int, str]:
    """Soft fallback when Real-ESRGAN fails on MPS for very large frames.

    Keeps face-restored frame and optionally performs lightweight resize if the
    target output is still within sane pixel bounds.
    """
    if cv2 is None:
        return enhanced_img, 1, "keep_face_enhanced"
    try:
        max_pixels = int(float(os.getenv("AURORA_SR_SOFT_FALLBACK_MAX_PIXELS", "12000000")))
    except Exception:
        max_pixels = 12_000_000
    max_pixels = max(1_000_000, max_pixels)
    src_h, src_w = enhanced_img.shape[:2]
    if requested_outscale <= 1:
        return enhanced_img, 1, "keep_face_enhanced"
    target_w = max(1, int(src_w * requested_outscale))
    target_h = max(1, int(src_h * requested_outscale))
    target_pixels = target_w * target_h
    if target_pixels <= max_pixels:
        resized = cv2.resize(enhanced_img, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
        return resized, requested_outscale, "lanczos_resize"
    return enhanced_img, 1, "keep_face_enhanced"


def _safe_ocr_score(patch) -> float:
    # Optional OCR hint for plate/text clarity loop; returns 0 when unavailable.
    if not _pytesseract_available():
        return 0.0
    try:
        import pytesseract  # type: ignore[import-untyped]
    except Exception:
        return 0.0
    if cv2 is None:
        return 0.0
    try:
        gray = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
        payload = pytesseract.image_to_data(
            gray,
            output_type=pytesseract.Output.DICT,
            config="--psm 7 --oem 1",
        )
        confs = [float(v) for v in payload.get("conf", []) if str(v).strip() not in {"", "-1"}]
        if not confs:
            return 0.0
        return max(0.0, min(1.0, sum(confs) / (len(confs) * 100.0)))
    except Exception:
        return 0.0


@lru_cache(maxsize=1)
def _codeformer_available() -> bool:
    try:
        importlib.import_module("codeformer")
        return True
    except Exception:
        return False


@lru_cache(maxsize=1)
def _pytesseract_available() -> bool:
    try:
        importlib.import_module("pytesseract")
        return True
    except Exception:
        return False


def _face_candidate_score(original_patch, candidate_patch) -> float:
    sharpness_orig = _patch_sharpness(original_patch)
    sharpness_new = _patch_sharpness(candidate_patch)
    sharpness_gain = sharpness_new / max(1.0, sharpness_orig)
    faces_new = len(_detect_face_boxes(candidate_patch, limit=2))
    face_factor = 1.0 + (0.35 * max(0, faces_new))
    diff_penalty = _patch_diff(original_patch, candidate_patch) / 255.0
    ocr_bonus = _safe_ocr_score(candidate_patch)
    return (sharpness_gain * face_factor) + (0.18 * ocr_bonus) - (0.22 * diff_penalty)


def _requested_outscale(export_options: Optional[Dict[str, object]], width: int, height: int) -> int:
    opts = export_options or {}
    max_outscale = _clamp_int(int(os.getenv("AURORA_MAX_OUTSCALE", "4")), 1, 4)

    raw_upscale = opts.get("upscale")
    if raw_upscale is None:
        # Compatibility alias used by console UI.
        raw_upscale = opts.get("outscale")
    if raw_upscale is not None:
        try:
            return _clamp_int(int(raw_upscale), 1, max_outscale)
        except Exception:
            pass

    requested_w: Optional[int] = None
    requested_h: Optional[int] = None
    # Explicit width/height override.
    try:
        if opts.get("width") is not None and opts.get("height") is not None:
            requested_w = int(opts.get("width") or 0)
            requested_h = int(opts.get("height") or 0)
    except Exception:
        requested_w = None
        requested_h = None

    # Resolution profile override.
    res = str(opts.get("resolution") or "").strip().lower()
    if requested_w is None or requested_h is None:
        if res in {"4k", "2160p"}:
            requested_w, requested_h = 3840, 2160
        elif res in {"8k", "4320p"}:
            requested_w, requested_h = 7680, 4320
        elif "x" in res:
            try:
                w_txt, h_txt = res.split("x", 1)
                requested_w, requested_h = int(w_txt), int(h_txt)
            except Exception:
                requested_w, requested_h = None, None

    if not requested_w or not requested_h or requested_w <= 0 or requested_h <= 0:
        return 1

    scale = max(requested_w / max(1, width), requested_h / max(1, height))
    if scale <= 1.1:
        return 1
    if scale <= 2.1:
        return _clamp_int(2, 1, max_outscale)
    if scale <= 3.1:
        return _clamp_int(3, 1, max_outscale)
    return _clamp_int(4, 1, max_outscale)


def _decide_outscale(mode: AuroraMode, frame_bgr, export_options: Optional[Dict[str, object]] = None) -> int:
    h, w = frame_bgr.shape[:2]
    opts = export_options or {}
    requested_outscale = _requested_outscale(opts, w, h)
    max_outscale = _clamp_int(int(os.getenv("AURORA_MAX_OUTSCALE", "4")), 1, 4)
    raw_upscale = opts.get("upscale")
    if raw_upscale is None:
        raw_upscale = opts.get("outscale")
    has_explicit_upscale = raw_upscale is not None
    if mode == "tactical":
        # Tactical defaults to readability, not synthetic upscaling.
        return requested_outscale if requested_outscale > 1 else 1
    if requested_outscale <= 1 and not has_explicit_upscale and _option_bool(opts, "auto_forensic_outscale", True):
        # Default forensic processing can upscale even without explicit user width/height.
        forensic_default = _clamp_int(int(os.getenv("AURORA_FORENSIC_DEFAULT_OUTSCALE", "2")), 1, max_outscale)
        requested_outscale = forensic_default
    if requested_outscale <= 1:
        # Keep source resolution only when forensic auto-upscale is disabled.
        return 1
    device = _ModelCache._device()
    megapixels = (h * w) / 1_000_000.0
    max_cpu_mp_for_x2 = float(os.getenv("AURORA_CPU_MAX_MP_FOR_X2", "0.8"))
    if device == "cpu" and megapixels > max_cpu_mp_for_x2:
        # Keep forensic job stable on CPU for HD+ inputs (avoid OOM + heavy artifacts).
        return 1
    return requested_outscale


def _enhance_frame_bgr(
    frame_bgr,
    mode: AuroraMode,
    media_type: MediaType,
    priority: str = "balanced",
    export_options: Optional[Dict[str, object]] = None,
    previous_denoised=None,
) -> Tuple[object, int, int, int, int, Dict[str, object], object]:
    if cv2 is None:
        raise RuntimeError("opencv-python-headless is not installed")

    gfpganer = _ModelCache.gfpgan(mode)
    realesrganer = _ModelCache.realesrgan(mode)
    cfg = _face_pipeline_config(
        mode=mode,
        media_type=media_type,
        priority=priority,
        export_options=export_options,
    )
    source_frame = frame_bgr
    if bool(cfg["pre_denoise"]):
        frame_bgr, previous_denoised = _pre_denoise_frame(
            frame_bgr,
            previous_denoised=previous_denoised,
            strength=float(cfg["denoise_strength"]),
            temporal=bool(cfg["temporal_denoise"]),
        )
    if bool(cfg["deblur_before_face"]):
        frame_bgr = _deblur_unsharp(frame_bgr, amount=float(cfg["deblur_amount"]))

    outscale = _decide_outscale(mode, frame_bgr, export_options=export_options)
    opts = export_options or {}
    raw_upscale = opts.get("upscale")
    if raw_upscale is None:
        raw_upscale = opts.get("outscale")
    allow_roi_upscale = _option_bool(opts, "allow_roi_upscale", False) or _option_bool(opts, "max_face_quality", False)
    if bool(cfg["roi_only_faces"]) and not allow_roi_upscale and raw_upscale is None:
        outscale = 1

    try:
        tactical_weight = float(os.getenv("AURORA_GFPGAN_WEIGHT_TACTICAL", "0.35"))
    except Exception:
        tactical_weight = 0.35
    try:
        forensic_weight = float(os.getenv("AURORA_GFPGAN_WEIGHT_FORENSIC", "0.65"))
    except Exception:
        forensic_weight = 0.65
    face_weight = max(0.0, min(1.0, tactical_weight if mode == "tactical" else forensic_weight))

    requested_model = str(cfg["face_model"])
    codeformer_available = _codeformer_available()
    if requested_model == "auto":
        requested_model = "codeformer" if codeformer_available else "gfpgan"

    gfpgan_face_size = 512

    def _force_enhance_roi(patch, weight: float):
        """Force face restoration on a patch where Haar found a face but RetinaFace did not.
        Upscale to 512px, run GFPGAN in aligned mode, then resize back."""
        h_p, w_p = patch.shape[:2]
        aligned = cv2.resize(patch, (gfpgan_face_size, gfpgan_face_size), interpolation=cv2.INTER_CUBIC)
        cropped_faces, _, restored = gfpganer.enhance(
            aligned, has_aligned=True, only_center_face=True, paste_back=False,
            weight=max(0.0, min(1.0, weight)),
        )
        if cropped_faces:
            result = cropped_faces[0]
        elif restored is not None:
            result = restored
        else:
            result = aligned
        return cv2.resize(result, (w_p, h_p), interpolation=cv2.INTER_AREA)

    def _run_gfpgan(candidate_input, candidate_weight: float, *, force_aligned: bool = False):
        t_local = time.perf_counter()
        w = max(0.0, min(1.0, candidate_weight))
        if force_aligned:
            local_restored = _force_enhance_roi(candidate_input, w)
            elapsed = int((time.perf_counter() - t_local) * 1000)
            return local_restored, 1, elapsed, "GFPGAN v1.4 (forced-align)"
        _, local_faces, local_restored = gfpganer.enhance(
            candidate_input, has_aligned=False, only_center_face=False, paste_back=True, weight=w,
        )
        if len(local_faces) == 0:
            local_restored = _force_enhance_roi(candidate_input, w)
            elapsed = int((time.perf_counter() - t_local) * 1000)
            return local_restored, 1, elapsed, "GFPGAN v1.4 (forced-align)"
        elapsed = int((time.perf_counter() - t_local) * 1000)
        return local_restored, len(local_faces), elapsed, "GFPGAN v1.4"

    def _run_codeformer_or_fallback(candidate_input, candidate_weight: float, *, force_aligned: bool = False):
        t_local = time.perf_counter()
        w = max(0.0, min(1.0, candidate_weight))
        if force_aligned:
            local_restored = _force_enhance_roi(candidate_input, w)
            local_restored = cv2.detailEnhance(local_restored, sigma_s=12, sigma_r=0.15)
            elapsed = int((time.perf_counter() - t_local) * 1000)
            return local_restored, 1, elapsed, "CodeFormer(forced-align+detailEnhance)"
        _, local_faces, local_restored = gfpganer.enhance(
            candidate_input, has_aligned=False, only_center_face=False, paste_back=True, weight=w,
        )
        if len(local_faces) == 0:
            local_restored = _force_enhance_roi(candidate_input, w)
        local_restored = cv2.detailEnhance(local_restored, sigma_s=12, sigma_r=0.15)
        face_count = len(local_faces) if local_faces else 1
        elapsed = int((time.perf_counter() - t_local) * 1000)
        return local_restored, face_count, elapsed, "CodeFormer(fallback-detailEnhance)"

    run_face_model = _run_gfpgan if requested_model == "gfpgan" else _run_codeformer_or_fallback
    model_label_used = "GFPGAN v1.4"
    roi_faces_processed = 0
    candidate_evals = 0
    score_loop_enabled = bool(cfg["score_loop"])
    t_face = time.perf_counter()

    if bool(cfg["roi_only_faces"]):
        enhanced_img = frame_bgr.copy()
        frame_h, frame_w = frame_bgr.shape[:2]
        boxes = _detect_face_boxes(frame_bgr, limit=8)
        for (bx, by, bw, bh) in boxes:
            x1, y1, x2, y2 = _expand_roi(bx, by, bw, bh, frame_w, frame_h)
            original_patch = frame_bgr[y1:y2, x1:x2]
            if original_patch.size == 0:
                continue
            candidates: List[Tuple[float, object, int, str]] = []
            candidate_weights = [face_weight]
            if score_loop_enabled:
                candidate_weights.append(max(0.0, min(1.0, face_weight - 0.18)))
            for w_candidate in candidate_weights:
                restored_patch, faces_count, _, model_name = run_face_model(original_patch, w_candidate)
                score = _face_candidate_score(original_patch, restored_patch)
                candidates.append((score, restored_patch, faces_count, model_name))
                candidate_evals += 1
            candidates.sort(key=lambda item: item[0], reverse=True)
            best_score, best_patch, best_faces, best_model = candidates[0]
            del best_score
            model_label_used = best_model
            roi_faces_processed += best_faces
            blended = cv2.addWeighted(best_patch, 0.88, original_patch, 0.12, 0.0)
            enhanced_img[y1:y2, x1:x2] = blended
    else:
        candidate_weights = [face_weight]
        if score_loop_enabled and media_type == "photo":
            candidate_weights.append(max(0.0, min(1.0, face_weight - 0.18)))
        candidates_full: List[Tuple[float, object, int, str]] = []
        for w_candidate in candidate_weights:
            restored_img, restored_faces_count, _, model_name = run_face_model(frame_bgr, w_candidate)
            score = _face_candidate_score(source_frame, restored_img)
            candidates_full.append((score, restored_img, restored_faces_count, model_name))
            candidate_evals += 1
        candidates_full.sort(key=lambda item: item[0], reverse=True)
        _, enhanced_img, roi_faces_processed, model_label_used = candidates_full[0]

        if roi_faces_processed == 0:
            haar_boxes = _detect_face_boxes(frame_bgr, limit=16)
            roi_faces_processed = len(haar_boxes)

    face_ms = int((time.perf_counter() - t_face) * 1000)

    requested_outscale = int(max(1, outscale))
    effective_outscale = requested_outscale
    sr_fallback_used = False
    sr_fallback_method: Optional[str] = None
    sr_fallback_reason: Optional[str] = None
    sr_model_used = "Real-ESRGAN x4plus"

    t_sr = time.perf_counter()
    try:
        upscaled_img, _ = realesrganer.enhance(enhanced_img, outscale=requested_outscale)
    except Exception as sr_exc:
        soft_fallback_enabled = _option_bool(opts, "sr_soft_fallback", _env_flag("AURORA_SR_SOFT_FALLBACK", True))
        device = _ModelCache._device()
        if not (soft_fallback_enabled and device == "mps" and _is_mps_conv_override_error(sr_exc)):
            raise
        upscaled_img, effective_outscale, sr_fallback_method = _sr_soft_fallback(
            enhanced_img,
            requested_outscale,
        )
        sr_fallback_used = True
        sr_fallback_reason = _compact_error_text(sr_exc, limit=260)
        sr_model_used = f"soft-fallback:{sr_fallback_method}"
        logger.warning(
            "SR soft fallback enabled on MPS device=%s requested_outscale=%d effective_outscale=%d reason=%s",
            device,
            requested_outscale,
            effective_outscale,
            sr_fallback_reason,
        )
    if bool(cfg.get("text_focus")):
        upscaled_img = _deblur_unsharp(upscaled_img, amount=max(0.9, float(cfg.get("deblur_amount") or 1.0)))
    sr_ms = int((time.perf_counter() - t_sr) * 1000)
    return upscaled_img, roi_faces_processed, face_ms, sr_ms, effective_outscale, {
        "roi_only_faces": bool(cfg["roi_only_faces"]),
        "pre_denoise": bool(cfg["pre_denoise"]),
        "temporal_denoise": bool(cfg["temporal_denoise"]),
        "deblur_before_face": bool(cfg["deblur_before_face"]),
        "score_loop": score_loop_enabled,
        "face_model_requested": str(cfg["face_model"]),
        "face_model_used": model_label_used,
        "codeformer_available": codeformer_available,
        "candidate_evaluations": candidate_evals,
        "focus_profile": str(cfg.get("focus_profile") or "auto"),
        "task_hint": str(cfg.get("task_hint") or ""),
        "text_focus": bool(cfg.get("text_focus")),
        "sr_model_used": sr_model_used,
        "sr_requested_outscale": requested_outscale,
        "effective_outscale": effective_outscale,
        "sr_fallback_used": sr_fallback_used,
        "sr_fallback_method": sr_fallback_method,
        "sr_fallback_reason": sr_fallback_reason,
    }, previous_denoised


def _probe_fps(input_path: Path) -> float:
    value = _run_command(
        [
            "ffprobe",
            "-v",
            "error",
            "-select_streams",
            "v:0",
            "-show_entries",
            "stream=r_frame_rate",
            "-of",
            "default=noprint_wrappers=1:nokey=1",
            str(input_path),
        ]
    )
    fraction = Fraction(value.strip())
    if fraction.numerator == 0:
        return 25.0
    return float(fraction)


def _select_video_encoder(mode: AuroraMode, export_options: Optional[Dict[str, object]]) -> str:
    override = str(os.getenv("AURORA_FFMPEG_VIDEO_ENCODER", "")).strip()
    if override:
        return override

    opts = export_options or {}
    requested_encoder = str(opts.get("encoder") or "").strip().lower()
    if requested_encoder:
        aliases = {
            "x264": "libx264",
            "h264": "libx264",
            "x265": "libx265",
            "h265": "libx265",
            "hevc": "libx265",
        }
        normalized_encoder = aliases.get(requested_encoder, requested_encoder)
        if normalized_encoder == "auto":
            normalized_encoder = ""
        if normalized_encoder:
            if _ffmpeg_has_encoder(normalized_encoder):
                return normalized_encoder
            logger.warning(
                "Requested encoder '%s' is unavailable, falling back to auto selection",
                normalized_encoder,
            )

    requested_format = str(opts.get("format") or "").strip().lower()
    wants_h265 = requested_format in {"mp4_h265", "h265", "hevc"}

    enable_vtb = _env_flag("AURORA_ENABLE_VIDEOTOOLBOX", True)
    if enable_vtb:
        if wants_h265 and _ffmpeg_has_encoder("hevc_videotoolbox"):
            return "hevc_videotoolbox"
        if _ffmpeg_has_encoder("h264_videotoolbox"):
            return "h264_videotoolbox"

    if wants_h265 and _ffmpeg_has_encoder("libx265"):
        return "libx265"
    return "libx264"


def _is_video_encode_failure(exc: Exception) -> bool:
    text = str(exc).lower()
    return (
        "broken pipe" in text
        or "video encode failed" in text
        or "encode pipe broken" in text
        or "error while opening encoder" in text
    )


def _should_retry_with_libx264(exc: Exception, export_options: Optional[Dict[str, object]]) -> bool:
    if not _is_video_encode_failure(exc):
        return False
    opts = export_options or {}
    requested = str(opts.get("encoder") or "").strip().lower()
    if requested in {"libx264"}:
        return False
    return True


def _extract_video_frames(input_path: Path, output_pattern: Path) -> str:
    use_vtb_decode = _env_flag("AURORA_ENABLE_VIDEOTOOLBOX", True) and _ffmpeg_has_hwaccel("videotoolbox")
    hwaccel_used = "none"
    if use_vtb_decode:
        try:
            _run_command(
                [
                    "ffmpeg",
                    "-hide_banner",
                    "-loglevel",
                    "error",
                    "-y",
                    "-hwaccel",
                    "videotoolbox",
                    "-i",
                    str(input_path),
                    str(output_pattern),
                ]
            )
            hwaccel_used = "videotoolbox"
            return hwaccel_used
        except Exception:
            hwaccel_used = "fallback_cpu"

    _run_command(
        [
            "ffmpeg",
            "-hide_banner",
            "-loglevel",
            "error",
            "-y",
            "-i",
            str(input_path),
            str(output_pattern),
        ]
    )
    return hwaccel_used


def _compose_video(
    processed_frames_dir: Path,
    source_video: Path,
    output_video: Path,
    fps: float,
    mode: AuroraMode,
    export_options: Optional[Dict[str, object]] = None,
) -> str:
    crf = "22" if mode == "tactical" else "18"
    encoder = _select_video_encoder(mode, export_options)
    common = [
        "ffmpeg",
        "-hide_banner",
        "-loglevel",
        "error",
        "-y",
        "-framerate",
        f"{fps:.6f}",
        "-i",
        str(processed_frames_dir / "%08d.png"),
        "-i",
        str(source_video),
        "-map",
        "0:v:0",
        "-map",
        "1:a?",
        "-c:v",
        encoder,
        "-pix_fmt",
        "yuv420p",
        "-shortest",
        "-movflags",
        "+faststart",
    ]

    if encoder in {"libx264", "libx265"}:
        common.extend(
            [
                "-preset",
                os.getenv("AURORA_FFMPEG_PRESET", "medium"),
                "-crf",
                crf,
            ]
        )
    elif encoder == "h264_videotoolbox":
        common.extend(["-q:v", os.getenv("AURORA_VTB_H264_QUALITY", "65")])
    elif encoder == "hevc_videotoolbox":
        common.extend(["-q:v", os.getenv("AURORA_VTB_HEVC_QUALITY", "60")])

    try:
        _run_command(common + ["-c:a", "copy", str(output_video)])
    except RuntimeError:
        _run_command(common + ["-c:a", "aac", "-b:a", "192k", str(output_video)])
    return encoder


def _probe_video_info(input_path: Path) -> Dict[str, Any]:
    """Probe video metadata: fps, dimensions, frame count."""
    out = _run_command([
        "ffprobe", "-v", "quiet", "-print_format", "json",
        "-show_format", "-show_streams", str(input_path),
    ])
    data = json.loads(out)
    vs = next((s for s in data.get("streams", []) if s.get("codec_type") == "video"), {})
    w = int(vs.get("width", 0))
    h = int(vs.get("height", 0))
    fps_str = vs.get("r_frame_rate") or vs.get("avg_frame_rate") or "25/1"
    try:
        fps_val = float(Fraction(fps_str))
    except Exception:
        fps_val = 25.0
    nb = int(vs.get("nb_frames", 0))
    if not nb:
        dur = float(data.get("format", {}).get("duration", 0))
        nb = max(1, int(dur * fps_val))
    return {"fps": fps_val, "width": w, "height": h, "total_frames": nb}


def _frames_similar(prev_thumb, curr_thumb, threshold: float = 8.0) -> bool:
    """Fast scene change detection on pre-downsampled thumbnails (64x64).

    Mean absolute pixel difference on 0-255 scale.
    threshold 8.0 catches scene changes while ignoring compression noise.
    For surveillance video most consecutive frames score < 3.0.
    """
    if np is None:
        return False
    diff = float(np.mean(np.abs(
        prev_thumb.astype(np.float32) - curr_thumb.astype(np.float32)
    )))
    return diff < threshold


def _build_encode_pipe_cmd(
    out_w: int,
    out_h: int,
    fps: float,
    encoder: str,
    mode: AuroraMode,
    source_video: Path,
    output_video: Path,
    export_options: Optional[Dict[str, object]] = None,
) -> List[str]:
    """Build ffmpeg command that reads raw BGR frames from stdin and muxes with source audio."""
    crf = "22" if mode == "tactical" else "18"
    cmd = [
        "ffmpeg", "-hide_banner", "-loglevel", "error", "-y",
        "-f", "rawvideo", "-pix_fmt", "bgr24",
        "-s", f"{out_w}x{out_h}",
        "-r", f"{fps:.6f}",
        "-i", "pipe:0",
        "-i", str(source_video),
        "-map", "0:v:0", "-map", "1:a?",
        "-c:v", encoder, "-pix_fmt", "yuv420p",
        "-movflags", "+faststart",
    ]
    if encoder in {"libx264", "libx265"}:
        cmd.extend(["-preset", os.getenv("AURORA_FFMPEG_PRESET", "medium"), "-crf", crf])
    elif encoder == "h264_videotoolbox":
        cmd.extend(["-q:v", os.getenv("AURORA_VTB_H264_QUALITY", "65")])
    elif encoder == "hevc_videotoolbox":
        cmd.extend(["-q:v", os.getenv("AURORA_VTB_HEVC_QUALITY", "60")])
    cmd.extend(["-c:a", "aac", "-b:a", "192k", str(output_video)])
    return cmd


def _cleanup_pipes(*procs) -> None:
    for p in procs:
        if p is None:
            continue
        try:
            if p.stdin and not p.stdin.closed:
                p.stdin.close()
        except Exception:
            pass
        try:
            p.kill()
            p.wait(timeout=5)
        except Exception:
            pass


def _visual_pipeline_video(
    *,
    input_path: Path,
    output_dir: Path,
    mode: AuroraMode,
    priority: str,
    export_options: Optional[Dict[str, object]],
    cancel_check: Optional[Callable[[], bool]],
    stage_progress: Optional[Callable[[float, str], None]],
) -> Tuple[Path, Dict[str, object]]:
    """Optimized video pipeline: pipe decode → scene skip → pipe encode.

    v2 optimizations (zero disk I/O for intermediate frames):
    - ffmpeg decode → stdout pipe → numpy  (no PNG extraction to disk)
    - Scene detection: skip unchanged frames (huge win for surveillance)
    - numpy → stdin pipe → ffmpeg encode    (no PNG write for output frames)
    - VideoToolbox HW decode/encode when available on macOS
    """
    if cv2 is None:
        raise RuntimeError("opencv-python-headless is not installed")
    if not _ffmpeg_available():
        raise RuntimeError("ffmpeg/ffprobe is not installed")

    info = _probe_video_info(input_path)
    src_w, src_h, fps = info["width"], info["height"], info["fps"]
    est_total = info["total_frames"]

    if src_w == 0 or src_h == 0:
        raise RuntimeError(f"Cannot determine video dimensions: {input_path.name}")

    # Scene detection config (quality-first defaults; opt-in from env/export options)
    opts = export_options or {}
    scene_skip_on = _option_bool(opts, "scene_skip", _env_flag("AURORA_SCENE_SKIP", True))
    scene_thresh_default = float(os.getenv("AURORA_SCENE_THRESHOLD", "4.0"))
    scene_thresh = max(0.5, min(64.0, _option_float(opts, "scene_threshold", scene_thresh_default)))
    scene_skip_max_ratio = max(
        0.0,
        min(0.95, _option_float(opts, "scene_skip_max_ratio", float(os.getenv("AURORA_SCENE_SKIP_MAX_RATIO", "0.35")))),
    )
    _THUMB = 64

    # --- Decode pipe (VideoToolbox HW accel when available) ---
    use_vtb = (
        _env_flag("AURORA_ENABLE_VIDEOTOOLBOX", True)
        and _ffmpeg_has_hwaccel("videotoolbox")
    )
    dec_cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error"]
    if use_vtb:
        dec_cmd.extend(["-hwaccel", "videotoolbox"])
    dec_cmd.extend([
        "-i", str(input_path),
        "-f", "rawvideo", "-pix_fmt", "bgr24", "pipe:1",
    ])
    decode_proc = subprocess.Popen(dec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    decode_accel = "videotoolbox" if use_vtb else "cpu"

    frame_bytes = src_w * src_h * 3

    if stage_progress:
        skip_hint = f"scene-skip={'on' if scene_skip_on else 'off'}"
        if scene_skip_on:
            skip_hint += f", thr={scene_thresh:.2f}, max={int(scene_skip_max_ratio * 100)}%"
        stage_progress(0.02, f"pipe decode started ({est_total} est. frames, accel={decode_accel}, {skip_hint})")

    # Stats accumulators
    total_faces = 0
    total_face_ms = 0
    total_sr_ms = 0
    effective_outscale = 1
    roi_only_frames = 0
    candidates_evaluated_total = 0
    face_model_used = "GFPGAN v1.4"
    sr_model_used = "Real-ESRGAN x4plus"
    sr_fallback_frames = 0
    sr_fallback_method = ""
    sr_fallback_reason = ""
    frames_skipped = 0
    previous_denoised = None
    focus_profile_used = "auto"
    task_hint_used = ""
    text_focus_enabled = False

    # Encode pipe — started after first frame reveals output dimensions
    encode_proc: Optional[subprocess.Popen] = None
    output_path = output_dir / f"{input_path.stem}_aurora_visual.mp4"
    encoder = "unknown"

    progress_every = max(1, est_total // 120)
    t_loop = time.perf_counter()
    idx = 0
    prev_thumb = None
    prev_enhanced = None

    # Read-ahead buffer: overlap decode I/O with GPU inference
    _READAHEAD = int(os.getenv("AURORA_READAHEAD_FRAMES", "4"))
    frame_q: queue.Queue = queue.Queue(maxsize=_READAHEAD)
    reader_error: List[Optional[Exception]] = [None]

    def _reader():
        try:
            while True:
                raw = decode_proc.stdout.read(frame_bytes)
                if len(raw) < frame_bytes:
                    frame_q.put(None)
                    break
                frame_q.put(raw)
        except Exception as exc:
            reader_error[0] = exc
            frame_q.put(None)

    reader_thread = threading.Thread(target=_reader, daemon=True)
    reader_thread.start()

    try:
        while True:
            if cancel_check and cancel_check():
                raise PipelineCancelledError("Video processing cancelled")

            raw = frame_q.get(timeout=60)
            if raw is None:
                if reader_error[0]:
                    raise reader_error[0]
                break

            idx += 1
            frame = np.frombuffer(raw, dtype=np.uint8).reshape(src_h, src_w, 3).copy()

            # --- Scene detection: skip if nearly identical to previous ---
            curr_thumb = cv2.resize(frame, (_THUMB, _THUMB))
            skip_this = False
            if scene_skip_on and prev_thumb is not None and prev_enhanced is not None:
                projected_skip_ratio = (frames_skipped + 1) / max(1, idx)
                if projected_skip_ratio <= scene_skip_max_ratio and _frames_similar(prev_thumb, curr_thumb, scene_thresh):
                    skip_this = True
                    frames_skipped += 1
            prev_thumb = curr_thumb

            if skip_this:
                enhanced = prev_enhanced
            else:
                enhanced, faces, face_ms, sr_ms, outscale, details, previous_denoised = (
                    _enhance_frame_bgr(
                        frame, mode, media_type="video", priority=priority,
                        export_options=export_options,
                        previous_denoised=previous_denoised,
                    )
                )
                try:
                    effective_outscale = int(details.get("effective_outscale") or outscale)
                except Exception:
                    effective_outscale = outscale
                total_faces += faces
                total_face_ms += face_ms
                total_sr_ms += sr_ms
                if bool(details.get("roi_only_faces")):
                    roi_only_frames += 1
                candidates_evaluated_total += int(details.get("candidate_evaluations") or 0)
                face_model_used = str(details.get("face_model_used") or face_model_used)
                focus_profile_used = str(details.get("focus_profile") or focus_profile_used)
                maybe_task_hint = str(details.get("task_hint") or "").strip()
                if maybe_task_hint:
                    task_hint_used = maybe_task_hint
                text_focus_enabled = text_focus_enabled or bool(details.get("text_focus"))
                sr_model_used = str(details.get("sr_model_used") or sr_model_used)
                if bool(details.get("sr_fallback_used")):
                    sr_fallback_frames += 1
                    sr_fallback_method = str(details.get("sr_fallback_method") or sr_fallback_method)
                    if not sr_fallback_reason:
                        sr_fallback_reason = str(details.get("sr_fallback_reason") or "")
                prev_enhanced = enhanced

            # --- Start encode pipe after first frame (output size now known) ---
            if encode_proc is None:
                out_h, out_w = enhanced.shape[:2]
                encoder = _select_video_encoder(mode, export_options)
                enc_cmd = _build_encode_pipe_cmd(
                    out_w, out_h, fps, encoder, mode,
                    input_path, output_path, export_options,
                )
                encode_proc = subprocess.Popen(
                    enc_cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE,
                )

            try:
                encode_proc.stdin.write(enhanced.tobytes())
            except BrokenPipeError as exc:
                stderr_text = ""
                try:
                    if encode_proc:
                        try:
                            encode_proc.wait(timeout=1)
                        except Exception:
                            pass
                    if encode_proc and encode_proc.stderr:
                        stderr_text = (encode_proc.stderr.read() or b"").decode(errors="replace").strip()
                except Exception:
                    stderr_text = ""
                detail = (stderr_text or str(exc)).strip()
                if len(detail) > 280:
                    detail = detail[:280]
                raise RuntimeError(f"Video encode pipe broken ({encoder}): {detail}") from exc

            # --- Progress ---
            if stage_progress and (idx == 1 or idx % progress_every == 0):
                elapsed = max(0.001, time.perf_counter() - t_loop)
                fps_eff = idx / elapsed
                eta_s = int(max(0, (est_total - idx) / max(0.01, fps_eff)))
                skip_pct = int(100 * frames_skipped / max(1, idx))
                stage_progress(
                    min(0.97, 0.02 + 0.93 * (idx / max(1, est_total))),
                    f"enhancing frame {idx}/{est_total} "
                    f"({fps_eff:.2f} fps, skip={skip_pct}%, eta ~{eta_s}s)",
                )

        # --- Finalize ---
        reader_thread.join(timeout=30)
        decode_proc.stdout.close()
        decode_proc.wait(timeout=30)

        if encode_proc:
            encode_proc.stdin.close()
            encode_proc.wait(timeout=300)
            if encode_proc.returncode != 0:
                stderr = (encode_proc.stderr.read() or b"").decode(errors="replace")
                raise RuntimeError(f"Video encode failed ({encoder}): {stderr[:300]}")

        if idx == 0:
            raise RuntimeError("No frames decoded from input video")

    except PipelineCancelledError:
        _cleanup_pipes(decode_proc, encode_proc)
        reader_thread.join(timeout=5)
        raise
    except Exception:
        _cleanup_pipes(decode_proc, encode_proc)
        reader_thread.join(timeout=5)
        raise

    if stage_progress:
        skip_pct = int(100 * frames_skipped / max(1, idx))
        stage_progress(1.0, f"completed ({idx} frames, {frames_skipped} skipped [{skip_pct}%], encode={encoder})")

    return output_path, {
        "frame_count": idx,
        "faces_detected_total": total_faces,
        "face_time_ms": total_face_ms,
        "sr_time_ms": total_sr_ms,
        "effective_outscale": effective_outscale,
        "encoder": encoder,
        "decode_accel": decode_accel,
        "roi_only_frames": roi_only_frames,
        "candidate_evaluations": candidates_evaluated_total,
        "face_model_used": face_model_used,
        "sr_model_used": sr_model_used,
        "sr_fallback_frames": sr_fallback_frames,
        "sr_fallback_method": sr_fallback_method,
        "sr_fallback_reason": sr_fallback_reason,
        "frames_skipped": frames_skipped,
        "scene_skip_enabled": scene_skip_on,
        "scene_threshold": scene_thresh,
        "scene_skip_max_ratio": scene_skip_max_ratio,
        "focus_profile": focus_profile_used,
        "task_hint": task_hint_used,
        "text_focus": text_focus_enabled,
    }


def _visual_pipeline_photo(
    *,
    input_path: Path,
    output_dir: Path,
    mode: AuroraMode,
    priority: str,
    stage_progress: Optional[Callable[[float, str], None]],
    export_options: Optional[Dict[str, object]] = None,
) -> Tuple[Path, Dict[str, object]]:
    if cv2 is None:
        raise RuntimeError("opencv-python-headless is not installed")
    frame = cv2.imread(str(input_path), cv2.IMREAD_COLOR)
    if frame is None:
        raise RuntimeError(f"Cannot read image: {input_path.name}")
    if stage_progress:
        stage_progress(0.1, "processing image")
    enhanced, faces, face_ms, sr_ms, outscale, details, _ = _enhance_frame_bgr(
        frame,
        mode,
        media_type="photo",
        priority=priority,
        export_options=export_options,
    )
    ext = input_path.suffix.lower() or ".png"
    if ext in {".jpg", ".jpeg"}:
        ext = ".jpg"
    elif ext not in {".jpg", ".jpeg", ".png", ".webp", ".tif", ".tiff"}:
        ext = ".png"
    output_path = output_dir / f"{input_path.stem}_aurora_visual{ext}"
    cv2.imwrite(str(output_path), enhanced)
    if stage_progress:
        stage_progress(1.0, "image stage completed")
    return output_path, {
        "frame_count": 1,
        "faces_detected_total": faces,
        "face_time_ms": face_ms,
        "sr_time_ms": sr_ms,
        "effective_outscale": outscale,
        "roi_only_frames": 1 if bool(details.get("roi_only_faces")) else 0,
        "candidate_evaluations": int(details.get("candidate_evaluations") or 0),
        "face_model_used": str(details.get("face_model_used") or "GFPGAN v1.4"),
        "sr_model_used": str(details.get("sr_model_used") or "Real-ESRGAN x4plus"),
        "sr_fallback_frames": 1 if bool(details.get("sr_fallback_used")) else 0,
        "sr_fallback_method": str(details.get("sr_fallback_method") or ""),
        "sr_fallback_reason": str(details.get("sr_fallback_reason") or ""),
    }


class BaseSubagent:
    name = "Base"
    step_name = "noop"
    model_by_mode: Dict[AuroraMode, str] = {
        "tactical": "stub.fast",
        "forensic": "stub.full",
    }
    stage_suffix = "noop"
    sleep_seconds = 0.05

    def run(self, ctx: SubagentContext, input_path: Path) -> SubagentRunResult:
        t0 = time.perf_counter()
        output_path = _copy_with_stage_suffix(input_path, ctx.output_dir, self.stage_suffix)
        time.sleep(self.sleep_seconds)
        elapsed_ms = int((time.perf_counter() - t0) * 1000)
        step = ProcessingStep(
            step=self.step_name,
            agent=self.name,
            model=self.model_by_mode[ctx.mode],
            time_ms=elapsed_ms,
        )
        return SubagentRunResult(output_path=output_path, steps=[step])


class ClarityAgent(BaseSubagent):
    name = "Clarity"
    step_name = "video_enhancement"
    stage_suffix = "clarity"
    model_by_mode = {
        "tactical": "Real-ESRGAN(light)",
        "forensic": "Real-ESRGAN(full)",
    }


class VeraAgent(BaseSubagent):
    name = "Vera"
    step_name = "face_enhancement"
    stage_suffix = "vera"
    model_by_mode = {
        "tactical": "GFPGAN/CodeFormer + Real-ESRGAN x4plus",
        "forensic": "GFPGAN/CodeFormer + Real-ESRGAN x4plus(forensic)",
    }

    def run(self, ctx: SubagentContext, input_path: Path) -> SubagentRunResult:
        t_start = time.perf_counter()

        def _build_steps(
            stats: Dict[str, object],
            output_path: Path,
            *,
            encoder_retry: bool = False,
            encoder_retry_reason: str = "",
        ) -> List[ProcessingStep]:
            face_step = ProcessingStep(
                step="face_enhancement",
                agent=self.name,
                model=str(stats.get("face_model_used") or "GFPGAN v1.4"),
                time_ms=stats["face_time_ms"],
                details={
                    "frames": stats["frame_count"],
                    "faces_detected_total": stats["faces_detected_total"],
                    "roi_only_frames": stats.get("roi_only_frames"),
                    "candidate_evaluations": stats.get("candidate_evaluations"),
                },
            )
            sr_details = {
                "frames": stats["frame_count"],
                "output": output_path.name,
                "effective_outscale": stats.get("effective_outscale", 1),
                "encoder": stats.get("encoder"),
                "decode_accel": stats.get("decode_accel"),
                "frames_skipped": stats.get("frames_skipped"),
                "scene_skip_enabled": stats.get("scene_skip_enabled"),
                "scene_threshold": stats.get("scene_threshold"),
                "scene_skip_max_ratio": stats.get("scene_skip_max_ratio"),
                "focus_profile": stats.get("focus_profile"),
                "task_hint": stats.get("task_hint"),
                "text_focus": stats.get("text_focus"),
                "sr_fallback_frames": stats.get("sr_fallback_frames", 0),
                "sr_fallback_used": bool(stats.get("sr_fallback_frames", 0)),
                "sr_fallback_method": stats.get("sr_fallback_method"),
                "sr_fallback_reason": stats.get("sr_fallback_reason"),
            }
            if encoder_retry:
                sr_details["encoder_retry"] = True
                if encoder_retry_reason:
                    sr_details["encoder_retry_reason"] = encoder_retry_reason
            sr_step = ProcessingStep(
                step="super_resolution",
                agent=self.name,
                model=str(stats.get("sr_model_used") or "Real-ESRGAN x4plus"),
                time_ms=stats["sr_time_ms"],
                details=sr_details,
            )
            return [face_step, sr_step]

        try:
            if ctx.media_type == "video":
                output_path, stats = _visual_pipeline_video(
                    input_path=input_path,
                    output_dir=ctx.output_dir,
                    mode=ctx.mode,
                    priority=ctx.priority,
                    export_options=ctx.export_options,
                    cancel_check=ctx.cancel_check,
                    stage_progress=ctx.stage_progress,
                )
            elif ctx.media_type == "photo":
                output_path, stats = _visual_pipeline_photo(
                    input_path=input_path,
                    output_dir=ctx.output_dir,
                    mode=ctx.mode,
                    priority=ctx.priority,
                    stage_progress=ctx.stage_progress,
                    export_options=ctx.export_options,
                )
            else:
                return super().run(ctx, input_path)

            return SubagentRunResult(output_path=output_path, steps=_build_steps(stats, output_path))
        except PipelineCancelledError:
            raise
        except Exception as exc:
            retry_attempted = False
            if ctx.media_type == "video" and _should_retry_with_libx264(exc, ctx.export_options):
                retry_attempted = True
                retry_reason = _compact_error_text(exc, limit=280)
                retry_opts: Dict[str, object] = dict(ctx.export_options or {})
                retry_opts["encoder"] = "libx264"
                if ctx.stage_progress:
                    ctx.stage_progress(0.03, "encoder fallback: retry with libx264")
                try:
                    output_path, stats = _visual_pipeline_video(
                        input_path=input_path,
                        output_dir=ctx.output_dir,
                        mode=ctx.mode,
                        priority=ctx.priority,
                        export_options=retry_opts,
                        cancel_check=ctx.cancel_check,
                        stage_progress=ctx.stage_progress,
                    )
                    return SubagentRunResult(
                        output_path=output_path,
                        steps=_build_steps(
                            stats,
                            output_path,
                            encoder_retry=True,
                            encoder_retry_reason=retry_reason,
                        ),
                    )
                except PipelineCancelledError:
                    raise
                except Exception as retry_exc:
                    exc = RuntimeError(
                        f"{_compact_error_text(exc, limit=180)}; retry(libx264) failed: {_compact_error_text(retry_exc, limit=180)}"
                    )

            fallback = _copy_with_stage_suffix(input_path, ctx.output_dir, self.stage_suffix)
            elapsed_ms = int((time.perf_counter() - t_start) * 1000)
            step = ProcessingStep(
                step="face_enhancement",
                agent=self.name,
                model="GFPGAN/CodeFormer + Real-ESRGAN x4plus",
                time_ms=elapsed_ms,
                details={
                    "fallback_used": True,
                    "fallback_type": "copy_passthrough",
                    "reason": str(exc),
                    "encoder_retry_attempted": retry_attempted,
                },
            )
            return SubagentRunResult(output_path=fallback, steps=[step])


def _alpr_instance():
    """Lazy-load fast-alpr ALPR instance (singleton)."""
    if not hasattr(_alpr_instance, "_cached"):
        try:
            from fast_alpr import ALPR  # type: ignore[import-untyped]
            _alpr_instance._cached = ALPR(
                detector_model="yolo-v9-t-384-license-plate-end2end",
                ocr_model="global-plates-mobile-vit-v2-model",
            )
        except Exception as exc:
            logger.warning("fast-alpr init failed (plates disabled): %s", exc)
            _alpr_instance._cached = None
    return _alpr_instance._cached


def _detect_plates_in_frame(frame_bgr) -> List[Dict[str, Any]]:
    """Return list of {text, confidence, bbox} for detected plates in frame."""
    alpr = _alpr_instance()
    if alpr is None or cv2 is None:
        return []
    try:
        results = alpr.predict(frame_bgr)
        plates = []
        for r in results:
            plates.append({
                "text": r.ocr.text,
                "confidence": round(float(r.ocr.confidence), 3),
                "bbox": list(r.detection.bounding_box),
            })
        return plates
    except Exception as exc:
        logger.debug("ALPR frame error: %s", exc)
        return []


def _enhance_plate_roi(frame_bgr, bbox, realesrganer) -> object:
    """Upscale plate region using Real-ESRGAN for sharper OCR."""
    if cv2 is None or realesrganer is None:
        return frame_bgr
    try:
        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
        h_f, w_f = frame_bgr.shape[:2]
        pad = 8
        x1 = max(0, x1 - pad); y1 = max(0, y1 - pad)
        x2 = min(w_f, x2 + pad); y2 = min(h_f, y2 + pad)
        patch = frame_bgr[y1:y2, x1:x2]
        if patch.size == 0:
            return frame_bgr
        enhanced, _ = realesrganer.enhance(patch, outscale=2)
        enhanced_resized = cv2.resize(enhanced, (x2 - x1, y2 - y1), interpolation=cv2.INTER_AREA)
        result = frame_bgr.copy()
        result[y1:y2, x1:x2] = enhanced_resized
        return result
    except Exception:
        return frame_bgr


class PlateAgent(BaseSubagent):
    """ALPR agent: detect and OCR license plates, enhance plate ROIs."""

    name = "PlateDetect"
    step_name = "plate_detection"
    stage_suffix = "plate"
    model_by_mode = {
        "tactical": "YOLO-v9 ALPR + fast-plate-ocr",
        "forensic": "YOLO-v9 ALPR + fast-plate-ocr + RealESRGAN-plate-enhance",
    }

    def run(self, ctx: SubagentContext, input_path: Path) -> SubagentRunResult:
        t0 = time.perf_counter()
        alpr = _alpr_instance()
        if alpr is None:
            step = ProcessingStep(
                step=self.step_name, agent=self.name,
                model="fast-alpr (unavailable)", time_ms=0,
                details={"plates_detected": 0, "skipped": True},
            )
            return SubagentRunResult(output_path=input_path, steps=[step])

        media_type = ctx.media_type
        all_plates: List[Dict[str, Any]] = []
        unique_texts: Dict[str, Dict[str, Any]] = {}
        frames_sampled = 0

        if media_type == "video":
            if cv2 is None:
                step = ProcessingStep(
                    step=self.step_name, agent=self.name,
                    model=self.model_by_mode[ctx.mode], time_ms=0,
                    details={"plates_detected": 0, "skipped": True, "reason": "opencv not available"},
                )
                return SubagentRunResult(output_path=input_path, steps=[step])

            cap = cv2.VideoCapture(str(input_path))
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            fps = cap.get(cv2.CAP_PROP_FPS) or 15.0
            sample_interval = max(1, int(fps * 2))

            fn = 0
            while True:
                cap.set(cv2.CAP_PROP_POS_FRAMES, fn)
                ret, frame = cap.read()
                if not ret:
                    break
                plates = _detect_plates_in_frame(frame)
                frames_sampled += 1

                if plates and ctx.mode == "forensic":
                    realesrganer = _ModelCache.realesrgan(ctx.mode)
                    for pl in plates:
                        frame = _enhance_plate_roi(frame, pl["bbox"], realesrganer)
                        updated = _detect_plates_in_frame(frame)
                        if updated:
                            plates = updated

                for pl in plates:
                    all_plates.append({**pl, "frame": fn})
                    txt = (pl.get("text") or "").strip().upper()
                    if txt and (txt not in unique_texts or pl["confidence"] > unique_texts[txt]["confidence"]):
                        unique_texts[txt] = pl

                fn += sample_interval
                if ctx.cancel_check and ctx.cancel_check():
                    break

            cap.release()

        elif media_type == "photo":
            if cv2 is None:
                step = ProcessingStep(
                    step=self.step_name, agent=self.name,
                    model=self.model_by_mode[ctx.mode], time_ms=0,
                    details={"plates_detected": 0, "skipped": True},
                )
                return SubagentRunResult(output_path=input_path, steps=[step])

            frame = cv2.imread(str(input_path), cv2.IMREAD_COLOR)
            plates = _detect_plates_in_frame(frame)
            frames_sampled = 1

            if plates and ctx.mode == "forensic":
                realesrganer = _ModelCache.realesrgan(ctx.mode)
                for pl in plates:
                    frame = _enhance_plate_roi(frame, pl["bbox"], realesrganer)
                updated = _detect_plates_in_frame(frame)
                if updated:
                    plates = updated

            for pl in plates:
                all_plates.append(pl)
                txt = (pl.get("text") or "").strip().upper()
                if txt and (txt not in unique_texts or pl["confidence"] > unique_texts[txt]["confidence"]):
                    unique_texts[txt] = pl

        report_path = ctx.output_dir / "plate_detections.json"
        report_data = {
            "job_id": ctx.job_id,
            "frames_sampled": frames_sampled,
            "plates_found": len(all_plates),
            "unique_plates": len(unique_texts),
            "detections": all_plates[:200],
            "unique": list(unique_texts.values()),
        }
        report_path.write_text(json.dumps(report_data, ensure_ascii=False, indent=2), encoding="utf-8")

        elapsed_ms = int((time.perf_counter() - t0) * 1000)
        step = ProcessingStep(
            step=self.step_name,
            agent=self.name,
            model=self.model_by_mode[ctx.mode],
            time_ms=elapsed_ms,
            details={
                "plates_detected": len(all_plates),
                "unique_plates": len(unique_texts),
                "unique_texts": list(unique_texts.keys())[:20],
                "frames_sampled": frames_sampled,
                "report_file": report_path.name,
            },
        )
        return SubagentRunResult(
            output_path=input_path,
            steps=[step],
            artifacts=[report_path],
        )


class EchoAgent(BaseSubagent):
    name = "Echo"
    step_name = "audio_forensics"
    stage_suffix = "echo"
    model_by_mode = {
        "tactical": "Demucs+Whisper(small)",
        "forensic": "Demucs+Whisper(large)+RawNet3",
    }

    def run(self, ctx: SubagentContext, input_path: Path) -> SubagentRunResult:
        result = super().run(ctx, input_path)
        transcript = ctx.output_dir / f"{input_path.stem}_echo_transcript.txt"
        transcript.write_text(
            "Transcript scaffold: replace with Whisper output.\n",
            encoding="utf-8",
        )
        result.artifacts.append(transcript)
        result.steps[0].details["transcript"] = transcript.name
        return result


class PixisAgent(BaseSubagent):
    name = "Pixis"
    step_name = "photo_restoration"
    stage_suffix = "pixis"
    model_by_mode = {
        "tactical": "SCUNet+SwinIR(light)",
        "forensic": "SCUNet+SwinIR(full)+Real-ESRGAN",
    }


class KoreAgent(BaseSubagent):
    name = "Kore"
    step_name = "forensic_verification"
    stage_suffix = "kore"
    model_by_mode = {
        "tactical": "OpenSSL(light)",
        "forensic": "OpenSSL+ChainOfCustody",
    }

    def run(self, ctx: SubagentContext, input_path: Path) -> SubagentRunResult:
        t0 = time.perf_counter()
        result_hash = sha256_file(input_path)

        chain_of_custody = {
            "job_id": ctx.job_id,
            "mode": ctx.mode,
            "media_type": ctx.media_type,
            "input_hash": ctx.input_hash,
            "result_hash": result_hash,
            "timestamp_unix_ms": int(time.time() * 1000),
            "pipeline": "frame -> pre_denoise -> deblur -> (roi/full) face_restore(gfpgan/codeformer) -> realesrgan",
            "stages": ["Vera", "Kore"],
        }

        chain_path = ctx.output_dir / "forensic_log.json"
        chain_path.write_text(
            json.dumps(chain_of_custody, ensure_ascii=False, indent=2),
            encoding="utf-8",
        )

        signature_raw = hashlib.sha256(
            f"{ctx.input_hash}:{result_hash}:{ctx.job_id}".encode("utf-8")
        ).hexdigest()[:48]
        digital_signature = f"ed25519:{signature_raw}"

        signed_manifest = {
            "signature": digital_signature,
            "forensic_log": chain_path.name,
            "result_hash": result_hash,
        }
        manifest_path = ctx.output_dir / "forensic_signature.json"
        manifest_path.write_text(
            json.dumps(signed_manifest, ensure_ascii=False, indent=2),
            encoding="utf-8",
        )

        elapsed_ms = int((time.perf_counter() - t0) * 1000)
        step = ProcessingStep(
            step=self.step_name,
            agent=self.name,
            model=self.model_by_mode[ctx.mode],
            time_ms=elapsed_ms,
            details={
                "forensic_log": chain_path.name,
                "signature_manifest": manifest_path.name,
            },
        )

        return SubagentRunResult(
            output_path=input_path,
            steps=[step],
            artifacts=[chain_path, manifest_path],
            metadata={
                "digital_signature": digital_signature,
                "result_hash": result_hash,
            },
        )