feat(aurora): add detection overlays with face/plate boxes in compare UI

2026-03-01 05:00:29 -08:00
parent 79f26ab683
commit 5b4c4f92ba
2 changed files with 416 additions and 0 deletions
--- a/services/sofiia-console/app/main.py
+++ b/services/sofiia-console/app/main.py
@@ -10,6 +10,7 @@ import os
 import re
 import sys
 import subprocess
+import mimetypes
 import time
 import uuid
 import logging
@@ -27,6 +28,11 @@ from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel

+try:
+    import cv2  # type: ignore[import-untyped]
+except Exception:  # pragma: no cover - optional dependency in console env
+    cv2 = None
+
 from .auth import (
    require_api_key, require_api_key_strict, require_auth, require_auth_strict,
    get_console_api_key, _key_valid, _cookie_token, _expected_cookie_token,
@@ -1371,6 +1377,12 @@ async def api_aurora_compare(job_id: str) -> Dict[str, Any]:
            before["file_size_mb"] = round(inp.stat().st_size / (1024 * 1024), 2)
        _probe = _ffprobe_quick(inp) if inp.exists() else {}
        if _probe:
+            before["resolution"] = _probe.get("resolution", before["resolution"])
+            before["width"] = _probe.get("width", before["width"])
+            before["height"] = _probe.get("height", before["height"])
+            before["duration_s"] = _probe.get("duration_s", before["duration_s"])
+            before["fps"] = _probe.get("fps", before["fps"])
+            before["frame_count"] = _probe.get("frame_count", before["frame_count"])
            before["codec"] = _probe.get("codec", "—")

    result_file = None
@@ -1428,6 +1440,12 @@ async def api_aurora_compare(job_id: str) -> Dict[str, Any]:
        output_path=output_media_path,
        output_dir=Path(output_dir) if output_dir else None,
    )
+    detections = await _aurora_build_compare_detections(
+        media_type=str(status.get("media_type") or ""),
+        output_dir=Path(output_dir) if output_dir else None,
+        frame_preview=frame_preview,
+        fps=before.get("fps") or after.get("fps"),
+    )

    return {
        "job_id": job_id,
@@ -1440,6 +1458,7 @@ async def api_aurora_compare(job_id: str) -> Dict[str, Any]:
        "faces_detected": faces_total,
        "enhance_steps": enhance_steps,
        "frame_preview": frame_preview,
+        "detections": detections,
        "folder_path": output_dir,
        "input_path": input_path,
    }
@@ -1530,6 +1549,236 @@ def _aurora_ensure_compare_frame_preview(
    }


+def _aurora_bbox_xyxy(raw_bbox: Any) -> Optional[List[int]]:
+    if not isinstance(raw_bbox, (list, tuple)) or len(raw_bbox) < 4:
+        return None
+    try:
+        x1 = int(float(raw_bbox[0]))
+        y1 = int(float(raw_bbox[1]))
+        x2 = int(float(raw_bbox[2]))
+        y2 = int(float(raw_bbox[3]))
+    except Exception:
+        return None
+    if x2 < x1:
+        x1, x2 = x2, x1
+    if y2 < y1:
+        y1, y2 = y2, y1
+    if x2 <= x1 or y2 <= y1:
+        return None
+    return [x1, y1, x2, y2]
+
+
+def _aurora_image_dims(path: Path) -> Optional[Dict[str, int]]:
+    if cv2 is None or not path.exists():
+        return None
+    try:
+        img = cv2.imread(str(path), cv2.IMREAD_COLOR)
+        if img is None:
+            return None
+        h, w = img.shape[:2]
+        if w <= 0 or h <= 0:
+            return None
+        return {"width": int(w), "height": int(h)}
+    except Exception:
+        return None
+
+
+def _aurora_detect_faces_from_preview(path: Path) -> List[Dict[str, Any]]:
+    if cv2 is None or not path.exists():
+        return []
+    try:
+        frame = cv2.imread(str(path), cv2.IMREAD_COLOR)
+        if frame is None:
+            return []
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        cascade_path = Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml"
+        cascade = cv2.CascadeClassifier(str(cascade_path))
+        if cascade.empty():
+            return []
+        faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(20, 20))
+        out: List[Dict[str, Any]] = []
+        for (x, y, w, h) in faces[:40]:
+            roi = gray[y : y + h, x : x + w]
+            lap = float(cv2.Laplacian(roi, cv2.CV_64F).var()) if roi.size > 0 else 0.0
+            conf = max(0.5, min(0.99, 0.55 + (lap / 400.0)))
+            out.append(
+                {
+                    "bbox": [int(x), int(y), int(x + w), int(y + h)],
+                    "confidence": round(conf, 3),
+                }
+            )
+        return out
+    except Exception:
+        return []
+
+
+async def _aurora_detect_faces_via_service(path: Path) -> List[Dict[str, Any]]:
+    if not path.exists():
+        return []
+    mime = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
+    timeout = httpx.Timeout(20.0, connect=6.0)
+    try:
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            with path.open("rb") as fh:
+                files = {"file": (path.name, fh, mime)}
+                resp = await client.post(f"{AURORA_SERVICE_URL}/api/aurora/analyze", files=files)
+        if resp.status_code >= 400:
+            return []
+        payload = resp.json() if resp.content else {}
+    except Exception:
+        return []
+
+    faces_raw = payload.get("faces")
+    if not isinstance(faces_raw, list):
+        return []
+    out: List[Dict[str, Any]] = []
+    for item in faces_raw[:60]:
+        if not isinstance(item, dict):
+            continue
+        bbox = item.get("bbox")
+        if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
+            continue
+        try:
+            x = int(float(bbox[0]))
+            y = int(float(bbox[1]))
+            w = int(float(bbox[2]))
+            h = int(float(bbox[3]))
+        except Exception:
+            continue
+        if w <= 1 or h <= 1:
+            continue
+        conf: Optional[float]
+        try:
+            conf = round(float(item.get("confidence")), 3)
+        except Exception:
+            conf = None
+        out.append(
+            {
+                "bbox": [x, y, x + w, y + h],
+                "confidence": conf,
+            }
+        )
+    return out
+
+
+def _aurora_select_plate_detections(
+    output_dir: Path,
+    *,
+    target_frame: Optional[int],
+    max_items: int = 12,
+) -> List[Dict[str, Any]]:
+    report_path = output_dir / "plate_detections.json"
+    if not report_path.exists():
+        return []
+    try:
+        payload = json.loads(report_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+
+    source_items: List[Any]
+    detections = payload.get("detections")
+    unique = payload.get("unique")
+    if isinstance(detections, list) and detections:
+        source_items = detections
+    elif isinstance(unique, list) and unique:
+        source_items = unique
+    else:
+        return []
+
+    parsed: List[Dict[str, Any]] = []
+    for item in source_items:
+        if not isinstance(item, dict):
+            continue
+        bbox = _aurora_bbox_xyxy(item.get("bbox"))
+        if not bbox:
+            continue
+        text_value = str(item.get("text") or "").strip()
+        conf_value: Optional[float]
+        try:
+            conf_value = round(float(item.get("confidence")), 3)
+        except Exception:
+            conf_value = None
+        frame_value: Optional[int]
+        try:
+            frame_value = int(item.get("frame")) if item.get("frame") is not None else None
+        except Exception:
+            frame_value = None
+        parsed.append(
+            {
+                "bbox": bbox,
+                "text": text_value or None,
+                "confidence": conf_value,
+                "frame": frame_value,
+            }
+        )
+
+    if not parsed:
+        return []
+
+    with_frame = [x for x in parsed if x.get("frame") is not None]
+    if target_frame is not None and with_frame:
+        min_distance = min(abs(int(x["frame"]) - int(target_frame)) for x in with_frame)
+        keep = max(4, min_distance + 2)
+        filtered = [x for x in with_frame if abs(int(x["frame"]) - int(target_frame)) <= keep]
+        filtered.sort(key=lambda x: (abs(int(x["frame"]) - int(target_frame)), -(x.get("confidence") or 0.0)))
+        return filtered[:max_items]
+
+    parsed.sort(key=lambda x: (-(x.get("confidence") or 0.0), x.get("text") or ""))
+    return parsed[:max_items]
+
+
+async def _aurora_build_compare_detections(
+    *,
+    media_type: str,
+    output_dir: Optional[Path],
+    frame_preview: Optional[Dict[str, Any]],
+    fps: Any,
+) -> Optional[Dict[str, Any]]:
+    if not output_dir or not output_dir.exists():
+        return None
+    if not isinstance(frame_preview, dict):
+        return None
+
+    before_path = output_dir / "_compare_before.jpg"
+    after_path = output_dir / "_compare_after.jpg"
+    before_faces = _aurora_detect_faces_from_preview(before_path)
+    after_faces = _aurora_detect_faces_from_preview(after_path)
+    if not before_faces and before_path.exists():
+        before_faces = await _aurora_detect_faces_via_service(before_path)
+    if not after_faces and after_path.exists():
+        after_faces = await _aurora_detect_faces_via_service(after_path)
+    before_size = _aurora_image_dims(before_path)
+    after_size = _aurora_image_dims(after_path)
+
+    target_ts = float(frame_preview.get("timestamp_sec") or 0.0)
+    target_frame: Optional[int] = None
+    if str(media_type).lower() == "video":
+        try:
+            fps_val = float(fps)
+        except Exception:
+            fps_val = 15.0
+        if fps_val <= 0:
+            fps_val = 15.0
+        target_frame = int(round(target_ts * fps_val))
+
+    plate_items = _aurora_select_plate_detections(output_dir, target_frame=target_frame)
+
+    return {
+        "target_timestamp_sec": target_ts if str(media_type).lower() == "video" else None,
+        "target_frame": target_frame,
+        "before": {
+            "frame_size": before_size,
+            "faces": before_faces,
+            "plates": plate_items,
+        },
+        "after": {
+            "frame_size": after_size,
+            "faces": after_faces,
+            "plates": plate_items,
+        },
+    }
+
+
 def _ffprobe_quick(filepath: Path) -> Dict[str, Any]:
    """Quick ffprobe for resolution, codec, duration, fps, frame count."""
    if not filepath.exists():