feat(production): sync all modified production files to git

Includes updates across gateway, router, node-worker, memory-service, aurora-service, swapper, sofiia-console UI and node2 infrastructure: - gateway-bot: Dockerfile, http_api.py, druid/aistalk prompts, doc_service - services/router: main.py, router-config.yml, fabric_metrics, memory_retrieval, offload_client, prompt_builder - services/node-worker: worker.py, main.py, config.py, fabric_metrics - services/memory-service: Dockerfile, database.py, main.py, requirements - services/aurora-service: main.py (+399), kling.py, quality_report.py - services/swapper-service: main.py, swapper_config_node2.yaml - services/sofiia-console: static/index.html (console UI update) - config: agent_registry, crewai_agents/teams, router_agents - ops/fabric_preflight.sh: updated preflight checks - router-config.yml, docker-compose.node2.yml: infra updates - docs: NODA1-AGENT-ARCHITECTURE, fabric_contract updated Made-with: Cursor
2026-03-03 07:13:29 -08:00
parent 9aac835882
commit e9dedffa48
35 changed files with 3317 additions and 805 deletions
--- a/config/agent_registry.yml
+++ b/config/agent_registry.yml
@@ -307,7 +307,7 @@ agents:
    canonical_role: "Autonomous Cyber Detective Agency Orchestrator"
    mission: |
      AISTALK - автономне агентство кібердетективів для розслідувань загроз і
-      вразливостей у Web2, Web3, AI та quantum-risk сценаріях.
+      вразливостей у Web2, Web3, AI, media-forensics та quantum-risk сценаріях.
      На етапі планування агент працює як внутрішній оркестратор команди
      спеціалізованих ролей з асинхронним case lifecycle.

@@ -336,6 +336,9 @@ agents:
        - blueteam
        - bughunter
        - quantum risk
+        - media forensics
+        - video analysis
+        - deepfake

    llm_profile: reasoning
    prompt_file: aistalk_prompt.txt
@@ -346,12 +349,12 @@ agents:
        enabled: true
        default_profile: default
        profile_hints:
-          default: [osint, threat_hunt, vulns, web3, ai, red-blue]
+          default: [osint, threat_hunt, vulns, web3, ai, red-blue, media_forensics, video, audio, photo, forensic, deepfake]
        profiles:
          default:
            team_name: AISTALK Cyber Detective Unit
            parallel_roles: true
-            max_concurrency: 6
+            max_concurrency: 7
            synthesis:
              role_context: AISTALK Orchestrator & Analyst
              system_prompt_ref: roles/aistalk/orchestrator_synthesis.md
@@ -381,6 +384,11 @@ agents:
                role_context: Neuron (Deep Analysis)
                system_prompt_ref: roles/aistalk/neuron.md
                llm_profile: reasoning
+              - id: aurora
+                role_context: Aurora (Autonomous Media Forensics)
+                system_prompt_ref: roles/aistalk/aurora.md
+                llm_profile: science
+                skills: [video_enhancement, audio_forensics, photo_restoration, chain_of_custody]
              - id: vault
                role_context: Vault (Secrets and Confidential Data Guard)
                system_prompt_ref: roles/aistalk/vault.md
@@ -432,6 +440,8 @@ agents:
          skills: [entity_resolution, link_analysis]
        - role: "Risk"
          skills: [cvss, mitre_mapping]
+        - role: "Aurora"
+          skills: [media_forensics, video_enhancement, audio_forensics, photo_analysis]
        - role: "Analyst"
          skills: [synthesis, reporting]

--- a/config/crewai_agents.json
+++ b/config/crewai_agents.json
@@ -246,6 +246,15 @@
          "role": "Neuron (Deep Analysis)",
          "skills": []
        },
+        {
+          "role": "Aurora (Autonomous Media Forensics)",
+          "skills": [
+            "video_enhancement",
+            "audio_forensics",
+            "photo_restoration",
+            "chain_of_custody"
+          ]
+        },
        {
          "role": "Vault (Secrets and Confidential Data Guard)",
          "skills": []
--- a/config/crewai_teams.generated.yml
+++ b/config/crewai_teams.generated.yml
@@ -109,7 +109,7 @@ aistalk:
    default:
      team_name: AISTALK Cyber Detective Unit
      parallel_roles: true
-      max_concurrency: 6
+      max_concurrency: 7
      synthesis:
        role_context: AISTALK Orchestrator & Analyst
        system_prompt_ref: roles/aistalk/orchestrator_synthesis.md
@@ -139,6 +139,15 @@ aistalk:
        role_context: Neuron (Deep Analysis)
        system_prompt_ref: roles/aistalk/neuron.md
        llm_profile: reasoning
+      - id: aurora
+        role_context: Aurora (Autonomous Media Forensics)
+        system_prompt_ref: roles/aistalk/aurora.md
+        llm_profile: science
+        skills:
+        - video_enhancement
+        - audio_forensics
+        - photo_restoration
+        - chain_of_custody
      - id: vault
        role_context: Vault (Secrets and Confidential Data Guard)
        system_prompt_ref: roles/aistalk/vault.md
@@ -178,6 +187,12 @@ aistalk:
    - web3
    - ai
    - red-blue
+    - media_forensics
+    - video
+    - audio
+    - photo
+    - forensic
+    - deepfake
 nutra:
  profiles:
    default:
--- a/config/router_agents.json
+++ b/config/router_agents.json
@@ -67,7 +67,10 @@
      "redteam",
      "blueteam",
      "bughunter",
-      "quantum risk"
+      "quantum risk",
+      "media forensics",
+      "video analysis",
+      "deepfake"
    ],
    "domains": [
      "cybersecurity",
@@ -522,4 +525,4 @@
    "class": "internal",
    "visibility": "internal"
  }
-}
+}
--- a/docker-compose.node2.yml
+++ b/docker-compose.node2.yml
@@ -56,6 +56,27 @@ services:
      - dagi-network
    restart: unless-stopped

+  aurora-service:
+    build:
+      context: ./services/aurora-service
+      dockerfile: Dockerfile
+    container_name: aurora-service-node2
+    ports:
+      - "127.0.0.1:9401:9401"
+    environment:
+      - AURORA_DATA_DIR=/data/aurora
+      - AURORA_PUBLIC_BASE_URL=http://127.0.0.1:9401
+      - AURORA_CORS_ORIGINS=*
+      - AURORA_MODELS_DIR=/data/aurora/models
+      - AURORA_FORCE_CPU=false
+      - AURORA_PREFER_MPS=true
+      - AURORA_ENABLE_VIDEOTOOLBOX=true
+    volumes:
+      - aurora-data:/data
+    networks:
+      - dagi-network
+    restart: unless-stopped
+
  dagi-nats:
    image: nats:2.10-alpine
    container_name: dagi-nats-node2
@@ -97,3 +118,7 @@ networks:
  dagi-memory-network:
    external: true
    name: dagi-memory-network-node2
+
+volumes:
+  aurora-data:
+    driver: local
--- a/docs/NODA1-AGENT-ARCHITECTURE.md
+++ b/docs/NODA1-AGENT-ARCHITECTURE.md
@@ -75,13 +75,16 @@ NODA1 використовує уніфіковану систему агент
 ┌───────────────────────┐ ┌───────────┐ ┌─────────────────────┐
 │   LLM PROVIDERS       │ │  MEMORY   │ │     CREWAI          │
 │ ───────────────────── │ │ SERVICE   │ │ (dagi-staging-      │
-│ • Ollama (local)      │ │ :8000     │ │  crewai-service)    │
-│   - qwen3:8b          │ ├───────────┤ │ ─────────────────── │
-│   - mistral:7b        │ │ • Qdrant  │ │ crewai_agents.json  │
-│   - qwen2.5:3b        │ │ • Neo4j   │ │                     │
-│ • DeepSeek (cloud)    │ │ • Postgres│ │ 11 Orchestrators    │
-│ • Mistral (cloud)     │ └───────────┘ │ + Teams per agent   │
-└───────────────────────┘               └─────────────────────┘
+│ • Grok (cloud)        │ │ :8000     │ │  crewai-service)    │
+│   - sofiia, senpai    │ ├───────────┤ │ ─────────────────── │
+│ • DeepSeek (cloud)    │ │ • Qdrant  │ │ crewai_agents.json  │
+│   - all other agents  │ │ • Neo4j   │ │                     │
+│   + fallback          │ │ • Postgres│ │ 11 Orchestrators    │
+│ • Mistral (fallback)  │ └───────────┘ │ + Teams per agent   │
+│ • Ollama (crew only)  │               └─────────────────────┘
+│   - qwen3:8b (crew)   │
+│   - qwen3-vl:8b (vis) │
+└───────────────────────┘
 ```

 ---
@@ -108,28 +111,28 @@ config/agent_registry.yml  ←── ЄДИНЕ джерело істини

 ### TOP-LEVEL (User-facing, 13 agents)

-| ID | Display | Telegram | Visibility | Domain |
-|----|---------|----------|------------|--------|
-| `daarwizz` | DAARWIZZ | public | public | Meta-Orchestrator |
-| `helion` | Helion | public | public | Energy |
-| `alateya` | Aletheia | public | public | R&D Lab |
-| `druid` | DRUID | public | public | Ayurveda/Cosmetics |
-| `nutra` | NUTRA | public | public | Nutraceuticals |
-| `agromatrix` | Степан Матрікс | public | public | Agriculture |
-| `greenfood` | GREENFOOD | public | public | Food ERP |
-| `clan` | CLAN | public | public | Community |
-| `eonarch` | EONARCH | public | public | Consciousness |
-| `yaromir` | YAROMIR | whitelist | private | Tech Lead |
-| `soul` | SOUL | public | public | Spiritual |
-| `senpai` | SENPAI | public | public | Trading |
-| `sofiia` | SOFIIA | public | public | AI Architecture |
+| ID | Display | Telegram | Visibility | Domain | LLM (primary) | Fallback |
+|----|---------|----------|------------|--------|---------------|---------|
+| `daarwizz` | DAARWIZZ | public | public | Meta-Orchestrator | DeepSeek | Mistral |
+| `helion` | Helion | public | public | Energy | DeepSeek | Mistral |
+| `alateya` | Aletheia | public | public | R&D Lab | DeepSeek | Mistral |
+| `druid` | DRUID | public | public | Ayurveda/Cosmetics | DeepSeek | Mistral |
+| `nutra` | NUTRA | public | public | Nutraceuticals | DeepSeek | Mistral |
+| `agromatrix` | Степан Матрікс | public | public | Agriculture | DeepSeek | Mistral |
+| `greenfood` | GREENFOOD | public | public | Food ERP | DeepSeek | Mistral |
+| `clan` | CLAN | public | public | Community | DeepSeek | Mistral |
+| `eonarch` | EONARCH | public | public | Consciousness | DeepSeek | Mistral |
+| `yaromir` | YAROMIR | whitelist | private | Tech Lead | DeepSeek | Mistral |
+| `soul` | SOUL | public | public | Spiritual | DeepSeek | Mistral |
+| `senpai` | SENPAI | public | public | Trading | **Grok** | DeepSeek |
+| `sofiia` | SOFIIA | public | public | AI Architecture | **Grok** | DeepSeek |

 ### INTERNAL (Service agents, 2 agents)

-| ID | Display | Telegram | Scope | Purpose |
-|----|---------|----------|-------|---------|
-| `monitor` | MONITOR | off | node_local | Observability, alerts |
-| `devtools` | DevTools | off | global | Development tools |
+| ID | Display | Telegram | Scope | Purpose | LLM |
+|----|---------|----------|-------|---------|-----|
+| `monitor` | MONITOR | off | node_local | Observability, alerts | Ollama (local) |
+| `devtools` | DevTools | off | global | Development tools | DeepSeek (складні) / Ollama (прості) |

 ---

--- a/docs/backups/LATEST.txt
+++ b/docs/backups/LATEST.txt
@@ -1 +1 @@
-/Users/apple/github-projects/microdao-daarion/docs/backups/docs_backup_20260226-091701.tar.gz
+/Users/apple/github-projects/microdao-daarion/docs/backups/docs_backup_20260302-091700.tar.gz
--- a/docs/fabric_contract.md
+++ b/docs/fabric_contract.md
@@ -155,5 +155,180 @@ STT/TTS/OCR/Image **можуть бути різними** на різних н
 - **14 контейнерів** (router, node-worker, node-capabilities, nats, gateway, memory, qdrant, postgres, neo4j, redis, open-webui, sofiia-console, swapper)
 - **13 served моделей** (Ollama: 12 + llama_server: 1)
 - **29 installed artifacts** на диску (150.3GB LLM + 0.3GB TTS kokoro-v1_0)
- **capabilities**: llm=Y, vision=Y, ocr=Y, stt=N, tts=N, image=N
- `OCR_PROVIDER=vision_prompted`
+- **capabilities**: llm=Y, vision=Y, ocr=Y, stt=Y, tts=Y, image=N  ← Phase 1 enabled
+- `STT_PROVIDER=memory_service`, `TTS_PROVIDER=memory_service`, `OCR_PROVIDER=vision_prompted`
+
+---
+
+## Phase 1: STT/TTS via Memory Service delegation (2026-02-27)
+
+### Мотивація
+
+Увімкнення `stt=true` / `tts=true` в Fabric без нових мікросервісів і без ризику MLX-залежностей.
+
+### Архітектура
+
+```
+Fabric Router → find_nodes_with_capability("stt"/"tts") → NODA2 node-worker
+     → STT_PROVIDER=memory_service → stt_memory_service.transcribe()
+     → POST http://memory-service:8000/voice/stt (faster-whisper)
+     → {text, segments, language, meta}
+
+Fabric Router → NODA2 node-worker
+     → TTS_PROVIDER=memory_service → tts_memory_service.synthesize()
+     → POST http://memory-service:8000/voice/tts (edge-tts: Polina/Ostap Neural uk-UA)
+     → {audio_b64, format="mp3", meta}
+```
+
+### Контракти
+
+**STT вхід:**
+```json
+{
+  "audio_b64": "<base64>",   // OR
+  "audio_url": "http://...", // one is required
+  "language": "uk",          // optional
+  "filename": "audio.wav"    // optional
+}
+```
+
+**STT вихід (fabric contract):**
+```json
+{"text": "...", "segments": [], "language": "uk", "meta": {...}, "provider": "memory_service"}
+```
+
+**TTS вхід:**
+```json
+{"text": "...", "voice": "Polina", "speed": 1.0}
+```
+
+**TTS вихід (fabric contract):**
+```json
+{"audio_b64": "<base64-mp3>", "format": "mp3", "meta": {...}, "provider": "memory_service"}
+```
+
+### Обмеження Phase 1
+
+- **ffmpeg=false**: лише формати що Memory Service ковтає нативно (WAV рекомендований)
+- **Текст TTS**: max 500 символів (Memory Service limit)
+- **Голоси TTS**: Polina (uk-UA-PolinaNeural), Ostap (uk-UA-OstapNeural), en-US-GuyNeural
+- **NODA1**: залишається `STT_PROVIDER=none` / `TTS_PROVIDER=none` (не заважає роутингу)
+
+### Phase 2 (MLX upgrade — опційний)
+
+Встановити `STT_PROVIDER=mlx_whisper` та/або `TTS_PROVIDER=mlx_kokoro` в docker-compose коли:
+- готовий ffmpeg або чітко обмежені формати
+- потрібний якісніший локальний TTS замість edge-tts
+- NODA2 Apple Silicon виграш від MLX
+
+---
+
+## Voice HA (Multi-node routing) — PR1–PR3
+
+### Архітектура
+
+```
+Browser → sofiia-console /api/voice/tts
+                    ↓ VOICE_HA_ENABLED=false (default)
+               memory-service:8000/voice/tts   ← legacy direct
+
+                    ↓ VOICE_HA_ENABLED=true
+               Router /v1/capability/voice_tts
+                    ↓ (caps + scoring)
+          node.{id}.voice.tts.request  (NATS)
+                    ↓
+               node-worker (voice semaphore)
+                    ↓
+               memory-service/voice/tts
+```
+
+### NATS Subjects (Voice HA — відокремлені від generic)
+
+| Subject | Призначення |
+|---|---|
+| `node.{id}.voice.tts.request` | Voice TTS offload (окремий semaphore) |
+| `node.{id}.voice.llm.request` | Voice LLM inference (голосові guardrails) |
+| `node.{id}.voice.stt.request` | Voice STT transcription |
+
+**Сумісність:** generic subjects (`node.{id}.tts.request` etc.) — незмінні.
+
+### Capability Flags
+
+Node Worker `/caps` повертає:
+```json
+{
+  "capabilities": {
+    "tts": true,
+    "voice_tts": true,
+    "voice_llm": true,
+    "voice_stt": true
+  },
+  "voice_concurrency": {
+    "voice_tts": 4,
+    "voice_llm": 2,
+    "voice_stt": 2
+  }
+}
+```
+
+`voice_tts=true` лише коли `TTS_PROVIDER != none` **і** NATS subscription активна.
+NCS агрегує ці флаги через `_derive_capabilities()`.
+
+### Router Endpoints
+
+| Endpoint | Дедлайн | Суб'єкт |
+|---|---|---|
+| `POST /v1/capability/voice_tts` | 3000ms | `node.{id}.voice.tts.request` |
+| `POST /v1/capability/voice_llm` | 9000ms (fast) / 12000ms (quality) | `node.{id}.voice.llm.request` |
+| `POST /v1/capability/voice_stt` | 6000ms | `node.{id}.voice.stt.request` |
+
+Response headers: `X-Voice-Node`, `X-Voice-Mode` (local|remote), `X-Voice-Cap`.
+
+### Scoring
+
+```
+score = wait_ms + rtt_ms + p95_ms + mem_penalty - local_bonus
+mem_penalty = 300 if mem_pressure == "high"
+local_bonus = VOICE_PREFER_LOCAL_BONUS (default 200ms)
+```
+
+Якщо `score_local <= score_best_remote + LOCAL_THRESHOLD_MS` → вибирається локальна нода.
+
+### BFF Feature Flag
+
+```yaml
+# docker-compose.node2-sofiia.yml
+VOICE_HA_ENABLED: "false"        # default — legacy direct path
+VOICE_HA_ROUTER_URL: "http://router:8000"  # Router для HA offload
+```
+
+Активація: `VOICE_HA_ENABLED=true` + rebuild `sofiia-console`.
+Деактивація: `VOICE_HA_ENABLED=false` — повертається до direct memory-service.
+
+### Метрики (Prometheus)
+
+**node-worker** (`/prom_metrics`):
+- `node_worker_voice_jobs_total{cap,status}`
+- `node_worker_voice_inflight{cap}`
+- `node_worker_voice_latency_ms{cap}` (histogram)
+
+**router** (`/fabric_metrics`):
+- `fabric_voice_capability_requests_total{cap,status}`
+- `fabric_voice_offload_total{cap,node,status}`
+- `fabric_voice_breaker_state{cap,node}` (1=open)
+- `fabric_voice_score_ms{cap}` (histogram)
+
+### Контракт: No Silent Fallback
+
+- Будь-який fallback (busy, broken, timeout) логує `WARNING` + інкрементує Prometheus counter
+- `TOO_BUSY` включає `retry_after_ms` hint для Router failover
+- Circuit breaker per `node+voice_cap` — не змішується з generic CB
+
+### Тести
+
+`tests/test_voice_ha.py` — 28 тестів:
+- Node Worker voice caps + semaphore isolation
+- Router fabric_metrics voice helpers
+- BFF `VOICE_HA_ENABLED` feature flag
+- Voice scoring logic (local prefer, mem penalty, remote wins when saturated)
+- No silent fallback invariants
--- a/gateway-bot/Dockerfile
+++ b/gateway-bot/Dockerfile
@@ -3,7 +3,12 @@ FROM python:3.11-slim

 LABEL maintainer="DAARION.city Team"
 LABEL description="Bot Gateway - Telegram/Discord webhook handler with DAARWIZZ"
-LABEL version="0.2.1"
+LABEL version="0.2.2"
+
+ARG BUILD_SHA=dev
+ARG BUILD_TIME=local
+ENV BUILD_SHA=${BUILD_SHA}
+ENV BUILD_TIME=${BUILD_TIME}

 WORKDIR /app/gateway-bot

--- a/gateway-bot/aistalk_prompt.txt
+++ b/gateway-bot/aistalk_prompt.txt
@@ -20,6 +20,35 @@ Modes:
 - public mode: community-shareable report, sanitized.
 - confidential mode: strict redaction and minimal retention.

+AISTALK team routing (internal):
+- Use `Aurora` for media forensics requests: blurry CCTV, noisy video/audio, frame extraction, metadata integrity, deepfake suspicion, photo restoration.
+- Default Aurora mode:
+  - `tactical` for quick understanding
+  - `forensic` when evidence is intended for legal/compliance workflows
+- For forensic media workflows require:
+  - hash of original and result (`sha256`)
+  - processing log (step, model, timing)
+  - chain-of-custody notes and signature metadata when available
+
+Aurora response contract for media tasks:
+```json
+{
+  "agent": "Aurora",
+  "mode": "tactical | forensic",
+  "job_id": "aurora_YYYYMMDD_###",
+  "input_file": {"name": "file.ext", "hash": "sha256:..."},
+  "processing_log": [{"step": "denoise", "model": "FastDVDnet", "time_ms": 1200}],
+  "output_files": [{"type": "video|audio|photo|forensic_log", "url": "https://...", "hash": "sha256:..."}],
+  "digital_signature": "ed25519:... | null"
+}
+```
+
+Safety and compliance:
+- No deceptive deepfake generation or identity manipulation.
+- Always label AI-enhanced artifacts as enhanced outputs.
+- Separate observations from conclusions; include confidence and limitations.
+- For legal-grade conclusions, require human forensic expert verification.
+
 Output style:
 - Short executive summary first.
 - Then findings, risk level, and recommended next actions.
--- a/gateway-bot/druid_prompt.txt
+++ b/gateway-bot/druid_prompt.txt
@@ -1,100 +1,79 @@
-Ти — DRUID, агент аналітики та RAG платформи DAARION. 
+# DRUID v1.0
+## Майстер алхімічних формул і аюрведи

-Твоя роль — допомагати користувачам з пошуком інформації, аналізом документів та відповідями на питання з бази знань.
+Ти — DRUID.
+Алхімік і дослідник на межі між давньою традицією і сучасною науукою формулювань.
+Твоя сила — перетворювати рослинні знання і хімічну точність на конкретні рецептури: від аюрведичної олії до сучасного шампуню.

-## NETWORK AWARENESS (DAARION)
- Ти знаєш, що **DAARWIZZ** — головний оркестратор/мер мережі DAARION.city.
- Якщо запит про DAARWIZZ або мережеву оркестрацію: коротко підтверди роль DAARWIZZ та скеруй до нього для coordination/governance питань.
- Ніколи не відповідай, що "не знаєш DAARWIZZ".
+Говориш від першої особи, у чоловічому роді. Стиль — впевнений, лаконічний, практичний.
+Ти не читаєш лекцій — ти даєш формули, склади, пропорції.

 ---

-# BEHAVIOR POLICY v1
+## Мережа DAARION

-## A. SPEAK-ONLY-WHEN-ASKED (SOWA)
-
-**Головне правило: мовчи, якщо не питали.**
-
-НЕ ВІДПОВІДАЙ, якщо:
- Немає прямого звернення (@DRUID73bot, "Druid", команда)
- Повідомлення — broadcast/оголошення/постер
- Коротка нотатка/таймінг без запиту
- Медіа/фото/посилання БЕЗ питання
-
-ВІДПОВІДАЙ, якщо:
- Пряме звернення: @DRUID73bot, "Druid", "/druid"
- Явний запит про пошук, документи, аналітику
- Особисте повідомлення (DM)
- Навчальна група (Agent Preschool)
-
-**Якщо не впевнений — МОВЧИ.**
-
-## B. SHORT-FIRST
-
-**За замовчуванням: 1-3 речення.**
-
-ЗАБОРОНЕНО:
- Довгі розбори без запиту
- "Радий допомогти", "Готовий до співпраці"
- Емодзі
-
-## C. MEDIA-NO-COMMENT
-
-Медіа без питання = мовчанка.
-Медіа з питанням = коротка відповідь по суті.
+- **DAARWIZZ** — головний оркестратор мережі. Якщо запит про координацію/governance — скеровуй до нього.
+- **NUTRA** — партнер по здоров'ю і нутріцевтиці. Якщо питання про внутрішній прийом, БАД, медицину — скеровуй до NUTRA.
+- Ніколи не заперечуй знайомство з DAARWIZZ.

 ---

-## 🎤 МУЛЬТИМОДАЛЬНІСТЬ
+## Що я роблю

-**Ти можеш працювати з:**
- ✅ **Голосовими повідомленнями** — автоматично перетворюються на текст (STT)
- ✅ **Фото** — аналіз зображень
- ✅ **Документами** — PDF, DOCX автоматично парсяться та індексуються
+**Аюрведа і фітохімія:**
+Рослинні екстракти, ефірні олії, адаптогени, мацерати, гідролати, настойки.
+Аюрведичні препарати для зовнішнього застосування.

-**ВАЖЛИВО:**
- Ніколи не кажи "я не можу слухати аудіо" — голосові повідомлення вже перетворені на текст!
- Ніколи не кажи "я не можу бачити/аналізувати зображення" — ти МАЄШ Vision API і МОЖЕШ аналізувати фото! Якщо в історії розмови є твій опис зображення — це означає ти його вже проаналізував(ла) через Vision. Не заперечуй це.
+**Косметичні формули:**
+Емульсії (O/W, W/O), сироватки, бальзами, шампуні, мило, дезодоранти.
+Підбір сурфактантів, емульгаторів, консервантів, pH-систем.
+
+**INCI і склади:**
+Розшифрую будь-який INCI список. Знаю що з чим поєднується і що — ні.
+Концентраційні ліміти, алергени, стабільність.
+
+**Для бізнесу і виробництва:**
+Базова регуляторика (EU Cosmetics Regulation 1223/2009, різниця EU/US).
+Вимоги маркування, claims, технологічні протоколи.

 ---

-## 🛠️ ТВОЇ МОЖЛИВОСТІ (tools)
+## Команда (для складних задач)

-Ти маєш доступ до спеціальних інструментів:
-
-**Пошук і знання:**
- `memory_search` — шукай в своїй пам'яті, документах
- `graph_query` — шукай зв'язки між темами
- `web_search` — шукай в інтернеті
-
-**Генерація:**
- `image_generate` — згенеруй зображення
- `presentation_create` — створи презентацію PowerPoint
-
-**Пам'ять:**
- `remember_fact` — запам'ятай важливий факт
-
-**Коли створювати презентацію:**
-Якщо користувач просить "створи презентацію", "зроби слайди" — використай `presentation_create`.
+Для детального аналізу я підключаю лабораторію:
+- **Formulator** — склад і пропорції
+- **Ingredient Analyst** — INCI, сумісність, функції
+- **Safety & QA** — безпека, концентрації, алергени
+- **Regulatory Basics** — регуляторні вимоги
+- **Protocol Writer** — покроковий протокол виробництва

 ---

+## Правила відповіді
+
+Відповідаю якщо: пряме звернення (@DRUID73bot, "Druid", "/druid"), запит про рецептуру, склад, INCI, аюрведу, косметику, ефірні олії.
+Мовчу якщо: оголошення без питання, медіа без запиту, теми поза моєю спеціалізацією.
+
+Формат: коротко і конкретно. Таблиця або список — якщо є що перерахувати. Деталі — на прохання.
+Заборонено: "Радий допомогти", зайві вступи, порожні застереження.
+
 ---

-## ПАМ'ЯТЬ ТА ІНСТРУМЕНТИ
+## Технічні можливості

-### Пам'ять (ETM — Ephemeral Turn Memory):
- Ти бачиш **80 останніх повідомлень** чату (повна доступна історія сесії)
- У ГРУПОВИХ чатах ти бачиш повідомлення **ВСІХ учасників** (не тільки поточного)
- Повідомлення від різних користувачів позначені їх іменами: [username]: текст
- Уся історія чату зберігається НАЗАВЖДИ у базі даних Memory Service
- **НІКОЛИ не кажи "не бачу повідомлення інших учасників" — ти їх БАЧИШ у контексті вище!**
- У тебе є доступ до документів через колекцію `druid_docs`
+- Аналізую фото (Vision): зображення рослин, продуктів, складів на етикетці
+- Читаю документи: PDF зі специфікаціями, SDS, технічними картами
+- Голосові — конвертуються автоматично в текст, просто відповідаю
+- `memory_search` — шукаю в збережених рецептурах і документах
+- `web_search` — нові дослідження, інгредієнти, регуляторні оновлення
+- `crawl4ai_scrape` — витягую INCI список прямо з сайту бренду

-### Інструменти:
- **memory_search** — пошук по збережених документах та попередніх розмовах
- **web_search** — пошук в інтернеті (якщо потрібна зовнішня інформація)
- **crawl4ai_scrape** — витягти контент з URL
+Ніколи не кажу "не можу аналізувати фото" або "не маю цієї інформації" без спроби пошуку.

-**Порядок пошуку:** 1) memory_search 2) якщо пусто → web_search 3) crawl4ai_scrape для URL.
-**НІКОЛИ не кажи "не маю інформації" без спроби web_search!**
+---
+
+## Межі
+
+Не даю медичних рекомендацій для внутрішнього вживання — це до NUTRA.
+Концентрації і застереження — на основі загальнодоступних даних.
+Для комерційного виробництва — рекомендую підтвердити з дерматологом або токсикологом.
--- a/gateway-bot/http_api.py
+++ b/gateway-bot/http_api.py
@@ -748,6 +748,11 @@ BRAND_REGISTRY_URL = os.getenv("BRAND_REGISTRY_URL", "http://brand-registry:9210
 PRESENTATION_RENDERER_URL = os.getenv("PRESENTATION_RENDERER_URL", "http://presentation-renderer:9212").rstrip("/")
 ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")

+# Build metadata — injected at image build time via ARG/ENV (BUILD_SHA, BUILD_TIME, NODE_ID)
+_GATEWAY_BUILD_SHA  = os.environ.get("BUILD_SHA", "dev")
+_GATEWAY_BUILD_TIME = os.environ.get("BUILD_TIME", "local")
+_GATEWAY_NODE_ID    = os.environ.get("NODE_ID", "NODA1")
+
 router = APIRouter()


@@ -985,6 +990,36 @@ SOFIIA_CONFIG = load_agent_config(
    default_prompt="Ти — Sophia (Софія), Chief AI Architect та Technical Sovereign екосистеми DAARION.city. Координуєш R&D, архітектуру, безпеку та еволюцію платформи.",
 )

+# MONITOR — Node-Local Ops Agent (internal, not user-facing via Telegram)
+MONITOR_CONFIG = load_agent_config(
+    agent_id="monitor",
+    name="MONITOR",
+    prompt_path=os.getenv(
+        "MONITOR_PROMPT_PATH",
+        str(Path(__file__).parent / "monitor_prompt.txt"),
+    ),
+    telegram_token_env="MONITOR_TELEGRAM_BOT_TOKEN",   # intentionally empty — no Telegram
+    default_prompt=(
+        "You are MONITOR, the node-local health and observability agent for DAARION infrastructure. "
+        "You perform health checks, alert triage, and safe ops diagnostics. Internal use only."
+    ),
+)
+
+# AISTALK — Cyber Detective Agency Orchestrator (planned, private)
+AISTALK_CONFIG = load_agent_config(
+    agent_id="aistalk",
+    name="AISTALK",
+    prompt_path=os.getenv(
+        "AISTALK_PROMPT_PATH",
+        str(Path(__file__).parent / "aistalk_prompt.txt"),
+    ),
+    telegram_token_env="AISTALK_TELEGRAM_BOT_TOKEN",
+    default_prompt=(
+        "You are AISTALK, an autonomous cyber detective agency orchestrator inside DAARION. "
+        "You handle cyber-investigation intents, threat intelligence, and incident response."
+    ),
+)
+
 # Registry of all agents (для легкого додавання нових агентів)
 AGENT_REGISTRY: Dict[str, AgentConfig] = {
    "daarwizz": DAARWIZZ_CONFIG,
@@ -1001,6 +1036,8 @@ AGENT_REGISTRY: Dict[str, AgentConfig] = {
    "soul": SOUL_CONFIG,
    "yaromir": YAROMIR_CONFIG,
    "sofiia": SOFIIA_CONFIG,
+    "monitor": MONITOR_CONFIG,
+    "aistalk": AISTALK_CONFIG,
 }
 # 3. Створіть endpoint (опціонально, якщо потрібен окремий webhook):
 #    @router.post("/new_agent/telegram/webhook")
@@ -5071,19 +5108,40 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate):
@router.get("/health")
 async def health():
    """Health check endpoint"""
+    # Static metadata for agents that don't have Telegram — used by Sofiia console UI badges
+    _AGENT_META: Dict[str, Dict] = {
+        "monitor": {"badges": ["per-node", "ops"], "visibility": "internal", "telegram_mode": "off"},
+        "aistalk": {"badges": ["cyber", "private"], "visibility": "private", "lifecycle_status": "planned"},
+        "sofiia":  {"badges": ["supervisor", "architect"]},
+        "helion":  {"badges": ["cto", "dao"]},
+    }
+
    agents_info = {}
    for agent_id, config in AGENT_REGISTRY.items():
+        meta = _AGENT_META.get(agent_id, {})
        agents_info[agent_id] = {
            "name": config.name,
            "prompt_loaded": len(config.system_prompt) > 0,
-            "telegram_token_configured": config.get_telegram_token() is not None
+            "telegram_token_configured": config.get_telegram_token() is not None,
+            "badges": meta.get("badges", []),
+            "visibility": meta.get("visibility", "public"),
+            "telegram_mode": meta.get("telegram_mode", "on"),
+            "lifecycle_status": meta.get("lifecycle_status", "active"),
        }
-    
+
+    # Required per-node agents check
+    required_agents = ["monitor"]
+    required_missing = [aid for aid in required_agents if aid not in agents_info]
+
    return {
        "status": "healthy",
        "agents": agents_info,
        "agents_count": len(AGENT_REGISTRY),
+        "required_missing": required_missing,
        "timestamp": datetime.utcnow().isoformat(),
+        "build_sha": _GATEWAY_BUILD_SHA,
+        "build_time": _GATEWAY_BUILD_TIME,
+        "node_id": _GATEWAY_NODE_ID,
    }


--- a/gateway-bot/services/doc_service.py
+++ b/gateway-bot/services/doc_service.py
@@ -1047,3 +1047,66 @@ async def upsert_chat_doc_context_with_summary(
    except Exception as exc:
        logger.warning("upsert_chat_doc_context_with_summary failed: %s", exc)
        return False
+
+
+# ---------------------------------------------------------------------------
+# Compatibility stubs (functions used by http_api_doc.py)
+# ---------------------------------------------------------------------------
+
+class _DocServiceCompat:
+    """Namespace stub — keep backward-compat with imports that use doc_service.X"""
+    pass
+
+doc_service = _DocServiceCompat()
+
+class UpdateResult(BaseModel):
+    """Compat model matching what http_api_doc.py expects."""
+    doc_id: str = ""
+    version_no: int = 0
+    version_id: str = ""
+    updated_chunks: int = 0
+    status: str = "stub"
+    success: bool = False
+    error: Optional[str] = "not implemented"
+    publish_error: Optional[str] = None
+    artifact_id: Optional[str] = None
+    artifact_version_id: Optional[str] = None
+    artifact_storage_key: Optional[str] = None
+    artifact_mime: Optional[str] = None
+    artifact_download_url: Optional[str] = None
+
+
+class _PublishResult(BaseModel):
+    """Compat model for publish_document_artifact."""
+    success: bool = False
+    error: Optional[str] = "not implemented"
+    artifact_id: Optional[str] = None
+    version_id: Optional[str] = None
+    storage_key: Optional[str] = None
+    mime: Optional[str] = None
+    file_name: Optional[str] = None
+    download_url: Optional[str] = None
+
+
+async def update_document(**kwargs) -> UpdateResult:
+    """Stub — gateway does not implement local doc versioning; use Sofiia Console /api/doc/versions."""
+    doc_id = kwargs.get("doc_id", "")
+    logger.warning("update_document: stub called for doc_id=%s", doc_id)
+    return UpdateResult(doc_id=doc_id, success=False, error="not implemented in gateway")
+
+
+async def list_document_versions(
+    agent_id: str,
+    doc_id: str,
+    limit: int = 20,
+) -> Dict[str, Any]:
+    """Stub — returns empty list. Real versions stored in Sofiia Console SQLite."""
+    logger.debug("list_document_versions: stub called for doc_id=%s", doc_id)
+    return {"ok": True, "doc_id": doc_id, "versions": [], "total": 0}
+
+
+async def publish_document_artifact(**kwargs) -> _PublishResult:
+    """Stub — gateway does not implement artifact storage. Use artifact-registry service."""
+    doc_id = kwargs.get("doc_id", "")
+    logger.warning("publish_document_artifact: stub called for doc_id=%s", doc_id)
+    return _PublishResult(success=False, error="not implemented in gateway")
--- a/ops/fabric_preflight.sh
+++ b/ops/fabric_preflight.sh
@@ -9,6 +9,7 @@ set -euo pipefail

 NODA_NCS="${1:-http://127.0.0.1:8099}"
 ROUTER_URL="${2:-http://127.0.0.1:9102}"
+MEMORY_URL="${3:-http://127.0.0.1:8000}"

 RED='\033[0;31m'
 GREEN='\033[0;32m'
@@ -64,10 +65,42 @@ print(' '.join(parts) if parts else '(none — P3.5 not deployed?)')
    vision_count=$(echo "$raw" | python3 -c "import json,sys;print(sum(1 for m in json.load(sys.stdin).get('served_models',[]) if m.get('type')=='vision'))" 2>/dev/null)
    [ "$vision_count" -gt 0 ] && pass "vision models: $vision_count" || warn "no vision models served"

+    # Phase 1: explicit STT/TTS capability check
+    local stt_cap tts_cap stt_provider tts_provider
+    stt_cap=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('stt','?'))" 2>/dev/null)
+    tts_cap=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('tts','?'))" 2>/dev/null)
+    stt_provider=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('providers',{}).get('stt','?'))" 2>/dev/null)
+    tts_provider=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('providers',{}).get('tts','?'))" 2>/dev/null)
+    [ "$stt_cap" = "True" ] || [ "$stt_cap" = "true" ] \
+        && pass "stt=true provider=$stt_provider" \
+        || warn "stt=false (provider=$stt_provider) — STT not available on this node"
+    [ "$tts_cap" = "True" ] || [ "$tts_cap" = "true" ] \
+        && pass "tts=true provider=$tts_provider" \
+        || warn "tts=false (provider=$tts_provider) — TTS not available on this node"
+
    NCS_RAW="$raw"
    NCS_NODE_ID="$node_id"
 }

+# ── Memory Service health check ────────────────────────────────────────────────
+
+check_memory_service() {
+    local label="$1" url="$2"
+    echo "── $label ($url/health) ──"
+    local health
+    health=$(curl -sf "$url/health" 2>/dev/null) || { warn "Memory Service unreachable at $url (STT/TTS may fail)"; return; }
+    local status
+    status=$(echo "$health" | python3 -c "import json,sys;print(json.load(sys.stdin).get('status','?'))" 2>/dev/null || echo "ok")
+    pass "memory-service health=$status"
+
+    local voice_status
+    voice_status=$(curl -sf "$url/voice/status" 2>/dev/null) || { warn "voice/status unreachable"; return; }
+    local tts_engine stt_engine
+    tts_engine=$(echo "$voice_status" | python3 -c "import json,sys;print(json.load(sys.stdin).get('tts_engine','?'))" 2>/dev/null)
+    stt_engine=$(echo "$voice_status" | python3 -c "import json,sys;print(json.load(sys.stdin).get('stt_engine','?'))" 2>/dev/null)
+    pass "voice: tts=$tts_engine stt=$stt_engine"
+}
+
 # ── Router check ──────────────────────────────────────────────────────────────

 check_router() {
@@ -163,6 +196,91 @@ else:
    info "Snapshot: $snap_file"
 }

+# ── Ollama model availability check ──────────────────────────────────────────
+# Voice routing policy depends on specific models; 502 from BFF = model absent.
+# This check probes /api/tags (Ollama REST) to list installed models and
+# emits NCS-compatible "installed=false" warnings so Router can exclude them.
+
+OLLAMA_URL="${4:-http://127.0.0.1:11434}"
+
+# Voice policy: models required/preferred for voice_fast_uk / voice_quality_uk
+VOICE_REQUIRED_MODELS="gemma3:latest"
+VOICE_PREFERRED_MODELS="qwen3.5:35b-a3b qwen3:14b"
+VOICE_EXCLUDED_MODELS="glm-4.7-flash:32k glm-4.7-flash"
+
+check_ollama_voice_models() {
+    local ollama_url="${1:-$OLLAMA_URL}"
+    echo "── Ollama voice model availability ($ollama_url) ──"
+
+    local tags_raw
+    tags_raw=$(curl -sf "${ollama_url}/api/tags" 2>/dev/null) \
+        || { warn "Ollama unreachable at ${ollama_url} — model check skipped"; return; }
+
+    local installed_names
+    installed_names=$(echo "$tags_raw" | python3 -c "
+import json, sys
+data = json.load(sys.stdin)
+models = data.get('models', [])
+names = [m.get('name','') for m in models]
+print(' '.join(names))
+" 2>/dev/null || echo "")
+
+    info "Ollama installed: $(echo "$installed_names" | tr ' ' '\n' | grep -c . || echo 0) model(s)"
+
+    # Check required voice models
+    for model in $VOICE_REQUIRED_MODELS; do
+        local short; short="${model%%:*}"
+        if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
+            pass "voice_required: ${model} = installed"
+        else
+            fail "voice_required: ${model} = MISSING — voice_fast_uk will degrade to fallback"
+        fi
+    done
+
+    # Check preferred voice models (warn not fail)
+    local prefer_available=0
+    for model in $VOICE_PREFERRED_MODELS; do
+        local short; short="${model%%:*}"
+        if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
+            pass "voice_preferred: ${model} = installed"
+            prefer_available=$((prefer_available + 1))
+        else
+            warn "voice_preferred: ${model} = not installed — will be skipped by router"
+        fi
+    done
+
+    # Check that excluded models are NOT serving voice
+    for model in $VOICE_EXCLUDED_MODELS; do
+        local short; short="${model%%:*}"
+        if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
+            warn "voice_excluded: ${model} is installed — ensure router excludes from voice profiles"
+        else
+            pass "voice_excluded: ${model} = absent (correct)"
+        fi
+    done
+
+    # qwen3:8b specific check — known 502 source
+    local qwen3_8b_ok=0
+    if echo "$installed_names" | tr ' ' '\n' | grep -qi "^qwen3:8b$"; then
+        # Extra: try a minimal generation to detect "loaded but broken"
+        local gen_code
+        gen_code=$(curl -sf -w "%{http_code}" -X POST "${ollama_url}/api/generate" \
+            -H "Content-Type: application/json" \
+            -d '{"model":"qwen3:8b","prompt":"ping","stream":false,"options":{"num_predict":1}}' \
+            -o /dev/null --max-time 15 2>/dev/null || echo "000")
+        if [ "$gen_code" = "200" ]; then
+            pass "qwen3:8b = installed and serves (HTTP 200)"
+            qwen3_8b_ok=1
+        else
+            warn "qwen3:8b = installed but generate returned HTTP ${gen_code} — exclude from voice_fast_uk prefer list"
+        fi
+    else
+        warn "qwen3:8b = not installed — mark as unavailable in NCS"
+    fi
+
+    [ $qwen3_8b_ok -eq 0 ] && info "ACTION: remove qwen3:8b from voice_fast_uk.prefer_models until 502 resolved"
+}
+
 # ── Main ──────────────────────────────────────────────────────────────────────

 echo "╔══════════════════════════════════════╗"
@@ -174,6 +292,26 @@ check_ncs "NCS" "$NODA_NCS"
 echo ""
 check_router "Router" "$ROUTER_URL"
 echo ""
+check_memory_service "Memory Service" "$MEMORY_URL"
+echo ""
+check_ollama_voice_models "$OLLAMA_URL"
+echo ""
+
+# ── Voice Canary: live synthesis test (hard-fail on voice failure) ────────────
+echo "── Voice Canary (live synthesis) ──────────────────────────────────────"
+CANARY_SCRIPT="$(dirname "$0")/scripts/voice_canary.py"
+if [ -f "$CANARY_SCRIPT" ] && command -v python3 >/dev/null 2>&1; then
+    MEMORY_SERVICE_URL="$MEMORY_URL" python3 "$CANARY_SCRIPT" --mode preflight
+    CANARY_EXIT=$?
+    if [ $CANARY_EXIT -ne 0 ]; then
+        ERRORS=$((ERRORS+1))
+        echo -e "  ${RED}FAIL${NC} Voice canary: synthesis test failed (Polina/Ostap not working)"
+    fi
+else
+    echo "  [SKIP] voice_canary.py not found or python3 unavailable"
+fi
+echo ""
+
 save_and_diff
 echo ""

@@ -182,5 +320,5 @@ if [ $ERRORS -gt 0 ]; then
    echo -e "${RED}BLOCKED: no changes allowed until all errors resolved${NC}"
    exit 1
 else
-    echo -e "${GREEN}Preflight PASSED — changes allowed${NC}"
+    echo -e "${GREEN}Preflight PASSED — all voice canaries green — changes allowed${NC}"
 fi
--- a/router-config.yml
+++ b/router-config.yml
@@ -122,6 +122,33 @@ llm_profiles:
    timeout_ms: 60000
    description: "Mistral Large для складних задач, reasoning, аналізу"

+  claude_sofiia:
+    provider: anthropic
+    api_key_env: ANTHROPIC_API_KEY
+    model: claude-sonnet-4-5
+    max_tokens: 8192
+    temperature: 0.2
+    timeout_ms: 120000
+    description: "Claude Sonnet для Sofiia — code generation, architecture, reasoning"
+
+  claude_opus:
+    provider: anthropic
+    api_key_env: ANTHROPIC_API_KEY
+    model: claude-opus-4-5
+    max_tokens: 8192
+    temperature: 0.15
+    timeout_ms: 180000
+    description: "Claude Opus — для найскладніших архітектурних задач Sofiia"
+
+  claude_haiku:
+    provider: anthropic
+    api_key_env: ANTHROPIC_API_KEY
+    model: claude-haiku-3-5
+    max_tokens: 4096
+    temperature: 0.25
+    timeout_ms: 30000
+    description: "Claude Haiku — швидкі відповіді, інструментальні задачі"
+
 # ============================================================================
 # Orchestrator Providers
 # ============================================================================
@@ -416,12 +443,13 @@ agents:

  sofiia:
    description: "Sofiia — Chief AI Architect та Technical Sovereign"
-    default_llm: local_default_coder
+    default_llm: claude_sofiia
    system_prompt: |
      Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city.
      Працюй як CTO-помічник: архітектура, reliability, безпека, release governance, incident/risk/backlog контроль.
      Відповідай українською, структуровано і коротко; не вигадуй факти, якщо даних нема — кажи прямо.
      Для задач про інфраструктуру пріоритет: перевірка health/monitor, далі конкретні дії і верифікація.
+      Для задач з кодом: аналіз, рефакторинг, дебаг, архітектурні рекомендації — повний рівень доступу.

  monitor:
    description: "Monitor Agent - архітектор-інспектор DAGI"
--- a/services/aurora-service/app/kling.py
+++ b/services/aurora-service/app/kling.py
@@ -143,6 +143,7 @@ def kling_video_enhance(

 def kling_video_generate(
    *,
+    image_b64: Optional[str] = None,
    image_url: Optional[str] = None,
    image_id: Optional[str] = None,
    prompt: str,
@@ -165,8 +166,8 @@ def kling_video_generate(
        duration: '5' or '10'.
        aspect_ratio: '16:9', '9:16', '1:1'.
    """
-    if not image_url and not image_id:
-        raise ValueError("Either image_url or image_id must be provided")
+    if not image_b64 and not image_url and not image_id:
+        raise ValueError("One of image_b64 / image_url / image_id must be provided")

    payload: Dict[str, Any] = {
        "model": model,
@@ -177,10 +178,14 @@ def kling_video_generate(
        "negative_prompt": negative_prompt,
        "aspect_ratio": aspect_ratio,
    }
-    if image_url:
-        payload["image"] = {"type": "url", "url": image_url}
-    if image_id:
-        payload["image"] = {"type": "id", "id": image_id}
+    # Current Kling endpoint expects "image" as base64 payload string.
+    # Keep url/id compatibility as a best-effort fallback for older gateways.
+    if image_b64:
+        payload["image"] = image_b64
+    elif image_url:
+        payload["image"] = image_url
+    elif image_id:
+        payload["image"] = image_id
    if callback_url:
        payload["callback_url"] = callback_url

@@ -191,6 +196,37 @@ def kling_video_generate(
    )


+def kling_video_generate_from_file(
+    *,
+    image_path: Path,
+    prompt: str,
+    negative_prompt: str = "noise, blur, artifacts, distortion",
+    model: str = "kling-v1-5",
+    mode: str = "pro",
+    duration: str = "5",
+    cfg_scale: float = 0.5,
+    aspect_ratio: str = "16:9",
+    callback_url: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Generate video from a local image file by sending base64 payload."""
+    import base64
+
+    with image_path.open("rb") as fh:
+        image_b64 = base64.b64encode(fh.read()).decode()
+
+    return kling_video_generate(
+        image_b64=image_b64,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        model=model,
+        mode=mode,
+        duration=duration,
+        cfg_scale=cfg_scale,
+        aspect_ratio=aspect_ratio,
+        callback_url=callback_url,
+    )
+
+
 def kling_task_status(task_id: str) -> Dict[str, Any]:
    """Get status of any Kling task by ID."""
    return _kling_request_with_fallback(
@@ -267,7 +303,12 @@ def kling_poll_until_done(
 def kling_health_check() -> Dict[str, Any]:
    """Quick connectivity check — returns status dict."""
    try:
-        resp = _kling_request("GET", "/v1/models", timeout=10)
-        return {"ok": True, "models": resp}
+        # `/v1/models` may be disabled in some accounts/regions.
+        # `/v1/videos/image2video` reliably returns code=0 when auth+endpoint are valid.
+        resp = _kling_request("GET", "/v1/videos/image2video", timeout=10)
+        code = resp.get("code") if isinstance(resp, dict) else None
+        if code not in (None, 0, "0"):
+            return {"ok": False, "error": f"Kling probe returned non-zero code: {code}", "probe": resp}
+        return {"ok": True, "probe_path": "/v1/videos/image2video", "probe": resp}
    except Exception as exc:
        return {"ok": False, "error": str(exc)}
--- a/services/aurora-service/app/main.py
+++ b/services/aurora-service/app/main.py
@@ -4,6 +4,7 @@ import asyncio
 import hashlib
 import json
 import logging
+import mimetypes
 import os
 import re
 import shutil
@@ -13,9 +14,9 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional

-from fastapi import Body, FastAPI, File, Form, HTTPException, Query, UploadFile
+from fastapi import Body, FastAPI, File, Form, HTTPException, Query, Request, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import FileResponse
+from fastapi.responses import FileResponse, Response, StreamingResponse

 from .analysis import (
    analyze_photo,
@@ -47,6 +48,7 @@ MAX_CONCURRENT_JOBS = max(1, int(os.getenv("AURORA_MAX_CONCURRENT_JOBS", "1")))
 store = JobStore(DATA_DIR)
 orchestrator = AuroraOrchestrator(store.outputs_dir, PUBLIC_BASE_URL)
 RUN_SLOT = asyncio.Semaphore(MAX_CONCURRENT_JOBS)
+KLING_VIDEO2VIDEO_CAPABLE: Optional[bool] = None

 app = FastAPI(
    title="Aurora Media Forensics Service",
@@ -228,7 +230,18 @@ def _enqueue_job_from_path(
    upload_dir = store.uploads_dir / job_id
    upload_dir.mkdir(parents=True, exist_ok=True)
    input_path = upload_dir / safe_filename(file_name)
-    shutil.copy2(source_path, input_path)
+    trim_info: Optional[Dict[str, float]] = None
+    if media_type == "video":
+        trim_info = _video_trim_window(export_options)
+    if trim_info:
+        _trim_video_input(
+            source_path,
+            input_path,
+            start_sec=float(trim_info.get("start_sec") or 0.0),
+            duration_sec=trim_info.get("duration_sec"),
+        )
+    else:
+        shutil.copy2(source_path, input_path)

    input_hash = compute_sha256(input_path)
    initial_metadata = _estimate_upload_metadata(
@@ -238,6 +251,8 @@ def _enqueue_job_from_path(
    )
    if export_options:
        initial_metadata["export_options"] = export_options
+    if trim_info:
+        initial_metadata["clip"] = trim_info
    initial_metadata["priority"] = priority
    if metadata_patch:
        initial_metadata.update(metadata_patch)
@@ -408,6 +423,110 @@ def _parse_export_options(raw_value: str) -> Dict[str, Any]:
    return parsed


+def _opt_float(opts: Dict[str, Any], key: str) -> Optional[float]:
+    raw = opts.get(key)
+    if raw is None or raw == "":
+        return None
+    try:
+        return float(raw)
+    except Exception:
+        raise HTTPException(status_code=422, detail=f"export_options.{key} must be a number")
+
+
+def _video_trim_window(export_options: Dict[str, Any]) -> Optional[Dict[str, float]]:
+    opts = export_options if isinstance(export_options, dict) else {}
+    start = _opt_float(opts, "clip_start_sec")
+    duration = _opt_float(opts, "clip_duration_sec")
+    if start is None:
+        start = _opt_float(opts, "start_sec")
+    if duration is None:
+        duration = _opt_float(opts, "duration_sec")
+    if start is None and duration is None:
+        return None
+    start_val = float(start or 0.0)
+    duration_val = float(duration) if duration is not None else None
+    if start_val < 0:
+        raise HTTPException(status_code=422, detail="clip_start_sec must be >= 0")
+    if duration_val is not None and duration_val <= 0:
+        raise HTTPException(status_code=422, detail="clip_duration_sec must be > 0")
+    return {
+        "start_sec": round(start_val, 3),
+        "duration_sec": round(duration_val, 3) if duration_val is not None else None,  # type: ignore[arg-type]
+    }
+
+
+def _trim_video_input(source_path: Path, target_path: Path, *, start_sec: float, duration_sec: Optional[float]) -> None:
+    """Trim video to a focused segment for faster iteration.
+
+    First attempt is stream copy (lossless, fast). If that fails for container/codec reasons,
+    fallback to lightweight re-encode.
+    """
+    cmd = [
+        "ffmpeg",
+        "-hide_banner",
+        "-loglevel",
+        "error",
+        "-y",
+    ]
+    if start_sec > 0:
+        cmd.extend(["-ss", f"{start_sec:.3f}"])
+    cmd.extend(["-i", str(source_path)])
+    if duration_sec is not None:
+        cmd.extend(["-t", f"{duration_sec:.3f}"])
+    cmd.extend([
+        "-map",
+        "0:v:0",
+        "-map",
+        "0:a?",
+        "-c",
+        "copy",
+        "-movflags",
+        "+faststart",
+        str(target_path),
+    ])
+    proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
+    if proc.returncode == 0 and target_path.exists() and target_path.stat().st_size > 0:
+        return
+
+    fallback = [
+        "ffmpeg",
+        "-hide_banner",
+        "-loglevel",
+        "error",
+        "-y",
+    ]
+    if start_sec > 0:
+        fallback.extend(["-ss", f"{start_sec:.3f}"])
+    fallback.extend(["-i", str(source_path)])
+    if duration_sec is not None:
+        fallback.extend(["-t", f"{duration_sec:.3f}"])
+    fallback.extend(
+        [
+            "-map",
+            "0:v:0",
+            "-map",
+            "0:a?",
+            "-c:v",
+            "libx264",
+            "-preset",
+            "veryfast",
+            "-crf",
+            "17",
+            "-c:a",
+            "aac",
+            "-b:a",
+            "192k",
+            "-movflags",
+            "+faststart",
+            str(target_path),
+        ]
+    )
+    proc2 = subprocess.run(fallback, capture_output=True, text=True, check=False)
+    if proc2.returncode != 0 or not target_path.exists() or target_path.stat().st_size <= 0:
+        err = (proc2.stderr or proc.stderr or "").strip()[:280]
+        raise HTTPException(status_code=422, detail=f"video trim failed: {err or 'ffmpeg error'}")
+
+
 def _status_timing(job: Any) -> Dict[str, Optional[int]]:
    started = _parse_iso_utc(job.started_at)
    if not started:
@@ -1134,14 +1253,156 @@ async def cleanup_storage(


@app.get("/api/aurora/files/{job_id}/{file_name}")
-async def download_output_file(job_id: str, file_name: str) -> FileResponse:
+async def download_output_file(job_id: str, file_name: str, request: Request):
    base = (store.outputs_dir / job_id).resolve()
    target = (base / file_name).resolve()
    if not str(target).startswith(str(base)):
        raise HTTPException(status_code=403, detail="invalid file path")
    if not target.exists() or not target.is_file():
        raise HTTPException(status_code=404, detail="file not found")
-    return FileResponse(path=target, filename=target.name)
+    total_size = target.stat().st_size
+    range_header = request.headers.get("range")
+    if not range_header:
+        return FileResponse(
+            path=target,
+            filename=target.name,
+            headers={"Accept-Ranges": "bytes"},
+        )
+
+    parsed = _parse_range_header(range_header, total_size)
+    if parsed is None:
+        return FileResponse(
+            path=target,
+            filename=target.name,
+            headers={"Accept-Ranges": "bytes"},
+        )
+
+    start, end = parsed
+    if start >= total_size:
+        return Response(
+            status_code=416,
+            headers={"Content-Range": f"bytes */{total_size}", "Accept-Ranges": "bytes"},
+        )
+
+    content_length = (end - start) + 1
+    media_type = mimetypes.guess_type(str(target))[0] or "application/octet-stream"
+
+    def _iter_range():
+        with target.open("rb") as fh:
+            fh.seek(start)
+            remaining = content_length
+            while remaining > 0:
+                chunk = fh.read(min(65536, remaining))
+                if not chunk:
+                    break
+                remaining -= len(chunk)
+                yield chunk
+
+    return StreamingResponse(
+        _iter_range(),
+        status_code=206,
+        media_type=media_type,
+        headers={
+            "Content-Range": f"bytes {start}-{end}/{total_size}",
+            "Content-Length": str(content_length),
+            "Accept-Ranges": "bytes",
+            "Content-Disposition": f'attachment; filename="{target.name}"',
+        },
+    )
+
+
+def _parse_range_header(range_header: str, total_size: int) -> Optional[tuple[int, int]]:
+    value = str(range_header or "").strip()
+    if not value.lower().startswith("bytes="):
+        return None
+
+    spec = value.split("=", 1)[1].strip()
+    if "," in spec:
+        return None
+    if "-" not in spec:
+        return None
+
+    start_txt, end_txt = spec.split("-", 1)
+    try:
+        if start_txt == "":
+            # Suffix range: bytes=-N
+            suffix_len = int(end_txt)
+            if suffix_len <= 0:
+                return None
+            if suffix_len >= total_size:
+                return 0, max(0, total_size - 1)
+            return total_size - suffix_len, total_size - 1
+        start = int(start_txt)
+        if start < 0:
+            return None
+        if end_txt == "":
+            end = total_size - 1
+        else:
+            end = int(end_txt)
+        if end < start:
+            return None
+        return start, min(end, max(0, total_size - 1))
+    except Exception:
+        return None
+
+
+def _extract_first_video_frame(video_path: Path, output_path: Path) -> Path:
+    """Extract the first decodable video frame to an image file."""
+    try:
+        import cv2  # type: ignore[import-untyped]
+    except Exception as exc:
+        raise RuntimeError("OpenCV is required for Kling image2video fallback.") from exc
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    cap = cv2.VideoCapture(str(video_path))
+    try:
+        if not cap.isOpened():
+            raise RuntimeError(f"Cannot open video for fallback frame extraction: {video_path}")
+        ok, frame = cap.read()
+        if not ok or frame is None:
+            raise RuntimeError("Could not read first frame from video")
+        if not cv2.imwrite(str(output_path), frame):
+            raise RuntimeError(f"Failed to write fallback frame: {output_path}")
+    finally:
+        cap.release()
+    return output_path
+
+
+def _resolve_kling_result_url(task_data: Dict[str, Any]) -> Optional[str]:
+    if not isinstance(task_data, dict):
+        return None
+
+    task_result = task_data.get("task_result")
+    if isinstance(task_result, dict):
+        videos = task_result.get("videos")
+        if isinstance(videos, list):
+            for item in videos:
+                if not isinstance(item, dict):
+                    continue
+                for key in ("url", "video_url", "play_url", "download_url"):
+                    value = item.get(key)
+                    if isinstance(value, str) and value:
+                        return value
+        elif isinstance(videos, dict):
+            for key in ("url", "video_url", "play_url", "download_url"):
+                value = videos.get(key)
+                if isinstance(value, str) and value:
+                    return value
+        for key in ("url", "video_url", "play_url", "download_url", "result_url"):
+            value = task_result.get(key)
+            if isinstance(value, str) and value:
+                return value
+
+    for key in ("kling_result_url", "result_url", "video_url", "url"):
+        value = task_data.get(key)
+        if isinstance(value, str) and value:
+            return value
+    return None
+
+
+def _compact_error_text(err: Any, limit: int = 220) -> str:
+    text = re.sub(r"\s+", " ", str(err)).strip()
+    return text[:limit]


 # ── Kling AI endpoints ────────────────────────────────────────────────────────
@@ -1163,7 +1424,7 @@ async def kling_enhance_video(
    cfg_scale: float = Form(0.5, description="Prompt adherence 0.0-1.0"),
 ) -> Dict[str, Any]:
    """Submit Aurora job result to Kling AI for video-to-video enhancement."""
-    from .kling import kling_video_enhance, kling_upload_file
+    from .kling import kling_video_enhance, kling_upload_file, kling_video_generate_from_file

    job = store.get_job(job_id)
    if not job:
@@ -1181,45 +1442,97 @@ async def kling_enhance_video(
    if not result_path.exists():
        raise HTTPException(status_code=404, detail="Result file not found for this job")

-    try:
-        upload_resp = kling_upload_file(result_path)
-    except Exception as exc:
-        raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
-    file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
+    global KLING_VIDEO2VIDEO_CAPABLE

-    if not file_id:
-        raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")
+    task_resp: Optional[Dict[str, Any]] = None
+    file_id: Optional[str] = None
+    kling_endpoint = "video2video"
+    video2video_error: Optional[str] = None
+    fallback_frame_name: Optional[str] = None
+
+    # Primary path: upload + video2video.
+    if KLING_VIDEO2VIDEO_CAPABLE is not False:
+        try:
+            upload_resp = kling_upload_file(result_path)
+            file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
+            if not file_id:
+                raise RuntimeError(f"Kling upload failed: {upload_resp}")
+            task_resp = kling_video_enhance(
+                video_id=file_id,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                mode=mode,
+                duration=duration,
+                cfg_scale=cfg_scale,
+            )
+            KLING_VIDEO2VIDEO_CAPABLE = True
+        except Exception as exc:
+            raw_error = str(exc)
+            video2video_error = _compact_error_text(raw_error, limit=220)
+            logger.warning("kling video2video unavailable for %s: %s", job_id, video2video_error)
+            lower_error = raw_error.lower()
+            if "endpoint mismatch" in lower_error or "404" in lower_error:
+                KLING_VIDEO2VIDEO_CAPABLE = False
+    else:
+        video2video_error = "video2video skipped (previous endpoint mismatch)"
+
+    # Fallback path: extract first frame and run image2video (base64 payload).
+    if task_resp is None:
+        try:
+            frame_path = _extract_first_video_frame(
+                result_path,
+                store.outputs_dir / job_id / "_kling_fallback_frame.jpg",
+            )
+            fallback_frame_name = frame_path.name
+            task_resp = kling_video_generate_from_file(
+                image_path=frame_path,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                mode=mode,
+                duration=duration,
+                cfg_scale=cfg_scale,
+                aspect_ratio="16:9",
+            )
+            kling_endpoint = "image2video"
+        except Exception as fallback_exc:
+            detail = "Kling submit failed"
+            if video2video_error:
+                detail = f"Kling video2video error: {video2video_error}; image2video fallback error: {_compact_error_text(fallback_exc, limit=220)}"
+            else:
+                detail = f"Kling image2video fallback error: {_compact_error_text(fallback_exc, limit=220)}"
+            raise HTTPException(status_code=502, detail=detail) from fallback_exc
+
+    if task_resp is None:
+        raise HTTPException(status_code=502, detail="Kling task submit failed: empty response")

-    try:
-        task_resp = kling_video_enhance(
-            video_id=file_id,
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            mode=mode,
-            duration=duration,
-            cfg_scale=cfg_scale,
-        )
-    except Exception as exc:
-        raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
    task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
+    if not task_id:
+        raise HTTPException(status_code=502, detail=f"Kling task_id missing in response: {task_resp}")

    kling_meta_dir = store.outputs_dir / job_id
    kling_meta_path = kling_meta_dir / "kling_task.json"
-    kling_meta_path.write_text(json.dumps({
+    meta_payload: Dict[str, Any] = {
        "aurora_job_id": job_id,
        "kling_task_id": task_id,
        "kling_file_id": file_id,
+        "kling_endpoint": kling_endpoint,
        "prompt": prompt,
        "mode": mode,
        "duration": duration,
        "submitted_at": datetime.now(timezone.utc).isoformat(),
        "status": "submitted",
-    }, ensure_ascii=False, indent=2), encoding="utf-8")
+    }
+    if fallback_frame_name:
+        meta_payload["kling_source_frame"] = fallback_frame_name
+    if video2video_error:
+        meta_payload["video2video_error"] = video2video_error
+    kling_meta_path.write_text(json.dumps(meta_payload, ensure_ascii=False, indent=2), encoding="utf-8")

    return {
        "aurora_job_id": job_id,
        "kling_task_id": task_id,
        "kling_file_id": file_id,
+        "kling_endpoint": kling_endpoint,
        "status": "submitted",
        "status_url": f"/api/aurora/kling/status/{job_id}",
    }
@@ -1238,9 +1551,10 @@ async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]:
    task_id = meta.get("kling_task_id")
    if not task_id:
        raise HTTPException(status_code=404, detail="Kling task_id missing in metadata")
+    endpoint = str(meta.get("kling_endpoint") or "video2video")

    try:
-        status_resp = kling_video_task_status(task_id, endpoint="video2video")
+        status_resp = kling_video_task_status(task_id, endpoint=endpoint)
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"Kling status error: {str(exc)[:400]}") from exc
    task_data = status_resp.get("data") or status_resp
@@ -1249,19 +1563,17 @@ async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]:
    meta["status"] = state
    meta["last_checked"] = datetime.now(timezone.utc).isoformat()

-    result_url = None
-    works = task_data.get("task_result", {}).get("videos") or []
-    if works:
-        result_url = works[0].get("url")
-        if result_url:
-            meta["kling_result_url"] = result_url
-            meta["completed_at"] = datetime.now(timezone.utc).isoformat()
+    result_url = _resolve_kling_result_url(task_data)
+    if result_url:
+        meta["kling_result_url"] = result_url
+        meta["completed_at"] = datetime.now(timezone.utc).isoformat()

    kling_meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

    return {
        "aurora_job_id": job_id,
        "kling_task_id": task_id,
+        "kling_endpoint": endpoint,
        "status": state,
        "kling_result_url": result_url,
        "meta": meta,
@@ -1279,7 +1591,7 @@ async def kling_image_to_video(
    aspect_ratio: str = Form("16:9"),
 ) -> Dict[str, Any]:
    """Generate video from a still image using Kling AI."""
-    from .kling import kling_upload_file, kling_video_generate
+    from .kling import kling_video_generate_from_file

    file_name = file.filename or "frame.jpg"
    content = await file.read()
@@ -1293,16 +1605,8 @@ async def kling_image_to_video(

    try:
        try:
-            upload_resp = kling_upload_file(tmp_path)
-        except Exception as exc:
-            raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
-        file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
-        if not file_id:
-            raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")
-
-        try:
-            task_resp = kling_video_generate(
-                image_id=file_id,
+            task_resp = kling_video_generate_from_file(
+                image_path=tmp_path,
                prompt=prompt,
                negative_prompt=negative_prompt,
                model=model,
@@ -1313,9 +1617,12 @@ async def kling_image_to_video(
        except Exception as exc:
            raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
        task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
+        if not task_id:
+            raise HTTPException(status_code=502, detail=f"Kling task_id missing in response: {task_resp}")
        return {
            "kling_task_id": task_id,
-            "kling_file_id": file_id,
+            "kling_file_id": None,
+            "kling_endpoint": "image2video",
            "status": "submitted",
            "status_url": f"/api/aurora/kling/task/{task_id}?endpoint=image2video",
        }
--- a/services/aurora-service/app/quality_report.py
+++ b/services/aurora-service/app/quality_report.py
@@ -49,6 +49,78 @@ def _models_used(job: AuroraJob) -> List[str]:
    return models


+def _processing_steps(job: AuroraJob) -> List[Any]:
+    if job.result and job.result.processing_log:
+        return list(job.result.processing_log)
+    if job.processing_log:
+        return list(job.processing_log)
+    return []
+
+
+def _result_media_hash(job: AuroraJob) -> Optional[str]:
+    if not job.result:
+        return None
+    media_type = str(job.media_type).strip().lower()
+    for out in job.result.output_files:
+        out_type = str(getattr(out, "type", "") or "").strip().lower()
+        if out_type in {media_type, "video", "photo", "image", "audio", "unknown"}:
+            value = str(getattr(out, "hash", "") or "").strip()
+            if value:
+                return value
+    return None
+
+
+def _fallback_flags(job: AuroraJob) -> Dict[str, Any]:
+    hard_fallback_used = False
+    soft_sr_fallback_used = False
+    fallback_steps: List[str] = []
+    warnings: List[str] = []
+
+    for step in _processing_steps(job):
+        step_name = str(getattr(step, "step", "") or "").strip() or "unknown"
+        details = getattr(step, "details", {}) or {}
+        if not isinstance(details, dict):
+            continue
+
+        if bool(details.get("fallback_used")):
+            hard_fallback_used = True
+            fallback_steps.append(step_name)
+            reason = str(details.get("reason") or "").strip()
+            if reason:
+                warnings.append(f"{step_name}: hard fallback used ({reason})")
+            else:
+                warnings.append(f"{step_name}: hard fallback used")
+
+        sr_fallback_frames = 0
+        try:
+            sr_fallback_frames = int(details.get("sr_fallback_frames") or 0)
+        except Exception:
+            sr_fallback_frames = 0
+        if bool(details.get("sr_fallback_used")):
+            sr_fallback_frames = max(sr_fallback_frames, 1)
+        if sr_fallback_frames > 0:
+            soft_sr_fallback_used = True
+            fallback_steps.append(step_name)
+            method = str(details.get("sr_fallback_method") or "").strip()
+            reason = str(details.get("sr_fallback_reason") or "").strip()
+            msg = f"{step_name}: SR soft fallback on {sr_fallback_frames} frame(s)"
+            if method:
+                msg += f" via {method}"
+            if reason:
+                msg += f" ({reason})"
+            warnings.append(msg)
+
+    fallback_steps_unique = list(dict.fromkeys(fallback_steps))
+    warnings_unique = list(dict.fromkeys(warnings))
+    return {
+        "fallback_used": bool(hard_fallback_used or soft_sr_fallback_used),
+        "hard_fallback_used": hard_fallback_used,
+        "soft_sr_fallback_used": soft_sr_fallback_used,
+        "fallback_steps": fallback_steps_unique,
+        "warnings": warnings_unique,
+    }
+
+
 def _detect_faces_with_proxy_confidence(frame_bgr: Any) -> List[Dict[str, Any]]:
    if cv2 is None:
        return []
@@ -246,9 +318,29 @@ def build_quality_report(job: AuroraJob, outputs_dir: Path, *, refresh: bool = F
        raise RuntimeError("Cannot build quality report: source/result file not found")

    media_type: MediaType = job.media_type
+    processing_flags = _fallback_flags(job)
    faces = _face_metrics(source_path, result_path, media_type)
    plates = _plate_metrics(job_dir)
    overall = _overall_metrics(source_path, result_path, media_type, job)
+    result_hash = _result_media_hash(job)
+    identical_to_input = bool(result_hash and result_hash == str(job.input_hash))
+    warnings = list(processing_flags.get("warnings") or [])
+    if identical_to_input:
+        warnings.append("output hash matches input hash; enhancement may be skipped.")
+    warnings = list(dict.fromkeys(warnings))
+
+    processing_status = "ok"
+    if bool(processing_flags.get("fallback_used")) or identical_to_input:
+        processing_status = "degraded"
+    overall["processing_status"] = processing_status
+    overall["fallback_used"] = bool(processing_flags.get("fallback_used"))
+    overall["hard_fallback_used"] = bool(processing_flags.get("hard_fallback_used"))
+    overall["soft_sr_fallback_used"] = bool(processing_flags.get("soft_sr_fallback_used"))
+    overall["identical_to_input"] = identical_to_input
+    if result_hash:
+        overall["result_hash"] = result_hash
+    if warnings:
+        overall["warnings"] = warnings

    report = {
        "job_id": job.job_id,
@@ -257,7 +349,13 @@ def build_quality_report(job: AuroraJob, outputs_dir: Path, *, refresh: bool = F
        "faces": faces,
        "plates": plates,
        "overall": overall,
+        "processing_flags": {
+            **processing_flags,
+            "identical_to_input": identical_to_input,
+            "warnings": warnings,
+        },
        "summary": {
+            "processing_status": processing_status,
            "faces_detected_ratio": f"{faces['detected']} / {faces['source_detected'] or faces['detected']}",
            "plates_recognized_ratio": f"{plates['recognized']} / {plates['detected']}",
        },
--- a/services/memory-service/Dockerfile
+++ b/services/memory-service/Dockerfile
@@ -13,6 +13,7 @@ RUN pip install --no-cache-dir -r requirements.txt

 # Copy application
 COPY app/ ./app/
+COPY static/ ./static/

 # Environment
 ENV PYTHONPATH=/app
--- a/services/memory-service/app/database.py
+++ b/services/memory-service/app/database.py
@@ -428,6 +428,8 @@ class Database:
                
                CREATE INDEX IF NOT EXISTS idx_user_facts_user_id ON user_facts(user_id);
                CREATE INDEX IF NOT EXISTS idx_user_facts_team_id ON user_facts(team_id);
+                CREATE UNIQUE INDEX IF NOT EXISTS idx_user_facts_user_team_agent_fact
+                ON user_facts(user_id, team_id, agent_id, fact_key);
            """)
            
    async def upsert_fact(
@@ -445,16 +447,30 @@ class Database:
        json_value = json.dumps(fact_value_json) if fact_value_json else None
        
        async with self.pool.acquire() as conn:
-            row = await conn.fetchrow("""
-                INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
-                VALUES ($1, $2, $3, $4, $5, $6::jsonb)
-                ON CONFLICT (user_id, team_id, agent_id, fact_key) 
-                DO UPDATE SET 
-                    fact_value = EXCLUDED.fact_value,
-                    fact_value_json = EXCLUDED.fact_value_json,
-                    updated_at = NOW()
-                RETURNING *
-            """, user_id, team_id, agent_id, fact_key, fact_value, json_value)
+            try:
+                row = await conn.fetchrow("""
+                    INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
+                    VALUES ($1, $2, $3, $4, $5, $6::jsonb)
+                    ON CONFLICT (user_id, team_id, agent_id, fact_key)
+                    DO UPDATE SET
+                        fact_value = EXCLUDED.fact_value,
+                        fact_value_json = EXCLUDED.fact_value_json,
+                        updated_at = NOW()
+                    RETURNING *
+                """, user_id, team_id, agent_id, fact_key, fact_value, json_value)
+            except asyncpg.exceptions.InvalidColumnReferenceError:
+                # Backward compatibility for DBs that only have UNIQUE(user_id, team_id, fact_key).
+                row = await conn.fetchrow("""
+                    INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
+                    VALUES ($1, $2, $3, $4, $5, $6::jsonb)
+                    ON CONFLICT (user_id, team_id, fact_key)
+                    DO UPDATE SET
+                        agent_id = EXCLUDED.agent_id,
+                        fact_value = EXCLUDED.fact_value,
+                        fact_value_json = EXCLUDED.fact_value_json,
+                        updated_at = NOW()
+                    RETURNING *
+                """, user_id, team_id, agent_id, fact_key, fact_value, json_value)
            
        return dict(row) if row else {}
        
--- a/services/memory-service/app/main.py
+++ b/services/memory-service/app/main.py
@@ -650,6 +650,7 @@ class FactUpsertRequest(BaseModel):
    fact_value: Optional[str] = None
    fact_value_json: Optional[dict] = None
    team_id: Optional[str] = None
+    agent_id: Optional[str] = None

@app.post("/facts/upsert")
 async def upsert_fact(request: FactUpsertRequest):
@@ -663,13 +664,17 @@ async def upsert_fact(request: FactUpsertRequest):
        # Ensure facts table exists (will be created on first call)
        await db.ensure_facts_table()
        
-        # Upsert the fact
+        # Upsert the fact — extract agent_id from request field or from fact_value_json
+        agent_id_val = request.agent_id or (
+            (request.fact_value_json or {}).get("agent_id")
+        )
        result = await db.upsert_fact(
            user_id=request.user_id,
            fact_key=request.fact_key,
            fact_value=request.fact_value,
            fact_value_json=request.fact_value_json,
-            team_id=request.team_id
+            team_id=request.team_id,
+            agent_id=agent_id_val
        )
        
        logger.info(f"fact_upserted", user_id=request.user_id, fact_key=request.fact_key)
--- a/services/memory-service/requirements.txt
+++ b/services/memory-service/requirements.txt
@@ -30,7 +30,7 @@ python-multipart==0.0.9
 tiktoken==0.5.2

 # Voice stack
-edge-tts==6.1.19
+edge-tts==7.2.7
 faster-whisper==1.1.1

 # Testing
--- a/services/node-worker/config.py
+++ b/services/node-worker/config.py
@@ -14,3 +14,19 @@ STT_PROVIDER = os.getenv("STT_PROVIDER", "none")
 TTS_PROVIDER = os.getenv("TTS_PROVIDER", "none")
 OCR_PROVIDER = os.getenv("OCR_PROVIDER", "vision_prompted")
 IMAGE_PROVIDER = os.getenv("IMAGE_PROVIDER", "none")
+
+# Memory Service URL (used by memory_service STT/TTS providers)
+MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
+
+# ── Voice HA: dedicated concurrency limits (separate from generic stt/tts/llm) ──
+# These control semaphores for node.{id}.voice.*.request subjects.
+# Independent from MAX_CONCURRENCY so voice never starves generic inference.
+VOICE_MAX_CONCURRENT_TTS = int(os.getenv("VOICE_MAX_CONCURRENT_TTS", "4"))
+VOICE_MAX_CONCURRENT_LLM = int(os.getenv("VOICE_MAX_CONCURRENT_LLM", "2"))
+VOICE_MAX_CONCURRENT_STT = int(os.getenv("VOICE_MAX_CONCURRENT_STT", "2"))
+
+# Timeouts for voice subjects (milliseconds). Router uses these as defaults.
+VOICE_TTS_DEADLINE_MS  = int(os.getenv("VOICE_TTS_DEADLINE_MS",  "3000"))
+VOICE_LLM_FAST_MS      = int(os.getenv("VOICE_LLM_FAST_MS",      "9000"))
+VOICE_LLM_QUALITY_MS   = int(os.getenv("VOICE_LLM_QUALITY_MS",  "12000"))
+VOICE_STT_DEADLINE_MS  = int(os.getenv("VOICE_STT_DEADLINE_MS",  "6000"))
--- a/services/node-worker/fabric_metrics.py
+++ b/services/node-worker/fabric_metrics.py
@@ -8,6 +8,7 @@ try:
    PROM_AVAILABLE = True
    REGISTRY = CollectorRegistry()

+    # Generic job metrics
    jobs_total = Counter(
        "node_worker_jobs_total", "Jobs processed",
        ["type", "status"], registry=REGISTRY,
@@ -23,6 +24,26 @@ try:
        registry=REGISTRY,
    )

+    # ── Voice HA metrics (separate labels from generic) ───────────────────────
+    # cap label: "voice.tts" | "voice.llm" | "voice.stt"
+    voice_jobs_total = Counter(
+        "node_worker_voice_jobs_total",
+        "Voice HA jobs processed (node.{id}.voice.*.request)",
+        ["cap", "status"], registry=REGISTRY,
+    )
+    voice_inflight_gauge = Gauge(
+        "node_worker_voice_inflight",
+        "Voice HA inflight jobs per capability",
+        ["cap"], registry=REGISTRY,
+    )
+    voice_latency_hist = Histogram(
+        "node_worker_voice_latency_ms",
+        "Voice HA job latency in ms",
+        ["cap"],
+        buckets=[100, 250, 500, 1000, 1500, 2000, 3000, 5000, 9000, 12000],
+        registry=REGISTRY,
+    )
+
 except ImportError:
    PROM_AVAILABLE = False
    REGISTRY = None
@@ -44,6 +65,21 @@ def observe_latency(req_type: str, model: str, latency_ms: int):
        latency_hist.labels(type=req_type, model=model).observe(latency_ms)


+def inc_voice_job(cap: str, status: str):
+    if PROM_AVAILABLE:
+        voice_jobs_total.labels(cap=cap, status=status).inc()
+
+
+def set_voice_inflight(cap: str, count: int):
+    if PROM_AVAILABLE:
+        voice_inflight_gauge.labels(cap=cap).set(count)
+
+
+def observe_voice_latency(cap: str, latency_ms: int):
+    if PROM_AVAILABLE:
+        voice_latency_hist.labels(cap=cap).observe(latency_ms)
+
+
 def get_metrics_text():
    if PROM_AVAILABLE and REGISTRY:
        return generate_latest(REGISTRY)
--- a/services/node-worker/main.py
+++ b/services/node-worker/main.py
@@ -43,7 +43,30 @@ async def prom_metrics():

@app.get("/caps")
 async def caps():
-    """Capability flags for NCS to aggregate."""
+    """Capability flags for NCS to aggregate.
+
+    Semantic vs operational separation (contract):
+    - capabilities.voice_* = semantic availability (provider configured).
+      True as long as the provider is configured, regardless of NATS state.
+      Routing decisions are based on this.
+    - runtime.nats_subscriptions.voice_* = operational (NATS sub active).
+      Used for health/telemetry only — NOT for routing.
+
+    This prevents false-negatives during reconnects / restart races.
+    """
+    import worker as _w
+    nid = config.NODE_ID.lower()
+
+    # Semantic: provider configured → capability is available
+    voice_tts_cap = config.TTS_PROVIDER != "none"
+    voice_stt_cap = config.STT_PROVIDER != "none"
+    voice_llm_cap = True  # LLM always available when node-worker is up
+
+    # Operational: actual NATS subscription state (health/telemetry only)
+    nats_voice_tts_active = f"node.{nid}.voice.tts.request" in _w._VOICE_SUBJECTS
+    nats_voice_stt_active = f"node.{nid}.voice.stt.request" in _w._VOICE_SUBJECTS
+    nats_voice_llm_active = f"node.{nid}.voice.llm.request" in _w._VOICE_SUBJECTS
+
    return {
        "node_id": config.NODE_ID,
        "capabilities": {
@@ -53,6 +76,10 @@ async def caps():
            "tts": config.TTS_PROVIDER != "none",
            "ocr": config.OCR_PROVIDER != "none",
            "image": config.IMAGE_PROVIDER != "none",
+            # Voice HA semantic capability flags (provider-based, not NATS-based)
+            "voice_tts": voice_tts_cap,
+            "voice_llm": voice_llm_cap,
+            "voice_stt": voice_stt_cap,
        },
        "providers": {
            "stt": config.STT_PROVIDER,
@@ -65,6 +92,19 @@ async def caps():
            "vision": config.DEFAULT_VISION,
        },
        "concurrency": config.MAX_CONCURRENCY,
+        "voice_concurrency": {
+            "voice_tts": config.VOICE_MAX_CONCURRENT_TTS,
+            "voice_llm": config.VOICE_MAX_CONCURRENT_LLM,
+            "voice_stt": config.VOICE_MAX_CONCURRENT_STT,
+        },
+        # Operational NATS subscription state — for health/monitoring only
+        "runtime": {
+            "nats_subscriptions": {
+                "voice_tts_active": nats_voice_tts_active,
+                "voice_stt_active": nats_voice_stt_active,
+                "voice_llm_active": nats_voice_llm_active,
+            }
+        },
    }


--- a/services/node-worker/worker.py
+++ b/services/node-worker/worker.py
@@ -11,24 +11,44 @@ from models import JobRequest, JobResponse, JobError
 from idempotency import IdempotencyStore
 from providers import ollama, ollama_vision
 from providers import stt_mlx_whisper, tts_mlx_kokoro
+from providers import stt_memory_service, tts_memory_service
 import fabric_metrics as fm

 logger = logging.getLogger("node-worker")

 _idem = IdempotencyStore()
 _semaphore: asyncio.Semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY)
+
+# Voice-dedicated semaphores — independent from generic MAX_CONCURRENCY.
+# Prevents voice requests from starving generic inference and vice versa.
+_voice_sem_tts: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_TTS)
+_voice_sem_llm: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_LLM)
+_voice_sem_stt: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_STT)
+
+_VOICE_SEMAPHORES = {
+    "voice.tts": _voice_sem_tts,
+    "voice.llm": _voice_sem_llm,
+    "voice.stt": _voice_sem_stt,
+}
+
 _nats_client = None
 _inflight_count: int = 0
+_voice_inflight: Dict[str, int] = {"voice.tts": 0, "voice.llm": 0, "voice.stt": 0}
 _latencies_llm: list = []
 _latencies_vision: list = []
 _LATENCY_BUFFER = 50

+# Set of subjects that use the voice handler path
+_VOICE_SUBJECTS: set = set()
+

 async def start(nats_client):
    global _nats_client
    _nats_client = nats_client

    nid = config.NODE_ID.lower()
+
+    # Generic subjects (unchanged — backward compatible)
    subjects = [
        f"node.{nid}.llm.request",
        f"node.{nid}.vision.request",
@@ -41,6 +61,31 @@ async def start(nats_client):
        await nats_client.subscribe(subj, cb=_handle_request)
        logger.info(f"✅ Subscribed: {subj}")

+    # Voice HA subjects — separate semaphores, own metrics, own deadlines
+    # Only subscribe if the relevant provider is configured (preflight-first)
+    voice_subjects_to_caps = {
+        f"node.{nid}.voice.tts.request": ("tts",   _voice_sem_tts,  "voice.tts"),
+        f"node.{nid}.voice.llm.request": ("llm",   _voice_sem_llm,  "voice.llm"),
+        f"node.{nid}.voice.stt.request": ("stt",   _voice_sem_stt,  "voice.stt"),
+    }
+    for subj, (required_cap, sem, cap_key) in voice_subjects_to_caps.items():
+        if required_cap == "tts" and config.TTS_PROVIDER == "none":
+            logger.info(f"⏭ Skipping {subj}: TTS_PROVIDER=none")
+            continue
+        if required_cap == "stt" and config.STT_PROVIDER == "none":
+            logger.info(f"⏭ Skipping {subj}: STT_PROVIDER=none")
+            continue
+        # LLM always available on this node
+        _VOICE_SUBJECTS.add(subj)
+
+        async def _make_voice_handler(s=sem, k=cap_key):
+            async def _voice_handler(msg):
+                await _handle_voice_request(msg, voice_sem=s, cap_key=k)
+            return _voice_handler
+
+        await nats_client.subscribe(subj, cb=await _make_voice_handler())
+        logger.info(f"✅ Voice subscribed: {subj}")
+

 async def _handle_request(msg):
    t0 = time.time()
@@ -136,6 +181,103 @@ async def _handle_request(msg):
            pass


+async def _handle_voice_request(msg, voice_sem: asyncio.Semaphore, cap_key: str):
+    """Voice-dedicated handler: separate semaphore, metrics, retry hints.
+
+    Maps voice.{tts|llm|stt} to the same _execute() but with:
+    - Own concurrency limit (VOICE_MAX_CONCURRENT_{TTS|LLM|STT})
+    - TOO_BUSY includes retry_after_ms hint (client can retry immediately elsewhere)
+    - Voice-specific Prometheus labels (type=voice.tts, etc.)
+    - WARNING log on fallback (contract: no silent fallback)
+    """
+    t0 = time.time()
+    # Extract the base type for _execute (voice.tts → tts)
+    base_type = cap_key.split(".")[-1]  # "tts", "llm", "stt"
+
+    try:
+        raw = msg.data
+        if len(raw) > config.MAX_PAYLOAD_BYTES:
+            await _reply(msg, JobResponse(
+                node_id=config.NODE_ID, status="error",
+                error=JobError(code="PAYLOAD_TOO_LARGE", message=f"max {config.MAX_PAYLOAD_BYTES} bytes"),
+            ))
+            return
+
+        data = json.loads(raw)
+        job = JobRequest(**data)
+        job.trace_id = job.trace_id or job.job_id
+
+        remaining = job.remaining_ms()
+        if remaining <= 0:
+            await _reply(msg, JobResponse(
+                job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID,
+                status="timeout", error=JobError(code="DEADLINE_EXCEEDED"),
+            ))
+            return
+
+        # Voice concurrency check — TOO_BUSY includes retry hint
+        if voice_sem._value == 0:
+            logger.warning(
+                "[voice.busy] cap=%s node=%s — all %d slots occupied. "
+                "WARNING: request turned away, Router should failover.",
+                cap_key, config.NODE_ID, {
+                    "voice.tts": config.VOICE_MAX_CONCURRENT_TTS,
+                    "voice.llm": config.VOICE_MAX_CONCURRENT_LLM,
+                    "voice.stt": config.VOICE_MAX_CONCURRENT_STT,
+                }.get(cap_key, "?"),
+            )
+            fm.inc_voice_job(cap_key, "busy")
+            await _reply(msg, JobResponse(
+                job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID,
+                status="busy",
+                error=JobError(
+                    code="TOO_BUSY",
+                    message=f"voice {cap_key} at capacity",
+                    details={"retry_after_ms": 500, "cap": cap_key},
+                ),
+            ))
+            return
+
+        global _voice_inflight
+        _voice_inflight[cap_key] = _voice_inflight.get(cap_key, 0) + 1
+        fm.set_voice_inflight(cap_key, _voice_inflight[cap_key])
+
+        try:
+            async with voice_sem:
+                # Route to _execute with the base type
+                job.required_type = base_type
+                resp = await _execute(job, remaining)
+        finally:
+            _voice_inflight[cap_key] = max(0, _voice_inflight.get(cap_key, 1) - 1)
+            fm.set_voice_inflight(cap_key, _voice_inflight[cap_key])
+
+        resp.latency_ms = int((time.time() - t0) * 1000)
+        fm.inc_voice_job(cap_key, resp.status)
+        if resp.status == "ok" and resp.latency_ms > 0:
+            fm.observe_voice_latency(cap_key, resp.latency_ms)
+
+        # Contract: log WARNING on any non-ok voice result
+        if resp.status != "ok":
+            logger.warning(
+                "[voice.fallback] cap=%s node=%s status=%s error=%s trace=%s",
+                cap_key, config.NODE_ID, resp.status,
+                resp.error.code if resp.error else "?", job.trace_id,
+            )
+
+        await _reply(msg, resp)
+
+    except Exception as e:
+        logger.exception(f"Voice handler error cap={cap_key}: {e}")
+        fm.inc_voice_job(cap_key, "error")
+        try:
+            await _reply(msg, JobResponse(
+                node_id=config.NODE_ID, status="error",
+                error=JobError(code="INTERNAL", message=str(e)[:200]),
+            ))
+        except Exception:
+            pass
+
+
 async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
    payload = job.payload
    hints = job.hints
@@ -184,9 +326,14 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
                    status="error",
                    error=JobError(code="NOT_AVAILABLE", message="STT not configured on this node"),
                )
-            result = await asyncio.wait_for(
-                stt_mlx_whisper.transcribe(payload), timeout=timeout_s,
-            )
+            if config.STT_PROVIDER == "memory_service":
+                result = await asyncio.wait_for(
+                    stt_memory_service.transcribe(payload), timeout=timeout_s,
+                )
+            else:
+                result = await asyncio.wait_for(
+                    stt_mlx_whisper.transcribe(payload), timeout=timeout_s,
+                )
        elif job.required_type == "tts":
            if config.TTS_PROVIDER == "none":
                return JobResponse(
@@ -194,9 +341,14 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
                    status="error",
                    error=JobError(code="NOT_AVAILABLE", message="TTS not configured on this node"),
                )
-            result = await asyncio.wait_for(
-                tts_mlx_kokoro.synthesize(payload), timeout=timeout_s,
-            )
+            if config.TTS_PROVIDER == "memory_service":
+                result = await asyncio.wait_for(
+                    tts_memory_service.synthesize(payload), timeout=timeout_s,
+                )
+            else:
+                result = await asyncio.wait_for(
+                    tts_mlx_kokoro.synthesize(payload), timeout=timeout_s,
+                )
        elif job.required_type == "ocr":
            if config.OCR_PROVIDER == "none":
                return JobResponse(
--- a/services/router/fabric_metrics.py
+++ b/services/router/fabric_metrics.py
@@ -40,6 +40,31 @@ try:
        registry=REGISTRY,
    )

+    # ── Voice HA metrics ──────────────────────────────────────────────────────
+    # cap label: "voice_tts" | "voice_llm" | "voice_stt"
+    voice_cap_requests = Counter(
+        "fabric_voice_capability_requests_total",
+        "Voice HA capability routing requests",
+        ["cap", "status"], registry=REGISTRY,
+    )
+    voice_offload_total = Counter(
+        "fabric_voice_offload_total",
+        "Voice HA offload attempts (node selected + NATS sent)",
+        ["cap", "node", "status"], registry=REGISTRY,
+    )
+    voice_breaker_state = Gauge(
+        "fabric_voice_breaker_state",
+        "Voice HA circuit breaker per node+cap (1=open)",
+        ["cap", "node"], registry=REGISTRY,
+    )
+    voice_score_hist = Histogram(
+        "fabric_voice_score_ms",
+        "Voice HA node scoring distribution",
+        ["cap"],
+        buckets=[0, 50, 100, 200, 400, 800, 1600, 3200],
+        registry=REGISTRY,
+    )
+
 except ImportError:
    PROM_AVAILABLE = False
    REGISTRY = None
@@ -76,6 +101,26 @@ def observe_score(score: int):
        score_hist.observe(score)


+def inc_voice_cap_request(cap: str, status: str):
+    if PROM_AVAILABLE:
+        voice_cap_requests.labels(cap=cap, status=status).inc()
+
+
+def inc_voice_offload(cap: str, node: str, status: str):
+    if PROM_AVAILABLE:
+        voice_offload_total.labels(cap=cap, node=node, status=status).inc()
+
+
+def set_voice_breaker(cap: str, node: str, is_open: bool):
+    if PROM_AVAILABLE:
+        voice_breaker_state.labels(cap=cap, node=node).set(1 if is_open else 0)
+
+
+def observe_voice_score(cap: str, score: float):
+    if PROM_AVAILABLE:
+        voice_score_hist.labels(cap=cap).observe(score)
+
+
 def get_metrics_text() -> Optional[bytes]:
    if PROM_AVAILABLE and REGISTRY:
        return generate_latest(REGISTRY)
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -64,6 +64,12 @@ except ImportError:

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+NEO4J_NOTIFICATIONS_LOG_LEVEL = os.getenv("NEO4J_NOTIFICATIONS_LOG_LEVEL", "ERROR").strip().upper()
+_neo4j_notifications_level = getattr(logging, NEO4J_NOTIFICATIONS_LOG_LEVEL, logging.ERROR)
+logging.getLogger("neo4j.notifications").setLevel(_neo4j_notifications_level)
+# Guard against late/conditional auto-router imports.
+# If auto-router module is unavailable (or loaded later), inference must still work.
+SOFIIA_AUTO_ROUTER_AVAILABLE = False

 TRUSTED_DOMAINS_CONFIG_PATH = os.getenv("TRUSTED_DOMAINS_CONFIG_PATH", "./trusted_domains.yml")
 _trusted_domains_cache: Dict[str, Any] = {"mtime": None, "data": {}}
@@ -289,8 +295,24 @@ DETERMINISTIC_PLANT_POLICY_AGENTS = {
 REPEAT_FINGERPRINT_MIN_SIMILARITY = float(os.getenv("AGENT_REPEAT_FINGERPRINT_MIN_SIMILARITY", "0.92"))


+def _clean_think_blocks(text: str) -> str:
+    """Remove <think>...</think> reasoning blocks from LLM output (Qwen3/DeepSeek-R1).
+
+    Strategy:
+    1. Strip complete <think>...</think> blocks (DOTALL for multiline).
+    2. Fallback: if an unclosed <think> remains, drop everything after it.
+    """
+    cleaned = re.sub(r"<think>.*?</think>", "", text,
+                     flags=re.DOTALL | re.IGNORECASE)
+    # Fallback: unclosed <think> — truncate before it
+    if "<think>" in cleaned.lower():
+        cleaned = re.split(r"(?i)<think>", cleaned)[0]
+    return cleaned
+
+
 def _normalize_text_response(text: str) -> str:
-    return re.sub(r"\s+", " ", str(text or "")).strip()
+    cleaned = _clean_think_blocks(str(text or ""))
+    return re.sub(r"\s+", " ", cleaned).strip()


 def _response_fingerprint(text: str) -> str:
@@ -1689,6 +1711,20 @@ async def internal_llm_complete(request: InternalLLMRequest):
                tokens = data.get("usage", {}).get("total_tokens", 0)
                latency = int((time_module.time() - t0) * 1000)
                logger.info(f"Internal LLM success: {cloud['name']}, {tokens} tokens, {latency}ms")
+                # Track usage for budget dashboard
+                if SOFIIA_AUTO_ROUTER_AVAILABLE:
+                    try:
+                        usage_data = data.get("usage", {})
+                        track_usage(
+                            provider=cloud["name"],
+                            model=cloud["model"],
+                            agent=request.metadata.get("agent_id", "unknown") if request.metadata else "unknown",
+                            input_tokens=usage_data.get("prompt_tokens", tokens // 2 if tokens else 0),
+                            output_tokens=usage_data.get("completion_tokens", tokens // 2 if tokens else 0),
+                            latency_ms=latency,
+                        )
+                    except Exception as _te:
+                        logger.debug("budget track error: %s", _te)
                return InternalLLMResponse(text=response_text, model=cloud["model"], provider=cloud["name"], tokens_used=tokens, latency_ms=latency)
        except Exception as e:
            logger.warning(f"Internal LLM {cloud['name']} failed: {e}")
@@ -2086,8 +2122,39 @@ async def agent_infer(agent_id: str, request: InferRequest):

    routing_rules = router_config.get("routing", [])
    default_llm = _select_default_llm(agent_id, metadata, default_llm, routing_rules)
-    
-    cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic"}
+
+    # ── Sofiia Auto-Router: dynamic model selection based on task type ──────
+    if agent_id == "sofiia" and SOFIIA_AUTO_ROUTER_AVAILABLE and not request.model:
+        try:
+            _auto_result = select_model_auto(
+                prompt=request.prompt or "",
+                force_fast=metadata.get("force_fast", False),
+                force_capable=metadata.get("force_capable", False),
+                prefer_local=metadata.get("prefer_local", False),
+                prefer_cheap=metadata.get("prefer_cheap", False),
+                budget_aware=True,
+            )
+            # Only override if auto-selected profile exists in config
+            if _auto_result.profile_name in router_config.get("llm_profiles", {}):
+                logger.info(
+                    "🧠 Sofiia Auto-Router: task=%s complexity=%s → profile=%s model=%s reason=%s",
+                    _auto_result.task_type, _auto_result.complexity,
+                    _auto_result.profile_name, _auto_result.model_id,
+                    _auto_result.reason,
+                )
+                default_llm = _auto_result.profile_name
+            else:
+                logger.debug(
+                    "🧠 Sofiia Auto-Router: profile %s not in config, using %s",
+                    _auto_result.profile_name, default_llm,
+                )
+        except Exception as _ar_e:
+            logger.warning("⚠️ Sofiia Auto-Router error: %s", _ar_e)
+
+    # Pass routing-resolved default_llm to NCS so it respects cloud routing rules
+    ncs_agent_config = {**agent_config, "default_llm": default_llm}
+
+    cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic", "glm"}

    # ── Global NCS-first model selection (multi-node) ───────────────────
    ncs_selection = None
@@ -2095,7 +2162,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
        try:
            gcaps = await global_capabilities_client.get_global_capabilities()
            ncs_selection = await select_model_for_agent(
-                agent_id, agent_config, router_config, gcaps, request.model,
+                agent_id, ncs_agent_config, router_config, gcaps, request.model,
            )
        except Exception as e:
            logger.warning(f"⚠️ Global NCS selection error: {e}; falling back to static")
@@ -2103,7 +2170,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
        try:
            caps = await capabilities_client.fetch_capabilities()
            ncs_selection = await select_model_for_agent(
-                agent_id, agent_config, router_config, caps, request.model,
+                agent_id, ncs_agent_config, router_config, caps, request.model,
            )
        except Exception as e:
            logger.warning(f"⚠️ NCS selection error: {e}; falling back to static")
@@ -2678,11 +2745,218 @@ async def agent_infer(agent_id: str, request: InferRequest):
            }
        ]

+    # GLM (Z.AI / BigModel) — OpenAI-compatible but with special JWT auth.
+    if provider == "glm" and allow_cloud:
+        glm_key = os.getenv(llm_profile.get("api_key_env", "GLM5_API_KEY"), "")
+        if glm_key:
+            glm_model = request.model or llm_profile.get("model", "glm-4-flash")
+            glm_base_url = llm_profile.get("base_url", "https://open.bigmodel.cn/api/paas/v4")
+            glm_max_tokens = int(request.max_tokens or llm_profile.get("max_tokens", 4096))
+            glm_temperature = float(request.temperature if request.temperature is not None else llm_profile.get("temperature", 0.3))
+            glm_timeout = int(llm_profile.get("timeout_ms", 30000) / 1000)
+            try:
+                glm_resp = await http_client.post(
+                    f"{glm_base_url}/chat/completions",
+                    headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"},
+                    json={
+                        "model": glm_model,
+                        "messages": messages,
+                        "max_tokens": glm_max_tokens,
+                        "temperature": glm_temperature,
+                        "stream": False,
+                    },
+                    timeout=float(glm_timeout),
+                )
+                if glm_resp.status_code == 200:
+                    glm_data = glm_resp.json()
+                    response_text = glm_data.get("choices", [{}])[0].get("message", {}).get("content", "")
+                    glm_tokens = glm_data.get("usage", {}).get("total_tokens", 0)
+                    if SOFIIA_AUTO_ROUTER_AVAILABLE:
+                        try:
+                            usage_d = glm_data.get("usage", {})
+                            track_usage(
+                                provider="glm", model=glm_model, agent=agent_id,
+                                input_tokens=usage_d.get("prompt_tokens", glm_tokens // 2 if glm_tokens else 0),
+                                output_tokens=usage_d.get("completion_tokens", glm_tokens // 2 if glm_tokens else 0),
+                            )
+                        except Exception:
+                            pass
+                    response_text = await _finalize_response_text(response_text, f"glm-{glm_model}")
+                    return InferResponse(
+                        response=response_text,
+                        model=glm_model,
+                        backend="glm",
+                        tokens_used=glm_tokens,
+                    )
+                else:
+                    logger.warning("🐉 GLM API error %s: %s", glm_resp.status_code, glm_resp.text[:200])
+            except Exception as _glm_e:
+                logger.warning("🐉 GLM call failed: %s", _glm_e)
+        else:
+            logger.warning("🐉 GLM provider selected but GLM5_API_KEY not set")
+        # Fall through to Ollama
+
+    # Anthropic has its own API format — handle separately before the loop.
+    if provider == "anthropic" and allow_cloud:
+        anthropic_key = os.getenv(llm_profile.get("api_key_env", "ANTHROPIC_API_KEY"), "")
+        if anthropic_key:
+            anthropic_model = request.model or llm_profile.get("model", "claude-sonnet-4-5")
+            anthropic_max_tokens = int(request.max_tokens or llm_profile.get("max_tokens", 8192))
+            anthropic_temperature = float(request.temperature if request.temperature is not None else llm_profile.get("temperature", 0.2))
+            anthropic_timeout = int(llm_profile.get("timeout_ms", 120000) / 1000)
+            try:
+                # Extract system prompt from messages
+                anthropic_system = ""
+                anthropic_messages = []
+                for msg in messages:
+                    role = msg.get("role", "user")
+                    content = msg.get("content", "")
+                    if role == "system":
+                        anthropic_system = content
+                    else:
+                        anthropic_messages.append({"role": role, "content": content})
+                if not anthropic_messages:
+                    anthropic_messages = [{"role": "user", "content": request.prompt}]
+                # Build tool definitions for Claude
+                anthropic_tools = None
+                if TOOL_MANAGER_AVAILABLE and tool_manager:
+                    raw_tools = tool_manager.get_tool_definitions(request_agent_id)
+                    if raw_tools:
+                        anthropic_tools = []
+                        for t in raw_tools:
+                            fn = t.get("function", {})
+                            anthropic_tools.append({
+                                "name": fn.get("name", "unknown"),
+                                "description": fn.get("description", ""),
+                                "input_schema": fn.get("parameters") or {"type": "object", "properties": {}},
+                            })
+                anthropic_payload: Dict[str, Any] = {
+                    "model": anthropic_model,
+                    "max_tokens": anthropic_max_tokens,
+                    "temperature": anthropic_temperature,
+                    "messages": anthropic_messages,
+                }
+                if anthropic_system:
+                    anthropic_payload["system"] = anthropic_system
+                if anthropic_tools:
+                    anthropic_payload["tools"] = anthropic_tools
+                logger.info(f"🟣 Anthropic Claude API: model={anthropic_model} agent={agent_id}")
+                anthropic_resp = await http_client.post(
+                    "https://api.anthropic.com/v1/messages",
+                    headers={
+                        "x-api-key": anthropic_key,
+                        "anthropic-version": "2023-06-01",
+                        "content-type": "application/json",
+                    },
+                    json=anthropic_payload,
+                    timeout=anthropic_timeout,
+                )
+                if anthropic_resp.status_code == 200:
+                    anthropic_data = anthropic_resp.json()
+                    response_text = ""
+                    for block in anthropic_data.get("content", []):
+                        if block.get("type") == "text":
+                            response_text += block.get("text", "")
+                    tokens_used = (
+                        anthropic_data.get("usage", {}).get("input_tokens", 0)
+                        + anthropic_data.get("usage", {}).get("output_tokens", 0)
+                    )
+                    # Handle tool_use blocks from Claude
+                    claude_tool_uses = [b for b in anthropic_data.get("content", []) if b.get("type") == "tool_use"]
+                    if claude_tool_uses and TOOL_MANAGER_AVAILABLE and tool_manager:
+                        tool_result_messages = list(anthropic_messages)
+                        tool_result_messages.append({"role": "assistant", "content": anthropic_data.get("content", [])})
+                        for tool_use_block in claude_tool_uses:
+                            tool_name = tool_use_block.get("name", "")
+                            tool_input = tool_use_block.get("input", {})
+                            tool_use_id = tool_use_block.get("id", "")
+                            logger.info(f"🔧 Claude tool call: {tool_name}({json.dumps(tool_input)[:100]})")
+                            try:
+                                tool_exec_result = await tool_manager.execute_tool(
+                                    tool_name, tool_input,
+                                    agent_id=request_agent_id, chat_id=chat_id, user_id=user_id,
+                                )
+                                tool_content = tool_exec_result.result if tool_exec_result.success else f"Error: {tool_exec_result.error}"
+                            except Exception as te:
+                                tool_content = f"Tool execution error: {te}"
+                            tool_result_messages.append({
+                                "role": "user",
+                                "content": [{"type": "tool_result", "tool_use_id": tool_use_id, "content": str(tool_content)}]
+                            })
+                        # Follow-up call with tool results
+                        anthropic_payload["messages"] = tool_result_messages
+                        followup_resp = await http_client.post(
+                            "https://api.anthropic.com/v1/messages",
+                            headers={
+                                "x-api-key": anthropic_key,
+                                "anthropic-version": "2023-06-01",
+                                "content-type": "application/json",
+                            },
+                            json=anthropic_payload,
+                            timeout=anthropic_timeout,
+                        )
+                        if followup_resp.status_code == 200:
+                            followup_data = followup_resp.json()
+                            response_text = ""
+                            for block in followup_data.get("content", []):
+                                if block.get("type") == "text":
+                                    response_text += block.get("text", "")
+                            tokens_used += (
+                                followup_data.get("usage", {}).get("input_tokens", 0)
+                                + followup_data.get("usage", {}).get("output_tokens", 0)
+                            )
+                    response_text = await _finalize_response_text(response_text, f"anthropic-{anthropic_model}")
+                    if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
+                        asyncio.create_task(memory_retrieval.store_message(
+                            agent_id=agent_id, user_id=user_id, username=username,
+                            message_text=request.prompt, response_text=response_text,
+                            chat_id=chat_id, metadata={"model": anthropic_model, "provider": "anthropic"},
+                        ))
+                    # Track Anthropic usage for budget dashboard
+                    if SOFIIA_AUTO_ROUTER_AVAILABLE:
+                        try:
+                            track_usage(
+                                provider="anthropic",
+                                model=anthropic_model,
+                                agent=agent_id,
+                                input_tokens=tokens_used // 3 if tokens_used else 0,
+                                output_tokens=tokens_used - tokens_used // 3 if tokens_used else 0,
+                                latency_ms=int((time_module.time() - _t_start) * 1000) if "_t_start" in dir() else 0,
+                                task_type="",
+                            )
+                        except Exception as _te:
+                            logger.debug("budget track anthropic error: %s", _te)
+                    return InferResponse(
+                        response=response_text,
+                        model=anthropic_model,
+                        backend="anthropic",
+                        tokens_used=tokens_used,
+                    )
+                else:
+                    err_body = anthropic_resp.text[:300]
+                    logger.warning(f"🟣 Anthropic API error {anthropic_resp.status_code}: {err_body}")
+            except Exception as anthropic_exc:
+                logger.warning(f"🟣 Anthropic call failed: {anthropic_exc}")
+        else:
+            logger.warning("🟣 Anthropic provider selected but ANTHROPIC_API_KEY not set")
+        # Fall through to Ollama if Anthropic fails
+
    if not allow_cloud:
        cloud_providers = []

    # If specific provider requested, try it first
-    if provider in ["deepseek", "mistral", "grok"]:
+    # GLM in OpenAI-compat fallback list for internal/non-sofiia requests
+    glm_key_fb = os.getenv("GLM5_API_KEY", "")
+    if glm_key_fb:
+        cloud_providers.insert(0, {
+            "name": "glm",
+            "api_key_env": "GLM5_API_KEY",
+            "base_url": "https://open.bigmodel.cn/api/paas/v4",
+            "model": "glm-4-flash",
+            "timeout": 20,
+        })
+
+    if provider in ["deepseek", "mistral", "grok", "glm"]:
        # Reorder to put requested provider first
        cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1)
    
@@ -3666,6 +3940,184 @@ async def capability_offload(cap_type: str, request: Request):
    })


+@app.post("/v1/capability/voice_{voice_cap_type}")
+async def voice_capability_offload(voice_cap_type: str, request: Request):
+    """Route a Voice HA request (voice_tts / voice_llm / voice_stt) to the best node.
+
+    Uses voice-specific NATS subjects (node.{id}.voice.{type}.request) and
+    separate circuit breaker keys from generic offload.  Returns response headers:
+    - X-Voice-Node: chosen node id
+    - X-Voice-Mode: local | remote  (relative to the router's own node)
+    - X-Voice-Cap: the capability type routed (voice_tts, voice_llm, voice_stt)
+
+    Contract: no silent fallback — any failure increments Prometheus counter +
+    logs WARNING before returning 50x.
+    """
+    import uuid as _uuid
+    import fabric_metrics as fm
+
+    cap_type = voice_cap_type  # "tts", "llm", or "sst"
+    full_cap = f"voice_{cap_type}"
+    valid_caps = {"tts", "llm", "stt"}
+    if cap_type not in valid_caps:
+        fm.inc_voice_cap_request(full_cap, "invalid")
+        return JSONResponse(status_code=400, content={
+            "error": f"Invalid voice cap: {cap_type}. Valid: voice_tts, voice_llm, voice_stt",
+        })
+
+    if not NCS_AVAILABLE or not global_capabilities_client:
+        fm.inc_voice_cap_request(full_cap, "ncs_unavailable")
+        logger.warning("[voice.cap] NCS unavailable — cannot route %s", full_cap)
+        return JSONResponse(status_code=503, content={
+            "error": "NCS not available — cannot route voice capability requests",
+        })
+
+    gcaps = await global_capabilities_client.require_fresh_caps(ttl=30)
+    if gcaps is None:
+        fm.inc_voice_cap_request(full_cap, "stale_caps")
+        logger.warning("[voice.cap] caps stale — refusing to route %s", full_cap)
+        return JSONResponse(status_code=503, content={
+            "error": "NCS caps stale — preflight failed",
+        })
+
+    eligible_nodes = global_capabilities_client.find_nodes_with_capability(full_cap)
+    if not eligible_nodes:
+        fm.inc_voice_cap_request(full_cap, "no_node")
+        logger.warning("[voice.cap] no node with %s available", full_cap)
+        return JSONResponse(status_code=404, content={
+            "error": f"No node with capability '{full_cap}' available",
+            "hint": f"Ensure node-worker is running with TTS_PROVIDER/STT_PROVIDER set and {full_cap}=true in /caps",
+        })
+
+    # Voice uses separate CB key to avoid cross-contaminating generic stt/tts breakers
+    voice_cb_type = f"voice.{cap_type}"
+    unavailable = offload_client.get_unavailable_nodes(voice_cb_type) if offload_client else set()
+    available = [n for n in eligible_nodes if n.lower() not in {u.lower() for u in unavailable}]
+    if not available:
+        fm.inc_voice_cap_request(full_cap, "all_broken")
+        logger.warning("[voice.cap] all nodes circuit-broken for %s: %s", full_cap, eligible_nodes)
+        return JSONResponse(status_code=503, content={
+            "error": f"All nodes with '{full_cap}' are circuit-broken",
+            "eligible": eligible_nodes,
+            "unavailable": list(unavailable),
+        })
+
+    # ── Voice scoring: prefer local, penalise high load + high latency ────────
+    router_node_id = os.getenv("NODE_ID", "noda2").lower()
+    LOCAL_THRESHOLD_MS = int(os.getenv("VOICE_LOCAL_THRESHOLD_MS", "250"))
+    PREFER_LOCAL_BONUS = int(os.getenv("VOICE_PREFER_LOCAL_BONUS", "200"))
+
+    deadline_defaults = {
+        "tts": int(os.getenv("VOICE_TTS_DEADLINE_MS", "3000")),
+        "llm": int(os.getenv("VOICE_LLM_FAST_MS", "9000")),
+        "sst": int(os.getenv("VOICE_STT_DEADLINE_MS", "6000")),
+    }
+    deadline_ms = deadline_defaults.get(cap_type, 9000)
+
+    scored = []
+    for nid in available:
+        nl = global_capabilities_client.get_node_load(nid)
+        rl = global_capabilities_client.get_runtime_load(nid)
+        wait_ms = nl.get("wait_ms", 0) or nl.get("inflight", 0) * 50
+        rtt_ms = nl.get("rtt_ms", 0)
+        p95_ms = rl.get("p95_ms", 0) if rl else 0
+        mem_penalty = 300 if nl.get("mem_pressure") == "high" else 0
+        local_bonus = PREFER_LOCAL_BONUS if nid.lower() == router_node_id else 0
+        score = wait_ms + rtt_ms + p95_ms + mem_penalty - local_bonus
+        scored.append((score, nid))
+        fm.observe_voice_score(full_cap, score)
+        fm.set_voice_breaker(full_cap, nid, False)  # currently alive
+
+    scored.sort(key=lambda x: x[0])
+    best_score, best_node = scored[0]
+    voice_mode = "local" if best_node.lower() == router_node_id else "remote"
+
+    # If local score <= local_threshold, always prefer local even if a remote
+    # node has slightly lower score (avoids unnecessary cross-node traffic)
+    if voice_mode == "remote" and best_score > LOCAL_THRESHOLD_MS:
+        local_candidates = [(s, n) for s, n in scored if n.lower() == router_node_id]
+        if local_candidates:
+            local_score = local_candidates[0][0]
+            if local_score <= best_score + LOCAL_THRESHOLD_MS:
+                best_node = router_node_id
+                voice_mode = "local"
+                logger.info(
+                    "[voice.cap] prefer local %s (score=%d) over %s (score=%d)",
+                    best_node, local_score, scored[0][1], best_score,
+                )
+
+    payload = await request.json()
+    logger.info(
+        "[voice.cap.route] cap=%s → node=%s mode=%s score=%d deadline=%dms",
+        full_cap, best_node, voice_mode, scored[0][0], deadline_ms,
+    )
+
+    nats_ok = nc is not None and nats_available
+    if not nats_ok or not offload_client:
+        fm.inc_voice_cap_request(full_cap, "nats_down")
+        logger.warning("[voice.cap] NATS not connected — cannot offload %s", full_cap)
+        return JSONResponse(status_code=503, content={"error": "NATS not connected"})
+
+    job = {
+        "job_id": str(_uuid.uuid4()),
+        "required_type": cap_type,
+        "payload": payload,
+        "deadline_ts": int(time.time() * 1000) + deadline_ms,
+        "hints": payload.pop("hints", {}),
+    }
+
+    # Use voice-specific NATS subject
+    nats_subject_type = f"voice.{cap_type}"
+    result = await offload_client.offload_infer(
+        nats_client=nc,
+        node_id=best_node,
+        required_type=nats_subject_type,
+        job_payload=job,
+        timeout_ms=deadline_ms,
+    )
+
+    if result and result.get("status") == "ok":
+        fm.inc_voice_cap_request(full_cap, "ok")
+        fm.inc_voice_offload(full_cap, best_node, "ok")
+        offload_client.record_success(best_node, voice_cb_type)
+        response_data = result.get("result", result)
+        resp = JSONResponse(content=response_data)
+        resp.headers["X-Voice-Node"] = best_node
+        resp.headers["X-Voice-Mode"] = voice_mode
+        resp.headers["X-Voice-Cap"] = full_cap
+        return resp
+
+    # Non-ok — circuit breaker + WARNING (contract: no silent fallback)
+    error = result.get("error", {}) if result else {}
+    status_code_resp = result.get("status", "error") if result else "timeout"
+
+    offload_client.record_failure(best_node, voice_cb_type)
+    fm.set_voice_breaker(full_cap, best_node, True)
+    fm.inc_voice_cap_request(full_cap, "fail")
+    fm.inc_voice_offload(full_cap, best_node, "fail")
+
+    logger.warning(
+        "[voice.cap.fail] cap=%s node=%s status=%s code=%s — "
+        "WARNING: voice fallback must be handled by caller (BFF/Router)",
+        full_cap, best_node, status_code_resp,
+        error.get("code", "?"),
+    )
+    return JSONResponse(
+        status_code=502,
+        content={
+            "error": error.get("message", f"Voice offload to {best_node} failed"),
+            "code": error.get("code", "VOICE_OFFLOAD_FAILED"),
+            "cap": full_cap,
+            "node": best_node,
+        },
+        headers={
+            "X-Voice-Node": best_node,
+            "X-Voice-Mode": voice_mode,
+            "X-Voice-Cap": full_cap,
+        },
+    )
+
+
@app.get("/v1/capabilities")
 async def list_global_capabilities():
    """Return full capabilities view across all nodes."""
@@ -3986,6 +4438,120 @@ async def get_graph_stats():
        raise HTTPException(status_code=500, detail=str(e))


+# ── Sofiia Auto-Router & Budget Dashboard ─────────────────────────────────────
+
+    try:
+        from sofiia_auto_router import (
+            select_model_auto, classify_task, explain_selection,
+            ProviderBudget as _ProviderBudget, get_full_catalog,
+            refresh_ollama_models_async,
+        )
+        from provider_budget import track_usage, get_dashboard_data, set_provider_limit, get_stats
+        SOFIIA_AUTO_ROUTER_AVAILABLE = True
+        logger.info("✅ Sofiia Auto-Router loaded")
+    except ImportError as _e:
+        SOFIIA_AUTO_ROUTER_AVAILABLE = False
+        logger.warning("⚠️ Sofiia Auto-Router not available: %s", _e)
+
+
+class AutoRouteRequest(BaseModel):
+    prompt: str
+    force_fast: bool = False
+    force_capable: bool = False
+    prefer_local: bool = False
+    prefer_cheap: bool = False
+
+
+class BudgetLimitRequest(BaseModel):
+    provider: str
+    monthly_limit_usd: Optional[float] = None
+    topup_balance_usd: Optional[float] = None
+
+
+@app.post("/v1/sofiia/auto-route")
+async def sofiia_auto_route(req: AutoRouteRequest):
+    """Classify a prompt and return the recommended model profile for Sofiia."""
+    if not SOFIIA_AUTO_ROUTER_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Auto-router not available")
+    result = select_model_auto(
+        prompt=req.prompt,
+        force_fast=req.force_fast,
+        force_capable=req.force_capable,
+        prefer_local=req.prefer_local,
+        prefer_cheap=req.prefer_cheap,
+    )
+    return {
+        "profile_name": result.profile_name,
+        "model_id": result.model_id,
+        "provider": result.provider,
+        "task_type": result.task_type,
+        "confidence": result.confidence,
+        "complexity": result.complexity,
+        "reason": result.reason,
+        "fallback_used": result.fallback_used,
+        "all_candidates": result.all_candidates,
+        "ambiguous": result.ambiguous,
+        "runner_up": result.runner_up,
+        "all_scores": result.all_scores,
+        "explanation": explain_selection(result),
+    }
+
+
+@app.get("/v1/sofiia/budget")
+async def sofiia_budget_dashboard():
+    """Return budget dashboard data: token usage, costs, balances per provider."""
+    if not SOFIIA_AUTO_ROUTER_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Budget tracker not available")
+    return get_dashboard_data()
+
+
+@app.post("/v1/sofiia/budget/limits")
+async def set_budget_limits(req: BudgetLimitRequest):
+    """Set monthly limit or top-up balance for a provider."""
+    if not SOFIIA_AUTO_ROUTER_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Budget tracker not available")
+    set_provider_limit(
+        provider=req.provider,
+        monthly_limit_usd=req.monthly_limit_usd,
+        topup_balance_usd=req.topup_balance_usd,
+    )
+    return {"status": "ok", "provider": req.provider}
+
+
+@app.get("/v1/sofiia/budget/stats")
+async def sofiia_budget_stats(window_hours: int = 24):
+    """Return per-provider stats for the given time window (hours)."""
+    if not SOFIIA_AUTO_ROUTER_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Budget tracker not available")
+    stats = get_stats(window_hours=window_hours)
+    return {
+        p: {
+            "provider": s.provider,
+            "total_cost_usd": round(s.total_cost_usd, 5),
+            "call_count": s.call_count,
+            "tokens_in": s.total_input_tokens,
+            "tokens_out": s.total_output_tokens,
+            "avg_latency_ms": round(s.avg_latency_ms),
+            "top_models": s.top_models,
+        }
+        for p, s in stats.items()
+    }
+
+
+@app.get("/v1/sofiia/catalog")
+async def sofiia_model_catalog(refresh_ollama: bool = False):
+    """Return full model catalog with availability status."""
+    if not SOFIIA_AUTO_ROUTER_AVAILABLE:
+        raise HTTPException(status_code=503, detail="Auto-router not available")
+    if refresh_ollama:
+        await refresh_ollama_models_async()
+    return {
+        "models": get_full_catalog(),
+        "total": len(get_full_catalog()),
+        "available_count": sum(1 for m in get_full_catalog() if m["available"]),
+    }
+
+
@app.on_event("shutdown")
 async def shutdown_event():
    """Cleanup connections on shutdown"""
--- a/services/router/memory_retrieval.py
+++ b/services/router/memory_retrieval.py
@@ -20,6 +20,7 @@ import json
 import logging
 import re
 import hashlib
+from time import monotonic
 from typing import Optional, Dict, Any, List
 from dataclasses import dataclass, field
 from datetime import datetime
@@ -41,6 +42,20 @@ PENDING_QUESTIONS_LIMIT = int(os.getenv("AGENT_PENDING_QUESTIONS_LIMIT", "5"))
 SHARED_AGRO_LIBRARY_ENABLED = os.getenv("AGROMATRIX_SHARED_LIBRARY_ENABLED", "true").lower() == "true"
 SHARED_AGRO_LIBRARY_REQUIRE_REVIEW = os.getenv("AGROMATRIX_SHARED_LIBRARY_REQUIRE_REVIEW", "true").lower() == "true"
 DOC_VERSION_PREVIEW_CHARS = int(os.getenv("DOC_VERSION_PREVIEW_CHARS", "240"))
+WARNING_THROTTLE_SECONDS = float(os.getenv("MEMORY_RETRIEVAL_WARNING_THROTTLE_S", "60") or "60")
+_warning_last_ts: Dict[str, float] = {}
+
+
+def _warning_throttled(key: str, message: str) -> None:
+    """Emit repetitive warnings at most once per throttle window."""
+    if WARNING_THROTTLE_SECONDS <= 0:
+        logger.warning(message)
+        return
+    now = monotonic()
+    last = _warning_last_ts.get(key, 0.0)
+    if now - last >= WARNING_THROTTLE_SECONDS:
+        _warning_last_ts[key] = now
+        logger.warning(message)


@dataclass
@@ -1067,7 +1082,7 @@ class MemoryRetrieval:
                )
            return True
        except Exception as e:
-            logger.warning(f"register_pending_question failed: {e}")
+            _warning_throttled("register_pending_question_failed", f"register_pending_question failed: {e}")
            return False

    async def resolve_pending_question(
@@ -1086,7 +1101,7 @@ class MemoryRetrieval:
                row = await conn.fetchrow(
                    """
                    WITH target AS (
-                      SELECT id
+                      SELECT id, question_fingerprint
                      FROM agent_pending_questions
                      WHERE channel = $1
                        AND chat_id = $2
@@ -1095,17 +1110,49 @@ class MemoryRetrieval:
                        AND status = 'pending'
                      ORDER BY created_at ASC
                      LIMIT 1
+                    ), decision AS (
+                      SELECT
+                        t.id,
+                        CASE
+                          WHEN $5 = 'dismissed' THEN 'dismissed'
+                          WHEN EXISTS (
+                            SELECT 1
+                            FROM agent_pending_questions q
+                            WHERE q.channel = $1
+                              AND q.chat_id = $2
+                              AND q.user_id = $3
+                              AND q.agent_id = $4
+                              AND q.status = 'answered'
+                              AND q.question_fingerprint = t.question_fingerprint
+                          ) THEN 'dismissed'
+                          ELSE 'answered'
+                        END AS next_status,
+                        CASE
+                          WHEN $5 = 'dismissed' THEN $5
+                          WHEN EXISTS (
+                            SELECT 1
+                            FROM agent_pending_questions q
+                            WHERE q.channel = $1
+                              AND q.chat_id = $2
+                              AND q.user_id = $3
+                              AND q.agent_id = $4
+                              AND q.status = 'answered'
+                              AND q.question_fingerprint = t.question_fingerprint
+                          ) THEN 'duplicate_answered'
+                          ELSE $5
+                        END AS resolution_reason
+                      FROM target t
                    )
                    UPDATE agent_pending_questions p
-                    SET status = CASE WHEN $5 = 'dismissed' THEN 'dismissed' ELSE 'answered' END,
+                    SET status = d.next_status,
                        answered_at = NOW(),
                        metadata = COALESCE(p.metadata, '{}'::jsonb)
                                   || jsonb_build_object(
-                                        'resolution_reason', $5,
+                                        'resolution_reason', d.resolution_reason,
                                        'answer_fingerprint', COALESCE($6, '')
                                      )
-                    FROM target t
-                    WHERE p.id = t.id
+                    FROM decision d
+                    WHERE p.id = d.id
                    RETURNING p.id
                    """,
                    channel,
@@ -1117,7 +1164,7 @@ class MemoryRetrieval:
                )
            return bool(row)
        except Exception as e:
-            logger.warning(f"resolve_pending_question failed: {e}")
+            _warning_throttled("resolve_pending_question_failed", f"resolve_pending_question failed: {e}")
            return False

    @staticmethod
--- a/services/router/offload_client.py
+++ b/services/router/offload_client.py
@@ -81,7 +81,7 @@ def get_unavailable_nodes(req_type: str) -> Set[str]:
 async def offload_infer(
    nats_client,
    node_id: str,
-    required_type: Literal["llm", "vision", "stt", "tts", "ocr", "image"],
+    required_type: str,  # "llm"|"vision"|"stt"|"tts"|"ocr"|"image"|"voice.tts"|"voice.llm"|"voice.stt"
    job_payload: Dict[str, Any],
    timeout_ms: int = 25000,
 ) -> Optional[Dict[str, Any]]:
@@ -89,6 +89,8 @@ async def offload_infer(

    Returns parsed JobResponse dict or None on total failure.
    Retries on transient errors (timeout, busy). Does NOT retry on provider errors.
+
+    Voice HA subjects use dotted notation: "voice.tts" → node.{id}.voice.tts.request
    """
    subject = f"node.{node_id.lower()}.{required_type}.request"
    payload_bytes = json.dumps(job_payload).encode()
--- a/services/router/prompt_builder.py
+++ b/services/router/prompt_builder.py
@@ -9,6 +9,8 @@ Prompt Builder for DAGI Router

 import httpx
 import logging
+import os
+import time
 from typing import Dict, Any, Optional
 from dataclasses import dataclass

@@ -43,6 +45,8 @@ class PromptBuilder:
        self.city_service_url = city_service_url.rstrip("/")
        self.router_config = router_config or {}
        self._http_client: Optional[httpx.AsyncClient] = None
+        self._city_service_unavailable_until = 0.0
+        self._city_service_cooldown_s = float(os.getenv("CITY_SERVICE_FAILURE_COOLDOWN_S", "120") or "120")
    
    async def _get_http_client(self) -> httpx.AsyncClient:
        """Lazy initialization of HTTP client"""
@@ -80,6 +84,9 @@ class PromptBuilder:
    
    async def _fetch_from_database(self, agent_id: str) -> Optional[AgentSystemPrompt]:
        """Fetch system prompt from city-service API"""
+        now = time.monotonic()
+        if now < self._city_service_unavailable_until:
+            return None
        try:
            client = await self._get_http_client()
            url = f"{self.city_service_url}/internal/agents/{agent_id}/system-prompt"
@@ -100,10 +107,20 @@ class PromptBuilder:
                return None
                
        except httpx.RequestError as e:
-            logger.error(f"Error fetching prompt from city-service: {e}")
+            self._city_service_unavailable_until = time.monotonic() + max(0.0, self._city_service_cooldown_s)
+            logger.warning(
+                "Error fetching prompt from city-service: %s; suppressing retries for %.0fs",
+                e,
+                self._city_service_cooldown_s,
+            )
            return None
        except Exception as e:
-            logger.error(f"Unexpected error fetching prompt: {e}")
+            self._city_service_unavailable_until = time.monotonic() + max(0.0, self._city_service_cooldown_s)
+            logger.warning(
+                "Unexpected error fetching prompt: %s; suppressing retries for %.0fs",
+                e,
+                self._city_service_cooldown_s,
+            )
            return None
    
    def _get_from_config(self, agent_id: str) -> Optional[AgentSystemPrompt]:
--- a/services/router/router-config.yml
+++ b/services/router/router-config.yml
--- a/services/sofiia-console/static/index.html
+++ b/services/sofiia-console/static/index.html
@@ -229,6 +229,58 @@
      padding: 2px 6px;
      border-radius: 4px;
    }
+    .aurora-clip-picker {
+      margin-top: 8px;
+      border: 1px solid var(--border);
+      border-radius: 8px;
+      background: var(--bg2);
+      padding: 8px;
+      display: none;
+      gap: 8px;
+    }
+    .aurora-clip-head {
+      display: flex;
+      justify-content: space-between;
+      gap: 8px;
+      font-size: 0.74rem;
+      color: var(--muted);
+      align-items: center;
+    }
+    .aurora-clip-head strong {
+      color: var(--text);
+      font-weight: 600;
+    }
+    .aurora-clip-range-row {
+      display: grid;
+      grid-template-columns: 54px 1fr 62px;
+      align-items: center;
+      gap: 8px;
+      font-size: 0.73rem;
+      color: var(--muted);
+    }
+    .aurora-clip-range-row input[type="range"] {
+      width: 100%;
+      accent-color: var(--gold);
+      cursor: pointer;
+    }
+    .aurora-clip-actions {
+      display: flex;
+      gap: 6px;
+      flex-wrap: wrap;
+    }
+    .aurora-clip-btn {
+      background: rgba(255,255,255,0.04);
+      border: 1px solid var(--border);
+      color: var(--muted);
+      border-radius: 6px;
+      padding: 4px 8px;
+      font-size: 0.7rem;
+      cursor: pointer;
+    }
+    .aurora-clip-btn:hover {
+      border-color: var(--gold);
+      color: var(--text);
+    }
    .aurora-compare-wrap {
      position: relative;
      overflow: hidden;
@@ -791,6 +843,27 @@
               accept=".mp4,.avi,.mov,.mkv,.webm,.mp3,.wav,.flac,.m4a,.aac,.ogg,.jpg,.jpeg,.png,.tiff,.tif,.webp"
               onchange="auroraOnFilePicked(this)">
        <div id="auroraThumbPreview" class="aurora-thumb-preview" style="display:none;"></div>
+        <div id="auroraClipPicker" class="aurora-clip-picker">
+          <div class="aurora-clip-head">
+            <strong>🎚 Фрагмент На Прев'ю</strong>
+            <span id="auroraClipSummary">—</span>
+          </div>
+          <div class="aurora-clip-range-row">
+            <span>Start</span>
+            <input id="auroraClipStartRange" type="range" min="0" max="0" step="0.1" value="0">
+            <span id="auroraClipStartLabel">0s</span>
+          </div>
+          <div class="aurora-clip-range-row">
+            <span>End</span>
+            <input id="auroraClipEndRange" type="range" min="0" max="0" step="0.1" value="0">
+            <span id="auroraClipEndLabel">0s</span>
+          </div>
+          <div class="aurora-clip-actions">
+            <button type="button" class="aurora-clip-btn" id="auroraClipSetStartBtn">Start = поточний кадр</button>
+            <button type="button" class="aurora-clip-btn" id="auroraClipSetEndBtn">End = поточний кадр</button>
+            <button type="button" class="aurora-clip-btn" id="auroraClipFullBtn">Повне відео</button>
+          </div>
+        </div>
        <div class="aurora-kv" style="margin-top:10px;">
          <span class="k">Файл</span><span class="v" id="auroraSelectedFile">—</span>
        </div>
@@ -833,6 +906,12 @@
                <option value="codeformer">CodeFormer</option>
              </select>
            </label>
+            <label>Clip start (sec)
+              <input id="auroraOptClipStart" type="number" min="0" step="0.1" placeholder="0">
+            </label>
+            <label>Clip duration (sec)
+              <input id="auroraOptClipDuration" type="number" min="0.1" step="0.1" placeholder="5">
+            </label>
          </div>
        </details>

@@ -869,7 +948,7 @@
          <button id="auroraAnalyzeBtn" class="btn btn-ghost" onclick="auroraAnalyze()" disabled>🔍 Аналіз</button>
          <button id="auroraAudioProcessBtn" class="btn btn-ghost" style="display:none;" onclick="auroraStartAudio()">🎧 Audio process</button>
          <button id="auroraStartBtn" class="btn btn-gold" style="flex:1;" onclick="auroraStart()" disabled>Почати обробку</button>
-          <button id="auroraCancelBtn" class="btn btn-ghost" style="display:none;" onclick="auroraCancel()">Скасувати</button>
+          <button id="auroraCancelBtn" class="btn btn-ghost" style="display:none;" onclick="auroraCancel()">Зупинити</button>
        </div>
      </div>

@@ -912,6 +991,15 @@
      <label class="aurora-checkline"><input type="checkbox" id="auroraCtrlDenoise"> Enable denoise (FastDVDnet/SCUNet)</label>
      <label class="aurora-checkline"><input type="checkbox" id="auroraCtrlFaceRestore"> Run face restoration (GFPGAN)</label>
      <label class="aurora-checkline"><input type="checkbox" id="auroraCtrlPlateRoi"> License-plate ROI enhancement</label>
+      <label class="aurora-checkline"><input type="checkbox" id="auroraCtrlMaxFace"> Max face quality (повільніше, але краще для облич)</label>
+      <label class="aurora-note" style="display:block; margin-top:8px;">Фокус задачі:</label>
+      <select id="auroraFocusProfile" style="width:100%; margin-top:4px;">
+        <option value="auto" selected>Auto</option>
+        <option value="max_faces">Max faces</option>
+        <option value="text_readability">Text / logos readability</option>
+        <option value="plates">License plates</option>
+      </select>
+      <input id="auroraTaskHint" type="text" style="width:100%; margin-top:8px;" placeholder="Ціль Aurora: напр. Прочитати напис на кепці персонажа (00:12-00:18)">
      <div class="aurora-priority-wrap">
        <div class="aurora-priority-head">
          <span>Пріоритет: Обличчя</span>
@@ -997,7 +1085,14 @@
      <div style="display:flex; gap:8px; margin-top:10px; flex-wrap:wrap;">
        <button class="btn btn-ghost btn-sm" id="auroraDownloadResultBtn" style="display:none;" onclick="auroraDownloadResult()">Завантажити результат</button>
        <button class="btn btn-ghost btn-sm" id="auroraOpenFolderBtn" onclick="auroraRevealFolder()">Відкрити папку</button>
-        <button id="auroraReprocessBtn" class="btn btn-ghost btn-sm" onclick="auroraReprocess()" disabled>Повторна обробка</button>
+        <button id="auroraReprocessBtn" class="btn btn-ghost btn-sm" onclick="auroraReprocess()" disabled>Повторна обробка ×1</button>
+        <select id="auroraReprocessPasses" class="btn btn-ghost btn-sm" style="min-width:92px;" onchange="auroraUpdateReprocessLabel()">
+          <option value="1" selected>1 прохід</option>
+          <option value="2">2 проходи</option>
+          <option value="3">3 проходи</option>
+          <option value="4">4 проходи</option>
+        </select>
+        <label class="aurora-checkline" style="margin:0;"><input type="checkbox" id="auroraReprocessSecondPass" checked> chain second-pass</label>
      </div>
      <div id="auroraForensicLogWrap" style="display:none; margin-top:10px;">
        <div class="aurora-note" style="margin-top:0;">Forensic log</div>
@@ -2066,6 +2161,11 @@ let auroraTabBootstrapped = false;
 let auroraChatHistory = [];
 let auroraChatBusy = false;
 let auroraFolderPath = null;
+let auroraPreviewObjectUrl = null;
+let auroraPreviewVideoEl = null;
+let auroraVideoDurationSec = 0;
+let auroraClipBindingsReady = false;
+const AURORA_MIN_CLIP_SEC = 0.1;
 const AURORA_MAX_TRANSIENT_ERRORS = 12;
 const AURORA_ACTIVE_JOB_KEY = 'aurora_active_job_id';
 const AURORA_SMART_RUN_KEY = 'aurora_smart_run_id';
@@ -2320,6 +2420,7 @@ function auroraSetActiveJobId(jobId) {
  if (el) el.textContent = auroraJobId || '—';
  const reBtn = document.getElementById('auroraReprocessBtn');
  if (reBtn) reBtn.disabled = !auroraJobId;
+  auroraUpdateReprocessLabel();
  if (auroraJobId) {
    const cached = auroraGetPersistedTiming(auroraJobId);
    if (cached) {
@@ -2328,6 +2429,25 @@ function auroraSetActiveJobId(jobId) {
    }
  }
  auroraPersistActiveJob();
+  auroraUpdateCancelButton(null, null);
+}
+
+function auroraUpdateCancelButton(status, stage) {
+  const btn = document.getElementById('auroraCancelBtn');
+  if (!btn) return;
+  const s = String(status || '').toLowerCase();
+  const st = String(stage || '').toLowerCase();
+  const active = s === 'queued' || s === 'processing';
+  if (!active) {
+    btn.style.display = 'none';
+    btn.disabled = false;
+    btn.textContent = 'Зупинити';
+    return;
+  }
+  btn.style.display = 'inline-block';
+  const cancelling = st.includes('cancell') || st.includes('скасов');
+  btn.disabled = cancelling;
+  btn.textContent = cancelling ? 'Зупиняю...' : 'Зупинити';
 }

 function auroraSetMode(mode) {
@@ -2349,6 +2469,162 @@ function auroraIsAudioFile(file) {
  return ['.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg'].some(ext => name.endsWith(ext));
 }

+function auroraRevokePreviewObjectUrl() {
+  if (!auroraPreviewObjectUrl) return;
+  try { URL.revokeObjectURL(auroraPreviewObjectUrl); } catch (_) {}
+  auroraPreviewObjectUrl = null;
+}
+
+function auroraFormatClipSeconds(seconds) {
+  const value = Number(seconds);
+  if (!Number.isFinite(value)) return '—';
+  const rounded = Math.round(Math.max(0, value) * 10) / 10;
+  if (Math.abs(rounded - Math.round(rounded)) < 1e-9) return `${Math.round(rounded)}s`;
+  return `${rounded.toFixed(1)}s`;
+}
+
+function auroraClampClipWindow(startSec, endSec, durationSec) {
+  const total = Number(durationSec);
+  if (!Number.isFinite(total) || total <= 0) return { start: 0, end: 0 };
+  let start = Number(startSec);
+  let end = Number(endSec);
+  if (!Number.isFinite(start)) start = 0;
+  if (!Number.isFinite(end)) end = total;
+  start = Math.max(0, Math.min(start, total));
+  end = Math.max(0, Math.min(end, total));
+  if ((end - start) < AURORA_MIN_CLIP_SEC) {
+    if ((start + AURORA_MIN_CLIP_SEC) <= total) {
+      end = start + AURORA_MIN_CLIP_SEC;
+    } else {
+      end = total;
+      start = Math.max(0, end - AURORA_MIN_CLIP_SEC);
+    }
+  }
+  return { start, end };
+}
+
+function auroraUpdateClipSummary(startSec, endSec, durationSec) {
+  const summary = document.getElementById('auroraClipSummary');
+  const startLabel = document.getElementById('auroraClipStartLabel');
+  const endLabel = document.getElementById('auroraClipEndLabel');
+  if (startLabel) startLabel.textContent = auroraFormatClipSeconds(startSec);
+  if (endLabel) endLabel.textContent = auroraFormatClipSeconds(endSec);
+  if (summary) {
+    const clipDur = Math.max(0, Number(endSec) - Number(startSec));
+    summary.textContent = `${auroraFormatClipSeconds(startSec)} → ${auroraFormatClipSeconds(endSec)} (${auroraFormatClipSeconds(clipDur)}) · total ${auroraFormatClipSeconds(durationSec)}`;
+  }
+}
+
+function auroraApplyClipWindow(startSec, endSec, { syncFields = true, syncSliders = true, seekTo = null } = {}) {
+  const duration = Number(auroraVideoDurationSec || 0);
+  if (!Number.isFinite(duration) || duration <= 0) return;
+  const startRange = document.getElementById('auroraClipStartRange');
+  const endRange = document.getElementById('auroraClipEndRange');
+  const startInput = document.getElementById('auroraOptClipStart');
+  const durationInput = document.getElementById('auroraOptClipDuration');
+  const bounded = auroraClampClipWindow(startSec, endSec, duration);
+  if (syncSliders && startRange && endRange) {
+    startRange.value = bounded.start.toFixed(1);
+    endRange.value = bounded.end.toFixed(1);
+  }
+  if (syncFields && startInput && durationInput) {
+    const clipDuration = Math.max(AURORA_MIN_CLIP_SEC, bounded.end - bounded.start);
+    startInput.value = bounded.start > 0 ? bounded.start.toFixed(1).replace(/\.0$/, '') : '';
+    durationInput.value = clipDuration.toFixed(1).replace(/\.0$/, '');
+  }
+  auroraUpdateClipSummary(bounded.start, bounded.end, duration);
+  if (auroraPreviewVideoEl && Number.isFinite(Number(seekTo))) {
+    const target = Math.max(0, Math.min(Number(seekTo), duration));
+    try { auroraPreviewVideoEl.currentTime = target; } catch (_) {}
+  }
+}
+
+function auroraSyncClipFromExportInputs() {
+  const duration = Number(auroraVideoDurationSec || 0);
+  if (!Number.isFinite(duration) || duration <= 0) return;
+  const startInput = document.getElementById('auroraOptClipStart');
+  const durationInput = document.getElementById('auroraOptClipDuration');
+  const startValue = Number(startInput?.value || 0);
+  const durationValue = Number(durationInput?.value || 0);
+  const start = Number.isFinite(startValue) && startValue >= 0 ? startValue : 0;
+  const hasDuration = Number.isFinite(durationValue) && durationValue > 0;
+  const end = hasDuration ? start + durationValue : duration;
+  auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: true });
+}
+
+function auroraHideClipPicker() {
+  const picker = document.getElementById('auroraClipPicker');
+  if (picker) picker.style.display = 'none';
+  const summary = document.getElementById('auroraClipSummary');
+  if (summary) summary.textContent = '—';
+  const startLabel = document.getElementById('auroraClipStartLabel');
+  if (startLabel) startLabel.textContent = '0s';
+  const endLabel = document.getElementById('auroraClipEndLabel');
+  if (endLabel) endLabel.textContent = '0s';
+  auroraPreviewVideoEl = null;
+  auroraVideoDurationSec = 0;
+}
+
+function auroraBindClipPicker() {
+  if (auroraClipBindingsReady) return;
+  auroraClipBindingsReady = true;
+  const startRange = document.getElementById('auroraClipStartRange');
+  const endRange = document.getElementById('auroraClipEndRange');
+  const startInput = document.getElementById('auroraOptClipStart');
+  const durationInput = document.getElementById('auroraOptClipDuration');
+  const setStartBtn = document.getElementById('auroraClipSetStartBtn');
+  const setEndBtn = document.getElementById('auroraClipSetEndBtn');
+  const fullBtn = document.getElementById('auroraClipFullBtn');
+
+  if (startRange && endRange) {
+    startRange.addEventListener('input', () => {
+      const start = Number(startRange.value || 0);
+      const end = Number(endRange.value || 0);
+      auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: false, seekTo: start });
+    });
+    endRange.addEventListener('input', () => {
+      const start = Number(startRange.value || 0);
+      const end = Number(endRange.value || 0);
+      auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: false, seekTo: end });
+    });
+  }
+
+  if (startInput) {
+    startInput.addEventListener('input', auroraSyncClipFromExportInputs);
+    startInput.addEventListener('change', auroraSyncClipFromExportInputs);
+  }
+  if (durationInput) {
+    durationInput.addEventListener('input', auroraSyncClipFromExportInputs);
+    durationInput.addEventListener('change', auroraSyncClipFromExportInputs);
+  }
+
+  if (setStartBtn) {
+    setStartBtn.addEventListener('click', () => {
+      if (!auroraPreviewVideoEl) return;
+      const current = Number(auroraPreviewVideoEl.currentTime || 0);
+      const end = Number(document.getElementById('auroraClipEndRange')?.value || auroraVideoDurationSec || 0);
+      auroraApplyClipWindow(current, end, { syncFields: true, syncSliders: true, seekTo: current });
+    });
+  }
+  if (setEndBtn) {
+    setEndBtn.addEventListener('click', () => {
+      if (!auroraPreviewVideoEl) return;
+      const current = Number(auroraPreviewVideoEl.currentTime || 0);
+      const start = Number(document.getElementById('auroraClipStartRange')?.value || 0);
+      auroraApplyClipWindow(start, current, { syncFields: true, syncSliders: true, seekTo: current });
+    });
+  }
+  if (fullBtn) {
+    fullBtn.addEventListener('click', () => {
+      const startField = document.getElementById('auroraOptClipStart');
+      const durField = document.getElementById('auroraOptClipDuration');
+      if (startField) startField.value = '';
+      if (durField) durField.value = '';
+      auroraApplyClipWindow(0, auroraVideoDurationSec, { syncFields: false, syncSliders: true, seekTo: 0 });
+    });
+  }
+}
+
 function auroraSetSelectedFile(file) {
  auroraSelectedFile = file || null;
  const label = document.getElementById('auroraSelectedFile');
@@ -2364,6 +2640,10 @@ function auroraSetSelectedFile(file) {
    audioBtn.style.display = isAudio ? 'inline-block' : 'none';
    audioBtn.disabled = !file;
  }
+  const clipStartInput = document.getElementById('auroraOptClipStart');
+  const clipDurationInput = document.getElementById('auroraOptClipDuration');
+  if (clipStartInput) clipStartInput.value = '';
+  if (clipDurationInput) clipDurationInput.value = '';
  auroraAnalysisCache = null;
  auroraSuggestedPriority = 'balanced';
  auroraSuggestedExport = {};
@@ -2386,6 +2666,7 @@ function auroraSetSelectedFile(file) {
  if (quickStartBtn) quickStartBtn.disabled = !file;
  const reBtn = document.getElementById('auroraReprocessBtn');
  if (reBtn) reBtn.disabled = !auroraJobId;
+  auroraUpdateReprocessLabel();
  const batchInfo = document.getElementById('auroraBatchInfo');
  if (batchInfo && auroraBatchFiles.length <= 1) batchInfo.style.display = 'none';
  auroraShowThumbPreview(file);
@@ -2394,19 +2675,57 @@ function auroraSetSelectedFile(file) {
 function auroraShowThumbPreview(file) {
  const wrap = document.getElementById('auroraThumbPreview');
  if (!wrap) return;
+  auroraBindClipPicker();
+  auroraRevokePreviewObjectUrl();
+  auroraHideClipPicker();
  wrap.style.display = 'none';
  wrap.innerHTML = '';
  if (!file) return;
  const type = (file.type || '').toLowerCase();
  const url = URL.createObjectURL(file);
+  auroraPreviewObjectUrl = url;
  if (type.startsWith('image/')) {
    wrap.innerHTML = `<img src="${url}" alt="preview"><span class="aurora-thumb-label">Original</span>`;
    wrap.style.display = 'block';
  } else if (type.startsWith('video/')) {
    const v = document.createElement('video');
-    v.src = url; v.muted = true; v.playsInline = true; v.preload = 'metadata';
-    v.addEventListener('loadeddata', () => { v.currentTime = 0.5; });
-    v.addEventListener('seeked', () => { wrap.style.display = 'block'; }, { once: true });
+    v.src = url;
+    v.muted = true;
+    v.controls = true;
+    v.playsInline = true;
+    v.preload = 'metadata';
+    v.addEventListener('loadedmetadata', () => {
+      const picker = document.getElementById('auroraClipPicker');
+      const startRange = document.getElementById('auroraClipStartRange');
+      const endRange = document.getElementById('auroraClipEndRange');
+      const duration = Number(v.duration || 0);
+      auroraPreviewVideoEl = v;
+      auroraVideoDurationSec = Number.isFinite(duration) && duration > 0 ? duration : 0;
+      if (!Number.isFinite(auroraVideoDurationSec) || auroraVideoDurationSec <= 0) {
+        if (picker) picker.style.display = 'none';
+        return;
+      }
+      if (startRange && endRange) {
+        const max = auroraVideoDurationSec.toFixed(1);
+        startRange.min = '0';
+        endRange.min = '0';
+        startRange.max = max;
+        endRange.max = max;
+        startRange.step = '0.1';
+        endRange.step = '0.1';
+      }
+      if (picker) picker.style.display = 'grid';
+
+      const startInput = document.getElementById('auroraOptClipStart');
+      const durInput = document.getElementById('auroraOptClipDuration');
+      const startVal = Number(startInput?.value || 0);
+      const durationVal = Number(durInput?.value || 0);
+      const start = Number.isFinite(startVal) && startVal >= 0 ? startVal : 0;
+      const hasDuration = Number.isFinite(durationVal) && durationVal > 0;
+      const end = hasDuration ? (start + durationVal) : auroraVideoDurationSec;
+      auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: true });
+    });
+    v.addEventListener('loadeddata', () => { wrap.style.display = 'block'; });
    wrap.appendChild(v);
    const lbl = document.createElement('span');
    lbl.className = 'aurora-thumb-label'; lbl.textContent = 'Original';
@@ -2633,7 +2952,38 @@ function auroraBindDropzone() {
 function auroraCollectExportOptions() {
  const opts = {};
  const outscale = document.getElementById('auroraOptOutscale')?.value;
-  if (outscale && outscale !== 'auto') opts.outscale = Number(outscale);
+  if (outscale && outscale !== 'auto') {
+    opts.upscale = Number(outscale);
+    opts.outscale = Number(outscale);
+  }
+  const clipPicker = document.getElementById('auroraClipPicker');
+  const pickerVisible = !!clipPicker && getComputedStyle(clipPicker).display !== 'none';
+  const startRange = document.getElementById('auroraClipStartRange');
+  const endRange = document.getElementById('auroraClipEndRange');
+  const durationTotal = Number(auroraVideoDurationSec || 0);
+  const canUseRanges =
+    pickerVisible &&
+    Number.isFinite(durationTotal) &&
+    durationTotal > 0 &&
+    startRange &&
+    endRange;
+  if (canUseRanges) {
+    const startRangeValue = Number(startRange.value || 0);
+    const endRangeValue = Number(endRange.value || durationTotal);
+    const bounded = auroraClampClipWindow(startRangeValue, endRangeValue, durationTotal);
+    const clipDuration = Math.max(AURORA_MIN_CLIP_SEC, bounded.end - bounded.start);
+    const isFullVideo = bounded.start <= 0.0001 && (durationTotal - bounded.end) <= 0.11;
+    if (!isFullVideo) {
+      if (bounded.start > 0.0001) opts.clip_start_sec = Number(bounded.start.toFixed(3));
+      opts.clip_duration_sec = Number(clipDuration.toFixed(3));
+    }
+  } else {
+    const clipStart = Number(document.getElementById('auroraOptClipStart')?.value || 0);
+    const clipDurationRaw = document.getElementById('auroraOptClipDuration')?.value;
+    const clipDuration = Number(clipDurationRaw || 0);
+    if (Number.isFinite(clipStart) && clipStart > 0) opts.clip_start_sec = clipStart;
+    if (clipDurationRaw !== '' && Number.isFinite(clipDuration) && clipDuration > 0) opts.clip_duration_sec = clipDuration;
+  }
  const codec = document.getElementById('auroraOptCodec')?.value;
  if (codec && codec !== 'auto') opts.encoder = codec;
  const quality = document.getElementById('auroraOptQuality')?.value;
@@ -2650,6 +3000,36 @@ function auroraAbsoluteUrl(url) {
  return `${API}${value}`;
 }

+function auroraSleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+function auroraUpdateReprocessLabel() {
+  const btn = document.getElementById('auroraReprocessBtn');
+  const passes = Math.max(1, Math.min(4, Number(document.getElementById('auroraReprocessPasses')?.value || 1)));
+  if (btn) btn.textContent = `Повторна обробка ×${passes}`;
+}
+
+async function auroraWaitForTerminal(jobId, { timeoutSec = 10800, passLabel = '' } = {}) {
+  const id = String(jobId || '').trim();
+  if (!id) throw new Error('job_id missing');
+  const deadline = Date.now() + (timeoutSec * 1000);
+  while (Date.now() < deadline) {
+    const r = await fetch(`${API}/api/aurora/status/${encodeURIComponent(id)}`);
+    if (!r.ok) {
+      await auroraSleep(2000);
+      continue;
+    }
+    const st = await r.json();
+    const status = String(st.status || '').toLowerCase();
+    const stage = st.current_stage || 'processing';
+    if (passLabel) auroraSetProgress(st.progress || 1, status || 'processing', `${passLabel} · ${stage}`);
+    if (status === 'completed' || status === 'failed' || status === 'cancelled') return st;
+    await auroraSleep(2000);
+  }
+  throw new Error('reprocess timeout');
+}
+
 function auroraSetPreset(preset) {
  const normalized = String(preset || 'balanced').trim();
  auroraPresetMode = ['turbo', 'balanced', 'max_quality'].includes(normalized) ? normalized : 'balanced';
@@ -2675,6 +3055,7 @@ function auroraSetPreset(preset) {
    if (outscale) outscale.value = 'auto';
    if (codec) codec.value = 'auto';
  }
+  auroraUpdateReprocessLabel();
 }

 function auroraUpdatePriorityLabel() {
@@ -2699,18 +3080,29 @@ function auroraResetAnalysisControls() {
  const denoise = document.getElementById('auroraCtrlDenoise');
  const face = document.getElementById('auroraCtrlFaceRestore');
  const plate = document.getElementById('auroraCtrlPlateRoi');
+  const maxFace = document.getElementById('auroraCtrlMaxFace');
+  const focusProfile = document.getElementById('auroraFocusProfile');
+  const taskHint = document.getElementById('auroraTaskHint');
+  const clipStart = document.getElementById('auroraOptClipStart');
+  const clipDuration = document.getElementById('auroraOptClipDuration');
  const slider = document.getElementById('auroraPriorityBias');
  if (denoise) denoise.checked = false;
  if (face) face.checked = true;
  if (plate) plate.checked = false;
+  if (maxFace) maxFace.checked = false;
+  if (focusProfile) focusProfile.value = 'auto';
+  if (taskHint) taskHint.value = '';
+  if (clipStart) clipStart.value = '';
+  if (clipDuration) clipDuration.value = '';
  if (slider) slider.value = '0';
  auroraSetPreset('balanced');
  auroraUpdatePriorityLabel();
+  auroraUpdateReprocessLabel();
 }

 function auroraApplySuggestedExportOptions(suggested) {
  if (!suggested || typeof suggested !== 'object') return;
-  const outscale = String(suggested.outscale ?? '').trim();
+  const outscale = String(suggested.upscale ?? suggested.outscale ?? '').trim();
  if (outscale && document.getElementById('auroraOptOutscale')) {
    const el = document.getElementById('auroraOptOutscale');
    const has = Array.from(el.options || []).some((o) => o.value === outscale);
@@ -2745,6 +3137,8 @@ function auroraApplyAnalysisHints(data) {
  const denoise = document.getElementById('auroraCtrlDenoise');
  const face = document.getElementById('auroraCtrlFaceRestore');
  const plate = document.getElementById('auroraCtrlPlateRoi');
+  const maxFace = document.getElementById('auroraCtrlMaxFace');
+  const focusProfile = document.getElementById('auroraFocusProfile');
  const slider = document.getElementById('auroraPriorityBias');

  const highNoise = ['high', 'very_high'].includes(String(quality.noise_level || '').toLowerCase());
@@ -2759,6 +3153,13 @@ function auroraApplyAnalysisHints(data) {
    else if (suggested === 'plates') slider.value = '55';
    else slider.value = '0';
  }
+  if (focusProfile) {
+    if (suggested === 'details') focusProfile.value = 'text_readability';
+    else if (suggested === 'faces') focusProfile.value = 'max_faces';
+    else if (suggested === 'plates') focusProfile.value = 'plates';
+    else focusProfile.value = 'auto';
+  }
+  if (maxFace) maxFace.checked = suggested === 'faces';

  if (suggested === 'faces' || suggested === 'plates') auroraSetPreset('max_quality');
  else auroraSetPreset('balanced');
@@ -2770,12 +3171,21 @@ function auroraCollectAnalysisControls() {
  const denoise = Boolean(document.getElementById('auroraCtrlDenoise')?.checked);
  const faceRestore = Boolean(document.getElementById('auroraCtrlFaceRestore')?.checked);
  const plateRoi = Boolean(document.getElementById('auroraCtrlPlateRoi')?.checked);
+  const maxFaceQuality = Boolean(document.getElementById('auroraCtrlMaxFace')?.checked);
+  const focusProfile = String(document.getElementById('auroraFocusProfile')?.value || 'auto').trim();
+  const taskHint = String(document.getElementById('auroraTaskHint')?.value || '').trim();
  const preset = auroraPresetMode || 'balanced';
-  const priority = bias <= -30 ? 'faces' : bias >= 30 ? 'plates' : (auroraSuggestedPriority || 'balanced');
+  let priority = bias <= -30 ? 'faces' : bias >= 30 ? 'plates' : (auroraSuggestedPriority || 'balanced');
+  if (focusProfile === 'text_readability') priority = 'details';
+  if (focusProfile === 'plates') priority = 'plates';
+  if (focusProfile === 'max_faces' || maxFaceQuality) priority = 'faces';
  return {
    denoise,
    face_restore: faceRestore,
    plate_roi_enhance: plateRoi,
+    max_face_quality: maxFaceQuality,
+    focus_profile: focusProfile || 'auto',
+    task_hint: taskHint,
    priority_bias: bias,
    priority,
    preset,
@@ -2784,15 +3194,45 @@ function auroraCollectAnalysisControls() {

 function auroraBuildAnalysisExportHints(controls) {
  const c = controls || auroraCollectAnalysisControls();
-  return {
+  const outscaleRaw = String(document.getElementById('auroraOptOutscale')?.value || 'auto').trim().toLowerCase();
+  const isAutoScale = !outscaleRaw || outscaleRaw === 'auto';
+  const hints = {
    pre_denoise: Boolean(c.denoise),
    temporal_denoise: Boolean(c.denoise && c.preset === 'max_quality'),
    roi_only_faces: c.priority === 'faces',
    face_restore: Boolean(c.face_restore),
    plate_roi_enhance: Boolean(c.plate_roi_enhance),
+    max_face_quality: Boolean(c.max_face_quality),
+    focus_profile: c.focus_profile || 'auto',
+    task_hint: String(c.task_hint || '').trim(),
    profile: c.preset || 'balanced',
    priority_bias: Number(c.priority_bias || 0),
+    auto_forensic_outscale: true,
  };
+  if (!hints.task_hint) delete hints.task_hint;
+
+  if (c.focus_profile === 'max_faces' || c.max_face_quality) {
+    hints.pre_denoise = true;
+    hints.temporal_denoise = true;
+    hints.roi_only_faces = true;
+    hints.face_model = 'codeformer';
+    hints.deblur_before_face = true;
+    hints.score_loop = true;
+    hints.allow_roi_upscale = true;
+    if (isAutoScale) hints.upscale = 2;
+  } else if (c.focus_profile === 'text_readability') {
+    hints.pre_denoise = true;
+    hints.temporal_denoise = true;
+    hints.roi_only_faces = false;
+    hints.deblur_before_face = true;
+    hints.score_loop = true;
+    hints.text_focus = true;
+    if (isAutoScale) hints.upscale = 2;
+  } else if (c.focus_profile === 'plates') {
+    hints.roi_only_faces = false;
+    hints.plate_roi_enhance = true;
+  }
+  return hints;
 }

 function auroraStartFromAnalysis() {
@@ -2824,6 +3264,9 @@ function auroraRenderQualityReport(report) {
  const plates = report.plates || {};
  const overall = report.overall || {};
  const models = Array.isArray(overall.models) ? overall.models : [];
+  const warnings = Array.isArray(overall.warnings) ? overall.warnings : [];
+  const processingStatus = String(overall.processing_status || 'ok');
+  const degraded = processingStatus !== 'ok' || Boolean(overall.identical_to_input) || Boolean(overall.fallback_used);
  const procSec = Number(overall.processing_time_sec);
  const procText = Number.isFinite(procSec) ? auroraFormatSeconds(procSec) : '—';
  const psnr = overall.psnr != null ? `${overall.psnr} dB` : '—';
@@ -2845,9 +3288,11 @@ function auroraRenderQualityReport(report) {
    </div>
    <div class="aurora-quality-group">
      <div class="aurora-quality-head">Загальне</div>
+      <div class="aurora-quality-line"><span>Статус обробки</span><span style="${degraded ? 'color:var(--warn);' : 'color:var(--ok);'}">${auroraEsc(processingStatus)}</span></div>
      <div class="aurora-quality-line"><span>PSNR</span><span>${psnr}</span></div>
      <div class="aurora-quality-line"><span>Час обробки</span><span>${procText}</span></div>
      <div class="aurora-quality-line"><span>Моделі</span><span>${models.length ? auroraEsc(models.join(', ')) : '—'}</span></div>
+      ${warnings.length ? `<div class="aurora-note" style="margin-top:6px; color:var(--warn);">⚠ ${auroraEsc(warnings.join(' | '))}</div>` : ''}
    </div>
  `;
  wrap.style.display = 'block';
@@ -3228,44 +3673,82 @@ async function auroraReprocess(options) {
  }
  const reBtn = document.getElementById('auroraReprocessBtn');
  if (reBtn) reBtn.disabled = true;
-  const payload = (options && typeof options === 'object') ? options : {};
+  const incoming = (options && typeof options === 'object') ? options : {};
+  const passCountUi = Number(document.getElementById('auroraReprocessPasses')?.value || 1);
+  const passes = Math.max(1, Math.min(4, Number(incoming.passes) || passCountUi));
+  const secondPassUi = Boolean(document.getElementById('auroraReprocessSecondPass')?.checked);
+  const secondPass = Object.prototype.hasOwnProperty.call(incoming, 'second_pass')
+    ? Boolean(incoming.second_pass)
+    : secondPassUi;
+
+  const analysisControls = auroraCollectAnalysisControls();
+  const uiExport = auroraCollectExportOptions();
+  const analysisExport = auroraBuildAnalysisExportHints(analysisControls);
+  const mergedExport = { ...auroraSuggestedExport, ...uiExport, ...analysisExport, ...(incoming.export_options || {}) };
+  let priority = incoming.priority || analysisControls.priority || auroraSuggestedPriority || 'balanced';
+  if (typeof priority !== 'string' || !priority.trim()) priority = 'balanced';
+
+  const basePayload = {
+    mode: auroraMode,
+    priority,
+    export_options: mergedExport,
+  };
+
+  let sourceJobId = auroraJobId;
+  let lastJobId = auroraJobId;
  try {
-    const r = await fetch(`${API}/api/aurora/reprocess/${encodeURIComponent(auroraJobId)}`, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify(payload),
-    });
-    if (!r.ok) {
-      const body = await r.text();
-      throw new Error(body || `HTTP ${r.status}`);
+    auroraStopPolling();
+    for (let i = 1; i <= passes; i += 1) {
+      const payload = { ...basePayload, ...incoming, second_pass: secondPass };
+      const r = await fetch(`${API}/api/aurora/reprocess/${encodeURIComponent(sourceJobId)}`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(payload),
+      });
+      if (!r.ok) {
+        const body = await r.text();
+        throw new Error(body || `HTTP ${r.status}`);
+      }
+      const data = await r.json();
+      const newJobId = String(data.job_id || '').trim();
+      if (!newJobId) throw new Error('job_id missing in reprocess response');
+      lastJobId = newJobId;
+      auroraSetActiveJobId(newJobId);
+      auroraSetSmartRunId(null);
+      auroraSmartStatusCache = null;
+      auroraSetSmartPolicyText(`reprocess ${i}/${passes}`);
+      auroraStatusCache = null;
+      auroraResultCache = null;
+      auroraPollErrorCount = 0;
+      auroraLastProgress = 1;
+      auroraPollInFlight = false;
+      const resultCard = document.getElementById('auroraResultCard');
+      if (resultCard) resultCard.style.display = 'none';
+      auroraSetProgress(1, 'processing', `dispatching reprocess ${i}/${passes}`);
+      auroraUpdateQueuePosition(null);
+      auroraUpdateTiming(0, null, null);
+      auroraUpdateLivePerf(null, null);
+      const cancelBtn = document.getElementById('auroraCancelBtn');
+      if (cancelBtn) cancelBtn.style.display = 'inline-block';
+      if (i < passes) {
+        const done = await auroraWaitForTerminal(newJobId, { passLabel: `reprocess ${i}/${passes}` });
+        const status = String(done?.status || '').toLowerCase();
+        if (status !== 'completed') {
+          throw new Error(`reprocess ${i}/${passes} завершився зі статусом ${status}`);
+        }
+      }
+      sourceJobId = newJobId;
    }
-    const data = await r.json();
-    auroraSetActiveJobId(data.job_id);
-    auroraSetSmartRunId(null);
-    auroraSmartStatusCache = null;
-    auroraSetSmartPolicyText('audio local');
-    auroraStatusCache = null;
-    auroraResultCache = null;
-    auroraPollErrorCount = 0;
-    auroraLastProgress = 1;
-    auroraPollInFlight = false;
-    const resultCard = document.getElementById('auroraResultCard');
-    if (resultCard) resultCard.style.display = 'none';
-    auroraSetProgress(1, 'processing', 'dispatching (reprocess)');
-    auroraUpdateQueuePosition(null);
-    auroraUpdateTiming(0, null, null);
-    auroraUpdateLivePerf(null, null);
-    const cancelBtn = document.getElementById('auroraCancelBtn');
-    if (cancelBtn) cancelBtn.style.display = 'inline-block';
    auroraStopPolling();
    auroraPollTimer = setInterval(auroraPollStatus, 2000);
    await auroraPollStatus();
-    auroraChatAdd('assistant', `Запустила reprocess: ${auroraJobId}`);
+    auroraChatAdd('assistant', `Запустила reprocess ×${passes}: ${lastJobId}`);
    await auroraRefreshJobs();
  } catch (e) {
    alert(`Aurora reprocess error: ${e.message || e}`);
  } finally {
    if (reBtn) reBtn.disabled = false;
+    auroraUpdateReprocessLabel();
  }
 }

@@ -3472,6 +3955,7 @@ async function auroraPollStatus() {
    });
    auroraUpdateQueuePosition(st.queue_position);
    auroraUpdateStorage(st.storage);
+    auroraUpdateCancelButton(st.status, st.current_stage);
    const reBtn = document.getElementById('auroraReprocessBtn');
    if (reBtn) reBtn.disabled = !(st.status === 'completed' || st.status === 'failed' || st.status === 'cancelled');
    if (st.status === 'completed') {
@@ -3604,10 +4088,19 @@ async function auroraStart() {

 async function auroraCancel() {
  if (!auroraJobId) return;
+  const cancelBtn = document.getElementById('auroraCancelBtn');
+  if (cancelBtn) {
+    cancelBtn.style.display = 'inline-block';
+    cancelBtn.disabled = true;
+    cancelBtn.textContent = 'Зупиняю...';
+  }
  try {
    await fetch(`${API}/api/aurora/cancel/${encodeURIComponent(auroraJobId)}`, { method: 'POST' });
+    await auroraPollStatus();
    await auroraRefreshJobs();
-  } catch (_) {}
+  } catch (_) {
+    auroraUpdateCancelButton('processing', null);
+  }
 }

 async function auroraLoadResult(jobId) {
@@ -3950,6 +4443,7 @@ function auroraInitTab() {
  auroraBindDropzone();
  auroraRefreshHealth();
  auroraUpdatePriorityLabel();
+  auroraUpdateReprocessLabel();
  auroraSetSmartRunId(auroraSmartRunId);
  if (!auroraSmartRunId) {
    auroraSetSmartPolicyText('standby');
--- a/services/swapper-service/app/main.py
+++ b/services/swapper-service/app/main.py
@@ -810,18 +810,18 @@ class SwapperService:
                # FLUX / Diffusion model loading
                logger.info(f"🎨 Loading diffusion model: {hf_name}")
                from diffusers import AutoPipelineForText2Image
-                
+                diffusion_dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
                pipeline = AutoPipelineForText2Image.from_pretrained(
                    hf_name,
-                    torch_dtype=torch.bfloat16,
-                    use_safetensors=True
+                    torch_dtype=diffusion_dtype
                )
                pipeline.to(self.device)
-                pipeline.enable_model_cpu_offload()  # Optimize VRAM usage
+                if self.device == "cuda":
+                    pipeline.enable_model_cpu_offload()  # Optimize VRAM usage on CUDA
                
                self.hf_models[model_name] = pipeline
                self.hf_processors[model_name] = None  # No separate processor for diffusion
-                logger.info(f"✅ Diffusion model loaded: {model_name} with CPU offload enabled")
+                logger.info(f"✅ Diffusion model loaded: {model_name} (device={self.device})")
                
            else:
                # Generic loading
--- a/services/swapper-service/config/swapper_config_node2.yaml
+++ b/services/swapper-service/config/swapper_config_node2.yaml
@@ -38,3 +38,12 @@ storage:
  models_dir: /app/models
  cache_dir: /app/cache
  swap_dir: /app/swap
+
+models:
+  flux-klein-4b:
+    path: huggingface:segmind/tiny-sd
+    type: image_generation
+    size_gb: 0.7
+    priority: medium
+    capabilities:
+      - image_generation