feat(production): sync all modified production files to git

Includes updates across gateway, router, node-worker, memory-service,
aurora-service, swapper, sofiia-console UI and node2 infrastructure:

- gateway-bot: Dockerfile, http_api.py, druid/aistalk prompts, doc_service
- services/router: main.py, router-config.yml, fabric_metrics, memory_retrieval,
  offload_client, prompt_builder
- services/node-worker: worker.py, main.py, config.py, fabric_metrics
- services/memory-service: Dockerfile, database.py, main.py, requirements
- services/aurora-service: main.py (+399), kling.py, quality_report.py
- services/swapper-service: main.py, swapper_config_node2.yaml
- services/sofiia-console: static/index.html (console UI update)
- config: agent_registry, crewai_agents/teams, router_agents
- ops/fabric_preflight.sh: updated preflight checks
- router-config.yml, docker-compose.node2.yml: infra updates
- docs: NODA1-AGENT-ARCHITECTURE, fabric_contract updated

Made-with: Cursor
This commit is contained in:
Apple
2026-03-03 07:13:29 -08:00
parent 9aac835882
commit e9dedffa48
35 changed files with 3317 additions and 805 deletions

View File

@@ -307,7 +307,7 @@ agents:
canonical_role: "Autonomous Cyber Detective Agency Orchestrator"
mission: |
AISTALK - автономне агентство кібердетективів для розслідувань загроз і
вразливостей у Web2, Web3, AI та quantum-risk сценаріях.
вразливостей у Web2, Web3, AI, media-forensics та quantum-risk сценаріях.
На етапі планування агент працює як внутрішній оркестратор команди
спеціалізованих ролей з асинхронним case lifecycle.
@@ -336,6 +336,9 @@ agents:
- blueteam
- bughunter
- quantum risk
- media forensics
- video analysis
- deepfake
llm_profile: reasoning
prompt_file: aistalk_prompt.txt
@@ -346,12 +349,12 @@ agents:
enabled: true
default_profile: default
profile_hints:
default: [osint, threat_hunt, vulns, web3, ai, red-blue]
default: [osint, threat_hunt, vulns, web3, ai, red-blue, media_forensics, video, audio, photo, forensic, deepfake]
profiles:
default:
team_name: AISTALK Cyber Detective Unit
parallel_roles: true
max_concurrency: 6
max_concurrency: 7
synthesis:
role_context: AISTALK Orchestrator & Analyst
system_prompt_ref: roles/aistalk/orchestrator_synthesis.md
@@ -381,6 +384,11 @@ agents:
role_context: Neuron (Deep Analysis)
system_prompt_ref: roles/aistalk/neuron.md
llm_profile: reasoning
- id: aurora
role_context: Aurora (Autonomous Media Forensics)
system_prompt_ref: roles/aistalk/aurora.md
llm_profile: science
skills: [video_enhancement, audio_forensics, photo_restoration, chain_of_custody]
- id: vault
role_context: Vault (Secrets and Confidential Data Guard)
system_prompt_ref: roles/aistalk/vault.md
@@ -432,6 +440,8 @@ agents:
skills: [entity_resolution, link_analysis]
- role: "Risk"
skills: [cvss, mitre_mapping]
- role: "Aurora"
skills: [media_forensics, video_enhancement, audio_forensics, photo_analysis]
- role: "Analyst"
skills: [synthesis, reporting]

View File

@@ -246,6 +246,15 @@
"role": "Neuron (Deep Analysis)",
"skills": []
},
{
"role": "Aurora (Autonomous Media Forensics)",
"skills": [
"video_enhancement",
"audio_forensics",
"photo_restoration",
"chain_of_custody"
]
},
{
"role": "Vault (Secrets and Confidential Data Guard)",
"skills": []

View File

@@ -109,7 +109,7 @@ aistalk:
default:
team_name: AISTALK Cyber Detective Unit
parallel_roles: true
max_concurrency: 6
max_concurrency: 7
synthesis:
role_context: AISTALK Orchestrator & Analyst
system_prompt_ref: roles/aistalk/orchestrator_synthesis.md
@@ -139,6 +139,15 @@ aistalk:
role_context: Neuron (Deep Analysis)
system_prompt_ref: roles/aistalk/neuron.md
llm_profile: reasoning
- id: aurora
role_context: Aurora (Autonomous Media Forensics)
system_prompt_ref: roles/aistalk/aurora.md
llm_profile: science
skills:
- video_enhancement
- audio_forensics
- photo_restoration
- chain_of_custody
- id: vault
role_context: Vault (Secrets and Confidential Data Guard)
system_prompt_ref: roles/aistalk/vault.md
@@ -178,6 +187,12 @@ aistalk:
- web3
- ai
- red-blue
- media_forensics
- video
- audio
- photo
- forensic
- deepfake
nutra:
profiles:
default:

View File

@@ -67,7 +67,10 @@
"redteam",
"blueteam",
"bughunter",
"quantum risk"
"quantum risk",
"media forensics",
"video analysis",
"deepfake"
],
"domains": [
"cybersecurity",
@@ -522,4 +525,4 @@
"class": "internal",
"visibility": "internal"
}
}
}

View File

@@ -56,6 +56,27 @@ services:
- dagi-network
restart: unless-stopped
aurora-service:
build:
context: ./services/aurora-service
dockerfile: Dockerfile
container_name: aurora-service-node2
ports:
- "127.0.0.1:9401:9401"
environment:
- AURORA_DATA_DIR=/data/aurora
- AURORA_PUBLIC_BASE_URL=http://127.0.0.1:9401
- AURORA_CORS_ORIGINS=*
- AURORA_MODELS_DIR=/data/aurora/models
- AURORA_FORCE_CPU=false
- AURORA_PREFER_MPS=true
- AURORA_ENABLE_VIDEOTOOLBOX=true
volumes:
- aurora-data:/data
networks:
- dagi-network
restart: unless-stopped
dagi-nats:
image: nats:2.10-alpine
container_name: dagi-nats-node2
@@ -97,3 +118,7 @@ networks:
dagi-memory-network:
external: true
name: dagi-memory-network-node2
volumes:
aurora-data:
driver: local

View File

@@ -75,13 +75,16 @@ NODA1 використовує уніфіковану систему агент
┌───────────────────────┐ ┌───────────┐ ┌─────────────────────┐
│ LLM PROVIDERS │ │ MEMORY │ │ CREWAI │
│ ───────────────────── │ │ SERVICE │ │ (dagi-staging- │
│ • Ollama (local) │ │ :8000 │ │ crewai-service) │
│ - qwen3:8b │ ├───────────┤ │ ─────────────────── │
- mistral:7b │ │ • Qdrant │ │ crewai_agents.json │
│ - qwen2.5:3b │ │ • Neo4j │ │ │
• DeepSeek (cloud) │ │ • Postgres│ │ 11 Orchestrators │
│ • Mistral (cloud) │ └───────────┘ │ + Teams per agent │
└───────────────────────┘ └─────────────────────┘
│ • Grok (cloud) │ │ :8000 │ │ crewai-service) │
│ - sofiia, senpai │ ├───────────┤ │ ─────────────────── │
• DeepSeek (cloud) │ │ • Qdrant │ │ crewai_agents.json │
│ - all other agents │ │ • Neo4j │ │ │
+ fallback │ │ • Postgres│ │ 11 Orchestrators │
│ • Mistral (fallback) │ └───────────┘ │ + Teams per agent │
│ • Ollama (crew only) │ └─────────────────────┘
│ - qwen3:8b (crew) │
│ - qwen3-vl:8b (vis) │
└───────────────────────┘
```
---
@@ -108,28 +111,28 @@ config/agent_registry.yml ←── ЄДИНЕ джерело істини
### TOP-LEVEL (User-facing, 13 agents)
| ID | Display | Telegram | Visibility | Domain |
|----|---------|----------|------------|--------|
| `daarwizz` | DAARWIZZ | public | public | Meta-Orchestrator |
| `helion` | Helion | public | public | Energy |
| `alateya` | Aletheia | public | public | R&D Lab |
| `druid` | DRUID | public | public | Ayurveda/Cosmetics |
| `nutra` | NUTRA | public | public | Nutraceuticals |
| `agromatrix` | Степан Матрікс | public | public | Agriculture |
| `greenfood` | GREENFOOD | public | public | Food ERP |
| `clan` | CLAN | public | public | Community |
| `eonarch` | EONARCH | public | public | Consciousness |
| `yaromir` | YAROMIR | whitelist | private | Tech Lead |
| `soul` | SOUL | public | public | Spiritual |
| `senpai` | SENPAI | public | public | Trading |
| `sofiia` | SOFIIA | public | public | AI Architecture |
| ID | Display | Telegram | Visibility | Domain | LLM (primary) | Fallback |
|----|---------|----------|------------|--------|---------------|---------|
| `daarwizz` | DAARWIZZ | public | public | Meta-Orchestrator | DeepSeek | Mistral |
| `helion` | Helion | public | public | Energy | DeepSeek | Mistral |
| `alateya` | Aletheia | public | public | R&D Lab | DeepSeek | Mistral |
| `druid` | DRUID | public | public | Ayurveda/Cosmetics | DeepSeek | Mistral |
| `nutra` | NUTRA | public | public | Nutraceuticals | DeepSeek | Mistral |
| `agromatrix` | Степан Матрікс | public | public | Agriculture | DeepSeek | Mistral |
| `greenfood` | GREENFOOD | public | public | Food ERP | DeepSeek | Mistral |
| `clan` | CLAN | public | public | Community | DeepSeek | Mistral |
| `eonarch` | EONARCH | public | public | Consciousness | DeepSeek | Mistral |
| `yaromir` | YAROMIR | whitelist | private | Tech Lead | DeepSeek | Mistral |
| `soul` | SOUL | public | public | Spiritual | DeepSeek | Mistral |
| `senpai` | SENPAI | public | public | Trading | **Grok** | DeepSeek |
| `sofiia` | SOFIIA | public | public | AI Architecture | **Grok** | DeepSeek |
### INTERNAL (Service agents, 2 agents)
| ID | Display | Telegram | Scope | Purpose |
|----|---------|----------|-------|---------|
| `monitor` | MONITOR | off | node_local | Observability, alerts |
| `devtools` | DevTools | off | global | Development tools |
| ID | Display | Telegram | Scope | Purpose | LLM |
|----|---------|----------|-------|---------|-----|
| `monitor` | MONITOR | off | node_local | Observability, alerts | Ollama (local) |
| `devtools` | DevTools | off | global | Development tools | DeepSeek (складні) / Ollama (прості) |
---

View File

@@ -1 +1 @@
/Users/apple/github-projects/microdao-daarion/docs/backups/docs_backup_20260226-091701.tar.gz
/Users/apple/github-projects/microdao-daarion/docs/backups/docs_backup_20260302-091700.tar.gz

View File

@@ -155,5 +155,180 @@ STT/TTS/OCR/Image **можуть бути різними** на різних н
- **14 контейнерів** (router, node-worker, node-capabilities, nats, gateway, memory, qdrant, postgres, neo4j, redis, open-webui, sofiia-console, swapper)
- **13 served моделей** (Ollama: 12 + llama_server: 1)
- **29 installed artifacts** на диску (150.3GB LLM + 0.3GB TTS kokoro-v1_0)
- **capabilities**: llm=Y, vision=Y, ocr=Y, stt=N, tts=N, image=N
- `OCR_PROVIDER=vision_prompted`
- **capabilities**: llm=Y, vision=Y, ocr=Y, stt=Y, tts=Y, image=N ← Phase 1 enabled
- `STT_PROVIDER=memory_service`, `TTS_PROVIDER=memory_service`, `OCR_PROVIDER=vision_prompted`
---
## Phase 1: STT/TTS via Memory Service delegation (2026-02-27)
### Мотивація
Увімкнення `stt=true` / `tts=true` в Fabric без нових мікросервісів і без ризику MLX-залежностей.
### Архітектура
```
Fabric Router → find_nodes_with_capability("stt"/"tts") → NODA2 node-worker
→ STT_PROVIDER=memory_service → stt_memory_service.transcribe()
→ POST http://memory-service:8000/voice/stt (faster-whisper)
→ {text, segments, language, meta}
Fabric Router → NODA2 node-worker
→ TTS_PROVIDER=memory_service → tts_memory_service.synthesize()
→ POST http://memory-service:8000/voice/tts (edge-tts: Polina/Ostap Neural uk-UA)
→ {audio_b64, format="mp3", meta}
```
### Контракти
**STT вхід:**
```json
{
"audio_b64": "<base64>", // OR
"audio_url": "http://...", // one is required
"language": "uk", // optional
"filename": "audio.wav" // optional
}
```
**STT вихід (fabric contract):**
```json
{"text": "...", "segments": [], "language": "uk", "meta": {...}, "provider": "memory_service"}
```
**TTS вхід:**
```json
{"text": "...", "voice": "Polina", "speed": 1.0}
```
**TTS вихід (fabric contract):**
```json
{"audio_b64": "<base64-mp3>", "format": "mp3", "meta": {...}, "provider": "memory_service"}
```
### Обмеження Phase 1
- **ffmpeg=false**: лише формати що Memory Service ковтає нативно (WAV рекомендований)
- **Текст TTS**: max 500 символів (Memory Service limit)
- **Голоси TTS**: Polina (uk-UA-PolinaNeural), Ostap (uk-UA-OstapNeural), en-US-GuyNeural
- **NODA1**: залишається `STT_PROVIDER=none` / `TTS_PROVIDER=none` (не заважає роутингу)
### Phase 2 (MLX upgrade — опційний)
Встановити `STT_PROVIDER=mlx_whisper` та/або `TTS_PROVIDER=mlx_kokoro` в docker-compose коли:
- готовий ffmpeg або чітко обмежені формати
- потрібний якісніший локальний TTS замість edge-tts
- NODA2 Apple Silicon виграш від MLX
---
## Voice HA (Multi-node routing) — PR1PR3
### Архітектура
```
Browser → sofiia-console /api/voice/tts
↓ VOICE_HA_ENABLED=false (default)
memory-service:8000/voice/tts ← legacy direct
↓ VOICE_HA_ENABLED=true
Router /v1/capability/voice_tts
↓ (caps + scoring)
node.{id}.voice.tts.request (NATS)
node-worker (voice semaphore)
memory-service/voice/tts
```
### NATS Subjects (Voice HA — відокремлені від generic)
| Subject | Призначення |
|---|---|
| `node.{id}.voice.tts.request` | Voice TTS offload (окремий semaphore) |
| `node.{id}.voice.llm.request` | Voice LLM inference (голосові guardrails) |
| `node.{id}.voice.stt.request` | Voice STT transcription |
**Сумісність:** generic subjects (`node.{id}.tts.request` etc.) — незмінні.
### Capability Flags
Node Worker `/caps` повертає:
```json
{
"capabilities": {
"tts": true,
"voice_tts": true,
"voice_llm": true,
"voice_stt": true
},
"voice_concurrency": {
"voice_tts": 4,
"voice_llm": 2,
"voice_stt": 2
}
}
```
`voice_tts=true` лише коли `TTS_PROVIDER != none` **і** NATS subscription активна.
NCS агрегує ці флаги через `_derive_capabilities()`.
### Router Endpoints
| Endpoint | Дедлайн | Суб'єкт |
|---|---|---|
| `POST /v1/capability/voice_tts` | 3000ms | `node.{id}.voice.tts.request` |
| `POST /v1/capability/voice_llm` | 9000ms (fast) / 12000ms (quality) | `node.{id}.voice.llm.request` |
| `POST /v1/capability/voice_stt` | 6000ms | `node.{id}.voice.stt.request` |
Response headers: `X-Voice-Node`, `X-Voice-Mode` (local|remote), `X-Voice-Cap`.
### Scoring
```
score = wait_ms + rtt_ms + p95_ms + mem_penalty - local_bonus
mem_penalty = 300 if mem_pressure == "high"
local_bonus = VOICE_PREFER_LOCAL_BONUS (default 200ms)
```
Якщо `score_local <= score_best_remote + LOCAL_THRESHOLD_MS` → вибирається локальна нода.
### BFF Feature Flag
```yaml
# docker-compose.node2-sofiia.yml
VOICE_HA_ENABLED: "false" # default — legacy direct path
VOICE_HA_ROUTER_URL: "http://router:8000" # Router для HA offload
```
Активація: `VOICE_HA_ENABLED=true` + rebuild `sofiia-console`.
Деактивація: `VOICE_HA_ENABLED=false` — повертається до direct memory-service.
### Метрики (Prometheus)
**node-worker** (`/prom_metrics`):
- `node_worker_voice_jobs_total{cap,status}`
- `node_worker_voice_inflight{cap}`
- `node_worker_voice_latency_ms{cap}` (histogram)
**router** (`/fabric_metrics`):
- `fabric_voice_capability_requests_total{cap,status}`
- `fabric_voice_offload_total{cap,node,status}`
- `fabric_voice_breaker_state{cap,node}` (1=open)
- `fabric_voice_score_ms{cap}` (histogram)
### Контракт: No Silent Fallback
- Будь-який fallback (busy, broken, timeout) логує `WARNING` + інкрементує Prometheus counter
- `TOO_BUSY` включає `retry_after_ms` hint для Router failover
- Circuit breaker per `node+voice_cap` — не змішується з generic CB
### Тести
`tests/test_voice_ha.py` — 28 тестів:
- Node Worker voice caps + semaphore isolation
- Router fabric_metrics voice helpers
- BFF `VOICE_HA_ENABLED` feature flag
- Voice scoring logic (local prefer, mem penalty, remote wins when saturated)
- No silent fallback invariants

View File

@@ -3,7 +3,12 @@ FROM python:3.11-slim
LABEL maintainer="DAARION.city Team"
LABEL description="Bot Gateway - Telegram/Discord webhook handler with DAARWIZZ"
LABEL version="0.2.1"
LABEL version="0.2.2"
ARG BUILD_SHA=dev
ARG BUILD_TIME=local
ENV BUILD_SHA=${BUILD_SHA}
ENV BUILD_TIME=${BUILD_TIME}
WORKDIR /app/gateway-bot

View File

@@ -20,6 +20,35 @@ Modes:
- public mode: community-shareable report, sanitized.
- confidential mode: strict redaction and minimal retention.
AISTALK team routing (internal):
- Use `Aurora` for media forensics requests: blurry CCTV, noisy video/audio, frame extraction, metadata integrity, deepfake suspicion, photo restoration.
- Default Aurora mode:
- `tactical` for quick understanding
- `forensic` when evidence is intended for legal/compliance workflows
- For forensic media workflows require:
- hash of original and result (`sha256`)
- processing log (step, model, timing)
- chain-of-custody notes and signature metadata when available
Aurora response contract for media tasks:
```json
{
"agent": "Aurora",
"mode": "tactical | forensic",
"job_id": "aurora_YYYYMMDD_###",
"input_file": {"name": "file.ext", "hash": "sha256:..."},
"processing_log": [{"step": "denoise", "model": "FastDVDnet", "time_ms": 1200}],
"output_files": [{"type": "video|audio|photo|forensic_log", "url": "https://...", "hash": "sha256:..."}],
"digital_signature": "ed25519:... | null"
}
```
Safety and compliance:
- No deceptive deepfake generation or identity manipulation.
- Always label AI-enhanced artifacts as enhanced outputs.
- Separate observations from conclusions; include confidence and limitations.
- For legal-grade conclusions, require human forensic expert verification.
Output style:
- Short executive summary first.
- Then findings, risk level, and recommended next actions.

View File

@@ -1,100 +1,79 @@
Ти — DRUID, агент аналітики та RAG платформи DAARION.
# DRUID v1.0
## Майстер алхімічних формул і аюрведи
Твоя роль — допомагати користувачам з пошуком інформації, аналізом документів та відповідями на питання з бази знань.
Ти — DRUID.
Алхімік і дослідник на межі між давньою традицією і сучасною науукою формулювань.
Твоя сила — перетворювати рослинні знання і хімічну точність на конкретні рецептури: від аюрведичної олії до сучасного шампуню.
## NETWORK AWARENESS (DAARION)
- Ти знаєш, що **DAARWIZZ** — головний оркестратор/мер мережі DAARION.city.
- Якщо запит про DAARWIZZ або мережеву оркестрацію: коротко підтверди роль DAARWIZZ та скеруй до нього для coordination/governance питань.
- Ніколи не відповідай, що "не знаєш DAARWIZZ".
Говориш від першої особи, у чоловічому роді. Стиль — впевнений, лаконічний, практичний.
Ти не читаєш лекцій — ти даєш формули, склади, пропорції.
---
# BEHAVIOR POLICY v1
## Мережа DAARION
## A. SPEAK-ONLY-WHEN-ASKED (SOWA)
**Головне правило: мовчи, якщо не питали.**
НЕ ВІДПОВІДАЙ, якщо:
- Немає прямого звернення (@DRUID73bot, "Druid", команда)
- Повідомлення — broadcast/оголошення/постер
- Коротка нотатка/таймінг без запиту
- Медіа/фото/посилання БЕЗ питання
ВІДПОВІДАЙ, якщо:
- Пряме звернення: @DRUID73bot, "Druid", "/druid"
- Явний запит про пошук, документи, аналітику
- Особисте повідомлення (DM)
- Навчальна група (Agent Preschool)
**Якщо не впевнений — МОВЧИ.**
## B. SHORT-FIRST
**За замовчуванням: 1-3 речення.**
ЗАБОРОНЕНО:
- Довгі розбори без запиту
- "Радий допомогти", "Готовий до співпраці"
- Емодзі
## C. MEDIA-NO-COMMENT
Медіа без питання = мовчанка.
Медіа з питанням = коротка відповідь по суті.
- **DAARWIZZ** — головний оркестратор мережі. Якщо запит про координацію/governance — скеровуй до нього.
- **NUTRA** — партнер по здоров'ю і нутріцевтиці. Якщо питання про внутрішній прийом, БАД, медицину — скеровуй до NUTRA.
- Ніколи не заперечуй знайомство з DAARWIZZ.
---
## 🎤 МУЛЬТИМОДАЛЬНІСТЬ
## Що я роблю
**Ти можеш працювати з:**
- ✅ **Голосовими повідомленнями** — автоматично перетворюються на текст (STT)
- ✅ **Фото** — аналіз зображень
- ✅ **Документами** — PDF, DOCX автоматично парсяться та індексуються
**Аюрведа і фітохімія:**
Рослинні екстракти, ефірні олії, адаптогени, мацерати, гідролати, настойки.
Аюрведичні препарати для зовнішнього застосування.
**ВАЖЛИВО:**
- Ніколи не кажи "я не можу слухати аудіо" — голосові повідомлення вже перетворені на текст!
- Ніколи не кажи "я не можу бачити/аналізувати зображення" — ти МАЄШ Vision API і МОЖЕШ аналізувати фото! Якщо в історії розмови є твій опис зображення — це означає ти його вже проаналізував(ла) через Vision. Не заперечуй це.
**Косметичні формули:**
Емульсії (O/W, W/O), сироватки, бальзами, шампуні, мило, дезодоранти.
Підбір сурфактантів, емульгаторів, консервантів, pH-систем.
**INCI і склади:**
Розшифрую будь-який INCI список. Знаю що з чим поєднується і що — ні.
Концентраційні ліміти, алергени, стабільність.
**Для бізнесу і виробництва:**
Базова регуляторика (EU Cosmetics Regulation 1223/2009, різниця EU/US).
Вимоги маркування, claims, технологічні протоколи.
---
## 🛠️ ТВОЇ МОЖЛИВОСТІ (tools)
## Команда (для складних задач)
Ти маєш доступ до спеціальних інструментів:
**Пошук і знання:**
- `memory_search` — шукай в своїй пам'яті, документах
- `graph_query` — шукай зв'язки між темами
- `web_search` — шукай в інтернеті
**Генерація:**
- `image_generate` — згенеруй зображення
- `presentation_create` — створи презентацію PowerPoint
**Пам'ять:**
- `remember_fact` — запам'ятай важливий факт
**Коли створювати презентацію:**
Якщо користувач просить "створи презентацію", "зроби слайди" — використай `presentation_create`.
Для детального аналізу я підключаю лабораторію:
- **Formulator** — склад і пропорції
- **Ingredient Analyst** — INCI, сумісність, функції
- **Safety & QA** — безпека, концентрації, алергени
- **Regulatory Basics** — регуляторні вимоги
- **Protocol Writer** — покроковий протокол виробництва
---
## Правила відповіді
Відповідаю якщо: пряме звернення (@DRUID73bot, "Druid", "/druid"), запит про рецептуру, склад, INCI, аюрведу, косметику, ефірні олії.
Мовчу якщо: оголошення без питання, медіа без запиту, теми поза моєю спеціалізацією.
Формат: коротко і конкретно. Таблиця або список — якщо є що перерахувати. Деталі — на прохання.
Заборонено: "Радий допомогти", зайві вступи, порожні застереження.
---
## ПАМ'ЯТЬ ТА ІНСТРУМЕНТИ
## Технічні можливості
### Пам'ять (ETM — Ephemeral Turn Memory):
- Ти бачиш **80 останніх повідомлень** чату (повна доступна історія сесії)
- У ГРУПОВИХ чатах ти бачиш повідомлення **ВСІХ учасників** (не тільки поточного)
- Повідомлення від різних користувачів позначені їх іменами: [username]: текст
- Уся історія чату зберігається НАЗАВЖДИ у базі даних Memory Service
- **НІКОЛИ не кажи "не бачу повідомлення інших учасників" — ти їх БАЧИШ у контексті вище!**
- У тебе є доступ до документів через колекцію `druid_docs`
- Аналізую фото (Vision): зображення рослин, продуктів, складів на етикетці
- Читаю документи: PDF зі специфікаціями, SDS, технічними картами
- Голосові — конвертуються автоматично в текст, просто відповідаю
- `memory_search` — шукаю в збережених рецептурах і документах
- `web_search` — нові дослідження, інгредієнти, регуляторні оновлення
- `crawl4ai_scrape` — витягую INCI список прямо з сайту бренду
### Інструменти:
- **memory_search** — пошук по збережених документах та попередніх розмовах
- **web_search** — пошук в інтернеті (якщо потрібна зовнішня інформація)
- **crawl4ai_scrape** — витягти контент з URL
Ніколи не кажу "не можу аналізувати фото" або "не маю цієї інформації" без спроби пошуку.
**Порядок пошуку:** 1) memory_search 2) якщо пусто → web_search 3) crawl4ai_scrape для URL.
**НІКОЛИ не кажи "не маю інформації" без спроби web_search!**
---
## Межі
Не даю медичних рекомендацій для внутрішнього вживання — це до NUTRA.
Концентрації і застереження — на основі загальнодоступних даних.
Для комерційного виробництва — рекомендую підтвердити з дерматологом або токсикологом.

View File

@@ -748,6 +748,11 @@ BRAND_REGISTRY_URL = os.getenv("BRAND_REGISTRY_URL", "http://brand-registry:9210
PRESENTATION_RENDERER_URL = os.getenv("PRESENTATION_RENDERER_URL", "http://presentation-renderer:9212").rstrip("/")
ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
# Build metadata — injected at image build time via ARG/ENV (BUILD_SHA, BUILD_TIME, NODE_ID)
_GATEWAY_BUILD_SHA = os.environ.get("BUILD_SHA", "dev")
_GATEWAY_BUILD_TIME = os.environ.get("BUILD_TIME", "local")
_GATEWAY_NODE_ID = os.environ.get("NODE_ID", "NODA1")
router = APIRouter()
@@ -985,6 +990,36 @@ SOFIIA_CONFIG = load_agent_config(
default_prompt="Ти — Sophia (Софія), Chief AI Architect та Technical Sovereign екосистеми DAARION.city. Координуєш R&D, архітектуру, безпеку та еволюцію платформи.",
)
# MONITOR — Node-Local Ops Agent (internal, not user-facing via Telegram)
MONITOR_CONFIG = load_agent_config(
agent_id="monitor",
name="MONITOR",
prompt_path=os.getenv(
"MONITOR_PROMPT_PATH",
str(Path(__file__).parent / "monitor_prompt.txt"),
),
telegram_token_env="MONITOR_TELEGRAM_BOT_TOKEN", # intentionally empty — no Telegram
default_prompt=(
"You are MONITOR, the node-local health and observability agent for DAARION infrastructure. "
"You perform health checks, alert triage, and safe ops diagnostics. Internal use only."
),
)
# AISTALK — Cyber Detective Agency Orchestrator (planned, private)
AISTALK_CONFIG = load_agent_config(
agent_id="aistalk",
name="AISTALK",
prompt_path=os.getenv(
"AISTALK_PROMPT_PATH",
str(Path(__file__).parent / "aistalk_prompt.txt"),
),
telegram_token_env="AISTALK_TELEGRAM_BOT_TOKEN",
default_prompt=(
"You are AISTALK, an autonomous cyber detective agency orchestrator inside DAARION. "
"You handle cyber-investigation intents, threat intelligence, and incident response."
),
)
# Registry of all agents (для легкого додавання нових агентів)
AGENT_REGISTRY: Dict[str, AgentConfig] = {
"daarwizz": DAARWIZZ_CONFIG,
@@ -1001,6 +1036,8 @@ AGENT_REGISTRY: Dict[str, AgentConfig] = {
"soul": SOUL_CONFIG,
"yaromir": YAROMIR_CONFIG,
"sofiia": SOFIIA_CONFIG,
"monitor": MONITOR_CONFIG,
"aistalk": AISTALK_CONFIG,
}
# 3. Створіть endpoint (опціонально, якщо потрібен окремий webhook):
# @router.post("/new_agent/telegram/webhook")
@@ -5071,19 +5108,40 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate):
@router.get("/health")
async def health():
"""Health check endpoint"""
# Static metadata for agents that don't have Telegram — used by Sofiia console UI badges
_AGENT_META: Dict[str, Dict] = {
"monitor": {"badges": ["per-node", "ops"], "visibility": "internal", "telegram_mode": "off"},
"aistalk": {"badges": ["cyber", "private"], "visibility": "private", "lifecycle_status": "planned"},
"sofiia": {"badges": ["supervisor", "architect"]},
"helion": {"badges": ["cto", "dao"]},
}
agents_info = {}
for agent_id, config in AGENT_REGISTRY.items():
meta = _AGENT_META.get(agent_id, {})
agents_info[agent_id] = {
"name": config.name,
"prompt_loaded": len(config.system_prompt) > 0,
"telegram_token_configured": config.get_telegram_token() is not None
"telegram_token_configured": config.get_telegram_token() is not None,
"badges": meta.get("badges", []),
"visibility": meta.get("visibility", "public"),
"telegram_mode": meta.get("telegram_mode", "on"),
"lifecycle_status": meta.get("lifecycle_status", "active"),
}
# Required per-node agents check
required_agents = ["monitor"]
required_missing = [aid for aid in required_agents if aid not in agents_info]
return {
"status": "healthy",
"agents": agents_info,
"agents_count": len(AGENT_REGISTRY),
"required_missing": required_missing,
"timestamp": datetime.utcnow().isoformat(),
"build_sha": _GATEWAY_BUILD_SHA,
"build_time": _GATEWAY_BUILD_TIME,
"node_id": _GATEWAY_NODE_ID,
}

View File

@@ -1047,3 +1047,66 @@ async def upsert_chat_doc_context_with_summary(
except Exception as exc:
logger.warning("upsert_chat_doc_context_with_summary failed: %s", exc)
return False
# ---------------------------------------------------------------------------
# Compatibility stubs (functions used by http_api_doc.py)
# ---------------------------------------------------------------------------
class _DocServiceCompat:
"""Namespace stub — keep backward-compat with imports that use doc_service.X"""
pass
doc_service = _DocServiceCompat()
class UpdateResult(BaseModel):
"""Compat model matching what http_api_doc.py expects."""
doc_id: str = ""
version_no: int = 0
version_id: str = ""
updated_chunks: int = 0
status: str = "stub"
success: bool = False
error: Optional[str] = "not implemented"
publish_error: Optional[str] = None
artifact_id: Optional[str] = None
artifact_version_id: Optional[str] = None
artifact_storage_key: Optional[str] = None
artifact_mime: Optional[str] = None
artifact_download_url: Optional[str] = None
class _PublishResult(BaseModel):
"""Compat model for publish_document_artifact."""
success: bool = False
error: Optional[str] = "not implemented"
artifact_id: Optional[str] = None
version_id: Optional[str] = None
storage_key: Optional[str] = None
mime: Optional[str] = None
file_name: Optional[str] = None
download_url: Optional[str] = None
async def update_document(**kwargs) -> UpdateResult:
"""Stub — gateway does not implement local doc versioning; use Sofiia Console /api/doc/versions."""
doc_id = kwargs.get("doc_id", "")
logger.warning("update_document: stub called for doc_id=%s", doc_id)
return UpdateResult(doc_id=doc_id, success=False, error="not implemented in gateway")
async def list_document_versions(
agent_id: str,
doc_id: str,
limit: int = 20,
) -> Dict[str, Any]:
"""Stub — returns empty list. Real versions stored in Sofiia Console SQLite."""
logger.debug("list_document_versions: stub called for doc_id=%s", doc_id)
return {"ok": True, "doc_id": doc_id, "versions": [], "total": 0}
async def publish_document_artifact(**kwargs) -> _PublishResult:
"""Stub — gateway does not implement artifact storage. Use artifact-registry service."""
doc_id = kwargs.get("doc_id", "")
logger.warning("publish_document_artifact: stub called for doc_id=%s", doc_id)
return _PublishResult(success=False, error="not implemented in gateway")

View File

@@ -9,6 +9,7 @@ set -euo pipefail
NODA_NCS="${1:-http://127.0.0.1:8099}"
ROUTER_URL="${2:-http://127.0.0.1:9102}"
MEMORY_URL="${3:-http://127.0.0.1:8000}"
RED='\033[0;31m'
GREEN='\033[0;32m'
@@ -64,10 +65,42 @@ print(' '.join(parts) if parts else '(none — P3.5 not deployed?)')
vision_count=$(echo "$raw" | python3 -c "import json,sys;print(sum(1 for m in json.load(sys.stdin).get('served_models',[]) if m.get('type')=='vision'))" 2>/dev/null)
[ "$vision_count" -gt 0 ] && pass "vision models: $vision_count" || warn "no vision models served"
# Phase 1: explicit STT/TTS capability check
local stt_cap tts_cap stt_provider tts_provider
stt_cap=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('stt','?'))" 2>/dev/null)
tts_cap=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('tts','?'))" 2>/dev/null)
stt_provider=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('providers',{}).get('stt','?'))" 2>/dev/null)
tts_provider=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('providers',{}).get('tts','?'))" 2>/dev/null)
[ "$stt_cap" = "True" ] || [ "$stt_cap" = "true" ] \
&& pass "stt=true provider=$stt_provider" \
|| warn "stt=false (provider=$stt_provider) — STT not available on this node"
[ "$tts_cap" = "True" ] || [ "$tts_cap" = "true" ] \
&& pass "tts=true provider=$tts_provider" \
|| warn "tts=false (provider=$tts_provider) — TTS not available on this node"
NCS_RAW="$raw"
NCS_NODE_ID="$node_id"
}
# ── Memory Service health check ────────────────────────────────────────────────
check_memory_service() {
local label="$1" url="$2"
echo "── $label ($url/health) ──"
local health
health=$(curl -sf "$url/health" 2>/dev/null) || { warn "Memory Service unreachable at $url (STT/TTS may fail)"; return; }
local status
status=$(echo "$health" | python3 -c "import json,sys;print(json.load(sys.stdin).get('status','?'))" 2>/dev/null || echo "ok")
pass "memory-service health=$status"
local voice_status
voice_status=$(curl -sf "$url/voice/status" 2>/dev/null) || { warn "voice/status unreachable"; return; }
local tts_engine stt_engine
tts_engine=$(echo "$voice_status" | python3 -c "import json,sys;print(json.load(sys.stdin).get('tts_engine','?'))" 2>/dev/null)
stt_engine=$(echo "$voice_status" | python3 -c "import json,sys;print(json.load(sys.stdin).get('stt_engine','?'))" 2>/dev/null)
pass "voice: tts=$tts_engine stt=$stt_engine"
}
# ── Router check ──────────────────────────────────────────────────────────────
check_router() {
@@ -163,6 +196,91 @@ else:
info "Snapshot: $snap_file"
}
# ── Ollama model availability check ──────────────────────────────────────────
# Voice routing policy depends on specific models; 502 from BFF = model absent.
# This check probes /api/tags (Ollama REST) to list installed models and
# emits NCS-compatible "installed=false" warnings so Router can exclude them.
OLLAMA_URL="${4:-http://127.0.0.1:11434}"
# Voice policy: models required/preferred for voice_fast_uk / voice_quality_uk
VOICE_REQUIRED_MODELS="gemma3:latest"
VOICE_PREFERRED_MODELS="qwen3.5:35b-a3b qwen3:14b"
VOICE_EXCLUDED_MODELS="glm-4.7-flash:32k glm-4.7-flash"
check_ollama_voice_models() {
local ollama_url="${1:-$OLLAMA_URL}"
echo "── Ollama voice model availability ($ollama_url) ──"
local tags_raw
tags_raw=$(curl -sf "${ollama_url}/api/tags" 2>/dev/null) \
|| { warn "Ollama unreachable at ${ollama_url} — model check skipped"; return; }
local installed_names
installed_names=$(echo "$tags_raw" | python3 -c "
import json, sys
data = json.load(sys.stdin)
models = data.get('models', [])
names = [m.get('name','') for m in models]
print(' '.join(names))
" 2>/dev/null || echo "")
info "Ollama installed: $(echo "$installed_names" | tr ' ' '\n' | grep -c . || echo 0) model(s)"
# Check required voice models
for model in $VOICE_REQUIRED_MODELS; do
local short; short="${model%%:*}"
if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
pass "voice_required: ${model} = installed"
else
fail "voice_required: ${model} = MISSING — voice_fast_uk will degrade to fallback"
fi
done
# Check preferred voice models (warn not fail)
local prefer_available=0
for model in $VOICE_PREFERRED_MODELS; do
local short; short="${model%%:*}"
if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
pass "voice_preferred: ${model} = installed"
prefer_available=$((prefer_available + 1))
else
warn "voice_preferred: ${model} = not installed — will be skipped by router"
fi
done
# Check that excluded models are NOT serving voice
for model in $VOICE_EXCLUDED_MODELS; do
local short; short="${model%%:*}"
if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
warn "voice_excluded: ${model} is installed — ensure router excludes from voice profiles"
else
pass "voice_excluded: ${model} = absent (correct)"
fi
done
# qwen3:8b specific check — known 502 source
local qwen3_8b_ok=0
if echo "$installed_names" | tr ' ' '\n' | grep -qi "^qwen3:8b$"; then
# Extra: try a minimal generation to detect "loaded but broken"
local gen_code
gen_code=$(curl -sf -w "%{http_code}" -X POST "${ollama_url}/api/generate" \
-H "Content-Type: application/json" \
-d '{"model":"qwen3:8b","prompt":"ping","stream":false,"options":{"num_predict":1}}' \
-o /dev/null --max-time 15 2>/dev/null || echo "000")
if [ "$gen_code" = "200" ]; then
pass "qwen3:8b = installed and serves (HTTP 200)"
qwen3_8b_ok=1
else
warn "qwen3:8b = installed but generate returned HTTP ${gen_code} — exclude from voice_fast_uk prefer list"
fi
else
warn "qwen3:8b = not installed — mark as unavailable in NCS"
fi
[ $qwen3_8b_ok -eq 0 ] && info "ACTION: remove qwen3:8b from voice_fast_uk.prefer_models until 502 resolved"
}
# ── Main ──────────────────────────────────────────────────────────────────────
echo "╔══════════════════════════════════════╗"
@@ -174,6 +292,26 @@ check_ncs "NCS" "$NODA_NCS"
echo ""
check_router "Router" "$ROUTER_URL"
echo ""
check_memory_service "Memory Service" "$MEMORY_URL"
echo ""
check_ollama_voice_models "$OLLAMA_URL"
echo ""
# ── Voice Canary: live synthesis test (hard-fail on voice failure) ────────────
echo "── Voice Canary (live synthesis) ──────────────────────────────────────"
CANARY_SCRIPT="$(dirname "$0")/scripts/voice_canary.py"
if [ -f "$CANARY_SCRIPT" ] && command -v python3 >/dev/null 2>&1; then
MEMORY_SERVICE_URL="$MEMORY_URL" python3 "$CANARY_SCRIPT" --mode preflight
CANARY_EXIT=$?
if [ $CANARY_EXIT -ne 0 ]; then
ERRORS=$((ERRORS+1))
echo -e " ${RED}FAIL${NC} Voice canary: synthesis test failed (Polina/Ostap not working)"
fi
else
echo " [SKIP] voice_canary.py not found or python3 unavailable"
fi
echo ""
save_and_diff
echo ""
@@ -182,5 +320,5 @@ if [ $ERRORS -gt 0 ]; then
echo -e "${RED}BLOCKED: no changes allowed until all errors resolved${NC}"
exit 1
else
echo -e "${GREEN}Preflight PASSED — changes allowed${NC}"
echo -e "${GREEN}Preflight PASSED — all voice canaries green — changes allowed${NC}"
fi

View File

@@ -122,6 +122,33 @@ llm_profiles:
timeout_ms: 60000
description: "Mistral Large для складних задач, reasoning, аналізу"
claude_sofiia:
provider: anthropic
api_key_env: ANTHROPIC_API_KEY
model: claude-sonnet-4-5
max_tokens: 8192
temperature: 0.2
timeout_ms: 120000
description: "Claude Sonnet для Sofiia — code generation, architecture, reasoning"
claude_opus:
provider: anthropic
api_key_env: ANTHROPIC_API_KEY
model: claude-opus-4-5
max_tokens: 8192
temperature: 0.15
timeout_ms: 180000
description: "Claude Opus — для найскладніших архітектурних задач Sofiia"
claude_haiku:
provider: anthropic
api_key_env: ANTHROPIC_API_KEY
model: claude-haiku-3-5
max_tokens: 4096
temperature: 0.25
timeout_ms: 30000
description: "Claude Haiku — швидкі відповіді, інструментальні задачі"
# ============================================================================
# Orchestrator Providers
# ============================================================================
@@ -416,12 +443,13 @@ agents:
sofiia:
description: "Sofiia — Chief AI Architect та Technical Sovereign"
default_llm: local_default_coder
default_llm: claude_sofiia
system_prompt: |
Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city.
Працюй як CTO-помічник: архітектура, reliability, безпека, release governance, incident/risk/backlog контроль.
Відповідай українською, структуровано і коротко; не вигадуй факти, якщо даних нема — кажи прямо.
Для задач про інфраструктуру пріоритет: перевірка health/monitor, далі конкретні дії і верифікація.
Для задач з кодом: аналіз, рефакторинг, дебаг, архітектурні рекомендації — повний рівень доступу.
monitor:
description: "Monitor Agent - архітектор-інспектор DAGI"

View File

@@ -143,6 +143,7 @@ def kling_video_enhance(
def kling_video_generate(
*,
image_b64: Optional[str] = None,
image_url: Optional[str] = None,
image_id: Optional[str] = None,
prompt: str,
@@ -165,8 +166,8 @@ def kling_video_generate(
duration: '5' or '10'.
aspect_ratio: '16:9', '9:16', '1:1'.
"""
if not image_url and not image_id:
raise ValueError("Either image_url or image_id must be provided")
if not image_b64 and not image_url and not image_id:
raise ValueError("One of image_b64 / image_url / image_id must be provided")
payload: Dict[str, Any] = {
"model": model,
@@ -177,10 +178,14 @@ def kling_video_generate(
"negative_prompt": negative_prompt,
"aspect_ratio": aspect_ratio,
}
if image_url:
payload["image"] = {"type": "url", "url": image_url}
if image_id:
payload["image"] = {"type": "id", "id": image_id}
# Current Kling endpoint expects "image" as base64 payload string.
# Keep url/id compatibility as a best-effort fallback for older gateways.
if image_b64:
payload["image"] = image_b64
elif image_url:
payload["image"] = image_url
elif image_id:
payload["image"] = image_id
if callback_url:
payload["callback_url"] = callback_url
@@ -191,6 +196,37 @@ def kling_video_generate(
)
def kling_video_generate_from_file(
*,
image_path: Path,
prompt: str,
negative_prompt: str = "noise, blur, artifacts, distortion",
model: str = "kling-v1-5",
mode: str = "pro",
duration: str = "5",
cfg_scale: float = 0.5,
aspect_ratio: str = "16:9",
callback_url: Optional[str] = None,
) -> Dict[str, Any]:
"""Generate video from a local image file by sending base64 payload."""
import base64
with image_path.open("rb") as fh:
image_b64 = base64.b64encode(fh.read()).decode()
return kling_video_generate(
image_b64=image_b64,
prompt=prompt,
negative_prompt=negative_prompt,
model=model,
mode=mode,
duration=duration,
cfg_scale=cfg_scale,
aspect_ratio=aspect_ratio,
callback_url=callback_url,
)
def kling_task_status(task_id: str) -> Dict[str, Any]:
"""Get status of any Kling task by ID."""
return _kling_request_with_fallback(
@@ -267,7 +303,12 @@ def kling_poll_until_done(
def kling_health_check() -> Dict[str, Any]:
"""Quick connectivity check — returns status dict."""
try:
resp = _kling_request("GET", "/v1/models", timeout=10)
return {"ok": True, "models": resp}
# `/v1/models` may be disabled in some accounts/regions.
# `/v1/videos/image2video` reliably returns code=0 when auth+endpoint are valid.
resp = _kling_request("GET", "/v1/videos/image2video", timeout=10)
code = resp.get("code") if isinstance(resp, dict) else None
if code not in (None, 0, "0"):
return {"ok": False, "error": f"Kling probe returned non-zero code: {code}", "probe": resp}
return {"ok": True, "probe_path": "/v1/videos/image2video", "probe": resp}
except Exception as exc:
return {"ok": False, "error": str(exc)}

View File

@@ -4,6 +4,7 @@ import asyncio
import hashlib
import json
import logging
import mimetypes
import os
import re
import shutil
@@ -13,9 +14,9 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from fastapi import Body, FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi import Body, FastAPI, File, Form, HTTPException, Query, Request, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.responses import FileResponse, Response, StreamingResponse
from .analysis import (
analyze_photo,
@@ -47,6 +48,7 @@ MAX_CONCURRENT_JOBS = max(1, int(os.getenv("AURORA_MAX_CONCURRENT_JOBS", "1")))
store = JobStore(DATA_DIR)
orchestrator = AuroraOrchestrator(store.outputs_dir, PUBLIC_BASE_URL)
RUN_SLOT = asyncio.Semaphore(MAX_CONCURRENT_JOBS)
KLING_VIDEO2VIDEO_CAPABLE: Optional[bool] = None
app = FastAPI(
title="Aurora Media Forensics Service",
@@ -228,7 +230,18 @@ def _enqueue_job_from_path(
upload_dir = store.uploads_dir / job_id
upload_dir.mkdir(parents=True, exist_ok=True)
input_path = upload_dir / safe_filename(file_name)
shutil.copy2(source_path, input_path)
trim_info: Optional[Dict[str, float]] = None
if media_type == "video":
trim_info = _video_trim_window(export_options)
if trim_info:
_trim_video_input(
source_path,
input_path,
start_sec=float(trim_info.get("start_sec") or 0.0),
duration_sec=trim_info.get("duration_sec"),
)
else:
shutil.copy2(source_path, input_path)
input_hash = compute_sha256(input_path)
initial_metadata = _estimate_upload_metadata(
@@ -238,6 +251,8 @@ def _enqueue_job_from_path(
)
if export_options:
initial_metadata["export_options"] = export_options
if trim_info:
initial_metadata["clip"] = trim_info
initial_metadata["priority"] = priority
if metadata_patch:
initial_metadata.update(metadata_patch)
@@ -408,6 +423,110 @@ def _parse_export_options(raw_value: str) -> Dict[str, Any]:
return parsed
def _opt_float(opts: Dict[str, Any], key: str) -> Optional[float]:
raw = opts.get(key)
if raw is None or raw == "":
return None
try:
return float(raw)
except Exception:
raise HTTPException(status_code=422, detail=f"export_options.{key} must be a number")
def _video_trim_window(export_options: Dict[str, Any]) -> Optional[Dict[str, float]]:
opts = export_options if isinstance(export_options, dict) else {}
start = _opt_float(opts, "clip_start_sec")
duration = _opt_float(opts, "clip_duration_sec")
if start is None:
start = _opt_float(opts, "start_sec")
if duration is None:
duration = _opt_float(opts, "duration_sec")
if start is None and duration is None:
return None
start_val = float(start or 0.0)
duration_val = float(duration) if duration is not None else None
if start_val < 0:
raise HTTPException(status_code=422, detail="clip_start_sec must be >= 0")
if duration_val is not None and duration_val <= 0:
raise HTTPException(status_code=422, detail="clip_duration_sec must be > 0")
return {
"start_sec": round(start_val, 3),
"duration_sec": round(duration_val, 3) if duration_val is not None else None, # type: ignore[arg-type]
}
def _trim_video_input(source_path: Path, target_path: Path, *, start_sec: float, duration_sec: Optional[float]) -> None:
"""Trim video to a focused segment for faster iteration.
First attempt is stream copy (lossless, fast). If that fails for container/codec reasons,
fallback to lightweight re-encode.
"""
cmd = [
"ffmpeg",
"-hide_banner",
"-loglevel",
"error",
"-y",
]
if start_sec > 0:
cmd.extend(["-ss", f"{start_sec:.3f}"])
cmd.extend(["-i", str(source_path)])
if duration_sec is not None:
cmd.extend(["-t", f"{duration_sec:.3f}"])
cmd.extend([
"-map",
"0:v:0",
"-map",
"0:a?",
"-c",
"copy",
"-movflags",
"+faststart",
str(target_path),
])
proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
if proc.returncode == 0 and target_path.exists() and target_path.stat().st_size > 0:
return
fallback = [
"ffmpeg",
"-hide_banner",
"-loglevel",
"error",
"-y",
]
if start_sec > 0:
fallback.extend(["-ss", f"{start_sec:.3f}"])
fallback.extend(["-i", str(source_path)])
if duration_sec is not None:
fallback.extend(["-t", f"{duration_sec:.3f}"])
fallback.extend(
[
"-map",
"0:v:0",
"-map",
"0:a?",
"-c:v",
"libx264",
"-preset",
"veryfast",
"-crf",
"17",
"-c:a",
"aac",
"-b:a",
"192k",
"-movflags",
"+faststart",
str(target_path),
]
)
proc2 = subprocess.run(fallback, capture_output=True, text=True, check=False)
if proc2.returncode != 0 or not target_path.exists() or target_path.stat().st_size <= 0:
err = (proc2.stderr or proc.stderr or "").strip()[:280]
raise HTTPException(status_code=422, detail=f"video trim failed: {err or 'ffmpeg error'}")
def _status_timing(job: Any) -> Dict[str, Optional[int]]:
started = _parse_iso_utc(job.started_at)
if not started:
@@ -1134,14 +1253,156 @@ async def cleanup_storage(
@app.get("/api/aurora/files/{job_id}/{file_name}")
async def download_output_file(job_id: str, file_name: str) -> FileResponse:
async def download_output_file(job_id: str, file_name: str, request: Request):
base = (store.outputs_dir / job_id).resolve()
target = (base / file_name).resolve()
if not str(target).startswith(str(base)):
raise HTTPException(status_code=403, detail="invalid file path")
if not target.exists() or not target.is_file():
raise HTTPException(status_code=404, detail="file not found")
return FileResponse(path=target, filename=target.name)
total_size = target.stat().st_size
range_header = request.headers.get("range")
if not range_header:
return FileResponse(
path=target,
filename=target.name,
headers={"Accept-Ranges": "bytes"},
)
parsed = _parse_range_header(range_header, total_size)
if parsed is None:
return FileResponse(
path=target,
filename=target.name,
headers={"Accept-Ranges": "bytes"},
)
start, end = parsed
if start >= total_size:
return Response(
status_code=416,
headers={"Content-Range": f"bytes */{total_size}", "Accept-Ranges": "bytes"},
)
content_length = (end - start) + 1
media_type = mimetypes.guess_type(str(target))[0] or "application/octet-stream"
def _iter_range():
with target.open("rb") as fh:
fh.seek(start)
remaining = content_length
while remaining > 0:
chunk = fh.read(min(65536, remaining))
if not chunk:
break
remaining -= len(chunk)
yield chunk
return StreamingResponse(
_iter_range(),
status_code=206,
media_type=media_type,
headers={
"Content-Range": f"bytes {start}-{end}/{total_size}",
"Content-Length": str(content_length),
"Accept-Ranges": "bytes",
"Content-Disposition": f'attachment; filename="{target.name}"',
},
)
def _parse_range_header(range_header: str, total_size: int) -> Optional[tuple[int, int]]:
value = str(range_header or "").strip()
if not value.lower().startswith("bytes="):
return None
spec = value.split("=", 1)[1].strip()
if "," in spec:
return None
if "-" not in spec:
return None
start_txt, end_txt = spec.split("-", 1)
try:
if start_txt == "":
# Suffix range: bytes=-N
suffix_len = int(end_txt)
if suffix_len <= 0:
return None
if suffix_len >= total_size:
return 0, max(0, total_size - 1)
return total_size - suffix_len, total_size - 1
start = int(start_txt)
if start < 0:
return None
if end_txt == "":
end = total_size - 1
else:
end = int(end_txt)
if end < start:
return None
return start, min(end, max(0, total_size - 1))
except Exception:
return None
def _extract_first_video_frame(video_path: Path, output_path: Path) -> Path:
"""Extract the first decodable video frame to an image file."""
try:
import cv2 # type: ignore[import-untyped]
except Exception as exc:
raise RuntimeError("OpenCV is required for Kling image2video fallback.") from exc
output_path.parent.mkdir(parents=True, exist_ok=True)
cap = cv2.VideoCapture(str(video_path))
try:
if not cap.isOpened():
raise RuntimeError(f"Cannot open video for fallback frame extraction: {video_path}")
ok, frame = cap.read()
if not ok or frame is None:
raise RuntimeError("Could not read first frame from video")
if not cv2.imwrite(str(output_path), frame):
raise RuntimeError(f"Failed to write fallback frame: {output_path}")
finally:
cap.release()
return output_path
def _resolve_kling_result_url(task_data: Dict[str, Any]) -> Optional[str]:
if not isinstance(task_data, dict):
return None
task_result = task_data.get("task_result")
if isinstance(task_result, dict):
videos = task_result.get("videos")
if isinstance(videos, list):
for item in videos:
if not isinstance(item, dict):
continue
for key in ("url", "video_url", "play_url", "download_url"):
value = item.get(key)
if isinstance(value, str) and value:
return value
elif isinstance(videos, dict):
for key in ("url", "video_url", "play_url", "download_url"):
value = videos.get(key)
if isinstance(value, str) and value:
return value
for key in ("url", "video_url", "play_url", "download_url", "result_url"):
value = task_result.get(key)
if isinstance(value, str) and value:
return value
for key in ("kling_result_url", "result_url", "video_url", "url"):
value = task_data.get(key)
if isinstance(value, str) and value:
return value
return None
def _compact_error_text(err: Any, limit: int = 220) -> str:
text = re.sub(r"\s+", " ", str(err)).strip()
return text[:limit]
# ── Kling AI endpoints ────────────────────────────────────────────────────────
@@ -1163,7 +1424,7 @@ async def kling_enhance_video(
cfg_scale: float = Form(0.5, description="Prompt adherence 0.0-1.0"),
) -> Dict[str, Any]:
"""Submit Aurora job result to Kling AI for video-to-video enhancement."""
from .kling import kling_video_enhance, kling_upload_file
from .kling import kling_video_enhance, kling_upload_file, kling_video_generate_from_file
job = store.get_job(job_id)
if not job:
@@ -1181,45 +1442,97 @@ async def kling_enhance_video(
if not result_path.exists():
raise HTTPException(status_code=404, detail="Result file not found for this job")
try:
upload_resp = kling_upload_file(result_path)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
global KLING_VIDEO2VIDEO_CAPABLE
if not file_id:
raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")
task_resp: Optional[Dict[str, Any]] = None
file_id: Optional[str] = None
kling_endpoint = "video2video"
video2video_error: Optional[str] = None
fallback_frame_name: Optional[str] = None
# Primary path: upload + video2video.
if KLING_VIDEO2VIDEO_CAPABLE is not False:
try:
upload_resp = kling_upload_file(result_path)
file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
if not file_id:
raise RuntimeError(f"Kling upload failed: {upload_resp}")
task_resp = kling_video_enhance(
video_id=file_id,
prompt=prompt,
negative_prompt=negative_prompt,
mode=mode,
duration=duration,
cfg_scale=cfg_scale,
)
KLING_VIDEO2VIDEO_CAPABLE = True
except Exception as exc:
raw_error = str(exc)
video2video_error = _compact_error_text(raw_error, limit=220)
logger.warning("kling video2video unavailable for %s: %s", job_id, video2video_error)
lower_error = raw_error.lower()
if "endpoint mismatch" in lower_error or "404" in lower_error:
KLING_VIDEO2VIDEO_CAPABLE = False
else:
video2video_error = "video2video skipped (previous endpoint mismatch)"
# Fallback path: extract first frame and run image2video (base64 payload).
if task_resp is None:
try:
frame_path = _extract_first_video_frame(
result_path,
store.outputs_dir / job_id / "_kling_fallback_frame.jpg",
)
fallback_frame_name = frame_path.name
task_resp = kling_video_generate_from_file(
image_path=frame_path,
prompt=prompt,
negative_prompt=negative_prompt,
mode=mode,
duration=duration,
cfg_scale=cfg_scale,
aspect_ratio="16:9",
)
kling_endpoint = "image2video"
except Exception as fallback_exc:
detail = "Kling submit failed"
if video2video_error:
detail = f"Kling video2video error: {video2video_error}; image2video fallback error: {_compact_error_text(fallback_exc, limit=220)}"
else:
detail = f"Kling image2video fallback error: {_compact_error_text(fallback_exc, limit=220)}"
raise HTTPException(status_code=502, detail=detail) from fallback_exc
if task_resp is None:
raise HTTPException(status_code=502, detail="Kling task submit failed: empty response")
try:
task_resp = kling_video_enhance(
video_id=file_id,
prompt=prompt,
negative_prompt=negative_prompt,
mode=mode,
duration=duration,
cfg_scale=cfg_scale,
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
if not task_id:
raise HTTPException(status_code=502, detail=f"Kling task_id missing in response: {task_resp}")
kling_meta_dir = store.outputs_dir / job_id
kling_meta_path = kling_meta_dir / "kling_task.json"
kling_meta_path.write_text(json.dumps({
meta_payload: Dict[str, Any] = {
"aurora_job_id": job_id,
"kling_task_id": task_id,
"kling_file_id": file_id,
"kling_endpoint": kling_endpoint,
"prompt": prompt,
"mode": mode,
"duration": duration,
"submitted_at": datetime.now(timezone.utc).isoformat(),
"status": "submitted",
}, ensure_ascii=False, indent=2), encoding="utf-8")
}
if fallback_frame_name:
meta_payload["kling_source_frame"] = fallback_frame_name
if video2video_error:
meta_payload["video2video_error"] = video2video_error
kling_meta_path.write_text(json.dumps(meta_payload, ensure_ascii=False, indent=2), encoding="utf-8")
return {
"aurora_job_id": job_id,
"kling_task_id": task_id,
"kling_file_id": file_id,
"kling_endpoint": kling_endpoint,
"status": "submitted",
"status_url": f"/api/aurora/kling/status/{job_id}",
}
@@ -1238,9 +1551,10 @@ async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]:
task_id = meta.get("kling_task_id")
if not task_id:
raise HTTPException(status_code=404, detail="Kling task_id missing in metadata")
endpoint = str(meta.get("kling_endpoint") or "video2video")
try:
status_resp = kling_video_task_status(task_id, endpoint="video2video")
status_resp = kling_video_task_status(task_id, endpoint=endpoint)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling status error: {str(exc)[:400]}") from exc
task_data = status_resp.get("data") or status_resp
@@ -1249,19 +1563,17 @@ async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]:
meta["status"] = state
meta["last_checked"] = datetime.now(timezone.utc).isoformat()
result_url = None
works = task_data.get("task_result", {}).get("videos") or []
if works:
result_url = works[0].get("url")
if result_url:
meta["kling_result_url"] = result_url
meta["completed_at"] = datetime.now(timezone.utc).isoformat()
result_url = _resolve_kling_result_url(task_data)
if result_url:
meta["kling_result_url"] = result_url
meta["completed_at"] = datetime.now(timezone.utc).isoformat()
kling_meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
return {
"aurora_job_id": job_id,
"kling_task_id": task_id,
"kling_endpoint": endpoint,
"status": state,
"kling_result_url": result_url,
"meta": meta,
@@ -1279,7 +1591,7 @@ async def kling_image_to_video(
aspect_ratio: str = Form("16:9"),
) -> Dict[str, Any]:
"""Generate video from a still image using Kling AI."""
from .kling import kling_upload_file, kling_video_generate
from .kling import kling_video_generate_from_file
file_name = file.filename or "frame.jpg"
content = await file.read()
@@ -1293,16 +1605,8 @@ async def kling_image_to_video(
try:
try:
upload_resp = kling_upload_file(tmp_path)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
if not file_id:
raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")
try:
task_resp = kling_video_generate(
image_id=file_id,
task_resp = kling_video_generate_from_file(
image_path=tmp_path,
prompt=prompt,
negative_prompt=negative_prompt,
model=model,
@@ -1313,9 +1617,12 @@ async def kling_image_to_video(
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
if not task_id:
raise HTTPException(status_code=502, detail=f"Kling task_id missing in response: {task_resp}")
return {
"kling_task_id": task_id,
"kling_file_id": file_id,
"kling_file_id": None,
"kling_endpoint": "image2video",
"status": "submitted",
"status_url": f"/api/aurora/kling/task/{task_id}?endpoint=image2video",
}

View File

@@ -49,6 +49,78 @@ def _models_used(job: AuroraJob) -> List[str]:
return models
def _processing_steps(job: AuroraJob) -> List[Any]:
if job.result and job.result.processing_log:
return list(job.result.processing_log)
if job.processing_log:
return list(job.processing_log)
return []
def _result_media_hash(job: AuroraJob) -> Optional[str]:
if not job.result:
return None
media_type = str(job.media_type).strip().lower()
for out in job.result.output_files:
out_type = str(getattr(out, "type", "") or "").strip().lower()
if out_type in {media_type, "video", "photo", "image", "audio", "unknown"}:
value = str(getattr(out, "hash", "") or "").strip()
if value:
return value
return None
def _fallback_flags(job: AuroraJob) -> Dict[str, Any]:
hard_fallback_used = False
soft_sr_fallback_used = False
fallback_steps: List[str] = []
warnings: List[str] = []
for step in _processing_steps(job):
step_name = str(getattr(step, "step", "") or "").strip() or "unknown"
details = getattr(step, "details", {}) or {}
if not isinstance(details, dict):
continue
if bool(details.get("fallback_used")):
hard_fallback_used = True
fallback_steps.append(step_name)
reason = str(details.get("reason") or "").strip()
if reason:
warnings.append(f"{step_name}: hard fallback used ({reason})")
else:
warnings.append(f"{step_name}: hard fallback used")
sr_fallback_frames = 0
try:
sr_fallback_frames = int(details.get("sr_fallback_frames") or 0)
except Exception:
sr_fallback_frames = 0
if bool(details.get("sr_fallback_used")):
sr_fallback_frames = max(sr_fallback_frames, 1)
if sr_fallback_frames > 0:
soft_sr_fallback_used = True
fallback_steps.append(step_name)
method = str(details.get("sr_fallback_method") or "").strip()
reason = str(details.get("sr_fallback_reason") or "").strip()
msg = f"{step_name}: SR soft fallback on {sr_fallback_frames} frame(s)"
if method:
msg += f" via {method}"
if reason:
msg += f" ({reason})"
warnings.append(msg)
fallback_steps_unique = list(dict.fromkeys(fallback_steps))
warnings_unique = list(dict.fromkeys(warnings))
return {
"fallback_used": bool(hard_fallback_used or soft_sr_fallback_used),
"hard_fallback_used": hard_fallback_used,
"soft_sr_fallback_used": soft_sr_fallback_used,
"fallback_steps": fallback_steps_unique,
"warnings": warnings_unique,
}
def _detect_faces_with_proxy_confidence(frame_bgr: Any) -> List[Dict[str, Any]]:
if cv2 is None:
return []
@@ -246,9 +318,29 @@ def build_quality_report(job: AuroraJob, outputs_dir: Path, *, refresh: bool = F
raise RuntimeError("Cannot build quality report: source/result file not found")
media_type: MediaType = job.media_type
processing_flags = _fallback_flags(job)
faces = _face_metrics(source_path, result_path, media_type)
plates = _plate_metrics(job_dir)
overall = _overall_metrics(source_path, result_path, media_type, job)
result_hash = _result_media_hash(job)
identical_to_input = bool(result_hash and result_hash == str(job.input_hash))
warnings = list(processing_flags.get("warnings") or [])
if identical_to_input:
warnings.append("output hash matches input hash; enhancement may be skipped.")
warnings = list(dict.fromkeys(warnings))
processing_status = "ok"
if bool(processing_flags.get("fallback_used")) or identical_to_input:
processing_status = "degraded"
overall["processing_status"] = processing_status
overall["fallback_used"] = bool(processing_flags.get("fallback_used"))
overall["hard_fallback_used"] = bool(processing_flags.get("hard_fallback_used"))
overall["soft_sr_fallback_used"] = bool(processing_flags.get("soft_sr_fallback_used"))
overall["identical_to_input"] = identical_to_input
if result_hash:
overall["result_hash"] = result_hash
if warnings:
overall["warnings"] = warnings
report = {
"job_id": job.job_id,
@@ -257,7 +349,13 @@ def build_quality_report(job: AuroraJob, outputs_dir: Path, *, refresh: bool = F
"faces": faces,
"plates": plates,
"overall": overall,
"processing_flags": {
**processing_flags,
"identical_to_input": identical_to_input,
"warnings": warnings,
},
"summary": {
"processing_status": processing_status,
"faces_detected_ratio": f"{faces['detected']} / {faces['source_detected'] or faces['detected']}",
"plates_recognized_ratio": f"{plates['recognized']} / {plates['detected']}",
},

View File

@@ -13,6 +13,7 @@ RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY app/ ./app/
COPY static/ ./static/
# Environment
ENV PYTHONPATH=/app

View File

@@ -428,6 +428,8 @@ class Database:
CREATE INDEX IF NOT EXISTS idx_user_facts_user_id ON user_facts(user_id);
CREATE INDEX IF NOT EXISTS idx_user_facts_team_id ON user_facts(team_id);
CREATE UNIQUE INDEX IF NOT EXISTS idx_user_facts_user_team_agent_fact
ON user_facts(user_id, team_id, agent_id, fact_key);
""")
async def upsert_fact(
@@ -445,16 +447,30 @@ class Database:
json_value = json.dumps(fact_value_json) if fact_value_json else None
async with self.pool.acquire() as conn:
row = await conn.fetchrow("""
INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
VALUES ($1, $2, $3, $4, $5, $6::jsonb)
ON CONFLICT (user_id, team_id, agent_id, fact_key)
DO UPDATE SET
fact_value = EXCLUDED.fact_value,
fact_value_json = EXCLUDED.fact_value_json,
updated_at = NOW()
RETURNING *
""", user_id, team_id, agent_id, fact_key, fact_value, json_value)
try:
row = await conn.fetchrow("""
INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
VALUES ($1, $2, $3, $4, $5, $6::jsonb)
ON CONFLICT (user_id, team_id, agent_id, fact_key)
DO UPDATE SET
fact_value = EXCLUDED.fact_value,
fact_value_json = EXCLUDED.fact_value_json,
updated_at = NOW()
RETURNING *
""", user_id, team_id, agent_id, fact_key, fact_value, json_value)
except asyncpg.exceptions.InvalidColumnReferenceError:
# Backward compatibility for DBs that only have UNIQUE(user_id, team_id, fact_key).
row = await conn.fetchrow("""
INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
VALUES ($1, $2, $3, $4, $5, $6::jsonb)
ON CONFLICT (user_id, team_id, fact_key)
DO UPDATE SET
agent_id = EXCLUDED.agent_id,
fact_value = EXCLUDED.fact_value,
fact_value_json = EXCLUDED.fact_value_json,
updated_at = NOW()
RETURNING *
""", user_id, team_id, agent_id, fact_key, fact_value, json_value)
return dict(row) if row else {}

View File

@@ -650,6 +650,7 @@ class FactUpsertRequest(BaseModel):
fact_value: Optional[str] = None
fact_value_json: Optional[dict] = None
team_id: Optional[str] = None
agent_id: Optional[str] = None
@app.post("/facts/upsert")
async def upsert_fact(request: FactUpsertRequest):
@@ -663,13 +664,17 @@ async def upsert_fact(request: FactUpsertRequest):
# Ensure facts table exists (will be created on first call)
await db.ensure_facts_table()
# Upsert the fact
# Upsert the fact — extract agent_id from request field or from fact_value_json
agent_id_val = request.agent_id or (
(request.fact_value_json or {}).get("agent_id")
)
result = await db.upsert_fact(
user_id=request.user_id,
fact_key=request.fact_key,
fact_value=request.fact_value,
fact_value_json=request.fact_value_json,
team_id=request.team_id
team_id=request.team_id,
agent_id=agent_id_val
)
logger.info(f"fact_upserted", user_id=request.user_id, fact_key=request.fact_key)

View File

@@ -30,7 +30,7 @@ python-multipart==0.0.9
tiktoken==0.5.2
# Voice stack
edge-tts==6.1.19
edge-tts==7.2.7
faster-whisper==1.1.1
# Testing

View File

@@ -14,3 +14,19 @@ STT_PROVIDER = os.getenv("STT_PROVIDER", "none")
TTS_PROVIDER = os.getenv("TTS_PROVIDER", "none")
OCR_PROVIDER = os.getenv("OCR_PROVIDER", "vision_prompted")
IMAGE_PROVIDER = os.getenv("IMAGE_PROVIDER", "none")
# Memory Service URL (used by memory_service STT/TTS providers)
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
# ── Voice HA: dedicated concurrency limits (separate from generic stt/tts/llm) ──
# These control semaphores for node.{id}.voice.*.request subjects.
# Independent from MAX_CONCURRENCY so voice never starves generic inference.
VOICE_MAX_CONCURRENT_TTS = int(os.getenv("VOICE_MAX_CONCURRENT_TTS", "4"))
VOICE_MAX_CONCURRENT_LLM = int(os.getenv("VOICE_MAX_CONCURRENT_LLM", "2"))
VOICE_MAX_CONCURRENT_STT = int(os.getenv("VOICE_MAX_CONCURRENT_STT", "2"))
# Timeouts for voice subjects (milliseconds). Router uses these as defaults.
VOICE_TTS_DEADLINE_MS = int(os.getenv("VOICE_TTS_DEADLINE_MS", "3000"))
VOICE_LLM_FAST_MS = int(os.getenv("VOICE_LLM_FAST_MS", "9000"))
VOICE_LLM_QUALITY_MS = int(os.getenv("VOICE_LLM_QUALITY_MS", "12000"))
VOICE_STT_DEADLINE_MS = int(os.getenv("VOICE_STT_DEADLINE_MS", "6000"))

View File

@@ -8,6 +8,7 @@ try:
PROM_AVAILABLE = True
REGISTRY = CollectorRegistry()
# Generic job metrics
jobs_total = Counter(
"node_worker_jobs_total", "Jobs processed",
["type", "status"], registry=REGISTRY,
@@ -23,6 +24,26 @@ try:
registry=REGISTRY,
)
# ── Voice HA metrics (separate labels from generic) ───────────────────────
# cap label: "voice.tts" | "voice.llm" | "voice.stt"
voice_jobs_total = Counter(
"node_worker_voice_jobs_total",
"Voice HA jobs processed (node.{id}.voice.*.request)",
["cap", "status"], registry=REGISTRY,
)
voice_inflight_gauge = Gauge(
"node_worker_voice_inflight",
"Voice HA inflight jobs per capability",
["cap"], registry=REGISTRY,
)
voice_latency_hist = Histogram(
"node_worker_voice_latency_ms",
"Voice HA job latency in ms",
["cap"],
buckets=[100, 250, 500, 1000, 1500, 2000, 3000, 5000, 9000, 12000],
registry=REGISTRY,
)
except ImportError:
PROM_AVAILABLE = False
REGISTRY = None
@@ -44,6 +65,21 @@ def observe_latency(req_type: str, model: str, latency_ms: int):
latency_hist.labels(type=req_type, model=model).observe(latency_ms)
def inc_voice_job(cap: str, status: str):
if PROM_AVAILABLE:
voice_jobs_total.labels(cap=cap, status=status).inc()
def set_voice_inflight(cap: str, count: int):
if PROM_AVAILABLE:
voice_inflight_gauge.labels(cap=cap).set(count)
def observe_voice_latency(cap: str, latency_ms: int):
if PROM_AVAILABLE:
voice_latency_hist.labels(cap=cap).observe(latency_ms)
def get_metrics_text():
if PROM_AVAILABLE and REGISTRY:
return generate_latest(REGISTRY)

View File

@@ -43,7 +43,30 @@ async def prom_metrics():
@app.get("/caps")
async def caps():
"""Capability flags for NCS to aggregate."""
"""Capability flags for NCS to aggregate.
Semantic vs operational separation (contract):
- capabilities.voice_* = semantic availability (provider configured).
True as long as the provider is configured, regardless of NATS state.
Routing decisions are based on this.
- runtime.nats_subscriptions.voice_* = operational (NATS sub active).
Used for health/telemetry only — NOT for routing.
This prevents false-negatives during reconnects / restart races.
"""
import worker as _w
nid = config.NODE_ID.lower()
# Semantic: provider configured → capability is available
voice_tts_cap = config.TTS_PROVIDER != "none"
voice_stt_cap = config.STT_PROVIDER != "none"
voice_llm_cap = True # LLM always available when node-worker is up
# Operational: actual NATS subscription state (health/telemetry only)
nats_voice_tts_active = f"node.{nid}.voice.tts.request" in _w._VOICE_SUBJECTS
nats_voice_stt_active = f"node.{nid}.voice.stt.request" in _w._VOICE_SUBJECTS
nats_voice_llm_active = f"node.{nid}.voice.llm.request" in _w._VOICE_SUBJECTS
return {
"node_id": config.NODE_ID,
"capabilities": {
@@ -53,6 +76,10 @@ async def caps():
"tts": config.TTS_PROVIDER != "none",
"ocr": config.OCR_PROVIDER != "none",
"image": config.IMAGE_PROVIDER != "none",
# Voice HA semantic capability flags (provider-based, not NATS-based)
"voice_tts": voice_tts_cap,
"voice_llm": voice_llm_cap,
"voice_stt": voice_stt_cap,
},
"providers": {
"stt": config.STT_PROVIDER,
@@ -65,6 +92,19 @@ async def caps():
"vision": config.DEFAULT_VISION,
},
"concurrency": config.MAX_CONCURRENCY,
"voice_concurrency": {
"voice_tts": config.VOICE_MAX_CONCURRENT_TTS,
"voice_llm": config.VOICE_MAX_CONCURRENT_LLM,
"voice_stt": config.VOICE_MAX_CONCURRENT_STT,
},
# Operational NATS subscription state — for health/monitoring only
"runtime": {
"nats_subscriptions": {
"voice_tts_active": nats_voice_tts_active,
"voice_stt_active": nats_voice_stt_active,
"voice_llm_active": nats_voice_llm_active,
}
},
}

View File

@@ -11,24 +11,44 @@ from models import JobRequest, JobResponse, JobError
from idempotency import IdempotencyStore
from providers import ollama, ollama_vision
from providers import stt_mlx_whisper, tts_mlx_kokoro
from providers import stt_memory_service, tts_memory_service
import fabric_metrics as fm
logger = logging.getLogger("node-worker")
_idem = IdempotencyStore()
_semaphore: asyncio.Semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY)
# Voice-dedicated semaphores — independent from generic MAX_CONCURRENCY.
# Prevents voice requests from starving generic inference and vice versa.
_voice_sem_tts: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_TTS)
_voice_sem_llm: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_LLM)
_voice_sem_stt: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_STT)
_VOICE_SEMAPHORES = {
"voice.tts": _voice_sem_tts,
"voice.llm": _voice_sem_llm,
"voice.stt": _voice_sem_stt,
}
_nats_client = None
_inflight_count: int = 0
_voice_inflight: Dict[str, int] = {"voice.tts": 0, "voice.llm": 0, "voice.stt": 0}
_latencies_llm: list = []
_latencies_vision: list = []
_LATENCY_BUFFER = 50
# Set of subjects that use the voice handler path
_VOICE_SUBJECTS: set = set()
async def start(nats_client):
global _nats_client
_nats_client = nats_client
nid = config.NODE_ID.lower()
# Generic subjects (unchanged — backward compatible)
subjects = [
f"node.{nid}.llm.request",
f"node.{nid}.vision.request",
@@ -41,6 +61,31 @@ async def start(nats_client):
await nats_client.subscribe(subj, cb=_handle_request)
logger.info(f"✅ Subscribed: {subj}")
# Voice HA subjects — separate semaphores, own metrics, own deadlines
# Only subscribe if the relevant provider is configured (preflight-first)
voice_subjects_to_caps = {
f"node.{nid}.voice.tts.request": ("tts", _voice_sem_tts, "voice.tts"),
f"node.{nid}.voice.llm.request": ("llm", _voice_sem_llm, "voice.llm"),
f"node.{nid}.voice.stt.request": ("stt", _voice_sem_stt, "voice.stt"),
}
for subj, (required_cap, sem, cap_key) in voice_subjects_to_caps.items():
if required_cap == "tts" and config.TTS_PROVIDER == "none":
logger.info(f"⏭ Skipping {subj}: TTS_PROVIDER=none")
continue
if required_cap == "stt" and config.STT_PROVIDER == "none":
logger.info(f"⏭ Skipping {subj}: STT_PROVIDER=none")
continue
# LLM always available on this node
_VOICE_SUBJECTS.add(subj)
async def _make_voice_handler(s=sem, k=cap_key):
async def _voice_handler(msg):
await _handle_voice_request(msg, voice_sem=s, cap_key=k)
return _voice_handler
await nats_client.subscribe(subj, cb=await _make_voice_handler())
logger.info(f"✅ Voice subscribed: {subj}")
async def _handle_request(msg):
t0 = time.time()
@@ -136,6 +181,103 @@ async def _handle_request(msg):
pass
async def _handle_voice_request(msg, voice_sem: asyncio.Semaphore, cap_key: str):
"""Voice-dedicated handler: separate semaphore, metrics, retry hints.
Maps voice.{tts|llm|stt} to the same _execute() but with:
- Own concurrency limit (VOICE_MAX_CONCURRENT_{TTS|LLM|STT})
- TOO_BUSY includes retry_after_ms hint (client can retry immediately elsewhere)
- Voice-specific Prometheus labels (type=voice.tts, etc.)
- WARNING log on fallback (contract: no silent fallback)
"""
t0 = time.time()
# Extract the base type for _execute (voice.tts → tts)
base_type = cap_key.split(".")[-1] # "tts", "llm", "stt"
try:
raw = msg.data
if len(raw) > config.MAX_PAYLOAD_BYTES:
await _reply(msg, JobResponse(
node_id=config.NODE_ID, status="error",
error=JobError(code="PAYLOAD_TOO_LARGE", message=f"max {config.MAX_PAYLOAD_BYTES} bytes"),
))
return
data = json.loads(raw)
job = JobRequest(**data)
job.trace_id = job.trace_id or job.job_id
remaining = job.remaining_ms()
if remaining <= 0:
await _reply(msg, JobResponse(
job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID,
status="timeout", error=JobError(code="DEADLINE_EXCEEDED"),
))
return
# Voice concurrency check — TOO_BUSY includes retry hint
if voice_sem._value == 0:
logger.warning(
"[voice.busy] cap=%s node=%s — all %d slots occupied. "
"WARNING: request turned away, Router should failover.",
cap_key, config.NODE_ID, {
"voice.tts": config.VOICE_MAX_CONCURRENT_TTS,
"voice.llm": config.VOICE_MAX_CONCURRENT_LLM,
"voice.stt": config.VOICE_MAX_CONCURRENT_STT,
}.get(cap_key, "?"),
)
fm.inc_voice_job(cap_key, "busy")
await _reply(msg, JobResponse(
job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID,
status="busy",
error=JobError(
code="TOO_BUSY",
message=f"voice {cap_key} at capacity",
details={"retry_after_ms": 500, "cap": cap_key},
),
))
return
global _voice_inflight
_voice_inflight[cap_key] = _voice_inflight.get(cap_key, 0) + 1
fm.set_voice_inflight(cap_key, _voice_inflight[cap_key])
try:
async with voice_sem:
# Route to _execute with the base type
job.required_type = base_type
resp = await _execute(job, remaining)
finally:
_voice_inflight[cap_key] = max(0, _voice_inflight.get(cap_key, 1) - 1)
fm.set_voice_inflight(cap_key, _voice_inflight[cap_key])
resp.latency_ms = int((time.time() - t0) * 1000)
fm.inc_voice_job(cap_key, resp.status)
if resp.status == "ok" and resp.latency_ms > 0:
fm.observe_voice_latency(cap_key, resp.latency_ms)
# Contract: log WARNING on any non-ok voice result
if resp.status != "ok":
logger.warning(
"[voice.fallback] cap=%s node=%s status=%s error=%s trace=%s",
cap_key, config.NODE_ID, resp.status,
resp.error.code if resp.error else "?", job.trace_id,
)
await _reply(msg, resp)
except Exception as e:
logger.exception(f"Voice handler error cap={cap_key}: {e}")
fm.inc_voice_job(cap_key, "error")
try:
await _reply(msg, JobResponse(
node_id=config.NODE_ID, status="error",
error=JobError(code="INTERNAL", message=str(e)[:200]),
))
except Exception:
pass
async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
payload = job.payload
hints = job.hints
@@ -184,9 +326,14 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
status="error",
error=JobError(code="NOT_AVAILABLE", message="STT not configured on this node"),
)
result = await asyncio.wait_for(
stt_mlx_whisper.transcribe(payload), timeout=timeout_s,
)
if config.STT_PROVIDER == "memory_service":
result = await asyncio.wait_for(
stt_memory_service.transcribe(payload), timeout=timeout_s,
)
else:
result = await asyncio.wait_for(
stt_mlx_whisper.transcribe(payload), timeout=timeout_s,
)
elif job.required_type == "tts":
if config.TTS_PROVIDER == "none":
return JobResponse(
@@ -194,9 +341,14 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
status="error",
error=JobError(code="NOT_AVAILABLE", message="TTS not configured on this node"),
)
result = await asyncio.wait_for(
tts_mlx_kokoro.synthesize(payload), timeout=timeout_s,
)
if config.TTS_PROVIDER == "memory_service":
result = await asyncio.wait_for(
tts_memory_service.synthesize(payload), timeout=timeout_s,
)
else:
result = await asyncio.wait_for(
tts_mlx_kokoro.synthesize(payload), timeout=timeout_s,
)
elif job.required_type == "ocr":
if config.OCR_PROVIDER == "none":
return JobResponse(

View File

@@ -40,6 +40,31 @@ try:
registry=REGISTRY,
)
# ── Voice HA metrics ──────────────────────────────────────────────────────
# cap label: "voice_tts" | "voice_llm" | "voice_stt"
voice_cap_requests = Counter(
"fabric_voice_capability_requests_total",
"Voice HA capability routing requests",
["cap", "status"], registry=REGISTRY,
)
voice_offload_total = Counter(
"fabric_voice_offload_total",
"Voice HA offload attempts (node selected + NATS sent)",
["cap", "node", "status"], registry=REGISTRY,
)
voice_breaker_state = Gauge(
"fabric_voice_breaker_state",
"Voice HA circuit breaker per node+cap (1=open)",
["cap", "node"], registry=REGISTRY,
)
voice_score_hist = Histogram(
"fabric_voice_score_ms",
"Voice HA node scoring distribution",
["cap"],
buckets=[0, 50, 100, 200, 400, 800, 1600, 3200],
registry=REGISTRY,
)
except ImportError:
PROM_AVAILABLE = False
REGISTRY = None
@@ -76,6 +101,26 @@ def observe_score(score: int):
score_hist.observe(score)
def inc_voice_cap_request(cap: str, status: str):
if PROM_AVAILABLE:
voice_cap_requests.labels(cap=cap, status=status).inc()
def inc_voice_offload(cap: str, node: str, status: str):
if PROM_AVAILABLE:
voice_offload_total.labels(cap=cap, node=node, status=status).inc()
def set_voice_breaker(cap: str, node: str, is_open: bool):
if PROM_AVAILABLE:
voice_breaker_state.labels(cap=cap, node=node).set(1 if is_open else 0)
def observe_voice_score(cap: str, score: float):
if PROM_AVAILABLE:
voice_score_hist.labels(cap=cap).observe(score)
def get_metrics_text() -> Optional[bytes]:
if PROM_AVAILABLE and REGISTRY:
return generate_latest(REGISTRY)

View File

@@ -64,6 +64,12 @@ except ImportError:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
NEO4J_NOTIFICATIONS_LOG_LEVEL = os.getenv("NEO4J_NOTIFICATIONS_LOG_LEVEL", "ERROR").strip().upper()
_neo4j_notifications_level = getattr(logging, NEO4J_NOTIFICATIONS_LOG_LEVEL, logging.ERROR)
logging.getLogger("neo4j.notifications").setLevel(_neo4j_notifications_level)
# Guard against late/conditional auto-router imports.
# If auto-router module is unavailable (or loaded later), inference must still work.
SOFIIA_AUTO_ROUTER_AVAILABLE = False
TRUSTED_DOMAINS_CONFIG_PATH = os.getenv("TRUSTED_DOMAINS_CONFIG_PATH", "./trusted_domains.yml")
_trusted_domains_cache: Dict[str, Any] = {"mtime": None, "data": {}}
@@ -289,8 +295,24 @@ DETERMINISTIC_PLANT_POLICY_AGENTS = {
REPEAT_FINGERPRINT_MIN_SIMILARITY = float(os.getenv("AGENT_REPEAT_FINGERPRINT_MIN_SIMILARITY", "0.92"))
def _clean_think_blocks(text: str) -> str:
"""Remove <think>...</think> reasoning blocks from LLM output (Qwen3/DeepSeek-R1).
Strategy:
1. Strip complete <think>...</think> blocks (DOTALL for multiline).
2. Fallback: if an unclosed <think> remains, drop everything after it.
"""
cleaned = re.sub(r"<think>.*?</think>", "", text,
flags=re.DOTALL | re.IGNORECASE)
# Fallback: unclosed <think> — truncate before it
if "<think>" in cleaned.lower():
cleaned = re.split(r"(?i)<think>", cleaned)[0]
return cleaned
def _normalize_text_response(text: str) -> str:
return re.sub(r"\s+", " ", str(text or "")).strip()
cleaned = _clean_think_blocks(str(text or ""))
return re.sub(r"\s+", " ", cleaned).strip()
def _response_fingerprint(text: str) -> str:
@@ -1689,6 +1711,20 @@ async def internal_llm_complete(request: InternalLLMRequest):
tokens = data.get("usage", {}).get("total_tokens", 0)
latency = int((time_module.time() - t0) * 1000)
logger.info(f"Internal LLM success: {cloud['name']}, {tokens} tokens, {latency}ms")
# Track usage for budget dashboard
if SOFIIA_AUTO_ROUTER_AVAILABLE:
try:
usage_data = data.get("usage", {})
track_usage(
provider=cloud["name"],
model=cloud["model"],
agent=request.metadata.get("agent_id", "unknown") if request.metadata else "unknown",
input_tokens=usage_data.get("prompt_tokens", tokens // 2 if tokens else 0),
output_tokens=usage_data.get("completion_tokens", tokens // 2 if tokens else 0),
latency_ms=latency,
)
except Exception as _te:
logger.debug("budget track error: %s", _te)
return InternalLLMResponse(text=response_text, model=cloud["model"], provider=cloud["name"], tokens_used=tokens, latency_ms=latency)
except Exception as e:
logger.warning(f"Internal LLM {cloud['name']} failed: {e}")
@@ -2086,8 +2122,39 @@ async def agent_infer(agent_id: str, request: InferRequest):
routing_rules = router_config.get("routing", [])
default_llm = _select_default_llm(agent_id, metadata, default_llm, routing_rules)
cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic"}
# ── Sofiia Auto-Router: dynamic model selection based on task type ──────
if agent_id == "sofiia" and SOFIIA_AUTO_ROUTER_AVAILABLE and not request.model:
try:
_auto_result = select_model_auto(
prompt=request.prompt or "",
force_fast=metadata.get("force_fast", False),
force_capable=metadata.get("force_capable", False),
prefer_local=metadata.get("prefer_local", False),
prefer_cheap=metadata.get("prefer_cheap", False),
budget_aware=True,
)
# Only override if auto-selected profile exists in config
if _auto_result.profile_name in router_config.get("llm_profiles", {}):
logger.info(
"🧠 Sofiia Auto-Router: task=%s complexity=%s → profile=%s model=%s reason=%s",
_auto_result.task_type, _auto_result.complexity,
_auto_result.profile_name, _auto_result.model_id,
_auto_result.reason,
)
default_llm = _auto_result.profile_name
else:
logger.debug(
"🧠 Sofiia Auto-Router: profile %s not in config, using %s",
_auto_result.profile_name, default_llm,
)
except Exception as _ar_e:
logger.warning("⚠️ Sofiia Auto-Router error: %s", _ar_e)
# Pass routing-resolved default_llm to NCS so it respects cloud routing rules
ncs_agent_config = {**agent_config, "default_llm": default_llm}
cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic", "glm"}
# ── Global NCS-first model selection (multi-node) ───────────────────
ncs_selection = None
@@ -2095,7 +2162,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
try:
gcaps = await global_capabilities_client.get_global_capabilities()
ncs_selection = await select_model_for_agent(
agent_id, agent_config, router_config, gcaps, request.model,
agent_id, ncs_agent_config, router_config, gcaps, request.model,
)
except Exception as e:
logger.warning(f"⚠️ Global NCS selection error: {e}; falling back to static")
@@ -2103,7 +2170,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
try:
caps = await capabilities_client.fetch_capabilities()
ncs_selection = await select_model_for_agent(
agent_id, agent_config, router_config, caps, request.model,
agent_id, ncs_agent_config, router_config, caps, request.model,
)
except Exception as e:
logger.warning(f"⚠️ NCS selection error: {e}; falling back to static")
@@ -2678,11 +2745,218 @@ async def agent_infer(agent_id: str, request: InferRequest):
}
]
# GLM (Z.AI / BigModel) — OpenAI-compatible but with special JWT auth.
if provider == "glm" and allow_cloud:
glm_key = os.getenv(llm_profile.get("api_key_env", "GLM5_API_KEY"), "")
if glm_key:
glm_model = request.model or llm_profile.get("model", "glm-4-flash")
glm_base_url = llm_profile.get("base_url", "https://open.bigmodel.cn/api/paas/v4")
glm_max_tokens = int(request.max_tokens or llm_profile.get("max_tokens", 4096))
glm_temperature = float(request.temperature if request.temperature is not None else llm_profile.get("temperature", 0.3))
glm_timeout = int(llm_profile.get("timeout_ms", 30000) / 1000)
try:
glm_resp = await http_client.post(
f"{glm_base_url}/chat/completions",
headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"},
json={
"model": glm_model,
"messages": messages,
"max_tokens": glm_max_tokens,
"temperature": glm_temperature,
"stream": False,
},
timeout=float(glm_timeout),
)
if glm_resp.status_code == 200:
glm_data = glm_resp.json()
response_text = glm_data.get("choices", [{}])[0].get("message", {}).get("content", "")
glm_tokens = glm_data.get("usage", {}).get("total_tokens", 0)
if SOFIIA_AUTO_ROUTER_AVAILABLE:
try:
usage_d = glm_data.get("usage", {})
track_usage(
provider="glm", model=glm_model, agent=agent_id,
input_tokens=usage_d.get("prompt_tokens", glm_tokens // 2 if glm_tokens else 0),
output_tokens=usage_d.get("completion_tokens", glm_tokens // 2 if glm_tokens else 0),
)
except Exception:
pass
response_text = await _finalize_response_text(response_text, f"glm-{glm_model}")
return InferResponse(
response=response_text,
model=glm_model,
backend="glm",
tokens_used=glm_tokens,
)
else:
logger.warning("🐉 GLM API error %s: %s", glm_resp.status_code, glm_resp.text[:200])
except Exception as _glm_e:
logger.warning("🐉 GLM call failed: %s", _glm_e)
else:
logger.warning("🐉 GLM provider selected but GLM5_API_KEY not set")
# Fall through to Ollama
# Anthropic has its own API format — handle separately before the loop.
if provider == "anthropic" and allow_cloud:
anthropic_key = os.getenv(llm_profile.get("api_key_env", "ANTHROPIC_API_KEY"), "")
if anthropic_key:
anthropic_model = request.model or llm_profile.get("model", "claude-sonnet-4-5")
anthropic_max_tokens = int(request.max_tokens or llm_profile.get("max_tokens", 8192))
anthropic_temperature = float(request.temperature if request.temperature is not None else llm_profile.get("temperature", 0.2))
anthropic_timeout = int(llm_profile.get("timeout_ms", 120000) / 1000)
try:
# Extract system prompt from messages
anthropic_system = ""
anthropic_messages = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if role == "system":
anthropic_system = content
else:
anthropic_messages.append({"role": role, "content": content})
if not anthropic_messages:
anthropic_messages = [{"role": "user", "content": request.prompt}]
# Build tool definitions for Claude
anthropic_tools = None
if TOOL_MANAGER_AVAILABLE and tool_manager:
raw_tools = tool_manager.get_tool_definitions(request_agent_id)
if raw_tools:
anthropic_tools = []
for t in raw_tools:
fn = t.get("function", {})
anthropic_tools.append({
"name": fn.get("name", "unknown"),
"description": fn.get("description", ""),
"input_schema": fn.get("parameters") or {"type": "object", "properties": {}},
})
anthropic_payload: Dict[str, Any] = {
"model": anthropic_model,
"max_tokens": anthropic_max_tokens,
"temperature": anthropic_temperature,
"messages": anthropic_messages,
}
if anthropic_system:
anthropic_payload["system"] = anthropic_system
if anthropic_tools:
anthropic_payload["tools"] = anthropic_tools
logger.info(f"🟣 Anthropic Claude API: model={anthropic_model} agent={agent_id}")
anthropic_resp = await http_client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": anthropic_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json=anthropic_payload,
timeout=anthropic_timeout,
)
if anthropic_resp.status_code == 200:
anthropic_data = anthropic_resp.json()
response_text = ""
for block in anthropic_data.get("content", []):
if block.get("type") == "text":
response_text += block.get("text", "")
tokens_used = (
anthropic_data.get("usage", {}).get("input_tokens", 0)
+ anthropic_data.get("usage", {}).get("output_tokens", 0)
)
# Handle tool_use blocks from Claude
claude_tool_uses = [b for b in anthropic_data.get("content", []) if b.get("type") == "tool_use"]
if claude_tool_uses and TOOL_MANAGER_AVAILABLE and tool_manager:
tool_result_messages = list(anthropic_messages)
tool_result_messages.append({"role": "assistant", "content": anthropic_data.get("content", [])})
for tool_use_block in claude_tool_uses:
tool_name = tool_use_block.get("name", "")
tool_input = tool_use_block.get("input", {})
tool_use_id = tool_use_block.get("id", "")
logger.info(f"🔧 Claude tool call: {tool_name}({json.dumps(tool_input)[:100]})")
try:
tool_exec_result = await tool_manager.execute_tool(
tool_name, tool_input,
agent_id=request_agent_id, chat_id=chat_id, user_id=user_id,
)
tool_content = tool_exec_result.result if tool_exec_result.success else f"Error: {tool_exec_result.error}"
except Exception as te:
tool_content = f"Tool execution error: {te}"
tool_result_messages.append({
"role": "user",
"content": [{"type": "tool_result", "tool_use_id": tool_use_id, "content": str(tool_content)}]
})
# Follow-up call with tool results
anthropic_payload["messages"] = tool_result_messages
followup_resp = await http_client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": anthropic_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json=anthropic_payload,
timeout=anthropic_timeout,
)
if followup_resp.status_code == 200:
followup_data = followup_resp.json()
response_text = ""
for block in followup_data.get("content", []):
if block.get("type") == "text":
response_text += block.get("text", "")
tokens_used += (
followup_data.get("usage", {}).get("input_tokens", 0)
+ followup_data.get("usage", {}).get("output_tokens", 0)
)
response_text = await _finalize_response_text(response_text, f"anthropic-{anthropic_model}")
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
asyncio.create_task(memory_retrieval.store_message(
agent_id=agent_id, user_id=user_id, username=username,
message_text=request.prompt, response_text=response_text,
chat_id=chat_id, metadata={"model": anthropic_model, "provider": "anthropic"},
))
# Track Anthropic usage for budget dashboard
if SOFIIA_AUTO_ROUTER_AVAILABLE:
try:
track_usage(
provider="anthropic",
model=anthropic_model,
agent=agent_id,
input_tokens=tokens_used // 3 if tokens_used else 0,
output_tokens=tokens_used - tokens_used // 3 if tokens_used else 0,
latency_ms=int((time_module.time() - _t_start) * 1000) if "_t_start" in dir() else 0,
task_type="",
)
except Exception as _te:
logger.debug("budget track anthropic error: %s", _te)
return InferResponse(
response=response_text,
model=anthropic_model,
backend="anthropic",
tokens_used=tokens_used,
)
else:
err_body = anthropic_resp.text[:300]
logger.warning(f"🟣 Anthropic API error {anthropic_resp.status_code}: {err_body}")
except Exception as anthropic_exc:
logger.warning(f"🟣 Anthropic call failed: {anthropic_exc}")
else:
logger.warning("🟣 Anthropic provider selected but ANTHROPIC_API_KEY not set")
# Fall through to Ollama if Anthropic fails
if not allow_cloud:
cloud_providers = []
# If specific provider requested, try it first
if provider in ["deepseek", "mistral", "grok"]:
# GLM in OpenAI-compat fallback list for internal/non-sofiia requests
glm_key_fb = os.getenv("GLM5_API_KEY", "")
if glm_key_fb:
cloud_providers.insert(0, {
"name": "glm",
"api_key_env": "GLM5_API_KEY",
"base_url": "https://open.bigmodel.cn/api/paas/v4",
"model": "glm-4-flash",
"timeout": 20,
})
if provider in ["deepseek", "mistral", "grok", "glm"]:
# Reorder to put requested provider first
cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1)
@@ -3666,6 +3940,184 @@ async def capability_offload(cap_type: str, request: Request):
})
@app.post("/v1/capability/voice_{voice_cap_type}")
async def voice_capability_offload(voice_cap_type: str, request: Request):
"""Route a Voice HA request (voice_tts / voice_llm / voice_stt) to the best node.
Uses voice-specific NATS subjects (node.{id}.voice.{type}.request) and
separate circuit breaker keys from generic offload. Returns response headers:
- X-Voice-Node: chosen node id
- X-Voice-Mode: local | remote (relative to the router's own node)
- X-Voice-Cap: the capability type routed (voice_tts, voice_llm, voice_stt)
Contract: no silent fallback — any failure increments Prometheus counter +
logs WARNING before returning 50x.
"""
import uuid as _uuid
import fabric_metrics as fm
cap_type = voice_cap_type # "tts", "llm", or "sst"
full_cap = f"voice_{cap_type}"
valid_caps = {"tts", "llm", "stt"}
if cap_type not in valid_caps:
fm.inc_voice_cap_request(full_cap, "invalid")
return JSONResponse(status_code=400, content={
"error": f"Invalid voice cap: {cap_type}. Valid: voice_tts, voice_llm, voice_stt",
})
if not NCS_AVAILABLE or not global_capabilities_client:
fm.inc_voice_cap_request(full_cap, "ncs_unavailable")
logger.warning("[voice.cap] NCS unavailable — cannot route %s", full_cap)
return JSONResponse(status_code=503, content={
"error": "NCS not available — cannot route voice capability requests",
})
gcaps = await global_capabilities_client.require_fresh_caps(ttl=30)
if gcaps is None:
fm.inc_voice_cap_request(full_cap, "stale_caps")
logger.warning("[voice.cap] caps stale — refusing to route %s", full_cap)
return JSONResponse(status_code=503, content={
"error": "NCS caps stale — preflight failed",
})
eligible_nodes = global_capabilities_client.find_nodes_with_capability(full_cap)
if not eligible_nodes:
fm.inc_voice_cap_request(full_cap, "no_node")
logger.warning("[voice.cap] no node with %s available", full_cap)
return JSONResponse(status_code=404, content={
"error": f"No node with capability '{full_cap}' available",
"hint": f"Ensure node-worker is running with TTS_PROVIDER/STT_PROVIDER set and {full_cap}=true in /caps",
})
# Voice uses separate CB key to avoid cross-contaminating generic stt/tts breakers
voice_cb_type = f"voice.{cap_type}"
unavailable = offload_client.get_unavailable_nodes(voice_cb_type) if offload_client else set()
available = [n for n in eligible_nodes if n.lower() not in {u.lower() for u in unavailable}]
if not available:
fm.inc_voice_cap_request(full_cap, "all_broken")
logger.warning("[voice.cap] all nodes circuit-broken for %s: %s", full_cap, eligible_nodes)
return JSONResponse(status_code=503, content={
"error": f"All nodes with '{full_cap}' are circuit-broken",
"eligible": eligible_nodes,
"unavailable": list(unavailable),
})
# ── Voice scoring: prefer local, penalise high load + high latency ────────
router_node_id = os.getenv("NODE_ID", "noda2").lower()
LOCAL_THRESHOLD_MS = int(os.getenv("VOICE_LOCAL_THRESHOLD_MS", "250"))
PREFER_LOCAL_BONUS = int(os.getenv("VOICE_PREFER_LOCAL_BONUS", "200"))
deadline_defaults = {
"tts": int(os.getenv("VOICE_TTS_DEADLINE_MS", "3000")),
"llm": int(os.getenv("VOICE_LLM_FAST_MS", "9000")),
"sst": int(os.getenv("VOICE_STT_DEADLINE_MS", "6000")),
}
deadline_ms = deadline_defaults.get(cap_type, 9000)
scored = []
for nid in available:
nl = global_capabilities_client.get_node_load(nid)
rl = global_capabilities_client.get_runtime_load(nid)
wait_ms = nl.get("wait_ms", 0) or nl.get("inflight", 0) * 50
rtt_ms = nl.get("rtt_ms", 0)
p95_ms = rl.get("p95_ms", 0) if rl else 0
mem_penalty = 300 if nl.get("mem_pressure") == "high" else 0
local_bonus = PREFER_LOCAL_BONUS if nid.lower() == router_node_id else 0
score = wait_ms + rtt_ms + p95_ms + mem_penalty - local_bonus
scored.append((score, nid))
fm.observe_voice_score(full_cap, score)
fm.set_voice_breaker(full_cap, nid, False) # currently alive
scored.sort(key=lambda x: x[0])
best_score, best_node = scored[0]
voice_mode = "local" if best_node.lower() == router_node_id else "remote"
# If local score <= local_threshold, always prefer local even if a remote
# node has slightly lower score (avoids unnecessary cross-node traffic)
if voice_mode == "remote" and best_score > LOCAL_THRESHOLD_MS:
local_candidates = [(s, n) for s, n in scored if n.lower() == router_node_id]
if local_candidates:
local_score = local_candidates[0][0]
if local_score <= best_score + LOCAL_THRESHOLD_MS:
best_node = router_node_id
voice_mode = "local"
logger.info(
"[voice.cap] prefer local %s (score=%d) over %s (score=%d)",
best_node, local_score, scored[0][1], best_score,
)
payload = await request.json()
logger.info(
"[voice.cap.route] cap=%s → node=%s mode=%s score=%d deadline=%dms",
full_cap, best_node, voice_mode, scored[0][0], deadline_ms,
)
nats_ok = nc is not None and nats_available
if not nats_ok or not offload_client:
fm.inc_voice_cap_request(full_cap, "nats_down")
logger.warning("[voice.cap] NATS not connected — cannot offload %s", full_cap)
return JSONResponse(status_code=503, content={"error": "NATS not connected"})
job = {
"job_id": str(_uuid.uuid4()),
"required_type": cap_type,
"payload": payload,
"deadline_ts": int(time.time() * 1000) + deadline_ms,
"hints": payload.pop("hints", {}),
}
# Use voice-specific NATS subject
nats_subject_type = f"voice.{cap_type}"
result = await offload_client.offload_infer(
nats_client=nc,
node_id=best_node,
required_type=nats_subject_type,
job_payload=job,
timeout_ms=deadline_ms,
)
if result and result.get("status") == "ok":
fm.inc_voice_cap_request(full_cap, "ok")
fm.inc_voice_offload(full_cap, best_node, "ok")
offload_client.record_success(best_node, voice_cb_type)
response_data = result.get("result", result)
resp = JSONResponse(content=response_data)
resp.headers["X-Voice-Node"] = best_node
resp.headers["X-Voice-Mode"] = voice_mode
resp.headers["X-Voice-Cap"] = full_cap
return resp
# Non-ok — circuit breaker + WARNING (contract: no silent fallback)
error = result.get("error", {}) if result else {}
status_code_resp = result.get("status", "error") if result else "timeout"
offload_client.record_failure(best_node, voice_cb_type)
fm.set_voice_breaker(full_cap, best_node, True)
fm.inc_voice_cap_request(full_cap, "fail")
fm.inc_voice_offload(full_cap, best_node, "fail")
logger.warning(
"[voice.cap.fail] cap=%s node=%s status=%s code=%s"
"WARNING: voice fallback must be handled by caller (BFF/Router)",
full_cap, best_node, status_code_resp,
error.get("code", "?"),
)
return JSONResponse(
status_code=502,
content={
"error": error.get("message", f"Voice offload to {best_node} failed"),
"code": error.get("code", "VOICE_OFFLOAD_FAILED"),
"cap": full_cap,
"node": best_node,
},
headers={
"X-Voice-Node": best_node,
"X-Voice-Mode": voice_mode,
"X-Voice-Cap": full_cap,
},
)
@app.get("/v1/capabilities")
async def list_global_capabilities():
"""Return full capabilities view across all nodes."""
@@ -3986,6 +4438,120 @@ async def get_graph_stats():
raise HTTPException(status_code=500, detail=str(e))
# ── Sofiia Auto-Router & Budget Dashboard ─────────────────────────────────────
try:
from sofiia_auto_router import (
select_model_auto, classify_task, explain_selection,
ProviderBudget as _ProviderBudget, get_full_catalog,
refresh_ollama_models_async,
)
from provider_budget import track_usage, get_dashboard_data, set_provider_limit, get_stats
SOFIIA_AUTO_ROUTER_AVAILABLE = True
logger.info("✅ Sofiia Auto-Router loaded")
except ImportError as _e:
SOFIIA_AUTO_ROUTER_AVAILABLE = False
logger.warning("⚠️ Sofiia Auto-Router not available: %s", _e)
class AutoRouteRequest(BaseModel):
prompt: str
force_fast: bool = False
force_capable: bool = False
prefer_local: bool = False
prefer_cheap: bool = False
class BudgetLimitRequest(BaseModel):
provider: str
monthly_limit_usd: Optional[float] = None
topup_balance_usd: Optional[float] = None
@app.post("/v1/sofiia/auto-route")
async def sofiia_auto_route(req: AutoRouteRequest):
"""Classify a prompt and return the recommended model profile for Sofiia."""
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
raise HTTPException(status_code=503, detail="Auto-router not available")
result = select_model_auto(
prompt=req.prompt,
force_fast=req.force_fast,
force_capable=req.force_capable,
prefer_local=req.prefer_local,
prefer_cheap=req.prefer_cheap,
)
return {
"profile_name": result.profile_name,
"model_id": result.model_id,
"provider": result.provider,
"task_type": result.task_type,
"confidence": result.confidence,
"complexity": result.complexity,
"reason": result.reason,
"fallback_used": result.fallback_used,
"all_candidates": result.all_candidates,
"ambiguous": result.ambiguous,
"runner_up": result.runner_up,
"all_scores": result.all_scores,
"explanation": explain_selection(result),
}
@app.get("/v1/sofiia/budget")
async def sofiia_budget_dashboard():
"""Return budget dashboard data: token usage, costs, balances per provider."""
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
raise HTTPException(status_code=503, detail="Budget tracker not available")
return get_dashboard_data()
@app.post("/v1/sofiia/budget/limits")
async def set_budget_limits(req: BudgetLimitRequest):
"""Set monthly limit or top-up balance for a provider."""
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
raise HTTPException(status_code=503, detail="Budget tracker not available")
set_provider_limit(
provider=req.provider,
monthly_limit_usd=req.monthly_limit_usd,
topup_balance_usd=req.topup_balance_usd,
)
return {"status": "ok", "provider": req.provider}
@app.get("/v1/sofiia/budget/stats")
async def sofiia_budget_stats(window_hours: int = 24):
"""Return per-provider stats for the given time window (hours)."""
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
raise HTTPException(status_code=503, detail="Budget tracker not available")
stats = get_stats(window_hours=window_hours)
return {
p: {
"provider": s.provider,
"total_cost_usd": round(s.total_cost_usd, 5),
"call_count": s.call_count,
"tokens_in": s.total_input_tokens,
"tokens_out": s.total_output_tokens,
"avg_latency_ms": round(s.avg_latency_ms),
"top_models": s.top_models,
}
for p, s in stats.items()
}
@app.get("/v1/sofiia/catalog")
async def sofiia_model_catalog(refresh_ollama: bool = False):
"""Return full model catalog with availability status."""
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
raise HTTPException(status_code=503, detail="Auto-router not available")
if refresh_ollama:
await refresh_ollama_models_async()
return {
"models": get_full_catalog(),
"total": len(get_full_catalog()),
"available_count": sum(1 for m in get_full_catalog() if m["available"]),
}
@app.on_event("shutdown")
async def shutdown_event():
"""Cleanup connections on shutdown"""

View File

@@ -20,6 +20,7 @@ import json
import logging
import re
import hashlib
from time import monotonic
from typing import Optional, Dict, Any, List
from dataclasses import dataclass, field
from datetime import datetime
@@ -41,6 +42,20 @@ PENDING_QUESTIONS_LIMIT = int(os.getenv("AGENT_PENDING_QUESTIONS_LIMIT", "5"))
SHARED_AGRO_LIBRARY_ENABLED = os.getenv("AGROMATRIX_SHARED_LIBRARY_ENABLED", "true").lower() == "true"
SHARED_AGRO_LIBRARY_REQUIRE_REVIEW = os.getenv("AGROMATRIX_SHARED_LIBRARY_REQUIRE_REVIEW", "true").lower() == "true"
DOC_VERSION_PREVIEW_CHARS = int(os.getenv("DOC_VERSION_PREVIEW_CHARS", "240"))
WARNING_THROTTLE_SECONDS = float(os.getenv("MEMORY_RETRIEVAL_WARNING_THROTTLE_S", "60") or "60")
_warning_last_ts: Dict[str, float] = {}
def _warning_throttled(key: str, message: str) -> None:
"""Emit repetitive warnings at most once per throttle window."""
if WARNING_THROTTLE_SECONDS <= 0:
logger.warning(message)
return
now = monotonic()
last = _warning_last_ts.get(key, 0.0)
if now - last >= WARNING_THROTTLE_SECONDS:
_warning_last_ts[key] = now
logger.warning(message)
@dataclass
@@ -1067,7 +1082,7 @@ class MemoryRetrieval:
)
return True
except Exception as e:
logger.warning(f"register_pending_question failed: {e}")
_warning_throttled("register_pending_question_failed", f"register_pending_question failed: {e}")
return False
async def resolve_pending_question(
@@ -1086,7 +1101,7 @@ class MemoryRetrieval:
row = await conn.fetchrow(
"""
WITH target AS (
SELECT id
SELECT id, question_fingerprint
FROM agent_pending_questions
WHERE channel = $1
AND chat_id = $2
@@ -1095,17 +1110,49 @@ class MemoryRetrieval:
AND status = 'pending'
ORDER BY created_at ASC
LIMIT 1
), decision AS (
SELECT
t.id,
CASE
WHEN $5 = 'dismissed' THEN 'dismissed'
WHEN EXISTS (
SELECT 1
FROM agent_pending_questions q
WHERE q.channel = $1
AND q.chat_id = $2
AND q.user_id = $3
AND q.agent_id = $4
AND q.status = 'answered'
AND q.question_fingerprint = t.question_fingerprint
) THEN 'dismissed'
ELSE 'answered'
END AS next_status,
CASE
WHEN $5 = 'dismissed' THEN $5
WHEN EXISTS (
SELECT 1
FROM agent_pending_questions q
WHERE q.channel = $1
AND q.chat_id = $2
AND q.user_id = $3
AND q.agent_id = $4
AND q.status = 'answered'
AND q.question_fingerprint = t.question_fingerprint
) THEN 'duplicate_answered'
ELSE $5
END AS resolution_reason
FROM target t
)
UPDATE agent_pending_questions p
SET status = CASE WHEN $5 = 'dismissed' THEN 'dismissed' ELSE 'answered' END,
SET status = d.next_status,
answered_at = NOW(),
metadata = COALESCE(p.metadata, '{}'::jsonb)
|| jsonb_build_object(
'resolution_reason', $5,
'resolution_reason', d.resolution_reason,
'answer_fingerprint', COALESCE($6, '')
)
FROM target t
WHERE p.id = t.id
FROM decision d
WHERE p.id = d.id
RETURNING p.id
""",
channel,
@@ -1117,7 +1164,7 @@ class MemoryRetrieval:
)
return bool(row)
except Exception as e:
logger.warning(f"resolve_pending_question failed: {e}")
_warning_throttled("resolve_pending_question_failed", f"resolve_pending_question failed: {e}")
return False
@staticmethod

View File

@@ -81,7 +81,7 @@ def get_unavailable_nodes(req_type: str) -> Set[str]:
async def offload_infer(
nats_client,
node_id: str,
required_type: Literal["llm", "vision", "stt", "tts", "ocr", "image"],
required_type: str, # "llm"|"vision"|"stt"|"tts"|"ocr"|"image"|"voice.tts"|"voice.llm"|"voice.stt"
job_payload: Dict[str, Any],
timeout_ms: int = 25000,
) -> Optional[Dict[str, Any]]:
@@ -89,6 +89,8 @@ async def offload_infer(
Returns parsed JobResponse dict or None on total failure.
Retries on transient errors (timeout, busy). Does NOT retry on provider errors.
Voice HA subjects use dotted notation: "voice.tts" → node.{id}.voice.tts.request
"""
subject = f"node.{node_id.lower()}.{required_type}.request"
payload_bytes = json.dumps(job_payload).encode()

View File

@@ -9,6 +9,8 @@ Prompt Builder for DAGI Router
import httpx
import logging
import os
import time
from typing import Dict, Any, Optional
from dataclasses import dataclass
@@ -43,6 +45,8 @@ class PromptBuilder:
self.city_service_url = city_service_url.rstrip("/")
self.router_config = router_config or {}
self._http_client: Optional[httpx.AsyncClient] = None
self._city_service_unavailable_until = 0.0
self._city_service_cooldown_s = float(os.getenv("CITY_SERVICE_FAILURE_COOLDOWN_S", "120") or "120")
async def _get_http_client(self) -> httpx.AsyncClient:
"""Lazy initialization of HTTP client"""
@@ -80,6 +84,9 @@ class PromptBuilder:
async def _fetch_from_database(self, agent_id: str) -> Optional[AgentSystemPrompt]:
"""Fetch system prompt from city-service API"""
now = time.monotonic()
if now < self._city_service_unavailable_until:
return None
try:
client = await self._get_http_client()
url = f"{self.city_service_url}/internal/agents/{agent_id}/system-prompt"
@@ -100,10 +107,20 @@ class PromptBuilder:
return None
except httpx.RequestError as e:
logger.error(f"Error fetching prompt from city-service: {e}")
self._city_service_unavailable_until = time.monotonic() + max(0.0, self._city_service_cooldown_s)
logger.warning(
"Error fetching prompt from city-service: %s; suppressing retries for %.0fs",
e,
self._city_service_cooldown_s,
)
return None
except Exception as e:
logger.error(f"Unexpected error fetching prompt: {e}")
self._city_service_unavailable_until = time.monotonic() + max(0.0, self._city_service_cooldown_s)
logger.warning(
"Unexpected error fetching prompt: %s; suppressing retries for %.0fs",
e,
self._city_service_cooldown_s,
)
return None
def _get_from_config(self, agent_id: str) -> Optional[AgentSystemPrompt]:

File diff suppressed because it is too large Load Diff

View File

@@ -229,6 +229,58 @@
padding: 2px 6px;
border-radius: 4px;
}
.aurora-clip-picker {
margin-top: 8px;
border: 1px solid var(--border);
border-radius: 8px;
background: var(--bg2);
padding: 8px;
display: none;
gap: 8px;
}
.aurora-clip-head {
display: flex;
justify-content: space-between;
gap: 8px;
font-size: 0.74rem;
color: var(--muted);
align-items: center;
}
.aurora-clip-head strong {
color: var(--text);
font-weight: 600;
}
.aurora-clip-range-row {
display: grid;
grid-template-columns: 54px 1fr 62px;
align-items: center;
gap: 8px;
font-size: 0.73rem;
color: var(--muted);
}
.aurora-clip-range-row input[type="range"] {
width: 100%;
accent-color: var(--gold);
cursor: pointer;
}
.aurora-clip-actions {
display: flex;
gap: 6px;
flex-wrap: wrap;
}
.aurora-clip-btn {
background: rgba(255,255,255,0.04);
border: 1px solid var(--border);
color: var(--muted);
border-radius: 6px;
padding: 4px 8px;
font-size: 0.7rem;
cursor: pointer;
}
.aurora-clip-btn:hover {
border-color: var(--gold);
color: var(--text);
}
.aurora-compare-wrap {
position: relative;
overflow: hidden;
@@ -791,6 +843,27 @@
accept=".mp4,.avi,.mov,.mkv,.webm,.mp3,.wav,.flac,.m4a,.aac,.ogg,.jpg,.jpeg,.png,.tiff,.tif,.webp"
onchange="auroraOnFilePicked(this)">
<div id="auroraThumbPreview" class="aurora-thumb-preview" style="display:none;"></div>
<div id="auroraClipPicker" class="aurora-clip-picker">
<div class="aurora-clip-head">
<strong>🎚 Фрагмент На Прев'ю</strong>
<span id="auroraClipSummary"></span>
</div>
<div class="aurora-clip-range-row">
<span>Start</span>
<input id="auroraClipStartRange" type="range" min="0" max="0" step="0.1" value="0">
<span id="auroraClipStartLabel">0s</span>
</div>
<div class="aurora-clip-range-row">
<span>End</span>
<input id="auroraClipEndRange" type="range" min="0" max="0" step="0.1" value="0">
<span id="auroraClipEndLabel">0s</span>
</div>
<div class="aurora-clip-actions">
<button type="button" class="aurora-clip-btn" id="auroraClipSetStartBtn">Start = поточний кадр</button>
<button type="button" class="aurora-clip-btn" id="auroraClipSetEndBtn">End = поточний кадр</button>
<button type="button" class="aurora-clip-btn" id="auroraClipFullBtn">Повне відео</button>
</div>
</div>
<div class="aurora-kv" style="margin-top:10px;">
<span class="k">Файл</span><span class="v" id="auroraSelectedFile"></span>
</div>
@@ -833,6 +906,12 @@
<option value="codeformer">CodeFormer</option>
</select>
</label>
<label>Clip start (sec)
<input id="auroraOptClipStart" type="number" min="0" step="0.1" placeholder="0">
</label>
<label>Clip duration (sec)
<input id="auroraOptClipDuration" type="number" min="0.1" step="0.1" placeholder="5">
</label>
</div>
</details>
@@ -869,7 +948,7 @@
<button id="auroraAnalyzeBtn" class="btn btn-ghost" onclick="auroraAnalyze()" disabled>🔍 Аналіз</button>
<button id="auroraAudioProcessBtn" class="btn btn-ghost" style="display:none;" onclick="auroraStartAudio()">🎧 Audio process</button>
<button id="auroraStartBtn" class="btn btn-gold" style="flex:1;" onclick="auroraStart()" disabled>Почати обробку</button>
<button id="auroraCancelBtn" class="btn btn-ghost" style="display:none;" onclick="auroraCancel()">Скасувати</button>
<button id="auroraCancelBtn" class="btn btn-ghost" style="display:none;" onclick="auroraCancel()">Зупинити</button>
</div>
</div>
@@ -912,6 +991,15 @@
<label class="aurora-checkline"><input type="checkbox" id="auroraCtrlDenoise"> Enable denoise (FastDVDnet/SCUNet)</label>
<label class="aurora-checkline"><input type="checkbox" id="auroraCtrlFaceRestore"> Run face restoration (GFPGAN)</label>
<label class="aurora-checkline"><input type="checkbox" id="auroraCtrlPlateRoi"> License-plate ROI enhancement</label>
<label class="aurora-checkline"><input type="checkbox" id="auroraCtrlMaxFace"> Max face quality (повільніше, але краще для облич)</label>
<label class="aurora-note" style="display:block; margin-top:8px;">Фокус задачі:</label>
<select id="auroraFocusProfile" style="width:100%; margin-top:4px;">
<option value="auto" selected>Auto</option>
<option value="max_faces">Max faces</option>
<option value="text_readability">Text / logos readability</option>
<option value="plates">License plates</option>
</select>
<input id="auroraTaskHint" type="text" style="width:100%; margin-top:8px;" placeholder="Ціль Aurora: напр. Прочитати напис на кепці персонажа (00:12-00:18)">
<div class="aurora-priority-wrap">
<div class="aurora-priority-head">
<span>Пріоритет: Обличчя</span>
@@ -997,7 +1085,14 @@
<div style="display:flex; gap:8px; margin-top:10px; flex-wrap:wrap;">
<button class="btn btn-ghost btn-sm" id="auroraDownloadResultBtn" style="display:none;" onclick="auroraDownloadResult()">Завантажити результат</button>
<button class="btn btn-ghost btn-sm" id="auroraOpenFolderBtn" onclick="auroraRevealFolder()">Відкрити папку</button>
<button id="auroraReprocessBtn" class="btn btn-ghost btn-sm" onclick="auroraReprocess()" disabled>Повторна обробка</button>
<button id="auroraReprocessBtn" class="btn btn-ghost btn-sm" onclick="auroraReprocess()" disabled>Повторна обробка ×1</button>
<select id="auroraReprocessPasses" class="btn btn-ghost btn-sm" style="min-width:92px;" onchange="auroraUpdateReprocessLabel()">
<option value="1" selected>1 прохід</option>
<option value="2">2 проходи</option>
<option value="3">3 проходи</option>
<option value="4">4 проходи</option>
</select>
<label class="aurora-checkline" style="margin:0;"><input type="checkbox" id="auroraReprocessSecondPass" checked> chain second-pass</label>
</div>
<div id="auroraForensicLogWrap" style="display:none; margin-top:10px;">
<div class="aurora-note" style="margin-top:0;">Forensic log</div>
@@ -2066,6 +2161,11 @@ let auroraTabBootstrapped = false;
let auroraChatHistory = [];
let auroraChatBusy = false;
let auroraFolderPath = null;
let auroraPreviewObjectUrl = null;
let auroraPreviewVideoEl = null;
let auroraVideoDurationSec = 0;
let auroraClipBindingsReady = false;
const AURORA_MIN_CLIP_SEC = 0.1;
const AURORA_MAX_TRANSIENT_ERRORS = 12;
const AURORA_ACTIVE_JOB_KEY = 'aurora_active_job_id';
const AURORA_SMART_RUN_KEY = 'aurora_smart_run_id';
@@ -2320,6 +2420,7 @@ function auroraSetActiveJobId(jobId) {
if (el) el.textContent = auroraJobId || '—';
const reBtn = document.getElementById('auroraReprocessBtn');
if (reBtn) reBtn.disabled = !auroraJobId;
auroraUpdateReprocessLabel();
if (auroraJobId) {
const cached = auroraGetPersistedTiming(auroraJobId);
if (cached) {
@@ -2328,6 +2429,25 @@ function auroraSetActiveJobId(jobId) {
}
}
auroraPersistActiveJob();
auroraUpdateCancelButton(null, null);
}
function auroraUpdateCancelButton(status, stage) {
const btn = document.getElementById('auroraCancelBtn');
if (!btn) return;
const s = String(status || '').toLowerCase();
const st = String(stage || '').toLowerCase();
const active = s === 'queued' || s === 'processing';
if (!active) {
btn.style.display = 'none';
btn.disabled = false;
btn.textContent = 'Зупинити';
return;
}
btn.style.display = 'inline-block';
const cancelling = st.includes('cancell') || st.includes('скасов');
btn.disabled = cancelling;
btn.textContent = cancelling ? 'Зупиняю...' : 'Зупинити';
}
function auroraSetMode(mode) {
@@ -2349,6 +2469,162 @@ function auroraIsAudioFile(file) {
return ['.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg'].some(ext => name.endsWith(ext));
}
function auroraRevokePreviewObjectUrl() {
if (!auroraPreviewObjectUrl) return;
try { URL.revokeObjectURL(auroraPreviewObjectUrl); } catch (_) {}
auroraPreviewObjectUrl = null;
}
function auroraFormatClipSeconds(seconds) {
const value = Number(seconds);
if (!Number.isFinite(value)) return '—';
const rounded = Math.round(Math.max(0, value) * 10) / 10;
if (Math.abs(rounded - Math.round(rounded)) < 1e-9) return `${Math.round(rounded)}s`;
return `${rounded.toFixed(1)}s`;
}
function auroraClampClipWindow(startSec, endSec, durationSec) {
const total = Number(durationSec);
if (!Number.isFinite(total) || total <= 0) return { start: 0, end: 0 };
let start = Number(startSec);
let end = Number(endSec);
if (!Number.isFinite(start)) start = 0;
if (!Number.isFinite(end)) end = total;
start = Math.max(0, Math.min(start, total));
end = Math.max(0, Math.min(end, total));
if ((end - start) < AURORA_MIN_CLIP_SEC) {
if ((start + AURORA_MIN_CLIP_SEC) <= total) {
end = start + AURORA_MIN_CLIP_SEC;
} else {
end = total;
start = Math.max(0, end - AURORA_MIN_CLIP_SEC);
}
}
return { start, end };
}
function auroraUpdateClipSummary(startSec, endSec, durationSec) {
const summary = document.getElementById('auroraClipSummary');
const startLabel = document.getElementById('auroraClipStartLabel');
const endLabel = document.getElementById('auroraClipEndLabel');
if (startLabel) startLabel.textContent = auroraFormatClipSeconds(startSec);
if (endLabel) endLabel.textContent = auroraFormatClipSeconds(endSec);
if (summary) {
const clipDur = Math.max(0, Number(endSec) - Number(startSec));
summary.textContent = `${auroraFormatClipSeconds(startSec)}${auroraFormatClipSeconds(endSec)} (${auroraFormatClipSeconds(clipDur)}) · total ${auroraFormatClipSeconds(durationSec)}`;
}
}
function auroraApplyClipWindow(startSec, endSec, { syncFields = true, syncSliders = true, seekTo = null } = {}) {
const duration = Number(auroraVideoDurationSec || 0);
if (!Number.isFinite(duration) || duration <= 0) return;
const startRange = document.getElementById('auroraClipStartRange');
const endRange = document.getElementById('auroraClipEndRange');
const startInput = document.getElementById('auroraOptClipStart');
const durationInput = document.getElementById('auroraOptClipDuration');
const bounded = auroraClampClipWindow(startSec, endSec, duration);
if (syncSliders && startRange && endRange) {
startRange.value = bounded.start.toFixed(1);
endRange.value = bounded.end.toFixed(1);
}
if (syncFields && startInput && durationInput) {
const clipDuration = Math.max(AURORA_MIN_CLIP_SEC, bounded.end - bounded.start);
startInput.value = bounded.start > 0 ? bounded.start.toFixed(1).replace(/\.0$/, '') : '';
durationInput.value = clipDuration.toFixed(1).replace(/\.0$/, '');
}
auroraUpdateClipSummary(bounded.start, bounded.end, duration);
if (auroraPreviewVideoEl && Number.isFinite(Number(seekTo))) {
const target = Math.max(0, Math.min(Number(seekTo), duration));
try { auroraPreviewVideoEl.currentTime = target; } catch (_) {}
}
}
function auroraSyncClipFromExportInputs() {
const duration = Number(auroraVideoDurationSec || 0);
if (!Number.isFinite(duration) || duration <= 0) return;
const startInput = document.getElementById('auroraOptClipStart');
const durationInput = document.getElementById('auroraOptClipDuration');
const startValue = Number(startInput?.value || 0);
const durationValue = Number(durationInput?.value || 0);
const start = Number.isFinite(startValue) && startValue >= 0 ? startValue : 0;
const hasDuration = Number.isFinite(durationValue) && durationValue > 0;
const end = hasDuration ? start + durationValue : duration;
auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: true });
}
function auroraHideClipPicker() {
const picker = document.getElementById('auroraClipPicker');
if (picker) picker.style.display = 'none';
const summary = document.getElementById('auroraClipSummary');
if (summary) summary.textContent = '—';
const startLabel = document.getElementById('auroraClipStartLabel');
if (startLabel) startLabel.textContent = '0s';
const endLabel = document.getElementById('auroraClipEndLabel');
if (endLabel) endLabel.textContent = '0s';
auroraPreviewVideoEl = null;
auroraVideoDurationSec = 0;
}
function auroraBindClipPicker() {
if (auroraClipBindingsReady) return;
auroraClipBindingsReady = true;
const startRange = document.getElementById('auroraClipStartRange');
const endRange = document.getElementById('auroraClipEndRange');
const startInput = document.getElementById('auroraOptClipStart');
const durationInput = document.getElementById('auroraOptClipDuration');
const setStartBtn = document.getElementById('auroraClipSetStartBtn');
const setEndBtn = document.getElementById('auroraClipSetEndBtn');
const fullBtn = document.getElementById('auroraClipFullBtn');
if (startRange && endRange) {
startRange.addEventListener('input', () => {
const start = Number(startRange.value || 0);
const end = Number(endRange.value || 0);
auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: false, seekTo: start });
});
endRange.addEventListener('input', () => {
const start = Number(startRange.value || 0);
const end = Number(endRange.value || 0);
auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: false, seekTo: end });
});
}
if (startInput) {
startInput.addEventListener('input', auroraSyncClipFromExportInputs);
startInput.addEventListener('change', auroraSyncClipFromExportInputs);
}
if (durationInput) {
durationInput.addEventListener('input', auroraSyncClipFromExportInputs);
durationInput.addEventListener('change', auroraSyncClipFromExportInputs);
}
if (setStartBtn) {
setStartBtn.addEventListener('click', () => {
if (!auroraPreviewVideoEl) return;
const current = Number(auroraPreviewVideoEl.currentTime || 0);
const end = Number(document.getElementById('auroraClipEndRange')?.value || auroraVideoDurationSec || 0);
auroraApplyClipWindow(current, end, { syncFields: true, syncSliders: true, seekTo: current });
});
}
if (setEndBtn) {
setEndBtn.addEventListener('click', () => {
if (!auroraPreviewVideoEl) return;
const current = Number(auroraPreviewVideoEl.currentTime || 0);
const start = Number(document.getElementById('auroraClipStartRange')?.value || 0);
auroraApplyClipWindow(start, current, { syncFields: true, syncSliders: true, seekTo: current });
});
}
if (fullBtn) {
fullBtn.addEventListener('click', () => {
const startField = document.getElementById('auroraOptClipStart');
const durField = document.getElementById('auroraOptClipDuration');
if (startField) startField.value = '';
if (durField) durField.value = '';
auroraApplyClipWindow(0, auroraVideoDurationSec, { syncFields: false, syncSliders: true, seekTo: 0 });
});
}
}
function auroraSetSelectedFile(file) {
auroraSelectedFile = file || null;
const label = document.getElementById('auroraSelectedFile');
@@ -2364,6 +2640,10 @@ function auroraSetSelectedFile(file) {
audioBtn.style.display = isAudio ? 'inline-block' : 'none';
audioBtn.disabled = !file;
}
const clipStartInput = document.getElementById('auroraOptClipStart');
const clipDurationInput = document.getElementById('auroraOptClipDuration');
if (clipStartInput) clipStartInput.value = '';
if (clipDurationInput) clipDurationInput.value = '';
auroraAnalysisCache = null;
auroraSuggestedPriority = 'balanced';
auroraSuggestedExport = {};
@@ -2386,6 +2666,7 @@ function auroraSetSelectedFile(file) {
if (quickStartBtn) quickStartBtn.disabled = !file;
const reBtn = document.getElementById('auroraReprocessBtn');
if (reBtn) reBtn.disabled = !auroraJobId;
auroraUpdateReprocessLabel();
const batchInfo = document.getElementById('auroraBatchInfo');
if (batchInfo && auroraBatchFiles.length <= 1) batchInfo.style.display = 'none';
auroraShowThumbPreview(file);
@@ -2394,19 +2675,57 @@ function auroraSetSelectedFile(file) {
function auroraShowThumbPreview(file) {
const wrap = document.getElementById('auroraThumbPreview');
if (!wrap) return;
auroraBindClipPicker();
auroraRevokePreviewObjectUrl();
auroraHideClipPicker();
wrap.style.display = 'none';
wrap.innerHTML = '';
if (!file) return;
const type = (file.type || '').toLowerCase();
const url = URL.createObjectURL(file);
auroraPreviewObjectUrl = url;
if (type.startsWith('image/')) {
wrap.innerHTML = `<img src="${url}" alt="preview"><span class="aurora-thumb-label">Original</span>`;
wrap.style.display = 'block';
} else if (type.startsWith('video/')) {
const v = document.createElement('video');
v.src = url; v.muted = true; v.playsInline = true; v.preload = 'metadata';
v.addEventListener('loadeddata', () => { v.currentTime = 0.5; });
v.addEventListener('seeked', () => { wrap.style.display = 'block'; }, { once: true });
v.src = url;
v.muted = true;
v.controls = true;
v.playsInline = true;
v.preload = 'metadata';
v.addEventListener('loadedmetadata', () => {
const picker = document.getElementById('auroraClipPicker');
const startRange = document.getElementById('auroraClipStartRange');
const endRange = document.getElementById('auroraClipEndRange');
const duration = Number(v.duration || 0);
auroraPreviewVideoEl = v;
auroraVideoDurationSec = Number.isFinite(duration) && duration > 0 ? duration : 0;
if (!Number.isFinite(auroraVideoDurationSec) || auroraVideoDurationSec <= 0) {
if (picker) picker.style.display = 'none';
return;
}
if (startRange && endRange) {
const max = auroraVideoDurationSec.toFixed(1);
startRange.min = '0';
endRange.min = '0';
startRange.max = max;
endRange.max = max;
startRange.step = '0.1';
endRange.step = '0.1';
}
if (picker) picker.style.display = 'grid';
const startInput = document.getElementById('auroraOptClipStart');
const durInput = document.getElementById('auroraOptClipDuration');
const startVal = Number(startInput?.value || 0);
const durationVal = Number(durInput?.value || 0);
const start = Number.isFinite(startVal) && startVal >= 0 ? startVal : 0;
const hasDuration = Number.isFinite(durationVal) && durationVal > 0;
const end = hasDuration ? (start + durationVal) : auroraVideoDurationSec;
auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: true });
});
v.addEventListener('loadeddata', () => { wrap.style.display = 'block'; });
wrap.appendChild(v);
const lbl = document.createElement('span');
lbl.className = 'aurora-thumb-label'; lbl.textContent = 'Original';
@@ -2633,7 +2952,38 @@ function auroraBindDropzone() {
function auroraCollectExportOptions() {
const opts = {};
const outscale = document.getElementById('auroraOptOutscale')?.value;
if (outscale && outscale !== 'auto') opts.outscale = Number(outscale);
if (outscale && outscale !== 'auto') {
opts.upscale = Number(outscale);
opts.outscale = Number(outscale);
}
const clipPicker = document.getElementById('auroraClipPicker');
const pickerVisible = !!clipPicker && getComputedStyle(clipPicker).display !== 'none';
const startRange = document.getElementById('auroraClipStartRange');
const endRange = document.getElementById('auroraClipEndRange');
const durationTotal = Number(auroraVideoDurationSec || 0);
const canUseRanges =
pickerVisible &&
Number.isFinite(durationTotal) &&
durationTotal > 0 &&
startRange &&
endRange;
if (canUseRanges) {
const startRangeValue = Number(startRange.value || 0);
const endRangeValue = Number(endRange.value || durationTotal);
const bounded = auroraClampClipWindow(startRangeValue, endRangeValue, durationTotal);
const clipDuration = Math.max(AURORA_MIN_CLIP_SEC, bounded.end - bounded.start);
const isFullVideo = bounded.start <= 0.0001 && (durationTotal - bounded.end) <= 0.11;
if (!isFullVideo) {
if (bounded.start > 0.0001) opts.clip_start_sec = Number(bounded.start.toFixed(3));
opts.clip_duration_sec = Number(clipDuration.toFixed(3));
}
} else {
const clipStart = Number(document.getElementById('auroraOptClipStart')?.value || 0);
const clipDurationRaw = document.getElementById('auroraOptClipDuration')?.value;
const clipDuration = Number(clipDurationRaw || 0);
if (Number.isFinite(clipStart) && clipStart > 0) opts.clip_start_sec = clipStart;
if (clipDurationRaw !== '' && Number.isFinite(clipDuration) && clipDuration > 0) opts.clip_duration_sec = clipDuration;
}
const codec = document.getElementById('auroraOptCodec')?.value;
if (codec && codec !== 'auto') opts.encoder = codec;
const quality = document.getElementById('auroraOptQuality')?.value;
@@ -2650,6 +3000,36 @@ function auroraAbsoluteUrl(url) {
return `${API}${value}`;
}
function auroraSleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function auroraUpdateReprocessLabel() {
const btn = document.getElementById('auroraReprocessBtn');
const passes = Math.max(1, Math.min(4, Number(document.getElementById('auroraReprocessPasses')?.value || 1)));
if (btn) btn.textContent = `Повторна обробка ×${passes}`;
}
async function auroraWaitForTerminal(jobId, { timeoutSec = 10800, passLabel = '' } = {}) {
const id = String(jobId || '').trim();
if (!id) throw new Error('job_id missing');
const deadline = Date.now() + (timeoutSec * 1000);
while (Date.now() < deadline) {
const r = await fetch(`${API}/api/aurora/status/${encodeURIComponent(id)}`);
if (!r.ok) {
await auroraSleep(2000);
continue;
}
const st = await r.json();
const status = String(st.status || '').toLowerCase();
const stage = st.current_stage || 'processing';
if (passLabel) auroraSetProgress(st.progress || 1, status || 'processing', `${passLabel} · ${stage}`);
if (status === 'completed' || status === 'failed' || status === 'cancelled') return st;
await auroraSleep(2000);
}
throw new Error('reprocess timeout');
}
function auroraSetPreset(preset) {
const normalized = String(preset || 'balanced').trim();
auroraPresetMode = ['turbo', 'balanced', 'max_quality'].includes(normalized) ? normalized : 'balanced';
@@ -2675,6 +3055,7 @@ function auroraSetPreset(preset) {
if (outscale) outscale.value = 'auto';
if (codec) codec.value = 'auto';
}
auroraUpdateReprocessLabel();
}
function auroraUpdatePriorityLabel() {
@@ -2699,18 +3080,29 @@ function auroraResetAnalysisControls() {
const denoise = document.getElementById('auroraCtrlDenoise');
const face = document.getElementById('auroraCtrlFaceRestore');
const plate = document.getElementById('auroraCtrlPlateRoi');
const maxFace = document.getElementById('auroraCtrlMaxFace');
const focusProfile = document.getElementById('auroraFocusProfile');
const taskHint = document.getElementById('auroraTaskHint');
const clipStart = document.getElementById('auroraOptClipStart');
const clipDuration = document.getElementById('auroraOptClipDuration');
const slider = document.getElementById('auroraPriorityBias');
if (denoise) denoise.checked = false;
if (face) face.checked = true;
if (plate) plate.checked = false;
if (maxFace) maxFace.checked = false;
if (focusProfile) focusProfile.value = 'auto';
if (taskHint) taskHint.value = '';
if (clipStart) clipStart.value = '';
if (clipDuration) clipDuration.value = '';
if (slider) slider.value = '0';
auroraSetPreset('balanced');
auroraUpdatePriorityLabel();
auroraUpdateReprocessLabel();
}
function auroraApplySuggestedExportOptions(suggested) {
if (!suggested || typeof suggested !== 'object') return;
const outscale = String(suggested.outscale ?? '').trim();
const outscale = String(suggested.upscale ?? suggested.outscale ?? '').trim();
if (outscale && document.getElementById('auroraOptOutscale')) {
const el = document.getElementById('auroraOptOutscale');
const has = Array.from(el.options || []).some((o) => o.value === outscale);
@@ -2745,6 +3137,8 @@ function auroraApplyAnalysisHints(data) {
const denoise = document.getElementById('auroraCtrlDenoise');
const face = document.getElementById('auroraCtrlFaceRestore');
const plate = document.getElementById('auroraCtrlPlateRoi');
const maxFace = document.getElementById('auroraCtrlMaxFace');
const focusProfile = document.getElementById('auroraFocusProfile');
const slider = document.getElementById('auroraPriorityBias');
const highNoise = ['high', 'very_high'].includes(String(quality.noise_level || '').toLowerCase());
@@ -2759,6 +3153,13 @@ function auroraApplyAnalysisHints(data) {
else if (suggested === 'plates') slider.value = '55';
else slider.value = '0';
}
if (focusProfile) {
if (suggested === 'details') focusProfile.value = 'text_readability';
else if (suggested === 'faces') focusProfile.value = 'max_faces';
else if (suggested === 'plates') focusProfile.value = 'plates';
else focusProfile.value = 'auto';
}
if (maxFace) maxFace.checked = suggested === 'faces';
if (suggested === 'faces' || suggested === 'plates') auroraSetPreset('max_quality');
else auroraSetPreset('balanced');
@@ -2770,12 +3171,21 @@ function auroraCollectAnalysisControls() {
const denoise = Boolean(document.getElementById('auroraCtrlDenoise')?.checked);
const faceRestore = Boolean(document.getElementById('auroraCtrlFaceRestore')?.checked);
const plateRoi = Boolean(document.getElementById('auroraCtrlPlateRoi')?.checked);
const maxFaceQuality = Boolean(document.getElementById('auroraCtrlMaxFace')?.checked);
const focusProfile = String(document.getElementById('auroraFocusProfile')?.value || 'auto').trim();
const taskHint = String(document.getElementById('auroraTaskHint')?.value || '').trim();
const preset = auroraPresetMode || 'balanced';
const priority = bias <= -30 ? 'faces' : bias >= 30 ? 'plates' : (auroraSuggestedPriority || 'balanced');
let priority = bias <= -30 ? 'faces' : bias >= 30 ? 'plates' : (auroraSuggestedPriority || 'balanced');
if (focusProfile === 'text_readability') priority = 'details';
if (focusProfile === 'plates') priority = 'plates';
if (focusProfile === 'max_faces' || maxFaceQuality) priority = 'faces';
return {
denoise,
face_restore: faceRestore,
plate_roi_enhance: plateRoi,
max_face_quality: maxFaceQuality,
focus_profile: focusProfile || 'auto',
task_hint: taskHint,
priority_bias: bias,
priority,
preset,
@@ -2784,15 +3194,45 @@ function auroraCollectAnalysisControls() {
function auroraBuildAnalysisExportHints(controls) {
const c = controls || auroraCollectAnalysisControls();
return {
const outscaleRaw = String(document.getElementById('auroraOptOutscale')?.value || 'auto').trim().toLowerCase();
const isAutoScale = !outscaleRaw || outscaleRaw === 'auto';
const hints = {
pre_denoise: Boolean(c.denoise),
temporal_denoise: Boolean(c.denoise && c.preset === 'max_quality'),
roi_only_faces: c.priority === 'faces',
face_restore: Boolean(c.face_restore),
plate_roi_enhance: Boolean(c.plate_roi_enhance),
max_face_quality: Boolean(c.max_face_quality),
focus_profile: c.focus_profile || 'auto',
task_hint: String(c.task_hint || '').trim(),
profile: c.preset || 'balanced',
priority_bias: Number(c.priority_bias || 0),
auto_forensic_outscale: true,
};
if (!hints.task_hint) delete hints.task_hint;
if (c.focus_profile === 'max_faces' || c.max_face_quality) {
hints.pre_denoise = true;
hints.temporal_denoise = true;
hints.roi_only_faces = true;
hints.face_model = 'codeformer';
hints.deblur_before_face = true;
hints.score_loop = true;
hints.allow_roi_upscale = true;
if (isAutoScale) hints.upscale = 2;
} else if (c.focus_profile === 'text_readability') {
hints.pre_denoise = true;
hints.temporal_denoise = true;
hints.roi_only_faces = false;
hints.deblur_before_face = true;
hints.score_loop = true;
hints.text_focus = true;
if (isAutoScale) hints.upscale = 2;
} else if (c.focus_profile === 'plates') {
hints.roi_only_faces = false;
hints.plate_roi_enhance = true;
}
return hints;
}
function auroraStartFromAnalysis() {
@@ -2824,6 +3264,9 @@ function auroraRenderQualityReport(report) {
const plates = report.plates || {};
const overall = report.overall || {};
const models = Array.isArray(overall.models) ? overall.models : [];
const warnings = Array.isArray(overall.warnings) ? overall.warnings : [];
const processingStatus = String(overall.processing_status || 'ok');
const degraded = processingStatus !== 'ok' || Boolean(overall.identical_to_input) || Boolean(overall.fallback_used);
const procSec = Number(overall.processing_time_sec);
const procText = Number.isFinite(procSec) ? auroraFormatSeconds(procSec) : '—';
const psnr = overall.psnr != null ? `${overall.psnr} dB` : '—';
@@ -2845,9 +3288,11 @@ function auroraRenderQualityReport(report) {
</div>
<div class="aurora-quality-group">
<div class="aurora-quality-head">Загальне</div>
<div class="aurora-quality-line"><span>Статус обробки</span><span style="${degraded ? 'color:var(--warn);' : 'color:var(--ok);'}">${auroraEsc(processingStatus)}</span></div>
<div class="aurora-quality-line"><span>PSNR</span><span>${psnr}</span></div>
<div class="aurora-quality-line"><span>Час обробки</span><span>${procText}</span></div>
<div class="aurora-quality-line"><span>Моделі</span><span>${models.length ? auroraEsc(models.join(', ')) : '—'}</span></div>
${warnings.length ? `<div class="aurora-note" style="margin-top:6px; color:var(--warn);">⚠ ${auroraEsc(warnings.join(' | '))}</div>` : ''}
</div>
`;
wrap.style.display = 'block';
@@ -3228,44 +3673,82 @@ async function auroraReprocess(options) {
}
const reBtn = document.getElementById('auroraReprocessBtn');
if (reBtn) reBtn.disabled = true;
const payload = (options && typeof options === 'object') ? options : {};
const incoming = (options && typeof options === 'object') ? options : {};
const passCountUi = Number(document.getElementById('auroraReprocessPasses')?.value || 1);
const passes = Math.max(1, Math.min(4, Number(incoming.passes) || passCountUi));
const secondPassUi = Boolean(document.getElementById('auroraReprocessSecondPass')?.checked);
const secondPass = Object.prototype.hasOwnProperty.call(incoming, 'second_pass')
? Boolean(incoming.second_pass)
: secondPassUi;
const analysisControls = auroraCollectAnalysisControls();
const uiExport = auroraCollectExportOptions();
const analysisExport = auroraBuildAnalysisExportHints(analysisControls);
const mergedExport = { ...auroraSuggestedExport, ...uiExport, ...analysisExport, ...(incoming.export_options || {}) };
let priority = incoming.priority || analysisControls.priority || auroraSuggestedPriority || 'balanced';
if (typeof priority !== 'string' || !priority.trim()) priority = 'balanced';
const basePayload = {
mode: auroraMode,
priority,
export_options: mergedExport,
};
let sourceJobId = auroraJobId;
let lastJobId = auroraJobId;
try {
const r = await fetch(`${API}/api/aurora/reprocess/${encodeURIComponent(auroraJobId)}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!r.ok) {
const body = await r.text();
throw new Error(body || `HTTP ${r.status}`);
auroraStopPolling();
for (let i = 1; i <= passes; i += 1) {
const payload = { ...basePayload, ...incoming, second_pass: secondPass };
const r = await fetch(`${API}/api/aurora/reprocess/${encodeURIComponent(sourceJobId)}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!r.ok) {
const body = await r.text();
throw new Error(body || `HTTP ${r.status}`);
}
const data = await r.json();
const newJobId = String(data.job_id || '').trim();
if (!newJobId) throw new Error('job_id missing in reprocess response');
lastJobId = newJobId;
auroraSetActiveJobId(newJobId);
auroraSetSmartRunId(null);
auroraSmartStatusCache = null;
auroraSetSmartPolicyText(`reprocess ${i}/${passes}`);
auroraStatusCache = null;
auroraResultCache = null;
auroraPollErrorCount = 0;
auroraLastProgress = 1;
auroraPollInFlight = false;
const resultCard = document.getElementById('auroraResultCard');
if (resultCard) resultCard.style.display = 'none';
auroraSetProgress(1, 'processing', `dispatching reprocess ${i}/${passes}`);
auroraUpdateQueuePosition(null);
auroraUpdateTiming(0, null, null);
auroraUpdateLivePerf(null, null);
const cancelBtn = document.getElementById('auroraCancelBtn');
if (cancelBtn) cancelBtn.style.display = 'inline-block';
if (i < passes) {
const done = await auroraWaitForTerminal(newJobId, { passLabel: `reprocess ${i}/${passes}` });
const status = String(done?.status || '').toLowerCase();
if (status !== 'completed') {
throw new Error(`reprocess ${i}/${passes} завершився зі статусом ${status}`);
}
}
sourceJobId = newJobId;
}
const data = await r.json();
auroraSetActiveJobId(data.job_id);
auroraSetSmartRunId(null);
auroraSmartStatusCache = null;
auroraSetSmartPolicyText('audio local');
auroraStatusCache = null;
auroraResultCache = null;
auroraPollErrorCount = 0;
auroraLastProgress = 1;
auroraPollInFlight = false;
const resultCard = document.getElementById('auroraResultCard');
if (resultCard) resultCard.style.display = 'none';
auroraSetProgress(1, 'processing', 'dispatching (reprocess)');
auroraUpdateQueuePosition(null);
auroraUpdateTiming(0, null, null);
auroraUpdateLivePerf(null, null);
const cancelBtn = document.getElementById('auroraCancelBtn');
if (cancelBtn) cancelBtn.style.display = 'inline-block';
auroraStopPolling();
auroraPollTimer = setInterval(auroraPollStatus, 2000);
await auroraPollStatus();
auroraChatAdd('assistant', `Запустила reprocess: ${auroraJobId}`);
auroraChatAdd('assistant', `Запустила reprocess ×${passes}: ${lastJobId}`);
await auroraRefreshJobs();
} catch (e) {
alert(`Aurora reprocess error: ${e.message || e}`);
} finally {
if (reBtn) reBtn.disabled = false;
auroraUpdateReprocessLabel();
}
}
@@ -3472,6 +3955,7 @@ async function auroraPollStatus() {
});
auroraUpdateQueuePosition(st.queue_position);
auroraUpdateStorage(st.storage);
auroraUpdateCancelButton(st.status, st.current_stage);
const reBtn = document.getElementById('auroraReprocessBtn');
if (reBtn) reBtn.disabled = !(st.status === 'completed' || st.status === 'failed' || st.status === 'cancelled');
if (st.status === 'completed') {
@@ -3604,10 +4088,19 @@ async function auroraStart() {
async function auroraCancel() {
if (!auroraJobId) return;
const cancelBtn = document.getElementById('auroraCancelBtn');
if (cancelBtn) {
cancelBtn.style.display = 'inline-block';
cancelBtn.disabled = true;
cancelBtn.textContent = 'Зупиняю...';
}
try {
await fetch(`${API}/api/aurora/cancel/${encodeURIComponent(auroraJobId)}`, { method: 'POST' });
await auroraPollStatus();
await auroraRefreshJobs();
} catch (_) {}
} catch (_) {
auroraUpdateCancelButton('processing', null);
}
}
async function auroraLoadResult(jobId) {
@@ -3950,6 +4443,7 @@ function auroraInitTab() {
auroraBindDropzone();
auroraRefreshHealth();
auroraUpdatePriorityLabel();
auroraUpdateReprocessLabel();
auroraSetSmartRunId(auroraSmartRunId);
if (!auroraSmartRunId) {
auroraSetSmartPolicyText('standby');

View File

@@ -810,18 +810,18 @@ class SwapperService:
# FLUX / Diffusion model loading
logger.info(f"🎨 Loading diffusion model: {hf_name}")
from diffusers import AutoPipelineForText2Image
diffusion_dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
pipeline = AutoPipelineForText2Image.from_pretrained(
hf_name,
torch_dtype=torch.bfloat16,
use_safetensors=True
torch_dtype=diffusion_dtype
)
pipeline.to(self.device)
pipeline.enable_model_cpu_offload() # Optimize VRAM usage
if self.device == "cuda":
pipeline.enable_model_cpu_offload() # Optimize VRAM usage on CUDA
self.hf_models[model_name] = pipeline
self.hf_processors[model_name] = None # No separate processor for diffusion
logger.info(f"✅ Diffusion model loaded: {model_name} with CPU offload enabled")
logger.info(f"✅ Diffusion model loaded: {model_name} (device={self.device})")
else:
# Generic loading

View File

@@ -38,3 +38,12 @@ storage:
models_dir: /app/models
cache_dir: /app/cache
swap_dir: /app/swap
models:
flux-klein-4b:
path: huggingface:segmind/tiny-sd
type: image_generation
size_gb: 0.7
priority: medium
capabilities:
- image_generation