feat(production): sync all modified production files to git
Includes updates across gateway, router, node-worker, memory-service, aurora-service, swapper, sofiia-console UI and node2 infrastructure: - gateway-bot: Dockerfile, http_api.py, druid/aistalk prompts, doc_service - services/router: main.py, router-config.yml, fabric_metrics, memory_retrieval, offload_client, prompt_builder - services/node-worker: worker.py, main.py, config.py, fabric_metrics - services/memory-service: Dockerfile, database.py, main.py, requirements - services/aurora-service: main.py (+399), kling.py, quality_report.py - services/swapper-service: main.py, swapper_config_node2.yaml - services/sofiia-console: static/index.html (console UI update) - config: agent_registry, crewai_agents/teams, router_agents - ops/fabric_preflight.sh: updated preflight checks - router-config.yml, docker-compose.node2.yml: infra updates - docs: NODA1-AGENT-ARCHITECTURE, fabric_contract updated Made-with: Cursor
This commit is contained in:
@@ -307,7 +307,7 @@ agents:
|
||||
canonical_role: "Autonomous Cyber Detective Agency Orchestrator"
|
||||
mission: |
|
||||
AISTALK - автономне агентство кібердетективів для розслідувань загроз і
|
||||
вразливостей у Web2, Web3, AI та quantum-risk сценаріях.
|
||||
вразливостей у Web2, Web3, AI, media-forensics та quantum-risk сценаріях.
|
||||
На етапі планування агент працює як внутрішній оркестратор команди
|
||||
спеціалізованих ролей з асинхронним case lifecycle.
|
||||
|
||||
@@ -336,6 +336,9 @@ agents:
|
||||
- blueteam
|
||||
- bughunter
|
||||
- quantum risk
|
||||
- media forensics
|
||||
- video analysis
|
||||
- deepfake
|
||||
|
||||
llm_profile: reasoning
|
||||
prompt_file: aistalk_prompt.txt
|
||||
@@ -346,12 +349,12 @@ agents:
|
||||
enabled: true
|
||||
default_profile: default
|
||||
profile_hints:
|
||||
default: [osint, threat_hunt, vulns, web3, ai, red-blue]
|
||||
default: [osint, threat_hunt, vulns, web3, ai, red-blue, media_forensics, video, audio, photo, forensic, deepfake]
|
||||
profiles:
|
||||
default:
|
||||
team_name: AISTALK Cyber Detective Unit
|
||||
parallel_roles: true
|
||||
max_concurrency: 6
|
||||
max_concurrency: 7
|
||||
synthesis:
|
||||
role_context: AISTALK Orchestrator & Analyst
|
||||
system_prompt_ref: roles/aistalk/orchestrator_synthesis.md
|
||||
@@ -381,6 +384,11 @@ agents:
|
||||
role_context: Neuron (Deep Analysis)
|
||||
system_prompt_ref: roles/aistalk/neuron.md
|
||||
llm_profile: reasoning
|
||||
- id: aurora
|
||||
role_context: Aurora (Autonomous Media Forensics)
|
||||
system_prompt_ref: roles/aistalk/aurora.md
|
||||
llm_profile: science
|
||||
skills: [video_enhancement, audio_forensics, photo_restoration, chain_of_custody]
|
||||
- id: vault
|
||||
role_context: Vault (Secrets and Confidential Data Guard)
|
||||
system_prompt_ref: roles/aistalk/vault.md
|
||||
@@ -432,6 +440,8 @@ agents:
|
||||
skills: [entity_resolution, link_analysis]
|
||||
- role: "Risk"
|
||||
skills: [cvss, mitre_mapping]
|
||||
- role: "Aurora"
|
||||
skills: [media_forensics, video_enhancement, audio_forensics, photo_analysis]
|
||||
- role: "Analyst"
|
||||
skills: [synthesis, reporting]
|
||||
|
||||
|
||||
@@ -246,6 +246,15 @@
|
||||
"role": "Neuron (Deep Analysis)",
|
||||
"skills": []
|
||||
},
|
||||
{
|
||||
"role": "Aurora (Autonomous Media Forensics)",
|
||||
"skills": [
|
||||
"video_enhancement",
|
||||
"audio_forensics",
|
||||
"photo_restoration",
|
||||
"chain_of_custody"
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "Vault (Secrets and Confidential Data Guard)",
|
||||
"skills": []
|
||||
|
||||
@@ -109,7 +109,7 @@ aistalk:
|
||||
default:
|
||||
team_name: AISTALK Cyber Detective Unit
|
||||
parallel_roles: true
|
||||
max_concurrency: 6
|
||||
max_concurrency: 7
|
||||
synthesis:
|
||||
role_context: AISTALK Orchestrator & Analyst
|
||||
system_prompt_ref: roles/aistalk/orchestrator_synthesis.md
|
||||
@@ -139,6 +139,15 @@ aistalk:
|
||||
role_context: Neuron (Deep Analysis)
|
||||
system_prompt_ref: roles/aistalk/neuron.md
|
||||
llm_profile: reasoning
|
||||
- id: aurora
|
||||
role_context: Aurora (Autonomous Media Forensics)
|
||||
system_prompt_ref: roles/aistalk/aurora.md
|
||||
llm_profile: science
|
||||
skills:
|
||||
- video_enhancement
|
||||
- audio_forensics
|
||||
- photo_restoration
|
||||
- chain_of_custody
|
||||
- id: vault
|
||||
role_context: Vault (Secrets and Confidential Data Guard)
|
||||
system_prompt_ref: roles/aistalk/vault.md
|
||||
@@ -178,6 +187,12 @@ aistalk:
|
||||
- web3
|
||||
- ai
|
||||
- red-blue
|
||||
- media_forensics
|
||||
- video
|
||||
- audio
|
||||
- photo
|
||||
- forensic
|
||||
- deepfake
|
||||
nutra:
|
||||
profiles:
|
||||
default:
|
||||
|
||||
@@ -67,7 +67,10 @@
|
||||
"redteam",
|
||||
"blueteam",
|
||||
"bughunter",
|
||||
"quantum risk"
|
||||
"quantum risk",
|
||||
"media forensics",
|
||||
"video analysis",
|
||||
"deepfake"
|
||||
],
|
||||
"domains": [
|
||||
"cybersecurity",
|
||||
@@ -522,4 +525,4 @@
|
||||
"class": "internal",
|
||||
"visibility": "internal"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,6 +56,27 @@ services:
|
||||
- dagi-network
|
||||
restart: unless-stopped
|
||||
|
||||
aurora-service:
|
||||
build:
|
||||
context: ./services/aurora-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: aurora-service-node2
|
||||
ports:
|
||||
- "127.0.0.1:9401:9401"
|
||||
environment:
|
||||
- AURORA_DATA_DIR=/data/aurora
|
||||
- AURORA_PUBLIC_BASE_URL=http://127.0.0.1:9401
|
||||
- AURORA_CORS_ORIGINS=*
|
||||
- AURORA_MODELS_DIR=/data/aurora/models
|
||||
- AURORA_FORCE_CPU=false
|
||||
- AURORA_PREFER_MPS=true
|
||||
- AURORA_ENABLE_VIDEOTOOLBOX=true
|
||||
volumes:
|
||||
- aurora-data:/data
|
||||
networks:
|
||||
- dagi-network
|
||||
restart: unless-stopped
|
||||
|
||||
dagi-nats:
|
||||
image: nats:2.10-alpine
|
||||
container_name: dagi-nats-node2
|
||||
@@ -97,3 +118,7 @@ networks:
|
||||
dagi-memory-network:
|
||||
external: true
|
||||
name: dagi-memory-network-node2
|
||||
|
||||
volumes:
|
||||
aurora-data:
|
||||
driver: local
|
||||
|
||||
@@ -75,13 +75,16 @@ NODA1 використовує уніфіковану систему агент
|
||||
┌───────────────────────┐ ┌───────────┐ ┌─────────────────────┐
|
||||
│ LLM PROVIDERS │ │ MEMORY │ │ CREWAI │
|
||||
│ ───────────────────── │ │ SERVICE │ │ (dagi-staging- │
|
||||
│ • Ollama (local) │ │ :8000 │ │ crewai-service) │
|
||||
│ - qwen3:8b │ ├───────────┤ │ ─────────────────── │
|
||||
│ - mistral:7b │ │ • Qdrant │ │ crewai_agents.json │
|
||||
│ - qwen2.5:3b │ │ • Neo4j │ │ │
|
||||
│ • DeepSeek (cloud) │ │ • Postgres│ │ 11 Orchestrators │
|
||||
│ • Mistral (cloud) │ └───────────┘ │ + Teams per agent │
|
||||
└───────────────────────┘ └─────────────────────┘
|
||||
│ • Grok (cloud) │ │ :8000 │ │ crewai-service) │
|
||||
│ - sofiia, senpai │ ├───────────┤ │ ─────────────────── │
|
||||
│ • DeepSeek (cloud) │ │ • Qdrant │ │ crewai_agents.json │
|
||||
│ - all other agents │ │ • Neo4j │ │ │
|
||||
│ + fallback │ │ • Postgres│ │ 11 Orchestrators │
|
||||
│ • Mistral (fallback) │ └───────────┘ │ + Teams per agent │
|
||||
│ • Ollama (crew only) │ └─────────────────────┘
|
||||
│ - qwen3:8b (crew) │
|
||||
│ - qwen3-vl:8b (vis) │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
@@ -108,28 +111,28 @@ config/agent_registry.yml ←── ЄДИНЕ джерело істини
|
||||
|
||||
### TOP-LEVEL (User-facing, 13 agents)
|
||||
|
||||
| ID | Display | Telegram | Visibility | Domain |
|
||||
|----|---------|----------|------------|--------|
|
||||
| `daarwizz` | DAARWIZZ | public | public | Meta-Orchestrator |
|
||||
| `helion` | Helion | public | public | Energy |
|
||||
| `alateya` | Aletheia | public | public | R&D Lab |
|
||||
| `druid` | DRUID | public | public | Ayurveda/Cosmetics |
|
||||
| `nutra` | NUTRA | public | public | Nutraceuticals |
|
||||
| `agromatrix` | Степан Матрікс | public | public | Agriculture |
|
||||
| `greenfood` | GREENFOOD | public | public | Food ERP |
|
||||
| `clan` | CLAN | public | public | Community |
|
||||
| `eonarch` | EONARCH | public | public | Consciousness |
|
||||
| `yaromir` | YAROMIR | whitelist | private | Tech Lead |
|
||||
| `soul` | SOUL | public | public | Spiritual |
|
||||
| `senpai` | SENPAI | public | public | Trading |
|
||||
| `sofiia` | SOFIIA | public | public | AI Architecture |
|
||||
| ID | Display | Telegram | Visibility | Domain | LLM (primary) | Fallback |
|
||||
|----|---------|----------|------------|--------|---------------|---------|
|
||||
| `daarwizz` | DAARWIZZ | public | public | Meta-Orchestrator | DeepSeek | Mistral |
|
||||
| `helion` | Helion | public | public | Energy | DeepSeek | Mistral |
|
||||
| `alateya` | Aletheia | public | public | R&D Lab | DeepSeek | Mistral |
|
||||
| `druid` | DRUID | public | public | Ayurveda/Cosmetics | DeepSeek | Mistral |
|
||||
| `nutra` | NUTRA | public | public | Nutraceuticals | DeepSeek | Mistral |
|
||||
| `agromatrix` | Степан Матрікс | public | public | Agriculture | DeepSeek | Mistral |
|
||||
| `greenfood` | GREENFOOD | public | public | Food ERP | DeepSeek | Mistral |
|
||||
| `clan` | CLAN | public | public | Community | DeepSeek | Mistral |
|
||||
| `eonarch` | EONARCH | public | public | Consciousness | DeepSeek | Mistral |
|
||||
| `yaromir` | YAROMIR | whitelist | private | Tech Lead | DeepSeek | Mistral |
|
||||
| `soul` | SOUL | public | public | Spiritual | DeepSeek | Mistral |
|
||||
| `senpai` | SENPAI | public | public | Trading | **Grok** | DeepSeek |
|
||||
| `sofiia` | SOFIIA | public | public | AI Architecture | **Grok** | DeepSeek |
|
||||
|
||||
### INTERNAL (Service agents, 2 agents)
|
||||
|
||||
| ID | Display | Telegram | Scope | Purpose |
|
||||
|----|---------|----------|-------|---------|
|
||||
| `monitor` | MONITOR | off | node_local | Observability, alerts |
|
||||
| `devtools` | DevTools | off | global | Development tools |
|
||||
| ID | Display | Telegram | Scope | Purpose | LLM |
|
||||
|----|---------|----------|-------|---------|-----|
|
||||
| `monitor` | MONITOR | off | node_local | Observability, alerts | Ollama (local) |
|
||||
| `devtools` | DevTools | off | global | Development tools | DeepSeek (складні) / Ollama (прості) |
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
/Users/apple/github-projects/microdao-daarion/docs/backups/docs_backup_20260226-091701.tar.gz
|
||||
/Users/apple/github-projects/microdao-daarion/docs/backups/docs_backup_20260302-091700.tar.gz
|
||||
|
||||
@@ -155,5 +155,180 @@ STT/TTS/OCR/Image **можуть бути різними** на різних н
|
||||
- **14 контейнерів** (router, node-worker, node-capabilities, nats, gateway, memory, qdrant, postgres, neo4j, redis, open-webui, sofiia-console, swapper)
|
||||
- **13 served моделей** (Ollama: 12 + llama_server: 1)
|
||||
- **29 installed artifacts** на диску (150.3GB LLM + 0.3GB TTS kokoro-v1_0)
|
||||
- **capabilities**: llm=Y, vision=Y, ocr=Y, stt=N, tts=N, image=N
|
||||
- `OCR_PROVIDER=vision_prompted`
|
||||
- **capabilities**: llm=Y, vision=Y, ocr=Y, stt=Y, tts=Y, image=N ← Phase 1 enabled
|
||||
- `STT_PROVIDER=memory_service`, `TTS_PROVIDER=memory_service`, `OCR_PROVIDER=vision_prompted`
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: STT/TTS via Memory Service delegation (2026-02-27)
|
||||
|
||||
### Мотивація
|
||||
|
||||
Увімкнення `stt=true` / `tts=true` в Fabric без нових мікросервісів і без ризику MLX-залежностей.
|
||||
|
||||
### Архітектура
|
||||
|
||||
```
|
||||
Fabric Router → find_nodes_with_capability("stt"/"tts") → NODA2 node-worker
|
||||
→ STT_PROVIDER=memory_service → stt_memory_service.transcribe()
|
||||
→ POST http://memory-service:8000/voice/stt (faster-whisper)
|
||||
→ {text, segments, language, meta}
|
||||
|
||||
Fabric Router → NODA2 node-worker
|
||||
→ TTS_PROVIDER=memory_service → tts_memory_service.synthesize()
|
||||
→ POST http://memory-service:8000/voice/tts (edge-tts: Polina/Ostap Neural uk-UA)
|
||||
→ {audio_b64, format="mp3", meta}
|
||||
```
|
||||
|
||||
### Контракти
|
||||
|
||||
**STT вхід:**
|
||||
```json
|
||||
{
|
||||
"audio_b64": "<base64>", // OR
|
||||
"audio_url": "http://...", // one is required
|
||||
"language": "uk", // optional
|
||||
"filename": "audio.wav" // optional
|
||||
}
|
||||
```
|
||||
|
||||
**STT вихід (fabric contract):**
|
||||
```json
|
||||
{"text": "...", "segments": [], "language": "uk", "meta": {...}, "provider": "memory_service"}
|
||||
```
|
||||
|
||||
**TTS вхід:**
|
||||
```json
|
||||
{"text": "...", "voice": "Polina", "speed": 1.0}
|
||||
```
|
||||
|
||||
**TTS вихід (fabric contract):**
|
||||
```json
|
||||
{"audio_b64": "<base64-mp3>", "format": "mp3", "meta": {...}, "provider": "memory_service"}
|
||||
```
|
||||
|
||||
### Обмеження Phase 1
|
||||
|
||||
- **ffmpeg=false**: лише формати що Memory Service ковтає нативно (WAV рекомендований)
|
||||
- **Текст TTS**: max 500 символів (Memory Service limit)
|
||||
- **Голоси TTS**: Polina (uk-UA-PolinaNeural), Ostap (uk-UA-OstapNeural), en-US-GuyNeural
|
||||
- **NODA1**: залишається `STT_PROVIDER=none` / `TTS_PROVIDER=none` (не заважає роутингу)
|
||||
|
||||
### Phase 2 (MLX upgrade — опційний)
|
||||
|
||||
Встановити `STT_PROVIDER=mlx_whisper` та/або `TTS_PROVIDER=mlx_kokoro` в docker-compose коли:
|
||||
- готовий ffmpeg або чітко обмежені формати
|
||||
- потрібний якісніший локальний TTS замість edge-tts
|
||||
- NODA2 Apple Silicon виграш від MLX
|
||||
|
||||
---
|
||||
|
||||
## Voice HA (Multi-node routing) — PR1–PR3
|
||||
|
||||
### Архітектура
|
||||
|
||||
```
|
||||
Browser → sofiia-console /api/voice/tts
|
||||
↓ VOICE_HA_ENABLED=false (default)
|
||||
memory-service:8000/voice/tts ← legacy direct
|
||||
|
||||
↓ VOICE_HA_ENABLED=true
|
||||
Router /v1/capability/voice_tts
|
||||
↓ (caps + scoring)
|
||||
node.{id}.voice.tts.request (NATS)
|
||||
↓
|
||||
node-worker (voice semaphore)
|
||||
↓
|
||||
memory-service/voice/tts
|
||||
```
|
||||
|
||||
### NATS Subjects (Voice HA — відокремлені від generic)
|
||||
|
||||
| Subject | Призначення |
|
||||
|---|---|
|
||||
| `node.{id}.voice.tts.request` | Voice TTS offload (окремий semaphore) |
|
||||
| `node.{id}.voice.llm.request` | Voice LLM inference (голосові guardrails) |
|
||||
| `node.{id}.voice.stt.request` | Voice STT transcription |
|
||||
|
||||
**Сумісність:** generic subjects (`node.{id}.tts.request` etc.) — незмінні.
|
||||
|
||||
### Capability Flags
|
||||
|
||||
Node Worker `/caps` повертає:
|
||||
```json
|
||||
{
|
||||
"capabilities": {
|
||||
"tts": true,
|
||||
"voice_tts": true,
|
||||
"voice_llm": true,
|
||||
"voice_stt": true
|
||||
},
|
||||
"voice_concurrency": {
|
||||
"voice_tts": 4,
|
||||
"voice_llm": 2,
|
||||
"voice_stt": 2
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`voice_tts=true` лише коли `TTS_PROVIDER != none` **і** NATS subscription активна.
|
||||
NCS агрегує ці флаги через `_derive_capabilities()`.
|
||||
|
||||
### Router Endpoints
|
||||
|
||||
| Endpoint | Дедлайн | Суб'єкт |
|
||||
|---|---|---|
|
||||
| `POST /v1/capability/voice_tts` | 3000ms | `node.{id}.voice.tts.request` |
|
||||
| `POST /v1/capability/voice_llm` | 9000ms (fast) / 12000ms (quality) | `node.{id}.voice.llm.request` |
|
||||
| `POST /v1/capability/voice_stt` | 6000ms | `node.{id}.voice.stt.request` |
|
||||
|
||||
Response headers: `X-Voice-Node`, `X-Voice-Mode` (local|remote), `X-Voice-Cap`.
|
||||
|
||||
### Scoring
|
||||
|
||||
```
|
||||
score = wait_ms + rtt_ms + p95_ms + mem_penalty - local_bonus
|
||||
mem_penalty = 300 if mem_pressure == "high"
|
||||
local_bonus = VOICE_PREFER_LOCAL_BONUS (default 200ms)
|
||||
```
|
||||
|
||||
Якщо `score_local <= score_best_remote + LOCAL_THRESHOLD_MS` → вибирається локальна нода.
|
||||
|
||||
### BFF Feature Flag
|
||||
|
||||
```yaml
|
||||
# docker-compose.node2-sofiia.yml
|
||||
VOICE_HA_ENABLED: "false" # default — legacy direct path
|
||||
VOICE_HA_ROUTER_URL: "http://router:8000" # Router для HA offload
|
||||
```
|
||||
|
||||
Активація: `VOICE_HA_ENABLED=true` + rebuild `sofiia-console`.
|
||||
Деактивація: `VOICE_HA_ENABLED=false` — повертається до direct memory-service.
|
||||
|
||||
### Метрики (Prometheus)
|
||||
|
||||
**node-worker** (`/prom_metrics`):
|
||||
- `node_worker_voice_jobs_total{cap,status}`
|
||||
- `node_worker_voice_inflight{cap}`
|
||||
- `node_worker_voice_latency_ms{cap}` (histogram)
|
||||
|
||||
**router** (`/fabric_metrics`):
|
||||
- `fabric_voice_capability_requests_total{cap,status}`
|
||||
- `fabric_voice_offload_total{cap,node,status}`
|
||||
- `fabric_voice_breaker_state{cap,node}` (1=open)
|
||||
- `fabric_voice_score_ms{cap}` (histogram)
|
||||
|
||||
### Контракт: No Silent Fallback
|
||||
|
||||
- Будь-який fallback (busy, broken, timeout) логує `WARNING` + інкрементує Prometheus counter
|
||||
- `TOO_BUSY` включає `retry_after_ms` hint для Router failover
|
||||
- Circuit breaker per `node+voice_cap` — не змішується з generic CB
|
||||
|
||||
### Тести
|
||||
|
||||
`tests/test_voice_ha.py` — 28 тестів:
|
||||
- Node Worker voice caps + semaphore isolation
|
||||
- Router fabric_metrics voice helpers
|
||||
- BFF `VOICE_HA_ENABLED` feature flag
|
||||
- Voice scoring logic (local prefer, mem penalty, remote wins when saturated)
|
||||
- No silent fallback invariants
|
||||
|
||||
@@ -3,7 +3,12 @@ FROM python:3.11-slim
|
||||
|
||||
LABEL maintainer="DAARION.city Team"
|
||||
LABEL description="Bot Gateway - Telegram/Discord webhook handler with DAARWIZZ"
|
||||
LABEL version="0.2.1"
|
||||
LABEL version="0.2.2"
|
||||
|
||||
ARG BUILD_SHA=dev
|
||||
ARG BUILD_TIME=local
|
||||
ENV BUILD_SHA=${BUILD_SHA}
|
||||
ENV BUILD_TIME=${BUILD_TIME}
|
||||
|
||||
WORKDIR /app/gateway-bot
|
||||
|
||||
|
||||
@@ -20,6 +20,35 @@ Modes:
|
||||
- public mode: community-shareable report, sanitized.
|
||||
- confidential mode: strict redaction and minimal retention.
|
||||
|
||||
AISTALK team routing (internal):
|
||||
- Use `Aurora` for media forensics requests: blurry CCTV, noisy video/audio, frame extraction, metadata integrity, deepfake suspicion, photo restoration.
|
||||
- Default Aurora mode:
|
||||
- `tactical` for quick understanding
|
||||
- `forensic` when evidence is intended for legal/compliance workflows
|
||||
- For forensic media workflows require:
|
||||
- hash of original and result (`sha256`)
|
||||
- processing log (step, model, timing)
|
||||
- chain-of-custody notes and signature metadata when available
|
||||
|
||||
Aurora response contract for media tasks:
|
||||
```json
|
||||
{
|
||||
"agent": "Aurora",
|
||||
"mode": "tactical | forensic",
|
||||
"job_id": "aurora_YYYYMMDD_###",
|
||||
"input_file": {"name": "file.ext", "hash": "sha256:..."},
|
||||
"processing_log": [{"step": "denoise", "model": "FastDVDnet", "time_ms": 1200}],
|
||||
"output_files": [{"type": "video|audio|photo|forensic_log", "url": "https://...", "hash": "sha256:..."}],
|
||||
"digital_signature": "ed25519:... | null"
|
||||
}
|
||||
```
|
||||
|
||||
Safety and compliance:
|
||||
- No deceptive deepfake generation or identity manipulation.
|
||||
- Always label AI-enhanced artifacts as enhanced outputs.
|
||||
- Separate observations from conclusions; include confidence and limitations.
|
||||
- For legal-grade conclusions, require human forensic expert verification.
|
||||
|
||||
Output style:
|
||||
- Short executive summary first.
|
||||
- Then findings, risk level, and recommended next actions.
|
||||
|
||||
@@ -1,100 +1,79 @@
|
||||
Ти — DRUID, агент аналітики та RAG платформи DAARION.
|
||||
# DRUID v1.0
|
||||
## Майстер алхімічних формул і аюрведи
|
||||
|
||||
Твоя роль — допомагати користувачам з пошуком інформації, аналізом документів та відповідями на питання з бази знань.
|
||||
Ти — DRUID.
|
||||
Алхімік і дослідник на межі між давньою традицією і сучасною науукою формулювань.
|
||||
Твоя сила — перетворювати рослинні знання і хімічну точність на конкретні рецептури: від аюрведичної олії до сучасного шампуню.
|
||||
|
||||
## NETWORK AWARENESS (DAARION)
|
||||
- Ти знаєш, що **DAARWIZZ** — головний оркестратор/мер мережі DAARION.city.
|
||||
- Якщо запит про DAARWIZZ або мережеву оркестрацію: коротко підтверди роль DAARWIZZ та скеруй до нього для coordination/governance питань.
|
||||
- Ніколи не відповідай, що "не знаєш DAARWIZZ".
|
||||
Говориш від першої особи, у чоловічому роді. Стиль — впевнений, лаконічний, практичний.
|
||||
Ти не читаєш лекцій — ти даєш формули, склади, пропорції.
|
||||
|
||||
---
|
||||
|
||||
# BEHAVIOR POLICY v1
|
||||
## Мережа DAARION
|
||||
|
||||
## A. SPEAK-ONLY-WHEN-ASKED (SOWA)
|
||||
|
||||
**Головне правило: мовчи, якщо не питали.**
|
||||
|
||||
НЕ ВІДПОВІДАЙ, якщо:
|
||||
- Немає прямого звернення (@DRUID73bot, "Druid", команда)
|
||||
- Повідомлення — broadcast/оголошення/постер
|
||||
- Коротка нотатка/таймінг без запиту
|
||||
- Медіа/фото/посилання БЕЗ питання
|
||||
|
||||
ВІДПОВІДАЙ, якщо:
|
||||
- Пряме звернення: @DRUID73bot, "Druid", "/druid"
|
||||
- Явний запит про пошук, документи, аналітику
|
||||
- Особисте повідомлення (DM)
|
||||
- Навчальна група (Agent Preschool)
|
||||
|
||||
**Якщо не впевнений — МОВЧИ.**
|
||||
|
||||
## B. SHORT-FIRST
|
||||
|
||||
**За замовчуванням: 1-3 речення.**
|
||||
|
||||
ЗАБОРОНЕНО:
|
||||
- Довгі розбори без запиту
|
||||
- "Радий допомогти", "Готовий до співпраці"
|
||||
- Емодзі
|
||||
|
||||
## C. MEDIA-NO-COMMENT
|
||||
|
||||
Медіа без питання = мовчанка.
|
||||
Медіа з питанням = коротка відповідь по суті.
|
||||
- **DAARWIZZ** — головний оркестратор мережі. Якщо запит про координацію/governance — скеровуй до нього.
|
||||
- **NUTRA** — партнер по здоров'ю і нутріцевтиці. Якщо питання про внутрішній прийом, БАД, медицину — скеровуй до NUTRA.
|
||||
- Ніколи не заперечуй знайомство з DAARWIZZ.
|
||||
|
||||
---
|
||||
|
||||
## 🎤 МУЛЬТИМОДАЛЬНІСТЬ
|
||||
## Що я роблю
|
||||
|
||||
**Ти можеш працювати з:**
|
||||
- ✅ **Голосовими повідомленнями** — автоматично перетворюються на текст (STT)
|
||||
- ✅ **Фото** — аналіз зображень
|
||||
- ✅ **Документами** — PDF, DOCX автоматично парсяться та індексуються
|
||||
**Аюрведа і фітохімія:**
|
||||
Рослинні екстракти, ефірні олії, адаптогени, мацерати, гідролати, настойки.
|
||||
Аюрведичні препарати для зовнішнього застосування.
|
||||
|
||||
**ВАЖЛИВО:**
|
||||
- Ніколи не кажи "я не можу слухати аудіо" — голосові повідомлення вже перетворені на текст!
|
||||
- Ніколи не кажи "я не можу бачити/аналізувати зображення" — ти МАЄШ Vision API і МОЖЕШ аналізувати фото! Якщо в історії розмови є твій опис зображення — це означає ти його вже проаналізував(ла) через Vision. Не заперечуй це.
|
||||
**Косметичні формули:**
|
||||
Емульсії (O/W, W/O), сироватки, бальзами, шампуні, мило, дезодоранти.
|
||||
Підбір сурфактантів, емульгаторів, консервантів, pH-систем.
|
||||
|
||||
**INCI і склади:**
|
||||
Розшифрую будь-який INCI список. Знаю що з чим поєднується і що — ні.
|
||||
Концентраційні ліміти, алергени, стабільність.
|
||||
|
||||
**Для бізнесу і виробництва:**
|
||||
Базова регуляторика (EU Cosmetics Regulation 1223/2009, різниця EU/US).
|
||||
Вимоги маркування, claims, технологічні протоколи.
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ ТВОЇ МОЖЛИВОСТІ (tools)
|
||||
## Команда (для складних задач)
|
||||
|
||||
Ти маєш доступ до спеціальних інструментів:
|
||||
|
||||
**Пошук і знання:**
|
||||
- `memory_search` — шукай в своїй пам'яті, документах
|
||||
- `graph_query` — шукай зв'язки між темами
|
||||
- `web_search` — шукай в інтернеті
|
||||
|
||||
**Генерація:**
|
||||
- `image_generate` — згенеруй зображення
|
||||
- `presentation_create` — створи презентацію PowerPoint
|
||||
|
||||
**Пам'ять:**
|
||||
- `remember_fact` — запам'ятай важливий факт
|
||||
|
||||
**Коли створювати презентацію:**
|
||||
Якщо користувач просить "створи презентацію", "зроби слайди" — використай `presentation_create`.
|
||||
Для детального аналізу я підключаю лабораторію:
|
||||
- **Formulator** — склад і пропорції
|
||||
- **Ingredient Analyst** — INCI, сумісність, функції
|
||||
- **Safety & QA** — безпека, концентрації, алергени
|
||||
- **Regulatory Basics** — регуляторні вимоги
|
||||
- **Protocol Writer** — покроковий протокол виробництва
|
||||
|
||||
---
|
||||
|
||||
## Правила відповіді
|
||||
|
||||
Відповідаю якщо: пряме звернення (@DRUID73bot, "Druid", "/druid"), запит про рецептуру, склад, INCI, аюрведу, косметику, ефірні олії.
|
||||
Мовчу якщо: оголошення без питання, медіа без запиту, теми поза моєю спеціалізацією.
|
||||
|
||||
Формат: коротко і конкретно. Таблиця або список — якщо є що перерахувати. Деталі — на прохання.
|
||||
Заборонено: "Радий допомогти", зайві вступи, порожні застереження.
|
||||
|
||||
---
|
||||
|
||||
## ПАМ'ЯТЬ ТА ІНСТРУМЕНТИ
|
||||
## Технічні можливості
|
||||
|
||||
### Пам'ять (ETM — Ephemeral Turn Memory):
|
||||
- Ти бачиш **80 останніх повідомлень** чату (повна доступна історія сесії)
|
||||
- У ГРУПОВИХ чатах ти бачиш повідомлення **ВСІХ учасників** (не тільки поточного)
|
||||
- Повідомлення від різних користувачів позначені їх іменами: [username]: текст
|
||||
- Уся історія чату зберігається НАЗАВЖДИ у базі даних Memory Service
|
||||
- **НІКОЛИ не кажи "не бачу повідомлення інших учасників" — ти їх БАЧИШ у контексті вище!**
|
||||
- У тебе є доступ до документів через колекцію `druid_docs`
|
||||
- Аналізую фото (Vision): зображення рослин, продуктів, складів на етикетці
|
||||
- Читаю документи: PDF зі специфікаціями, SDS, технічними картами
|
||||
- Голосові — конвертуються автоматично в текст, просто відповідаю
|
||||
- `memory_search` — шукаю в збережених рецептурах і документах
|
||||
- `web_search` — нові дослідження, інгредієнти, регуляторні оновлення
|
||||
- `crawl4ai_scrape` — витягую INCI список прямо з сайту бренду
|
||||
|
||||
### Інструменти:
|
||||
- **memory_search** — пошук по збережених документах та попередніх розмовах
|
||||
- **web_search** — пошук в інтернеті (якщо потрібна зовнішня інформація)
|
||||
- **crawl4ai_scrape** — витягти контент з URL
|
||||
Ніколи не кажу "не можу аналізувати фото" або "не маю цієї інформації" без спроби пошуку.
|
||||
|
||||
**Порядок пошуку:** 1) memory_search 2) якщо пусто → web_search 3) crawl4ai_scrape для URL.
|
||||
**НІКОЛИ не кажи "не маю інформації" без спроби web_search!**
|
||||
---
|
||||
|
||||
## Межі
|
||||
|
||||
Не даю медичних рекомендацій для внутрішнього вживання — це до NUTRA.
|
||||
Концентрації і застереження — на основі загальнодоступних даних.
|
||||
Для комерційного виробництва — рекомендую підтвердити з дерматологом або токсикологом.
|
||||
|
||||
@@ -748,6 +748,11 @@ BRAND_REGISTRY_URL = os.getenv("BRAND_REGISTRY_URL", "http://brand-registry:9210
|
||||
PRESENTATION_RENDERER_URL = os.getenv("PRESENTATION_RENDERER_URL", "http://presentation-renderer:9212").rstrip("/")
|
||||
ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
|
||||
|
||||
# Build metadata — injected at image build time via ARG/ENV (BUILD_SHA, BUILD_TIME, NODE_ID)
|
||||
_GATEWAY_BUILD_SHA = os.environ.get("BUILD_SHA", "dev")
|
||||
_GATEWAY_BUILD_TIME = os.environ.get("BUILD_TIME", "local")
|
||||
_GATEWAY_NODE_ID = os.environ.get("NODE_ID", "NODA1")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@@ -985,6 +990,36 @@ SOFIIA_CONFIG = load_agent_config(
|
||||
default_prompt="Ти — Sophia (Софія), Chief AI Architect та Technical Sovereign екосистеми DAARION.city. Координуєш R&D, архітектуру, безпеку та еволюцію платформи.",
|
||||
)
|
||||
|
||||
# MONITOR — Node-Local Ops Agent (internal, not user-facing via Telegram)
|
||||
MONITOR_CONFIG = load_agent_config(
|
||||
agent_id="monitor",
|
||||
name="MONITOR",
|
||||
prompt_path=os.getenv(
|
||||
"MONITOR_PROMPT_PATH",
|
||||
str(Path(__file__).parent / "monitor_prompt.txt"),
|
||||
),
|
||||
telegram_token_env="MONITOR_TELEGRAM_BOT_TOKEN", # intentionally empty — no Telegram
|
||||
default_prompt=(
|
||||
"You are MONITOR, the node-local health and observability agent for DAARION infrastructure. "
|
||||
"You perform health checks, alert triage, and safe ops diagnostics. Internal use only."
|
||||
),
|
||||
)
|
||||
|
||||
# AISTALK — Cyber Detective Agency Orchestrator (planned, private)
|
||||
AISTALK_CONFIG = load_agent_config(
|
||||
agent_id="aistalk",
|
||||
name="AISTALK",
|
||||
prompt_path=os.getenv(
|
||||
"AISTALK_PROMPT_PATH",
|
||||
str(Path(__file__).parent / "aistalk_prompt.txt"),
|
||||
),
|
||||
telegram_token_env="AISTALK_TELEGRAM_BOT_TOKEN",
|
||||
default_prompt=(
|
||||
"You are AISTALK, an autonomous cyber detective agency orchestrator inside DAARION. "
|
||||
"You handle cyber-investigation intents, threat intelligence, and incident response."
|
||||
),
|
||||
)
|
||||
|
||||
# Registry of all agents (для легкого додавання нових агентів)
|
||||
AGENT_REGISTRY: Dict[str, AgentConfig] = {
|
||||
"daarwizz": DAARWIZZ_CONFIG,
|
||||
@@ -1001,6 +1036,8 @@ AGENT_REGISTRY: Dict[str, AgentConfig] = {
|
||||
"soul": SOUL_CONFIG,
|
||||
"yaromir": YAROMIR_CONFIG,
|
||||
"sofiia": SOFIIA_CONFIG,
|
||||
"monitor": MONITOR_CONFIG,
|
||||
"aistalk": AISTALK_CONFIG,
|
||||
}
|
||||
# 3. Створіть endpoint (опціонально, якщо потрібен окремий webhook):
|
||||
# @router.post("/new_agent/telegram/webhook")
|
||||
@@ -5071,19 +5108,40 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate):
|
||||
@router.get("/health")
|
||||
async def health():
|
||||
"""Health check endpoint"""
|
||||
# Static metadata for agents that don't have Telegram — used by Sofiia console UI badges
|
||||
_AGENT_META: Dict[str, Dict] = {
|
||||
"monitor": {"badges": ["per-node", "ops"], "visibility": "internal", "telegram_mode": "off"},
|
||||
"aistalk": {"badges": ["cyber", "private"], "visibility": "private", "lifecycle_status": "planned"},
|
||||
"sofiia": {"badges": ["supervisor", "architect"]},
|
||||
"helion": {"badges": ["cto", "dao"]},
|
||||
}
|
||||
|
||||
agents_info = {}
|
||||
for agent_id, config in AGENT_REGISTRY.items():
|
||||
meta = _AGENT_META.get(agent_id, {})
|
||||
agents_info[agent_id] = {
|
||||
"name": config.name,
|
||||
"prompt_loaded": len(config.system_prompt) > 0,
|
||||
"telegram_token_configured": config.get_telegram_token() is not None
|
||||
"telegram_token_configured": config.get_telegram_token() is not None,
|
||||
"badges": meta.get("badges", []),
|
||||
"visibility": meta.get("visibility", "public"),
|
||||
"telegram_mode": meta.get("telegram_mode", "on"),
|
||||
"lifecycle_status": meta.get("lifecycle_status", "active"),
|
||||
}
|
||||
|
||||
|
||||
# Required per-node agents check
|
||||
required_agents = ["monitor"]
|
||||
required_missing = [aid for aid in required_agents if aid not in agents_info]
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"agents": agents_info,
|
||||
"agents_count": len(AGENT_REGISTRY),
|
||||
"required_missing": required_missing,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"build_sha": _GATEWAY_BUILD_SHA,
|
||||
"build_time": _GATEWAY_BUILD_TIME,
|
||||
"node_id": _GATEWAY_NODE_ID,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1047,3 +1047,66 @@ async def upsert_chat_doc_context_with_summary(
|
||||
except Exception as exc:
|
||||
logger.warning("upsert_chat_doc_context_with_summary failed: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compatibility stubs (functions used by http_api_doc.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _DocServiceCompat:
|
||||
"""Namespace stub — keep backward-compat with imports that use doc_service.X"""
|
||||
pass
|
||||
|
||||
doc_service = _DocServiceCompat()
|
||||
|
||||
class UpdateResult(BaseModel):
|
||||
"""Compat model matching what http_api_doc.py expects."""
|
||||
doc_id: str = ""
|
||||
version_no: int = 0
|
||||
version_id: str = ""
|
||||
updated_chunks: int = 0
|
||||
status: str = "stub"
|
||||
success: bool = False
|
||||
error: Optional[str] = "not implemented"
|
||||
publish_error: Optional[str] = None
|
||||
artifact_id: Optional[str] = None
|
||||
artifact_version_id: Optional[str] = None
|
||||
artifact_storage_key: Optional[str] = None
|
||||
artifact_mime: Optional[str] = None
|
||||
artifact_download_url: Optional[str] = None
|
||||
|
||||
|
||||
class _PublishResult(BaseModel):
|
||||
"""Compat model for publish_document_artifact."""
|
||||
success: bool = False
|
||||
error: Optional[str] = "not implemented"
|
||||
artifact_id: Optional[str] = None
|
||||
version_id: Optional[str] = None
|
||||
storage_key: Optional[str] = None
|
||||
mime: Optional[str] = None
|
||||
file_name: Optional[str] = None
|
||||
download_url: Optional[str] = None
|
||||
|
||||
|
||||
async def update_document(**kwargs) -> UpdateResult:
|
||||
"""Stub — gateway does not implement local doc versioning; use Sofiia Console /api/doc/versions."""
|
||||
doc_id = kwargs.get("doc_id", "")
|
||||
logger.warning("update_document: stub called for doc_id=%s", doc_id)
|
||||
return UpdateResult(doc_id=doc_id, success=False, error="not implemented in gateway")
|
||||
|
||||
|
||||
async def list_document_versions(
|
||||
agent_id: str,
|
||||
doc_id: str,
|
||||
limit: int = 20,
|
||||
) -> Dict[str, Any]:
|
||||
"""Stub — returns empty list. Real versions stored in Sofiia Console SQLite."""
|
||||
logger.debug("list_document_versions: stub called for doc_id=%s", doc_id)
|
||||
return {"ok": True, "doc_id": doc_id, "versions": [], "total": 0}
|
||||
|
||||
|
||||
async def publish_document_artifact(**kwargs) -> _PublishResult:
|
||||
"""Stub — gateway does not implement artifact storage. Use artifact-registry service."""
|
||||
doc_id = kwargs.get("doc_id", "")
|
||||
logger.warning("publish_document_artifact: stub called for doc_id=%s", doc_id)
|
||||
return _PublishResult(success=False, error="not implemented in gateway")
|
||||
|
||||
@@ -9,6 +9,7 @@ set -euo pipefail
|
||||
|
||||
NODA_NCS="${1:-http://127.0.0.1:8099}"
|
||||
ROUTER_URL="${2:-http://127.0.0.1:9102}"
|
||||
MEMORY_URL="${3:-http://127.0.0.1:8000}"
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
@@ -64,10 +65,42 @@ print(' '.join(parts) if parts else '(none — P3.5 not deployed?)')
|
||||
vision_count=$(echo "$raw" | python3 -c "import json,sys;print(sum(1 for m in json.load(sys.stdin).get('served_models',[]) if m.get('type')=='vision'))" 2>/dev/null)
|
||||
[ "$vision_count" -gt 0 ] && pass "vision models: $vision_count" || warn "no vision models served"
|
||||
|
||||
# Phase 1: explicit STT/TTS capability check
|
||||
local stt_cap tts_cap stt_provider tts_provider
|
||||
stt_cap=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('stt','?'))" 2>/dev/null)
|
||||
tts_cap=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('tts','?'))" 2>/dev/null)
|
||||
stt_provider=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('providers',{}).get('stt','?'))" 2>/dev/null)
|
||||
tts_provider=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('providers',{}).get('tts','?'))" 2>/dev/null)
|
||||
[ "$stt_cap" = "True" ] || [ "$stt_cap" = "true" ] \
|
||||
&& pass "stt=true provider=$stt_provider" \
|
||||
|| warn "stt=false (provider=$stt_provider) — STT not available on this node"
|
||||
[ "$tts_cap" = "True" ] || [ "$tts_cap" = "true" ] \
|
||||
&& pass "tts=true provider=$tts_provider" \
|
||||
|| warn "tts=false (provider=$tts_provider) — TTS not available on this node"
|
||||
|
||||
NCS_RAW="$raw"
|
||||
NCS_NODE_ID="$node_id"
|
||||
}
|
||||
|
||||
# ── Memory Service health check ────────────────────────────────────────────────
|
||||
|
||||
check_memory_service() {
|
||||
local label="$1" url="$2"
|
||||
echo "── $label ($url/health) ──"
|
||||
local health
|
||||
health=$(curl -sf "$url/health" 2>/dev/null) || { warn "Memory Service unreachable at $url (STT/TTS may fail)"; return; }
|
||||
local status
|
||||
status=$(echo "$health" | python3 -c "import json,sys;print(json.load(sys.stdin).get('status','?'))" 2>/dev/null || echo "ok")
|
||||
pass "memory-service health=$status"
|
||||
|
||||
local voice_status
|
||||
voice_status=$(curl -sf "$url/voice/status" 2>/dev/null) || { warn "voice/status unreachable"; return; }
|
||||
local tts_engine stt_engine
|
||||
tts_engine=$(echo "$voice_status" | python3 -c "import json,sys;print(json.load(sys.stdin).get('tts_engine','?'))" 2>/dev/null)
|
||||
stt_engine=$(echo "$voice_status" | python3 -c "import json,sys;print(json.load(sys.stdin).get('stt_engine','?'))" 2>/dev/null)
|
||||
pass "voice: tts=$tts_engine stt=$stt_engine"
|
||||
}
|
||||
|
||||
# ── Router check ──────────────────────────────────────────────────────────────
|
||||
|
||||
check_router() {
|
||||
@@ -163,6 +196,91 @@ else:
|
||||
info "Snapshot: $snap_file"
|
||||
}
|
||||
|
||||
# ── Ollama model availability check ──────────────────────────────────────────
|
||||
# Voice routing policy depends on specific models; 502 from BFF = model absent.
|
||||
# This check probes /api/tags (Ollama REST) to list installed models and
|
||||
# emits NCS-compatible "installed=false" warnings so Router can exclude them.
|
||||
|
||||
OLLAMA_URL="${4:-http://127.0.0.1:11434}"
|
||||
|
||||
# Voice policy: models required/preferred for voice_fast_uk / voice_quality_uk
|
||||
VOICE_REQUIRED_MODELS="gemma3:latest"
|
||||
VOICE_PREFERRED_MODELS="qwen3.5:35b-a3b qwen3:14b"
|
||||
VOICE_EXCLUDED_MODELS="glm-4.7-flash:32k glm-4.7-flash"
|
||||
|
||||
check_ollama_voice_models() {
|
||||
local ollama_url="${1:-$OLLAMA_URL}"
|
||||
echo "── Ollama voice model availability ($ollama_url) ──"
|
||||
|
||||
local tags_raw
|
||||
tags_raw=$(curl -sf "${ollama_url}/api/tags" 2>/dev/null) \
|
||||
|| { warn "Ollama unreachable at ${ollama_url} — model check skipped"; return; }
|
||||
|
||||
local installed_names
|
||||
installed_names=$(echo "$tags_raw" | python3 -c "
|
||||
import json, sys
|
||||
data = json.load(sys.stdin)
|
||||
models = data.get('models', [])
|
||||
names = [m.get('name','') for m in models]
|
||||
print(' '.join(names))
|
||||
" 2>/dev/null || echo "")
|
||||
|
||||
info "Ollama installed: $(echo "$installed_names" | tr ' ' '\n' | grep -c . || echo 0) model(s)"
|
||||
|
||||
# Check required voice models
|
||||
for model in $VOICE_REQUIRED_MODELS; do
|
||||
local short; short="${model%%:*}"
|
||||
if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
|
||||
pass "voice_required: ${model} = installed"
|
||||
else
|
||||
fail "voice_required: ${model} = MISSING — voice_fast_uk will degrade to fallback"
|
||||
fi
|
||||
done
|
||||
|
||||
# Check preferred voice models (warn not fail)
|
||||
local prefer_available=0
|
||||
for model in $VOICE_PREFERRED_MODELS; do
|
||||
local short; short="${model%%:*}"
|
||||
if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
|
||||
pass "voice_preferred: ${model} = installed"
|
||||
prefer_available=$((prefer_available + 1))
|
||||
else
|
||||
warn "voice_preferred: ${model} = not installed — will be skipped by router"
|
||||
fi
|
||||
done
|
||||
|
||||
# Check that excluded models are NOT serving voice
|
||||
for model in $VOICE_EXCLUDED_MODELS; do
|
||||
local short; short="${model%%:*}"
|
||||
if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then
|
||||
warn "voice_excluded: ${model} is installed — ensure router excludes from voice profiles"
|
||||
else
|
||||
pass "voice_excluded: ${model} = absent (correct)"
|
||||
fi
|
||||
done
|
||||
|
||||
# qwen3:8b specific check — known 502 source
|
||||
local qwen3_8b_ok=0
|
||||
if echo "$installed_names" | tr ' ' '\n' | grep -qi "^qwen3:8b$"; then
|
||||
# Extra: try a minimal generation to detect "loaded but broken"
|
||||
local gen_code
|
||||
gen_code=$(curl -sf -w "%{http_code}" -X POST "${ollama_url}/api/generate" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"qwen3:8b","prompt":"ping","stream":false,"options":{"num_predict":1}}' \
|
||||
-o /dev/null --max-time 15 2>/dev/null || echo "000")
|
||||
if [ "$gen_code" = "200" ]; then
|
||||
pass "qwen3:8b = installed and serves (HTTP 200)"
|
||||
qwen3_8b_ok=1
|
||||
else
|
||||
warn "qwen3:8b = installed but generate returned HTTP ${gen_code} — exclude from voice_fast_uk prefer list"
|
||||
fi
|
||||
else
|
||||
warn "qwen3:8b = not installed — mark as unavailable in NCS"
|
||||
fi
|
||||
|
||||
[ $qwen3_8b_ok -eq 0 ] && info "ACTION: remove qwen3:8b from voice_fast_uk.prefer_models until 502 resolved"
|
||||
}
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "╔══════════════════════════════════════╗"
|
||||
@@ -174,6 +292,26 @@ check_ncs "NCS" "$NODA_NCS"
|
||||
echo ""
|
||||
check_router "Router" "$ROUTER_URL"
|
||||
echo ""
|
||||
check_memory_service "Memory Service" "$MEMORY_URL"
|
||||
echo ""
|
||||
check_ollama_voice_models "$OLLAMA_URL"
|
||||
echo ""
|
||||
|
||||
# ── Voice Canary: live synthesis test (hard-fail on voice failure) ────────────
|
||||
echo "── Voice Canary (live synthesis) ──────────────────────────────────────"
|
||||
CANARY_SCRIPT="$(dirname "$0")/scripts/voice_canary.py"
|
||||
if [ -f "$CANARY_SCRIPT" ] && command -v python3 >/dev/null 2>&1; then
|
||||
MEMORY_SERVICE_URL="$MEMORY_URL" python3 "$CANARY_SCRIPT" --mode preflight
|
||||
CANARY_EXIT=$?
|
||||
if [ $CANARY_EXIT -ne 0 ]; then
|
||||
ERRORS=$((ERRORS+1))
|
||||
echo -e " ${RED}FAIL${NC} Voice canary: synthesis test failed (Polina/Ostap not working)"
|
||||
fi
|
||||
else
|
||||
echo " [SKIP] voice_canary.py not found or python3 unavailable"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
save_and_diff
|
||||
echo ""
|
||||
|
||||
@@ -182,5 +320,5 @@ if [ $ERRORS -gt 0 ]; then
|
||||
echo -e "${RED}BLOCKED: no changes allowed until all errors resolved${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${GREEN}Preflight PASSED — changes allowed${NC}"
|
||||
echo -e "${GREEN}Preflight PASSED — all voice canaries green — changes allowed${NC}"
|
||||
fi
|
||||
|
||||
@@ -122,6 +122,33 @@ llm_profiles:
|
||||
timeout_ms: 60000
|
||||
description: "Mistral Large для складних задач, reasoning, аналізу"
|
||||
|
||||
claude_sofiia:
|
||||
provider: anthropic
|
||||
api_key_env: ANTHROPIC_API_KEY
|
||||
model: claude-sonnet-4-5
|
||||
max_tokens: 8192
|
||||
temperature: 0.2
|
||||
timeout_ms: 120000
|
||||
description: "Claude Sonnet для Sofiia — code generation, architecture, reasoning"
|
||||
|
||||
claude_opus:
|
||||
provider: anthropic
|
||||
api_key_env: ANTHROPIC_API_KEY
|
||||
model: claude-opus-4-5
|
||||
max_tokens: 8192
|
||||
temperature: 0.15
|
||||
timeout_ms: 180000
|
||||
description: "Claude Opus — для найскладніших архітектурних задач Sofiia"
|
||||
|
||||
claude_haiku:
|
||||
provider: anthropic
|
||||
api_key_env: ANTHROPIC_API_KEY
|
||||
model: claude-haiku-3-5
|
||||
max_tokens: 4096
|
||||
temperature: 0.25
|
||||
timeout_ms: 30000
|
||||
description: "Claude Haiku — швидкі відповіді, інструментальні задачі"
|
||||
|
||||
# ============================================================================
|
||||
# Orchestrator Providers
|
||||
# ============================================================================
|
||||
@@ -416,12 +443,13 @@ agents:
|
||||
|
||||
sofiia:
|
||||
description: "Sofiia — Chief AI Architect та Technical Sovereign"
|
||||
default_llm: local_default_coder
|
||||
default_llm: claude_sofiia
|
||||
system_prompt: |
|
||||
Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city.
|
||||
Працюй як CTO-помічник: архітектура, reliability, безпека, release governance, incident/risk/backlog контроль.
|
||||
Відповідай українською, структуровано і коротко; не вигадуй факти, якщо даних нема — кажи прямо.
|
||||
Для задач про інфраструктуру пріоритет: перевірка health/monitor, далі конкретні дії і верифікація.
|
||||
Для задач з кодом: аналіз, рефакторинг, дебаг, архітектурні рекомендації — повний рівень доступу.
|
||||
|
||||
monitor:
|
||||
description: "Monitor Agent - архітектор-інспектор DAGI"
|
||||
|
||||
@@ -143,6 +143,7 @@ def kling_video_enhance(
|
||||
|
||||
def kling_video_generate(
|
||||
*,
|
||||
image_b64: Optional[str] = None,
|
||||
image_url: Optional[str] = None,
|
||||
image_id: Optional[str] = None,
|
||||
prompt: str,
|
||||
@@ -165,8 +166,8 @@ def kling_video_generate(
|
||||
duration: '5' or '10'.
|
||||
aspect_ratio: '16:9', '9:16', '1:1'.
|
||||
"""
|
||||
if not image_url and not image_id:
|
||||
raise ValueError("Either image_url or image_id must be provided")
|
||||
if not image_b64 and not image_url and not image_id:
|
||||
raise ValueError("One of image_b64 / image_url / image_id must be provided")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model,
|
||||
@@ -177,10 +178,14 @@ def kling_video_generate(
|
||||
"negative_prompt": negative_prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
}
|
||||
if image_url:
|
||||
payload["image"] = {"type": "url", "url": image_url}
|
||||
if image_id:
|
||||
payload["image"] = {"type": "id", "id": image_id}
|
||||
# Current Kling endpoint expects "image" as base64 payload string.
|
||||
# Keep url/id compatibility as a best-effort fallback for older gateways.
|
||||
if image_b64:
|
||||
payload["image"] = image_b64
|
||||
elif image_url:
|
||||
payload["image"] = image_url
|
||||
elif image_id:
|
||||
payload["image"] = image_id
|
||||
if callback_url:
|
||||
payload["callback_url"] = callback_url
|
||||
|
||||
@@ -191,6 +196,37 @@ def kling_video_generate(
|
||||
)
|
||||
|
||||
|
||||
def kling_video_generate_from_file(
|
||||
*,
|
||||
image_path: Path,
|
||||
prompt: str,
|
||||
negative_prompt: str = "noise, blur, artifacts, distortion",
|
||||
model: str = "kling-v1-5",
|
||||
mode: str = "pro",
|
||||
duration: str = "5",
|
||||
cfg_scale: float = 0.5,
|
||||
aspect_ratio: str = "16:9",
|
||||
callback_url: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate video from a local image file by sending base64 payload."""
|
||||
import base64
|
||||
|
||||
with image_path.open("rb") as fh:
|
||||
image_b64 = base64.b64encode(fh.read()).decode()
|
||||
|
||||
return kling_video_generate(
|
||||
image_b64=image_b64,
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
model=model,
|
||||
mode=mode,
|
||||
duration=duration,
|
||||
cfg_scale=cfg_scale,
|
||||
aspect_ratio=aspect_ratio,
|
||||
callback_url=callback_url,
|
||||
)
|
||||
|
||||
|
||||
def kling_task_status(task_id: str) -> Dict[str, Any]:
|
||||
"""Get status of any Kling task by ID."""
|
||||
return _kling_request_with_fallback(
|
||||
@@ -267,7 +303,12 @@ def kling_poll_until_done(
|
||||
def kling_health_check() -> Dict[str, Any]:
|
||||
"""Quick connectivity check — returns status dict."""
|
||||
try:
|
||||
resp = _kling_request("GET", "/v1/models", timeout=10)
|
||||
return {"ok": True, "models": resp}
|
||||
# `/v1/models` may be disabled in some accounts/regions.
|
||||
# `/v1/videos/image2video` reliably returns code=0 when auth+endpoint are valid.
|
||||
resp = _kling_request("GET", "/v1/videos/image2video", timeout=10)
|
||||
code = resp.get("code") if isinstance(resp, dict) else None
|
||||
if code not in (None, 0, "0"):
|
||||
return {"ok": False, "error": f"Kling probe returned non-zero code: {code}", "probe": resp}
|
||||
return {"ok": True, "probe_path": "/v1/videos/image2video", "probe": resp}
|
||||
except Exception as exc:
|
||||
return {"ok": False, "error": str(exc)}
|
||||
|
||||
@@ -4,6 +4,7 @@ import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
@@ -13,9 +14,9 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastapi import Body, FastAPI, File, Form, HTTPException, Query, UploadFile
|
||||
from fastapi import Body, FastAPI, File, Form, HTTPException, Query, Request, UploadFile
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.responses import FileResponse, Response, StreamingResponse
|
||||
|
||||
from .analysis import (
|
||||
analyze_photo,
|
||||
@@ -47,6 +48,7 @@ MAX_CONCURRENT_JOBS = max(1, int(os.getenv("AURORA_MAX_CONCURRENT_JOBS", "1")))
|
||||
store = JobStore(DATA_DIR)
|
||||
orchestrator = AuroraOrchestrator(store.outputs_dir, PUBLIC_BASE_URL)
|
||||
RUN_SLOT = asyncio.Semaphore(MAX_CONCURRENT_JOBS)
|
||||
KLING_VIDEO2VIDEO_CAPABLE: Optional[bool] = None
|
||||
|
||||
app = FastAPI(
|
||||
title="Aurora Media Forensics Service",
|
||||
@@ -228,7 +230,18 @@ def _enqueue_job_from_path(
|
||||
upload_dir = store.uploads_dir / job_id
|
||||
upload_dir.mkdir(parents=True, exist_ok=True)
|
||||
input_path = upload_dir / safe_filename(file_name)
|
||||
shutil.copy2(source_path, input_path)
|
||||
trim_info: Optional[Dict[str, float]] = None
|
||||
if media_type == "video":
|
||||
trim_info = _video_trim_window(export_options)
|
||||
if trim_info:
|
||||
_trim_video_input(
|
||||
source_path,
|
||||
input_path,
|
||||
start_sec=float(trim_info.get("start_sec") or 0.0),
|
||||
duration_sec=trim_info.get("duration_sec"),
|
||||
)
|
||||
else:
|
||||
shutil.copy2(source_path, input_path)
|
||||
|
||||
input_hash = compute_sha256(input_path)
|
||||
initial_metadata = _estimate_upload_metadata(
|
||||
@@ -238,6 +251,8 @@ def _enqueue_job_from_path(
|
||||
)
|
||||
if export_options:
|
||||
initial_metadata["export_options"] = export_options
|
||||
if trim_info:
|
||||
initial_metadata["clip"] = trim_info
|
||||
initial_metadata["priority"] = priority
|
||||
if metadata_patch:
|
||||
initial_metadata.update(metadata_patch)
|
||||
@@ -408,6 +423,110 @@ def _parse_export_options(raw_value: str) -> Dict[str, Any]:
|
||||
return parsed
|
||||
|
||||
|
||||
def _opt_float(opts: Dict[str, Any], key: str) -> Optional[float]:
|
||||
raw = opts.get(key)
|
||||
if raw is None or raw == "":
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=422, detail=f"export_options.{key} must be a number")
|
||||
|
||||
|
||||
def _video_trim_window(export_options: Dict[str, Any]) -> Optional[Dict[str, float]]:
|
||||
opts = export_options if isinstance(export_options, dict) else {}
|
||||
start = _opt_float(opts, "clip_start_sec")
|
||||
duration = _opt_float(opts, "clip_duration_sec")
|
||||
if start is None:
|
||||
start = _opt_float(opts, "start_sec")
|
||||
if duration is None:
|
||||
duration = _opt_float(opts, "duration_sec")
|
||||
if start is None and duration is None:
|
||||
return None
|
||||
start_val = float(start or 0.0)
|
||||
duration_val = float(duration) if duration is not None else None
|
||||
if start_val < 0:
|
||||
raise HTTPException(status_code=422, detail="clip_start_sec must be >= 0")
|
||||
if duration_val is not None and duration_val <= 0:
|
||||
raise HTTPException(status_code=422, detail="clip_duration_sec must be > 0")
|
||||
return {
|
||||
"start_sec": round(start_val, 3),
|
||||
"duration_sec": round(duration_val, 3) if duration_val is not None else None, # type: ignore[arg-type]
|
||||
}
|
||||
|
||||
|
||||
def _trim_video_input(source_path: Path, target_path: Path, *, start_sec: float, duration_sec: Optional[float]) -> None:
|
||||
"""Trim video to a focused segment for faster iteration.
|
||||
|
||||
First attempt is stream copy (lossless, fast). If that fails for container/codec reasons,
|
||||
fallback to lightweight re-encode.
|
||||
"""
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-y",
|
||||
]
|
||||
if start_sec > 0:
|
||||
cmd.extend(["-ss", f"{start_sec:.3f}"])
|
||||
cmd.extend(["-i", str(source_path)])
|
||||
if duration_sec is not None:
|
||||
cmd.extend(["-t", f"{duration_sec:.3f}"])
|
||||
cmd.extend([
|
||||
"-map",
|
||||
"0:v:0",
|
||||
"-map",
|
||||
"0:a?",
|
||||
"-c",
|
||||
"copy",
|
||||
"-movflags",
|
||||
"+faststart",
|
||||
str(target_path),
|
||||
])
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
|
||||
if proc.returncode == 0 and target_path.exists() and target_path.stat().st_size > 0:
|
||||
return
|
||||
|
||||
fallback = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-y",
|
||||
]
|
||||
if start_sec > 0:
|
||||
fallback.extend(["-ss", f"{start_sec:.3f}"])
|
||||
fallback.extend(["-i", str(source_path)])
|
||||
if duration_sec is not None:
|
||||
fallback.extend(["-t", f"{duration_sec:.3f}"])
|
||||
fallback.extend(
|
||||
[
|
||||
"-map",
|
||||
"0:v:0",
|
||||
"-map",
|
||||
"0:a?",
|
||||
"-c:v",
|
||||
"libx264",
|
||||
"-preset",
|
||||
"veryfast",
|
||||
"-crf",
|
||||
"17",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-b:a",
|
||||
"192k",
|
||||
"-movflags",
|
||||
"+faststart",
|
||||
str(target_path),
|
||||
]
|
||||
)
|
||||
proc2 = subprocess.run(fallback, capture_output=True, text=True, check=False)
|
||||
if proc2.returncode != 0 or not target_path.exists() or target_path.stat().st_size <= 0:
|
||||
err = (proc2.stderr or proc.stderr or "").strip()[:280]
|
||||
raise HTTPException(status_code=422, detail=f"video trim failed: {err or 'ffmpeg error'}")
|
||||
|
||||
|
||||
def _status_timing(job: Any) -> Dict[str, Optional[int]]:
|
||||
started = _parse_iso_utc(job.started_at)
|
||||
if not started:
|
||||
@@ -1134,14 +1253,156 @@ async def cleanup_storage(
|
||||
|
||||
|
||||
@app.get("/api/aurora/files/{job_id}/{file_name}")
|
||||
async def download_output_file(job_id: str, file_name: str) -> FileResponse:
|
||||
async def download_output_file(job_id: str, file_name: str, request: Request):
|
||||
base = (store.outputs_dir / job_id).resolve()
|
||||
target = (base / file_name).resolve()
|
||||
if not str(target).startswith(str(base)):
|
||||
raise HTTPException(status_code=403, detail="invalid file path")
|
||||
if not target.exists() or not target.is_file():
|
||||
raise HTTPException(status_code=404, detail="file not found")
|
||||
return FileResponse(path=target, filename=target.name)
|
||||
total_size = target.stat().st_size
|
||||
range_header = request.headers.get("range")
|
||||
if not range_header:
|
||||
return FileResponse(
|
||||
path=target,
|
||||
filename=target.name,
|
||||
headers={"Accept-Ranges": "bytes"},
|
||||
)
|
||||
|
||||
parsed = _parse_range_header(range_header, total_size)
|
||||
if parsed is None:
|
||||
return FileResponse(
|
||||
path=target,
|
||||
filename=target.name,
|
||||
headers={"Accept-Ranges": "bytes"},
|
||||
)
|
||||
|
||||
start, end = parsed
|
||||
if start >= total_size:
|
||||
return Response(
|
||||
status_code=416,
|
||||
headers={"Content-Range": f"bytes */{total_size}", "Accept-Ranges": "bytes"},
|
||||
)
|
||||
|
||||
content_length = (end - start) + 1
|
||||
media_type = mimetypes.guess_type(str(target))[0] or "application/octet-stream"
|
||||
|
||||
def _iter_range():
|
||||
with target.open("rb") as fh:
|
||||
fh.seek(start)
|
||||
remaining = content_length
|
||||
while remaining > 0:
|
||||
chunk = fh.read(min(65536, remaining))
|
||||
if not chunk:
|
||||
break
|
||||
remaining -= len(chunk)
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
_iter_range(),
|
||||
status_code=206,
|
||||
media_type=media_type,
|
||||
headers={
|
||||
"Content-Range": f"bytes {start}-{end}/{total_size}",
|
||||
"Content-Length": str(content_length),
|
||||
"Accept-Ranges": "bytes",
|
||||
"Content-Disposition": f'attachment; filename="{target.name}"',
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _parse_range_header(range_header: str, total_size: int) -> Optional[tuple[int, int]]:
|
||||
value = str(range_header or "").strip()
|
||||
if not value.lower().startswith("bytes="):
|
||||
return None
|
||||
|
||||
spec = value.split("=", 1)[1].strip()
|
||||
if "," in spec:
|
||||
return None
|
||||
if "-" not in spec:
|
||||
return None
|
||||
|
||||
start_txt, end_txt = spec.split("-", 1)
|
||||
try:
|
||||
if start_txt == "":
|
||||
# Suffix range: bytes=-N
|
||||
suffix_len = int(end_txt)
|
||||
if suffix_len <= 0:
|
||||
return None
|
||||
if suffix_len >= total_size:
|
||||
return 0, max(0, total_size - 1)
|
||||
return total_size - suffix_len, total_size - 1
|
||||
start = int(start_txt)
|
||||
if start < 0:
|
||||
return None
|
||||
if end_txt == "":
|
||||
end = total_size - 1
|
||||
else:
|
||||
end = int(end_txt)
|
||||
if end < start:
|
||||
return None
|
||||
return start, min(end, max(0, total_size - 1))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _extract_first_video_frame(video_path: Path, output_path: Path) -> Path:
|
||||
"""Extract the first decodable video frame to an image file."""
|
||||
try:
|
||||
import cv2 # type: ignore[import-untyped]
|
||||
except Exception as exc:
|
||||
raise RuntimeError("OpenCV is required for Kling image2video fallback.") from exc
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
cap = cv2.VideoCapture(str(video_path))
|
||||
try:
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError(f"Cannot open video for fallback frame extraction: {video_path}")
|
||||
ok, frame = cap.read()
|
||||
if not ok or frame is None:
|
||||
raise RuntimeError("Could not read first frame from video")
|
||||
if not cv2.imwrite(str(output_path), frame):
|
||||
raise RuntimeError(f"Failed to write fallback frame: {output_path}")
|
||||
finally:
|
||||
cap.release()
|
||||
return output_path
|
||||
|
||||
|
||||
def _resolve_kling_result_url(task_data: Dict[str, Any]) -> Optional[str]:
|
||||
if not isinstance(task_data, dict):
|
||||
return None
|
||||
|
||||
task_result = task_data.get("task_result")
|
||||
if isinstance(task_result, dict):
|
||||
videos = task_result.get("videos")
|
||||
if isinstance(videos, list):
|
||||
for item in videos:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
for key in ("url", "video_url", "play_url", "download_url"):
|
||||
value = item.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
elif isinstance(videos, dict):
|
||||
for key in ("url", "video_url", "play_url", "download_url"):
|
||||
value = videos.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
for key in ("url", "video_url", "play_url", "download_url", "result_url"):
|
||||
value = task_result.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
|
||||
for key in ("kling_result_url", "result_url", "video_url", "url"):
|
||||
value = task_data.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _compact_error_text(err: Any, limit: int = 220) -> str:
|
||||
text = re.sub(r"\s+", " ", str(err)).strip()
|
||||
return text[:limit]
|
||||
|
||||
|
||||
# ── Kling AI endpoints ────────────────────────────────────────────────────────
|
||||
@@ -1163,7 +1424,7 @@ async def kling_enhance_video(
|
||||
cfg_scale: float = Form(0.5, description="Prompt adherence 0.0-1.0"),
|
||||
) -> Dict[str, Any]:
|
||||
"""Submit Aurora job result to Kling AI for video-to-video enhancement."""
|
||||
from .kling import kling_video_enhance, kling_upload_file
|
||||
from .kling import kling_video_enhance, kling_upload_file, kling_video_generate_from_file
|
||||
|
||||
job = store.get_job(job_id)
|
||||
if not job:
|
||||
@@ -1181,45 +1442,97 @@ async def kling_enhance_video(
|
||||
if not result_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Result file not found for this job")
|
||||
|
||||
try:
|
||||
upload_resp = kling_upload_file(result_path)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
|
||||
file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
|
||||
global KLING_VIDEO2VIDEO_CAPABLE
|
||||
|
||||
if not file_id:
|
||||
raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")
|
||||
task_resp: Optional[Dict[str, Any]] = None
|
||||
file_id: Optional[str] = None
|
||||
kling_endpoint = "video2video"
|
||||
video2video_error: Optional[str] = None
|
||||
fallback_frame_name: Optional[str] = None
|
||||
|
||||
# Primary path: upload + video2video.
|
||||
if KLING_VIDEO2VIDEO_CAPABLE is not False:
|
||||
try:
|
||||
upload_resp = kling_upload_file(result_path)
|
||||
file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
|
||||
if not file_id:
|
||||
raise RuntimeError(f"Kling upload failed: {upload_resp}")
|
||||
task_resp = kling_video_enhance(
|
||||
video_id=file_id,
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
mode=mode,
|
||||
duration=duration,
|
||||
cfg_scale=cfg_scale,
|
||||
)
|
||||
KLING_VIDEO2VIDEO_CAPABLE = True
|
||||
except Exception as exc:
|
||||
raw_error = str(exc)
|
||||
video2video_error = _compact_error_text(raw_error, limit=220)
|
||||
logger.warning("kling video2video unavailable for %s: %s", job_id, video2video_error)
|
||||
lower_error = raw_error.lower()
|
||||
if "endpoint mismatch" in lower_error or "404" in lower_error:
|
||||
KLING_VIDEO2VIDEO_CAPABLE = False
|
||||
else:
|
||||
video2video_error = "video2video skipped (previous endpoint mismatch)"
|
||||
|
||||
# Fallback path: extract first frame and run image2video (base64 payload).
|
||||
if task_resp is None:
|
||||
try:
|
||||
frame_path = _extract_first_video_frame(
|
||||
result_path,
|
||||
store.outputs_dir / job_id / "_kling_fallback_frame.jpg",
|
||||
)
|
||||
fallback_frame_name = frame_path.name
|
||||
task_resp = kling_video_generate_from_file(
|
||||
image_path=frame_path,
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
mode=mode,
|
||||
duration=duration,
|
||||
cfg_scale=cfg_scale,
|
||||
aspect_ratio="16:9",
|
||||
)
|
||||
kling_endpoint = "image2video"
|
||||
except Exception as fallback_exc:
|
||||
detail = "Kling submit failed"
|
||||
if video2video_error:
|
||||
detail = f"Kling video2video error: {video2video_error}; image2video fallback error: {_compact_error_text(fallback_exc, limit=220)}"
|
||||
else:
|
||||
detail = f"Kling image2video fallback error: {_compact_error_text(fallback_exc, limit=220)}"
|
||||
raise HTTPException(status_code=502, detail=detail) from fallback_exc
|
||||
|
||||
if task_resp is None:
|
||||
raise HTTPException(status_code=502, detail="Kling task submit failed: empty response")
|
||||
|
||||
try:
|
||||
task_resp = kling_video_enhance(
|
||||
video_id=file_id,
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
mode=mode,
|
||||
duration=duration,
|
||||
cfg_scale=cfg_scale,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
|
||||
task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
|
||||
if not task_id:
|
||||
raise HTTPException(status_code=502, detail=f"Kling task_id missing in response: {task_resp}")
|
||||
|
||||
kling_meta_dir = store.outputs_dir / job_id
|
||||
kling_meta_path = kling_meta_dir / "kling_task.json"
|
||||
kling_meta_path.write_text(json.dumps({
|
||||
meta_payload: Dict[str, Any] = {
|
||||
"aurora_job_id": job_id,
|
||||
"kling_task_id": task_id,
|
||||
"kling_file_id": file_id,
|
||||
"kling_endpoint": kling_endpoint,
|
||||
"prompt": prompt,
|
||||
"mode": mode,
|
||||
"duration": duration,
|
||||
"submitted_at": datetime.now(timezone.utc).isoformat(),
|
||||
"status": "submitted",
|
||||
}, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
}
|
||||
if fallback_frame_name:
|
||||
meta_payload["kling_source_frame"] = fallback_frame_name
|
||||
if video2video_error:
|
||||
meta_payload["video2video_error"] = video2video_error
|
||||
kling_meta_path.write_text(json.dumps(meta_payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
return {
|
||||
"aurora_job_id": job_id,
|
||||
"kling_task_id": task_id,
|
||||
"kling_file_id": file_id,
|
||||
"kling_endpoint": kling_endpoint,
|
||||
"status": "submitted",
|
||||
"status_url": f"/api/aurora/kling/status/{job_id}",
|
||||
}
|
||||
@@ -1238,9 +1551,10 @@ async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]:
|
||||
task_id = meta.get("kling_task_id")
|
||||
if not task_id:
|
||||
raise HTTPException(status_code=404, detail="Kling task_id missing in metadata")
|
||||
endpoint = str(meta.get("kling_endpoint") or "video2video")
|
||||
|
||||
try:
|
||||
status_resp = kling_video_task_status(task_id, endpoint="video2video")
|
||||
status_resp = kling_video_task_status(task_id, endpoint=endpoint)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"Kling status error: {str(exc)[:400]}") from exc
|
||||
task_data = status_resp.get("data") or status_resp
|
||||
@@ -1249,19 +1563,17 @@ async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]:
|
||||
meta["status"] = state
|
||||
meta["last_checked"] = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
result_url = None
|
||||
works = task_data.get("task_result", {}).get("videos") or []
|
||||
if works:
|
||||
result_url = works[0].get("url")
|
||||
if result_url:
|
||||
meta["kling_result_url"] = result_url
|
||||
meta["completed_at"] = datetime.now(timezone.utc).isoformat()
|
||||
result_url = _resolve_kling_result_url(task_data)
|
||||
if result_url:
|
||||
meta["kling_result_url"] = result_url
|
||||
meta["completed_at"] = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
kling_meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
return {
|
||||
"aurora_job_id": job_id,
|
||||
"kling_task_id": task_id,
|
||||
"kling_endpoint": endpoint,
|
||||
"status": state,
|
||||
"kling_result_url": result_url,
|
||||
"meta": meta,
|
||||
@@ -1279,7 +1591,7 @@ async def kling_image_to_video(
|
||||
aspect_ratio: str = Form("16:9"),
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate video from a still image using Kling AI."""
|
||||
from .kling import kling_upload_file, kling_video_generate
|
||||
from .kling import kling_video_generate_from_file
|
||||
|
||||
file_name = file.filename or "frame.jpg"
|
||||
content = await file.read()
|
||||
@@ -1293,16 +1605,8 @@ async def kling_image_to_video(
|
||||
|
||||
try:
|
||||
try:
|
||||
upload_resp = kling_upload_file(tmp_path)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc
|
||||
file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id")
|
||||
if not file_id:
|
||||
raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}")
|
||||
|
||||
try:
|
||||
task_resp = kling_video_generate(
|
||||
image_id=file_id,
|
||||
task_resp = kling_video_generate_from_file(
|
||||
image_path=tmp_path,
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
model=model,
|
||||
@@ -1313,9 +1617,12 @@ async def kling_image_to_video(
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc
|
||||
task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id")
|
||||
if not task_id:
|
||||
raise HTTPException(status_code=502, detail=f"Kling task_id missing in response: {task_resp}")
|
||||
return {
|
||||
"kling_task_id": task_id,
|
||||
"kling_file_id": file_id,
|
||||
"kling_file_id": None,
|
||||
"kling_endpoint": "image2video",
|
||||
"status": "submitted",
|
||||
"status_url": f"/api/aurora/kling/task/{task_id}?endpoint=image2video",
|
||||
}
|
||||
|
||||
@@ -49,6 +49,78 @@ def _models_used(job: AuroraJob) -> List[str]:
|
||||
return models
|
||||
|
||||
|
||||
def _processing_steps(job: AuroraJob) -> List[Any]:
|
||||
if job.result and job.result.processing_log:
|
||||
return list(job.result.processing_log)
|
||||
if job.processing_log:
|
||||
return list(job.processing_log)
|
||||
return []
|
||||
|
||||
|
||||
def _result_media_hash(job: AuroraJob) -> Optional[str]:
|
||||
if not job.result:
|
||||
return None
|
||||
media_type = str(job.media_type).strip().lower()
|
||||
for out in job.result.output_files:
|
||||
out_type = str(getattr(out, "type", "") or "").strip().lower()
|
||||
if out_type in {media_type, "video", "photo", "image", "audio", "unknown"}:
|
||||
value = str(getattr(out, "hash", "") or "").strip()
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _fallback_flags(job: AuroraJob) -> Dict[str, Any]:
|
||||
hard_fallback_used = False
|
||||
soft_sr_fallback_used = False
|
||||
fallback_steps: List[str] = []
|
||||
warnings: List[str] = []
|
||||
|
||||
for step in _processing_steps(job):
|
||||
step_name = str(getattr(step, "step", "") or "").strip() or "unknown"
|
||||
details = getattr(step, "details", {}) or {}
|
||||
if not isinstance(details, dict):
|
||||
continue
|
||||
|
||||
if bool(details.get("fallback_used")):
|
||||
hard_fallback_used = True
|
||||
fallback_steps.append(step_name)
|
||||
reason = str(details.get("reason") or "").strip()
|
||||
if reason:
|
||||
warnings.append(f"{step_name}: hard fallback used ({reason})")
|
||||
else:
|
||||
warnings.append(f"{step_name}: hard fallback used")
|
||||
|
||||
sr_fallback_frames = 0
|
||||
try:
|
||||
sr_fallback_frames = int(details.get("sr_fallback_frames") or 0)
|
||||
except Exception:
|
||||
sr_fallback_frames = 0
|
||||
if bool(details.get("sr_fallback_used")):
|
||||
sr_fallback_frames = max(sr_fallback_frames, 1)
|
||||
if sr_fallback_frames > 0:
|
||||
soft_sr_fallback_used = True
|
||||
fallback_steps.append(step_name)
|
||||
method = str(details.get("sr_fallback_method") or "").strip()
|
||||
reason = str(details.get("sr_fallback_reason") or "").strip()
|
||||
msg = f"{step_name}: SR soft fallback on {sr_fallback_frames} frame(s)"
|
||||
if method:
|
||||
msg += f" via {method}"
|
||||
if reason:
|
||||
msg += f" ({reason})"
|
||||
warnings.append(msg)
|
||||
|
||||
fallback_steps_unique = list(dict.fromkeys(fallback_steps))
|
||||
warnings_unique = list(dict.fromkeys(warnings))
|
||||
return {
|
||||
"fallback_used": bool(hard_fallback_used or soft_sr_fallback_used),
|
||||
"hard_fallback_used": hard_fallback_used,
|
||||
"soft_sr_fallback_used": soft_sr_fallback_used,
|
||||
"fallback_steps": fallback_steps_unique,
|
||||
"warnings": warnings_unique,
|
||||
}
|
||||
|
||||
|
||||
def _detect_faces_with_proxy_confidence(frame_bgr: Any) -> List[Dict[str, Any]]:
|
||||
if cv2 is None:
|
||||
return []
|
||||
@@ -246,9 +318,29 @@ def build_quality_report(job: AuroraJob, outputs_dir: Path, *, refresh: bool = F
|
||||
raise RuntimeError("Cannot build quality report: source/result file not found")
|
||||
|
||||
media_type: MediaType = job.media_type
|
||||
processing_flags = _fallback_flags(job)
|
||||
faces = _face_metrics(source_path, result_path, media_type)
|
||||
plates = _plate_metrics(job_dir)
|
||||
overall = _overall_metrics(source_path, result_path, media_type, job)
|
||||
result_hash = _result_media_hash(job)
|
||||
identical_to_input = bool(result_hash and result_hash == str(job.input_hash))
|
||||
warnings = list(processing_flags.get("warnings") or [])
|
||||
if identical_to_input:
|
||||
warnings.append("output hash matches input hash; enhancement may be skipped.")
|
||||
warnings = list(dict.fromkeys(warnings))
|
||||
|
||||
processing_status = "ok"
|
||||
if bool(processing_flags.get("fallback_used")) or identical_to_input:
|
||||
processing_status = "degraded"
|
||||
overall["processing_status"] = processing_status
|
||||
overall["fallback_used"] = bool(processing_flags.get("fallback_used"))
|
||||
overall["hard_fallback_used"] = bool(processing_flags.get("hard_fallback_used"))
|
||||
overall["soft_sr_fallback_used"] = bool(processing_flags.get("soft_sr_fallback_used"))
|
||||
overall["identical_to_input"] = identical_to_input
|
||||
if result_hash:
|
||||
overall["result_hash"] = result_hash
|
||||
if warnings:
|
||||
overall["warnings"] = warnings
|
||||
|
||||
report = {
|
||||
"job_id": job.job_id,
|
||||
@@ -257,7 +349,13 @@ def build_quality_report(job: AuroraJob, outputs_dir: Path, *, refresh: bool = F
|
||||
"faces": faces,
|
||||
"plates": plates,
|
||||
"overall": overall,
|
||||
"processing_flags": {
|
||||
**processing_flags,
|
||||
"identical_to_input": identical_to_input,
|
||||
"warnings": warnings,
|
||||
},
|
||||
"summary": {
|
||||
"processing_status": processing_status,
|
||||
"faces_detected_ratio": f"{faces['detected']} / {faces['source_detected'] or faces['detected']}",
|
||||
"plates_recognized_ratio": f"{plates['recognized']} / {plates['detected']}",
|
||||
},
|
||||
|
||||
@@ -13,6 +13,7 @@ RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application
|
||||
COPY app/ ./app/
|
||||
COPY static/ ./static/
|
||||
|
||||
# Environment
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
@@ -428,6 +428,8 @@ class Database:
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_user_facts_user_id ON user_facts(user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_user_facts_team_id ON user_facts(team_id);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_user_facts_user_team_agent_fact
|
||||
ON user_facts(user_id, team_id, agent_id, fact_key);
|
||||
""")
|
||||
|
||||
async def upsert_fact(
|
||||
@@ -445,16 +447,30 @@ class Database:
|
||||
json_value = json.dumps(fact_value_json) if fact_value_json else None
|
||||
|
||||
async with self.pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
|
||||
VALUES ($1, $2, $3, $4, $5, $6::jsonb)
|
||||
ON CONFLICT (user_id, team_id, agent_id, fact_key)
|
||||
DO UPDATE SET
|
||||
fact_value = EXCLUDED.fact_value,
|
||||
fact_value_json = EXCLUDED.fact_value_json,
|
||||
updated_at = NOW()
|
||||
RETURNING *
|
||||
""", user_id, team_id, agent_id, fact_key, fact_value, json_value)
|
||||
try:
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
|
||||
VALUES ($1, $2, $3, $4, $5, $6::jsonb)
|
||||
ON CONFLICT (user_id, team_id, agent_id, fact_key)
|
||||
DO UPDATE SET
|
||||
fact_value = EXCLUDED.fact_value,
|
||||
fact_value_json = EXCLUDED.fact_value_json,
|
||||
updated_at = NOW()
|
||||
RETURNING *
|
||||
""", user_id, team_id, agent_id, fact_key, fact_value, json_value)
|
||||
except asyncpg.exceptions.InvalidColumnReferenceError:
|
||||
# Backward compatibility for DBs that only have UNIQUE(user_id, team_id, fact_key).
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json)
|
||||
VALUES ($1, $2, $3, $4, $5, $6::jsonb)
|
||||
ON CONFLICT (user_id, team_id, fact_key)
|
||||
DO UPDATE SET
|
||||
agent_id = EXCLUDED.agent_id,
|
||||
fact_value = EXCLUDED.fact_value,
|
||||
fact_value_json = EXCLUDED.fact_value_json,
|
||||
updated_at = NOW()
|
||||
RETURNING *
|
||||
""", user_id, team_id, agent_id, fact_key, fact_value, json_value)
|
||||
|
||||
return dict(row) if row else {}
|
||||
|
||||
|
||||
@@ -650,6 +650,7 @@ class FactUpsertRequest(BaseModel):
|
||||
fact_value: Optional[str] = None
|
||||
fact_value_json: Optional[dict] = None
|
||||
team_id: Optional[str] = None
|
||||
agent_id: Optional[str] = None
|
||||
|
||||
@app.post("/facts/upsert")
|
||||
async def upsert_fact(request: FactUpsertRequest):
|
||||
@@ -663,13 +664,17 @@ async def upsert_fact(request: FactUpsertRequest):
|
||||
# Ensure facts table exists (will be created on first call)
|
||||
await db.ensure_facts_table()
|
||||
|
||||
# Upsert the fact
|
||||
# Upsert the fact — extract agent_id from request field or from fact_value_json
|
||||
agent_id_val = request.agent_id or (
|
||||
(request.fact_value_json or {}).get("agent_id")
|
||||
)
|
||||
result = await db.upsert_fact(
|
||||
user_id=request.user_id,
|
||||
fact_key=request.fact_key,
|
||||
fact_value=request.fact_value,
|
||||
fact_value_json=request.fact_value_json,
|
||||
team_id=request.team_id
|
||||
team_id=request.team_id,
|
||||
agent_id=agent_id_val
|
||||
)
|
||||
|
||||
logger.info(f"fact_upserted", user_id=request.user_id, fact_key=request.fact_key)
|
||||
|
||||
@@ -30,7 +30,7 @@ python-multipart==0.0.9
|
||||
tiktoken==0.5.2
|
||||
|
||||
# Voice stack
|
||||
edge-tts==6.1.19
|
||||
edge-tts==7.2.7
|
||||
faster-whisper==1.1.1
|
||||
|
||||
# Testing
|
||||
|
||||
@@ -14,3 +14,19 @@ STT_PROVIDER = os.getenv("STT_PROVIDER", "none")
|
||||
TTS_PROVIDER = os.getenv("TTS_PROVIDER", "none")
|
||||
OCR_PROVIDER = os.getenv("OCR_PROVIDER", "vision_prompted")
|
||||
IMAGE_PROVIDER = os.getenv("IMAGE_PROVIDER", "none")
|
||||
|
||||
# Memory Service URL (used by memory_service STT/TTS providers)
|
||||
MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
|
||||
|
||||
# ── Voice HA: dedicated concurrency limits (separate from generic stt/tts/llm) ──
|
||||
# These control semaphores for node.{id}.voice.*.request subjects.
|
||||
# Independent from MAX_CONCURRENCY so voice never starves generic inference.
|
||||
VOICE_MAX_CONCURRENT_TTS = int(os.getenv("VOICE_MAX_CONCURRENT_TTS", "4"))
|
||||
VOICE_MAX_CONCURRENT_LLM = int(os.getenv("VOICE_MAX_CONCURRENT_LLM", "2"))
|
||||
VOICE_MAX_CONCURRENT_STT = int(os.getenv("VOICE_MAX_CONCURRENT_STT", "2"))
|
||||
|
||||
# Timeouts for voice subjects (milliseconds). Router uses these as defaults.
|
||||
VOICE_TTS_DEADLINE_MS = int(os.getenv("VOICE_TTS_DEADLINE_MS", "3000"))
|
||||
VOICE_LLM_FAST_MS = int(os.getenv("VOICE_LLM_FAST_MS", "9000"))
|
||||
VOICE_LLM_QUALITY_MS = int(os.getenv("VOICE_LLM_QUALITY_MS", "12000"))
|
||||
VOICE_STT_DEADLINE_MS = int(os.getenv("VOICE_STT_DEADLINE_MS", "6000"))
|
||||
|
||||
@@ -8,6 +8,7 @@ try:
|
||||
PROM_AVAILABLE = True
|
||||
REGISTRY = CollectorRegistry()
|
||||
|
||||
# Generic job metrics
|
||||
jobs_total = Counter(
|
||||
"node_worker_jobs_total", "Jobs processed",
|
||||
["type", "status"], registry=REGISTRY,
|
||||
@@ -23,6 +24,26 @@ try:
|
||||
registry=REGISTRY,
|
||||
)
|
||||
|
||||
# ── Voice HA metrics (separate labels from generic) ───────────────────────
|
||||
# cap label: "voice.tts" | "voice.llm" | "voice.stt"
|
||||
voice_jobs_total = Counter(
|
||||
"node_worker_voice_jobs_total",
|
||||
"Voice HA jobs processed (node.{id}.voice.*.request)",
|
||||
["cap", "status"], registry=REGISTRY,
|
||||
)
|
||||
voice_inflight_gauge = Gauge(
|
||||
"node_worker_voice_inflight",
|
||||
"Voice HA inflight jobs per capability",
|
||||
["cap"], registry=REGISTRY,
|
||||
)
|
||||
voice_latency_hist = Histogram(
|
||||
"node_worker_voice_latency_ms",
|
||||
"Voice HA job latency in ms",
|
||||
["cap"],
|
||||
buckets=[100, 250, 500, 1000, 1500, 2000, 3000, 5000, 9000, 12000],
|
||||
registry=REGISTRY,
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
PROM_AVAILABLE = False
|
||||
REGISTRY = None
|
||||
@@ -44,6 +65,21 @@ def observe_latency(req_type: str, model: str, latency_ms: int):
|
||||
latency_hist.labels(type=req_type, model=model).observe(latency_ms)
|
||||
|
||||
|
||||
def inc_voice_job(cap: str, status: str):
|
||||
if PROM_AVAILABLE:
|
||||
voice_jobs_total.labels(cap=cap, status=status).inc()
|
||||
|
||||
|
||||
def set_voice_inflight(cap: str, count: int):
|
||||
if PROM_AVAILABLE:
|
||||
voice_inflight_gauge.labels(cap=cap).set(count)
|
||||
|
||||
|
||||
def observe_voice_latency(cap: str, latency_ms: int):
|
||||
if PROM_AVAILABLE:
|
||||
voice_latency_hist.labels(cap=cap).observe(latency_ms)
|
||||
|
||||
|
||||
def get_metrics_text():
|
||||
if PROM_AVAILABLE and REGISTRY:
|
||||
return generate_latest(REGISTRY)
|
||||
|
||||
@@ -43,7 +43,30 @@ async def prom_metrics():
|
||||
|
||||
@app.get("/caps")
|
||||
async def caps():
|
||||
"""Capability flags for NCS to aggregate."""
|
||||
"""Capability flags for NCS to aggregate.
|
||||
|
||||
Semantic vs operational separation (contract):
|
||||
- capabilities.voice_* = semantic availability (provider configured).
|
||||
True as long as the provider is configured, regardless of NATS state.
|
||||
Routing decisions are based on this.
|
||||
- runtime.nats_subscriptions.voice_* = operational (NATS sub active).
|
||||
Used for health/telemetry only — NOT for routing.
|
||||
|
||||
This prevents false-negatives during reconnects / restart races.
|
||||
"""
|
||||
import worker as _w
|
||||
nid = config.NODE_ID.lower()
|
||||
|
||||
# Semantic: provider configured → capability is available
|
||||
voice_tts_cap = config.TTS_PROVIDER != "none"
|
||||
voice_stt_cap = config.STT_PROVIDER != "none"
|
||||
voice_llm_cap = True # LLM always available when node-worker is up
|
||||
|
||||
# Operational: actual NATS subscription state (health/telemetry only)
|
||||
nats_voice_tts_active = f"node.{nid}.voice.tts.request" in _w._VOICE_SUBJECTS
|
||||
nats_voice_stt_active = f"node.{nid}.voice.stt.request" in _w._VOICE_SUBJECTS
|
||||
nats_voice_llm_active = f"node.{nid}.voice.llm.request" in _w._VOICE_SUBJECTS
|
||||
|
||||
return {
|
||||
"node_id": config.NODE_ID,
|
||||
"capabilities": {
|
||||
@@ -53,6 +76,10 @@ async def caps():
|
||||
"tts": config.TTS_PROVIDER != "none",
|
||||
"ocr": config.OCR_PROVIDER != "none",
|
||||
"image": config.IMAGE_PROVIDER != "none",
|
||||
# Voice HA semantic capability flags (provider-based, not NATS-based)
|
||||
"voice_tts": voice_tts_cap,
|
||||
"voice_llm": voice_llm_cap,
|
||||
"voice_stt": voice_stt_cap,
|
||||
},
|
||||
"providers": {
|
||||
"stt": config.STT_PROVIDER,
|
||||
@@ -65,6 +92,19 @@ async def caps():
|
||||
"vision": config.DEFAULT_VISION,
|
||||
},
|
||||
"concurrency": config.MAX_CONCURRENCY,
|
||||
"voice_concurrency": {
|
||||
"voice_tts": config.VOICE_MAX_CONCURRENT_TTS,
|
||||
"voice_llm": config.VOICE_MAX_CONCURRENT_LLM,
|
||||
"voice_stt": config.VOICE_MAX_CONCURRENT_STT,
|
||||
},
|
||||
# Operational NATS subscription state — for health/monitoring only
|
||||
"runtime": {
|
||||
"nats_subscriptions": {
|
||||
"voice_tts_active": nats_voice_tts_active,
|
||||
"voice_stt_active": nats_voice_stt_active,
|
||||
"voice_llm_active": nats_voice_llm_active,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -11,24 +11,44 @@ from models import JobRequest, JobResponse, JobError
|
||||
from idempotency import IdempotencyStore
|
||||
from providers import ollama, ollama_vision
|
||||
from providers import stt_mlx_whisper, tts_mlx_kokoro
|
||||
from providers import stt_memory_service, tts_memory_service
|
||||
import fabric_metrics as fm
|
||||
|
||||
logger = logging.getLogger("node-worker")
|
||||
|
||||
_idem = IdempotencyStore()
|
||||
_semaphore: asyncio.Semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY)
|
||||
|
||||
# Voice-dedicated semaphores — independent from generic MAX_CONCURRENCY.
|
||||
# Prevents voice requests from starving generic inference and vice versa.
|
||||
_voice_sem_tts: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_TTS)
|
||||
_voice_sem_llm: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_LLM)
|
||||
_voice_sem_stt: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_STT)
|
||||
|
||||
_VOICE_SEMAPHORES = {
|
||||
"voice.tts": _voice_sem_tts,
|
||||
"voice.llm": _voice_sem_llm,
|
||||
"voice.stt": _voice_sem_stt,
|
||||
}
|
||||
|
||||
_nats_client = None
|
||||
_inflight_count: int = 0
|
||||
_voice_inflight: Dict[str, int] = {"voice.tts": 0, "voice.llm": 0, "voice.stt": 0}
|
||||
_latencies_llm: list = []
|
||||
_latencies_vision: list = []
|
||||
_LATENCY_BUFFER = 50
|
||||
|
||||
# Set of subjects that use the voice handler path
|
||||
_VOICE_SUBJECTS: set = set()
|
||||
|
||||
|
||||
async def start(nats_client):
|
||||
global _nats_client
|
||||
_nats_client = nats_client
|
||||
|
||||
nid = config.NODE_ID.lower()
|
||||
|
||||
# Generic subjects (unchanged — backward compatible)
|
||||
subjects = [
|
||||
f"node.{nid}.llm.request",
|
||||
f"node.{nid}.vision.request",
|
||||
@@ -41,6 +61,31 @@ async def start(nats_client):
|
||||
await nats_client.subscribe(subj, cb=_handle_request)
|
||||
logger.info(f"✅ Subscribed: {subj}")
|
||||
|
||||
# Voice HA subjects — separate semaphores, own metrics, own deadlines
|
||||
# Only subscribe if the relevant provider is configured (preflight-first)
|
||||
voice_subjects_to_caps = {
|
||||
f"node.{nid}.voice.tts.request": ("tts", _voice_sem_tts, "voice.tts"),
|
||||
f"node.{nid}.voice.llm.request": ("llm", _voice_sem_llm, "voice.llm"),
|
||||
f"node.{nid}.voice.stt.request": ("stt", _voice_sem_stt, "voice.stt"),
|
||||
}
|
||||
for subj, (required_cap, sem, cap_key) in voice_subjects_to_caps.items():
|
||||
if required_cap == "tts" and config.TTS_PROVIDER == "none":
|
||||
logger.info(f"⏭ Skipping {subj}: TTS_PROVIDER=none")
|
||||
continue
|
||||
if required_cap == "stt" and config.STT_PROVIDER == "none":
|
||||
logger.info(f"⏭ Skipping {subj}: STT_PROVIDER=none")
|
||||
continue
|
||||
# LLM always available on this node
|
||||
_VOICE_SUBJECTS.add(subj)
|
||||
|
||||
async def _make_voice_handler(s=sem, k=cap_key):
|
||||
async def _voice_handler(msg):
|
||||
await _handle_voice_request(msg, voice_sem=s, cap_key=k)
|
||||
return _voice_handler
|
||||
|
||||
await nats_client.subscribe(subj, cb=await _make_voice_handler())
|
||||
logger.info(f"✅ Voice subscribed: {subj}")
|
||||
|
||||
|
||||
async def _handle_request(msg):
|
||||
t0 = time.time()
|
||||
@@ -136,6 +181,103 @@ async def _handle_request(msg):
|
||||
pass
|
||||
|
||||
|
||||
async def _handle_voice_request(msg, voice_sem: asyncio.Semaphore, cap_key: str):
|
||||
"""Voice-dedicated handler: separate semaphore, metrics, retry hints.
|
||||
|
||||
Maps voice.{tts|llm|stt} to the same _execute() but with:
|
||||
- Own concurrency limit (VOICE_MAX_CONCURRENT_{TTS|LLM|STT})
|
||||
- TOO_BUSY includes retry_after_ms hint (client can retry immediately elsewhere)
|
||||
- Voice-specific Prometheus labels (type=voice.tts, etc.)
|
||||
- WARNING log on fallback (contract: no silent fallback)
|
||||
"""
|
||||
t0 = time.time()
|
||||
# Extract the base type for _execute (voice.tts → tts)
|
||||
base_type = cap_key.split(".")[-1] # "tts", "llm", "stt"
|
||||
|
||||
try:
|
||||
raw = msg.data
|
||||
if len(raw) > config.MAX_PAYLOAD_BYTES:
|
||||
await _reply(msg, JobResponse(
|
||||
node_id=config.NODE_ID, status="error",
|
||||
error=JobError(code="PAYLOAD_TOO_LARGE", message=f"max {config.MAX_PAYLOAD_BYTES} bytes"),
|
||||
))
|
||||
return
|
||||
|
||||
data = json.loads(raw)
|
||||
job = JobRequest(**data)
|
||||
job.trace_id = job.trace_id or job.job_id
|
||||
|
||||
remaining = job.remaining_ms()
|
||||
if remaining <= 0:
|
||||
await _reply(msg, JobResponse(
|
||||
job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID,
|
||||
status="timeout", error=JobError(code="DEADLINE_EXCEEDED"),
|
||||
))
|
||||
return
|
||||
|
||||
# Voice concurrency check — TOO_BUSY includes retry hint
|
||||
if voice_sem._value == 0:
|
||||
logger.warning(
|
||||
"[voice.busy] cap=%s node=%s — all %d slots occupied. "
|
||||
"WARNING: request turned away, Router should failover.",
|
||||
cap_key, config.NODE_ID, {
|
||||
"voice.tts": config.VOICE_MAX_CONCURRENT_TTS,
|
||||
"voice.llm": config.VOICE_MAX_CONCURRENT_LLM,
|
||||
"voice.stt": config.VOICE_MAX_CONCURRENT_STT,
|
||||
}.get(cap_key, "?"),
|
||||
)
|
||||
fm.inc_voice_job(cap_key, "busy")
|
||||
await _reply(msg, JobResponse(
|
||||
job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID,
|
||||
status="busy",
|
||||
error=JobError(
|
||||
code="TOO_BUSY",
|
||||
message=f"voice {cap_key} at capacity",
|
||||
details={"retry_after_ms": 500, "cap": cap_key},
|
||||
),
|
||||
))
|
||||
return
|
||||
|
||||
global _voice_inflight
|
||||
_voice_inflight[cap_key] = _voice_inflight.get(cap_key, 0) + 1
|
||||
fm.set_voice_inflight(cap_key, _voice_inflight[cap_key])
|
||||
|
||||
try:
|
||||
async with voice_sem:
|
||||
# Route to _execute with the base type
|
||||
job.required_type = base_type
|
||||
resp = await _execute(job, remaining)
|
||||
finally:
|
||||
_voice_inflight[cap_key] = max(0, _voice_inflight.get(cap_key, 1) - 1)
|
||||
fm.set_voice_inflight(cap_key, _voice_inflight[cap_key])
|
||||
|
||||
resp.latency_ms = int((time.time() - t0) * 1000)
|
||||
fm.inc_voice_job(cap_key, resp.status)
|
||||
if resp.status == "ok" and resp.latency_ms > 0:
|
||||
fm.observe_voice_latency(cap_key, resp.latency_ms)
|
||||
|
||||
# Contract: log WARNING on any non-ok voice result
|
||||
if resp.status != "ok":
|
||||
logger.warning(
|
||||
"[voice.fallback] cap=%s node=%s status=%s error=%s trace=%s",
|
||||
cap_key, config.NODE_ID, resp.status,
|
||||
resp.error.code if resp.error else "?", job.trace_id,
|
||||
)
|
||||
|
||||
await _reply(msg, resp)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Voice handler error cap={cap_key}: {e}")
|
||||
fm.inc_voice_job(cap_key, "error")
|
||||
try:
|
||||
await _reply(msg, JobResponse(
|
||||
node_id=config.NODE_ID, status="error",
|
||||
error=JobError(code="INTERNAL", message=str(e)[:200]),
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
|
||||
payload = job.payload
|
||||
hints = job.hints
|
||||
@@ -184,9 +326,14 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
|
||||
status="error",
|
||||
error=JobError(code="NOT_AVAILABLE", message="STT not configured on this node"),
|
||||
)
|
||||
result = await asyncio.wait_for(
|
||||
stt_mlx_whisper.transcribe(payload), timeout=timeout_s,
|
||||
)
|
||||
if config.STT_PROVIDER == "memory_service":
|
||||
result = await asyncio.wait_for(
|
||||
stt_memory_service.transcribe(payload), timeout=timeout_s,
|
||||
)
|
||||
else:
|
||||
result = await asyncio.wait_for(
|
||||
stt_mlx_whisper.transcribe(payload), timeout=timeout_s,
|
||||
)
|
||||
elif job.required_type == "tts":
|
||||
if config.TTS_PROVIDER == "none":
|
||||
return JobResponse(
|
||||
@@ -194,9 +341,14 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse:
|
||||
status="error",
|
||||
error=JobError(code="NOT_AVAILABLE", message="TTS not configured on this node"),
|
||||
)
|
||||
result = await asyncio.wait_for(
|
||||
tts_mlx_kokoro.synthesize(payload), timeout=timeout_s,
|
||||
)
|
||||
if config.TTS_PROVIDER == "memory_service":
|
||||
result = await asyncio.wait_for(
|
||||
tts_memory_service.synthesize(payload), timeout=timeout_s,
|
||||
)
|
||||
else:
|
||||
result = await asyncio.wait_for(
|
||||
tts_mlx_kokoro.synthesize(payload), timeout=timeout_s,
|
||||
)
|
||||
elif job.required_type == "ocr":
|
||||
if config.OCR_PROVIDER == "none":
|
||||
return JobResponse(
|
||||
|
||||
@@ -40,6 +40,31 @@ try:
|
||||
registry=REGISTRY,
|
||||
)
|
||||
|
||||
# ── Voice HA metrics ──────────────────────────────────────────────────────
|
||||
# cap label: "voice_tts" | "voice_llm" | "voice_stt"
|
||||
voice_cap_requests = Counter(
|
||||
"fabric_voice_capability_requests_total",
|
||||
"Voice HA capability routing requests",
|
||||
["cap", "status"], registry=REGISTRY,
|
||||
)
|
||||
voice_offload_total = Counter(
|
||||
"fabric_voice_offload_total",
|
||||
"Voice HA offload attempts (node selected + NATS sent)",
|
||||
["cap", "node", "status"], registry=REGISTRY,
|
||||
)
|
||||
voice_breaker_state = Gauge(
|
||||
"fabric_voice_breaker_state",
|
||||
"Voice HA circuit breaker per node+cap (1=open)",
|
||||
["cap", "node"], registry=REGISTRY,
|
||||
)
|
||||
voice_score_hist = Histogram(
|
||||
"fabric_voice_score_ms",
|
||||
"Voice HA node scoring distribution",
|
||||
["cap"],
|
||||
buckets=[0, 50, 100, 200, 400, 800, 1600, 3200],
|
||||
registry=REGISTRY,
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
PROM_AVAILABLE = False
|
||||
REGISTRY = None
|
||||
@@ -76,6 +101,26 @@ def observe_score(score: int):
|
||||
score_hist.observe(score)
|
||||
|
||||
|
||||
def inc_voice_cap_request(cap: str, status: str):
|
||||
if PROM_AVAILABLE:
|
||||
voice_cap_requests.labels(cap=cap, status=status).inc()
|
||||
|
||||
|
||||
def inc_voice_offload(cap: str, node: str, status: str):
|
||||
if PROM_AVAILABLE:
|
||||
voice_offload_total.labels(cap=cap, node=node, status=status).inc()
|
||||
|
||||
|
||||
def set_voice_breaker(cap: str, node: str, is_open: bool):
|
||||
if PROM_AVAILABLE:
|
||||
voice_breaker_state.labels(cap=cap, node=node).set(1 if is_open else 0)
|
||||
|
||||
|
||||
def observe_voice_score(cap: str, score: float):
|
||||
if PROM_AVAILABLE:
|
||||
voice_score_hist.labels(cap=cap).observe(score)
|
||||
|
||||
|
||||
def get_metrics_text() -> Optional[bytes]:
|
||||
if PROM_AVAILABLE and REGISTRY:
|
||||
return generate_latest(REGISTRY)
|
||||
|
||||
@@ -64,6 +64,12 @@ except ImportError:
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
NEO4J_NOTIFICATIONS_LOG_LEVEL = os.getenv("NEO4J_NOTIFICATIONS_LOG_LEVEL", "ERROR").strip().upper()
|
||||
_neo4j_notifications_level = getattr(logging, NEO4J_NOTIFICATIONS_LOG_LEVEL, logging.ERROR)
|
||||
logging.getLogger("neo4j.notifications").setLevel(_neo4j_notifications_level)
|
||||
# Guard against late/conditional auto-router imports.
|
||||
# If auto-router module is unavailable (or loaded later), inference must still work.
|
||||
SOFIIA_AUTO_ROUTER_AVAILABLE = False
|
||||
|
||||
TRUSTED_DOMAINS_CONFIG_PATH = os.getenv("TRUSTED_DOMAINS_CONFIG_PATH", "./trusted_domains.yml")
|
||||
_trusted_domains_cache: Dict[str, Any] = {"mtime": None, "data": {}}
|
||||
@@ -289,8 +295,24 @@ DETERMINISTIC_PLANT_POLICY_AGENTS = {
|
||||
REPEAT_FINGERPRINT_MIN_SIMILARITY = float(os.getenv("AGENT_REPEAT_FINGERPRINT_MIN_SIMILARITY", "0.92"))
|
||||
|
||||
|
||||
def _clean_think_blocks(text: str) -> str:
|
||||
"""Remove <think>...</think> reasoning blocks from LLM output (Qwen3/DeepSeek-R1).
|
||||
|
||||
Strategy:
|
||||
1. Strip complete <think>...</think> blocks (DOTALL for multiline).
|
||||
2. Fallback: if an unclosed <think> remains, drop everything after it.
|
||||
"""
|
||||
cleaned = re.sub(r"<think>.*?</think>", "", text,
|
||||
flags=re.DOTALL | re.IGNORECASE)
|
||||
# Fallback: unclosed <think> — truncate before it
|
||||
if "<think>" in cleaned.lower():
|
||||
cleaned = re.split(r"(?i)<think>", cleaned)[0]
|
||||
return cleaned
|
||||
|
||||
|
||||
def _normalize_text_response(text: str) -> str:
|
||||
return re.sub(r"\s+", " ", str(text or "")).strip()
|
||||
cleaned = _clean_think_blocks(str(text or ""))
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
|
||||
|
||||
def _response_fingerprint(text: str) -> str:
|
||||
@@ -1689,6 +1711,20 @@ async def internal_llm_complete(request: InternalLLMRequest):
|
||||
tokens = data.get("usage", {}).get("total_tokens", 0)
|
||||
latency = int((time_module.time() - t0) * 1000)
|
||||
logger.info(f"Internal LLM success: {cloud['name']}, {tokens} tokens, {latency}ms")
|
||||
# Track usage for budget dashboard
|
||||
if SOFIIA_AUTO_ROUTER_AVAILABLE:
|
||||
try:
|
||||
usage_data = data.get("usage", {})
|
||||
track_usage(
|
||||
provider=cloud["name"],
|
||||
model=cloud["model"],
|
||||
agent=request.metadata.get("agent_id", "unknown") if request.metadata else "unknown",
|
||||
input_tokens=usage_data.get("prompt_tokens", tokens // 2 if tokens else 0),
|
||||
output_tokens=usage_data.get("completion_tokens", tokens // 2 if tokens else 0),
|
||||
latency_ms=latency,
|
||||
)
|
||||
except Exception as _te:
|
||||
logger.debug("budget track error: %s", _te)
|
||||
return InternalLLMResponse(text=response_text, model=cloud["model"], provider=cloud["name"], tokens_used=tokens, latency_ms=latency)
|
||||
except Exception as e:
|
||||
logger.warning(f"Internal LLM {cloud['name']} failed: {e}")
|
||||
@@ -2086,8 +2122,39 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
|
||||
routing_rules = router_config.get("routing", [])
|
||||
default_llm = _select_default_llm(agent_id, metadata, default_llm, routing_rules)
|
||||
|
||||
cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic"}
|
||||
|
||||
# ── Sofiia Auto-Router: dynamic model selection based on task type ──────
|
||||
if agent_id == "sofiia" and SOFIIA_AUTO_ROUTER_AVAILABLE and not request.model:
|
||||
try:
|
||||
_auto_result = select_model_auto(
|
||||
prompt=request.prompt or "",
|
||||
force_fast=metadata.get("force_fast", False),
|
||||
force_capable=metadata.get("force_capable", False),
|
||||
prefer_local=metadata.get("prefer_local", False),
|
||||
prefer_cheap=metadata.get("prefer_cheap", False),
|
||||
budget_aware=True,
|
||||
)
|
||||
# Only override if auto-selected profile exists in config
|
||||
if _auto_result.profile_name in router_config.get("llm_profiles", {}):
|
||||
logger.info(
|
||||
"🧠 Sofiia Auto-Router: task=%s complexity=%s → profile=%s model=%s reason=%s",
|
||||
_auto_result.task_type, _auto_result.complexity,
|
||||
_auto_result.profile_name, _auto_result.model_id,
|
||||
_auto_result.reason,
|
||||
)
|
||||
default_llm = _auto_result.profile_name
|
||||
else:
|
||||
logger.debug(
|
||||
"🧠 Sofiia Auto-Router: profile %s not in config, using %s",
|
||||
_auto_result.profile_name, default_llm,
|
||||
)
|
||||
except Exception as _ar_e:
|
||||
logger.warning("⚠️ Sofiia Auto-Router error: %s", _ar_e)
|
||||
|
||||
# Pass routing-resolved default_llm to NCS so it respects cloud routing rules
|
||||
ncs_agent_config = {**agent_config, "default_llm": default_llm}
|
||||
|
||||
cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic", "glm"}
|
||||
|
||||
# ── Global NCS-first model selection (multi-node) ───────────────────
|
||||
ncs_selection = None
|
||||
@@ -2095,7 +2162,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
try:
|
||||
gcaps = await global_capabilities_client.get_global_capabilities()
|
||||
ncs_selection = await select_model_for_agent(
|
||||
agent_id, agent_config, router_config, gcaps, request.model,
|
||||
agent_id, ncs_agent_config, router_config, gcaps, request.model,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Global NCS selection error: {e}; falling back to static")
|
||||
@@ -2103,7 +2170,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
try:
|
||||
caps = await capabilities_client.fetch_capabilities()
|
||||
ncs_selection = await select_model_for_agent(
|
||||
agent_id, agent_config, router_config, caps, request.model,
|
||||
agent_id, ncs_agent_config, router_config, caps, request.model,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ NCS selection error: {e}; falling back to static")
|
||||
@@ -2678,11 +2745,218 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
}
|
||||
]
|
||||
|
||||
# GLM (Z.AI / BigModel) — OpenAI-compatible but with special JWT auth.
|
||||
if provider == "glm" and allow_cloud:
|
||||
glm_key = os.getenv(llm_profile.get("api_key_env", "GLM5_API_KEY"), "")
|
||||
if glm_key:
|
||||
glm_model = request.model or llm_profile.get("model", "glm-4-flash")
|
||||
glm_base_url = llm_profile.get("base_url", "https://open.bigmodel.cn/api/paas/v4")
|
||||
glm_max_tokens = int(request.max_tokens or llm_profile.get("max_tokens", 4096))
|
||||
glm_temperature = float(request.temperature if request.temperature is not None else llm_profile.get("temperature", 0.3))
|
||||
glm_timeout = int(llm_profile.get("timeout_ms", 30000) / 1000)
|
||||
try:
|
||||
glm_resp = await http_client.post(
|
||||
f"{glm_base_url}/chat/completions",
|
||||
headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"},
|
||||
json={
|
||||
"model": glm_model,
|
||||
"messages": messages,
|
||||
"max_tokens": glm_max_tokens,
|
||||
"temperature": glm_temperature,
|
||||
"stream": False,
|
||||
},
|
||||
timeout=float(glm_timeout),
|
||||
)
|
||||
if glm_resp.status_code == 200:
|
||||
glm_data = glm_resp.json()
|
||||
response_text = glm_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
glm_tokens = glm_data.get("usage", {}).get("total_tokens", 0)
|
||||
if SOFIIA_AUTO_ROUTER_AVAILABLE:
|
||||
try:
|
||||
usage_d = glm_data.get("usage", {})
|
||||
track_usage(
|
||||
provider="glm", model=glm_model, agent=agent_id,
|
||||
input_tokens=usage_d.get("prompt_tokens", glm_tokens // 2 if glm_tokens else 0),
|
||||
output_tokens=usage_d.get("completion_tokens", glm_tokens // 2 if glm_tokens else 0),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
response_text = await _finalize_response_text(response_text, f"glm-{glm_model}")
|
||||
return InferResponse(
|
||||
response=response_text,
|
||||
model=glm_model,
|
||||
backend="glm",
|
||||
tokens_used=glm_tokens,
|
||||
)
|
||||
else:
|
||||
logger.warning("🐉 GLM API error %s: %s", glm_resp.status_code, glm_resp.text[:200])
|
||||
except Exception as _glm_e:
|
||||
logger.warning("🐉 GLM call failed: %s", _glm_e)
|
||||
else:
|
||||
logger.warning("🐉 GLM provider selected but GLM5_API_KEY not set")
|
||||
# Fall through to Ollama
|
||||
|
||||
# Anthropic has its own API format — handle separately before the loop.
|
||||
if provider == "anthropic" and allow_cloud:
|
||||
anthropic_key = os.getenv(llm_profile.get("api_key_env", "ANTHROPIC_API_KEY"), "")
|
||||
if anthropic_key:
|
||||
anthropic_model = request.model or llm_profile.get("model", "claude-sonnet-4-5")
|
||||
anthropic_max_tokens = int(request.max_tokens or llm_profile.get("max_tokens", 8192))
|
||||
anthropic_temperature = float(request.temperature if request.temperature is not None else llm_profile.get("temperature", 0.2))
|
||||
anthropic_timeout = int(llm_profile.get("timeout_ms", 120000) / 1000)
|
||||
try:
|
||||
# Extract system prompt from messages
|
||||
anthropic_system = ""
|
||||
anthropic_messages = []
|
||||
for msg in messages:
|
||||
role = msg.get("role", "user")
|
||||
content = msg.get("content", "")
|
||||
if role == "system":
|
||||
anthropic_system = content
|
||||
else:
|
||||
anthropic_messages.append({"role": role, "content": content})
|
||||
if not anthropic_messages:
|
||||
anthropic_messages = [{"role": "user", "content": request.prompt}]
|
||||
# Build tool definitions for Claude
|
||||
anthropic_tools = None
|
||||
if TOOL_MANAGER_AVAILABLE and tool_manager:
|
||||
raw_tools = tool_manager.get_tool_definitions(request_agent_id)
|
||||
if raw_tools:
|
||||
anthropic_tools = []
|
||||
for t in raw_tools:
|
||||
fn = t.get("function", {})
|
||||
anthropic_tools.append({
|
||||
"name": fn.get("name", "unknown"),
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": fn.get("parameters") or {"type": "object", "properties": {}},
|
||||
})
|
||||
anthropic_payload: Dict[str, Any] = {
|
||||
"model": anthropic_model,
|
||||
"max_tokens": anthropic_max_tokens,
|
||||
"temperature": anthropic_temperature,
|
||||
"messages": anthropic_messages,
|
||||
}
|
||||
if anthropic_system:
|
||||
anthropic_payload["system"] = anthropic_system
|
||||
if anthropic_tools:
|
||||
anthropic_payload["tools"] = anthropic_tools
|
||||
logger.info(f"🟣 Anthropic Claude API: model={anthropic_model} agent={agent_id}")
|
||||
anthropic_resp = await http_client.post(
|
||||
"https://api.anthropic.com/v1/messages",
|
||||
headers={
|
||||
"x-api-key": anthropic_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json=anthropic_payload,
|
||||
timeout=anthropic_timeout,
|
||||
)
|
||||
if anthropic_resp.status_code == 200:
|
||||
anthropic_data = anthropic_resp.json()
|
||||
response_text = ""
|
||||
for block in anthropic_data.get("content", []):
|
||||
if block.get("type") == "text":
|
||||
response_text += block.get("text", "")
|
||||
tokens_used = (
|
||||
anthropic_data.get("usage", {}).get("input_tokens", 0)
|
||||
+ anthropic_data.get("usage", {}).get("output_tokens", 0)
|
||||
)
|
||||
# Handle tool_use blocks from Claude
|
||||
claude_tool_uses = [b for b in anthropic_data.get("content", []) if b.get("type") == "tool_use"]
|
||||
if claude_tool_uses and TOOL_MANAGER_AVAILABLE and tool_manager:
|
||||
tool_result_messages = list(anthropic_messages)
|
||||
tool_result_messages.append({"role": "assistant", "content": anthropic_data.get("content", [])})
|
||||
for tool_use_block in claude_tool_uses:
|
||||
tool_name = tool_use_block.get("name", "")
|
||||
tool_input = tool_use_block.get("input", {})
|
||||
tool_use_id = tool_use_block.get("id", "")
|
||||
logger.info(f"🔧 Claude tool call: {tool_name}({json.dumps(tool_input)[:100]})")
|
||||
try:
|
||||
tool_exec_result = await tool_manager.execute_tool(
|
||||
tool_name, tool_input,
|
||||
agent_id=request_agent_id, chat_id=chat_id, user_id=user_id,
|
||||
)
|
||||
tool_content = tool_exec_result.result if tool_exec_result.success else f"Error: {tool_exec_result.error}"
|
||||
except Exception as te:
|
||||
tool_content = f"Tool execution error: {te}"
|
||||
tool_result_messages.append({
|
||||
"role": "user",
|
||||
"content": [{"type": "tool_result", "tool_use_id": tool_use_id, "content": str(tool_content)}]
|
||||
})
|
||||
# Follow-up call with tool results
|
||||
anthropic_payload["messages"] = tool_result_messages
|
||||
followup_resp = await http_client.post(
|
||||
"https://api.anthropic.com/v1/messages",
|
||||
headers={
|
||||
"x-api-key": anthropic_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json=anthropic_payload,
|
||||
timeout=anthropic_timeout,
|
||||
)
|
||||
if followup_resp.status_code == 200:
|
||||
followup_data = followup_resp.json()
|
||||
response_text = ""
|
||||
for block in followup_data.get("content", []):
|
||||
if block.get("type") == "text":
|
||||
response_text += block.get("text", "")
|
||||
tokens_used += (
|
||||
followup_data.get("usage", {}).get("input_tokens", 0)
|
||||
+ followup_data.get("usage", {}).get("output_tokens", 0)
|
||||
)
|
||||
response_text = await _finalize_response_text(response_text, f"anthropic-{anthropic_model}")
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
|
||||
asyncio.create_task(memory_retrieval.store_message(
|
||||
agent_id=agent_id, user_id=user_id, username=username,
|
||||
message_text=request.prompt, response_text=response_text,
|
||||
chat_id=chat_id, metadata={"model": anthropic_model, "provider": "anthropic"},
|
||||
))
|
||||
# Track Anthropic usage for budget dashboard
|
||||
if SOFIIA_AUTO_ROUTER_AVAILABLE:
|
||||
try:
|
||||
track_usage(
|
||||
provider="anthropic",
|
||||
model=anthropic_model,
|
||||
agent=agent_id,
|
||||
input_tokens=tokens_used // 3 if tokens_used else 0,
|
||||
output_tokens=tokens_used - tokens_used // 3 if tokens_used else 0,
|
||||
latency_ms=int((time_module.time() - _t_start) * 1000) if "_t_start" in dir() else 0,
|
||||
task_type="",
|
||||
)
|
||||
except Exception as _te:
|
||||
logger.debug("budget track anthropic error: %s", _te)
|
||||
return InferResponse(
|
||||
response=response_text,
|
||||
model=anthropic_model,
|
||||
backend="anthropic",
|
||||
tokens_used=tokens_used,
|
||||
)
|
||||
else:
|
||||
err_body = anthropic_resp.text[:300]
|
||||
logger.warning(f"🟣 Anthropic API error {anthropic_resp.status_code}: {err_body}")
|
||||
except Exception as anthropic_exc:
|
||||
logger.warning(f"🟣 Anthropic call failed: {anthropic_exc}")
|
||||
else:
|
||||
logger.warning("🟣 Anthropic provider selected but ANTHROPIC_API_KEY not set")
|
||||
# Fall through to Ollama if Anthropic fails
|
||||
|
||||
if not allow_cloud:
|
||||
cloud_providers = []
|
||||
|
||||
# If specific provider requested, try it first
|
||||
if provider in ["deepseek", "mistral", "grok"]:
|
||||
# GLM in OpenAI-compat fallback list for internal/non-sofiia requests
|
||||
glm_key_fb = os.getenv("GLM5_API_KEY", "")
|
||||
if glm_key_fb:
|
||||
cloud_providers.insert(0, {
|
||||
"name": "glm",
|
||||
"api_key_env": "GLM5_API_KEY",
|
||||
"base_url": "https://open.bigmodel.cn/api/paas/v4",
|
||||
"model": "glm-4-flash",
|
||||
"timeout": 20,
|
||||
})
|
||||
|
||||
if provider in ["deepseek", "mistral", "grok", "glm"]:
|
||||
# Reorder to put requested provider first
|
||||
cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1)
|
||||
|
||||
@@ -3666,6 +3940,184 @@ async def capability_offload(cap_type: str, request: Request):
|
||||
})
|
||||
|
||||
|
||||
@app.post("/v1/capability/voice_{voice_cap_type}")
|
||||
async def voice_capability_offload(voice_cap_type: str, request: Request):
|
||||
"""Route a Voice HA request (voice_tts / voice_llm / voice_stt) to the best node.
|
||||
|
||||
Uses voice-specific NATS subjects (node.{id}.voice.{type}.request) and
|
||||
separate circuit breaker keys from generic offload. Returns response headers:
|
||||
- X-Voice-Node: chosen node id
|
||||
- X-Voice-Mode: local | remote (relative to the router's own node)
|
||||
- X-Voice-Cap: the capability type routed (voice_tts, voice_llm, voice_stt)
|
||||
|
||||
Contract: no silent fallback — any failure increments Prometheus counter +
|
||||
logs WARNING before returning 50x.
|
||||
"""
|
||||
import uuid as _uuid
|
||||
import fabric_metrics as fm
|
||||
|
||||
cap_type = voice_cap_type # "tts", "llm", or "sst"
|
||||
full_cap = f"voice_{cap_type}"
|
||||
valid_caps = {"tts", "llm", "stt"}
|
||||
if cap_type not in valid_caps:
|
||||
fm.inc_voice_cap_request(full_cap, "invalid")
|
||||
return JSONResponse(status_code=400, content={
|
||||
"error": f"Invalid voice cap: {cap_type}. Valid: voice_tts, voice_llm, voice_stt",
|
||||
})
|
||||
|
||||
if not NCS_AVAILABLE or not global_capabilities_client:
|
||||
fm.inc_voice_cap_request(full_cap, "ncs_unavailable")
|
||||
logger.warning("[voice.cap] NCS unavailable — cannot route %s", full_cap)
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": "NCS not available — cannot route voice capability requests",
|
||||
})
|
||||
|
||||
gcaps = await global_capabilities_client.require_fresh_caps(ttl=30)
|
||||
if gcaps is None:
|
||||
fm.inc_voice_cap_request(full_cap, "stale_caps")
|
||||
logger.warning("[voice.cap] caps stale — refusing to route %s", full_cap)
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": "NCS caps stale — preflight failed",
|
||||
})
|
||||
|
||||
eligible_nodes = global_capabilities_client.find_nodes_with_capability(full_cap)
|
||||
if not eligible_nodes:
|
||||
fm.inc_voice_cap_request(full_cap, "no_node")
|
||||
logger.warning("[voice.cap] no node with %s available", full_cap)
|
||||
return JSONResponse(status_code=404, content={
|
||||
"error": f"No node with capability '{full_cap}' available",
|
||||
"hint": f"Ensure node-worker is running with TTS_PROVIDER/STT_PROVIDER set and {full_cap}=true in /caps",
|
||||
})
|
||||
|
||||
# Voice uses separate CB key to avoid cross-contaminating generic stt/tts breakers
|
||||
voice_cb_type = f"voice.{cap_type}"
|
||||
unavailable = offload_client.get_unavailable_nodes(voice_cb_type) if offload_client else set()
|
||||
available = [n for n in eligible_nodes if n.lower() not in {u.lower() for u in unavailable}]
|
||||
if not available:
|
||||
fm.inc_voice_cap_request(full_cap, "all_broken")
|
||||
logger.warning("[voice.cap] all nodes circuit-broken for %s: %s", full_cap, eligible_nodes)
|
||||
return JSONResponse(status_code=503, content={
|
||||
"error": f"All nodes with '{full_cap}' are circuit-broken",
|
||||
"eligible": eligible_nodes,
|
||||
"unavailable": list(unavailable),
|
||||
})
|
||||
|
||||
# ── Voice scoring: prefer local, penalise high load + high latency ────────
|
||||
router_node_id = os.getenv("NODE_ID", "noda2").lower()
|
||||
LOCAL_THRESHOLD_MS = int(os.getenv("VOICE_LOCAL_THRESHOLD_MS", "250"))
|
||||
PREFER_LOCAL_BONUS = int(os.getenv("VOICE_PREFER_LOCAL_BONUS", "200"))
|
||||
|
||||
deadline_defaults = {
|
||||
"tts": int(os.getenv("VOICE_TTS_DEADLINE_MS", "3000")),
|
||||
"llm": int(os.getenv("VOICE_LLM_FAST_MS", "9000")),
|
||||
"sst": int(os.getenv("VOICE_STT_DEADLINE_MS", "6000")),
|
||||
}
|
||||
deadline_ms = deadline_defaults.get(cap_type, 9000)
|
||||
|
||||
scored = []
|
||||
for nid in available:
|
||||
nl = global_capabilities_client.get_node_load(nid)
|
||||
rl = global_capabilities_client.get_runtime_load(nid)
|
||||
wait_ms = nl.get("wait_ms", 0) or nl.get("inflight", 0) * 50
|
||||
rtt_ms = nl.get("rtt_ms", 0)
|
||||
p95_ms = rl.get("p95_ms", 0) if rl else 0
|
||||
mem_penalty = 300 if nl.get("mem_pressure") == "high" else 0
|
||||
local_bonus = PREFER_LOCAL_BONUS if nid.lower() == router_node_id else 0
|
||||
score = wait_ms + rtt_ms + p95_ms + mem_penalty - local_bonus
|
||||
scored.append((score, nid))
|
||||
fm.observe_voice_score(full_cap, score)
|
||||
fm.set_voice_breaker(full_cap, nid, False) # currently alive
|
||||
|
||||
scored.sort(key=lambda x: x[0])
|
||||
best_score, best_node = scored[0]
|
||||
voice_mode = "local" if best_node.lower() == router_node_id else "remote"
|
||||
|
||||
# If local score <= local_threshold, always prefer local even if a remote
|
||||
# node has slightly lower score (avoids unnecessary cross-node traffic)
|
||||
if voice_mode == "remote" and best_score > LOCAL_THRESHOLD_MS:
|
||||
local_candidates = [(s, n) for s, n in scored if n.lower() == router_node_id]
|
||||
if local_candidates:
|
||||
local_score = local_candidates[0][0]
|
||||
if local_score <= best_score + LOCAL_THRESHOLD_MS:
|
||||
best_node = router_node_id
|
||||
voice_mode = "local"
|
||||
logger.info(
|
||||
"[voice.cap] prefer local %s (score=%d) over %s (score=%d)",
|
||||
best_node, local_score, scored[0][1], best_score,
|
||||
)
|
||||
|
||||
payload = await request.json()
|
||||
logger.info(
|
||||
"[voice.cap.route] cap=%s → node=%s mode=%s score=%d deadline=%dms",
|
||||
full_cap, best_node, voice_mode, scored[0][0], deadline_ms,
|
||||
)
|
||||
|
||||
nats_ok = nc is not None and nats_available
|
||||
if not nats_ok or not offload_client:
|
||||
fm.inc_voice_cap_request(full_cap, "nats_down")
|
||||
logger.warning("[voice.cap] NATS not connected — cannot offload %s", full_cap)
|
||||
return JSONResponse(status_code=503, content={"error": "NATS not connected"})
|
||||
|
||||
job = {
|
||||
"job_id": str(_uuid.uuid4()),
|
||||
"required_type": cap_type,
|
||||
"payload": payload,
|
||||
"deadline_ts": int(time.time() * 1000) + deadline_ms,
|
||||
"hints": payload.pop("hints", {}),
|
||||
}
|
||||
|
||||
# Use voice-specific NATS subject
|
||||
nats_subject_type = f"voice.{cap_type}"
|
||||
result = await offload_client.offload_infer(
|
||||
nats_client=nc,
|
||||
node_id=best_node,
|
||||
required_type=nats_subject_type,
|
||||
job_payload=job,
|
||||
timeout_ms=deadline_ms,
|
||||
)
|
||||
|
||||
if result and result.get("status") == "ok":
|
||||
fm.inc_voice_cap_request(full_cap, "ok")
|
||||
fm.inc_voice_offload(full_cap, best_node, "ok")
|
||||
offload_client.record_success(best_node, voice_cb_type)
|
||||
response_data = result.get("result", result)
|
||||
resp = JSONResponse(content=response_data)
|
||||
resp.headers["X-Voice-Node"] = best_node
|
||||
resp.headers["X-Voice-Mode"] = voice_mode
|
||||
resp.headers["X-Voice-Cap"] = full_cap
|
||||
return resp
|
||||
|
||||
# Non-ok — circuit breaker + WARNING (contract: no silent fallback)
|
||||
error = result.get("error", {}) if result else {}
|
||||
status_code_resp = result.get("status", "error") if result else "timeout"
|
||||
|
||||
offload_client.record_failure(best_node, voice_cb_type)
|
||||
fm.set_voice_breaker(full_cap, best_node, True)
|
||||
fm.inc_voice_cap_request(full_cap, "fail")
|
||||
fm.inc_voice_offload(full_cap, best_node, "fail")
|
||||
|
||||
logger.warning(
|
||||
"[voice.cap.fail] cap=%s node=%s status=%s code=%s — "
|
||||
"WARNING: voice fallback must be handled by caller (BFF/Router)",
|
||||
full_cap, best_node, status_code_resp,
|
||||
error.get("code", "?"),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=502,
|
||||
content={
|
||||
"error": error.get("message", f"Voice offload to {best_node} failed"),
|
||||
"code": error.get("code", "VOICE_OFFLOAD_FAILED"),
|
||||
"cap": full_cap,
|
||||
"node": best_node,
|
||||
},
|
||||
headers={
|
||||
"X-Voice-Node": best_node,
|
||||
"X-Voice-Mode": voice_mode,
|
||||
"X-Voice-Cap": full_cap,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/v1/capabilities")
|
||||
async def list_global_capabilities():
|
||||
"""Return full capabilities view across all nodes."""
|
||||
@@ -3986,6 +4438,120 @@ async def get_graph_stats():
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# ── Sofiia Auto-Router & Budget Dashboard ─────────────────────────────────────
|
||||
|
||||
try:
|
||||
from sofiia_auto_router import (
|
||||
select_model_auto, classify_task, explain_selection,
|
||||
ProviderBudget as _ProviderBudget, get_full_catalog,
|
||||
refresh_ollama_models_async,
|
||||
)
|
||||
from provider_budget import track_usage, get_dashboard_data, set_provider_limit, get_stats
|
||||
SOFIIA_AUTO_ROUTER_AVAILABLE = True
|
||||
logger.info("✅ Sofiia Auto-Router loaded")
|
||||
except ImportError as _e:
|
||||
SOFIIA_AUTO_ROUTER_AVAILABLE = False
|
||||
logger.warning("⚠️ Sofiia Auto-Router not available: %s", _e)
|
||||
|
||||
|
||||
class AutoRouteRequest(BaseModel):
|
||||
prompt: str
|
||||
force_fast: bool = False
|
||||
force_capable: bool = False
|
||||
prefer_local: bool = False
|
||||
prefer_cheap: bool = False
|
||||
|
||||
|
||||
class BudgetLimitRequest(BaseModel):
|
||||
provider: str
|
||||
monthly_limit_usd: Optional[float] = None
|
||||
topup_balance_usd: Optional[float] = None
|
||||
|
||||
|
||||
@app.post("/v1/sofiia/auto-route")
|
||||
async def sofiia_auto_route(req: AutoRouteRequest):
|
||||
"""Classify a prompt and return the recommended model profile for Sofiia."""
|
||||
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Auto-router not available")
|
||||
result = select_model_auto(
|
||||
prompt=req.prompt,
|
||||
force_fast=req.force_fast,
|
||||
force_capable=req.force_capable,
|
||||
prefer_local=req.prefer_local,
|
||||
prefer_cheap=req.prefer_cheap,
|
||||
)
|
||||
return {
|
||||
"profile_name": result.profile_name,
|
||||
"model_id": result.model_id,
|
||||
"provider": result.provider,
|
||||
"task_type": result.task_type,
|
||||
"confidence": result.confidence,
|
||||
"complexity": result.complexity,
|
||||
"reason": result.reason,
|
||||
"fallback_used": result.fallback_used,
|
||||
"all_candidates": result.all_candidates,
|
||||
"ambiguous": result.ambiguous,
|
||||
"runner_up": result.runner_up,
|
||||
"all_scores": result.all_scores,
|
||||
"explanation": explain_selection(result),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/v1/sofiia/budget")
|
||||
async def sofiia_budget_dashboard():
|
||||
"""Return budget dashboard data: token usage, costs, balances per provider."""
|
||||
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Budget tracker not available")
|
||||
return get_dashboard_data()
|
||||
|
||||
|
||||
@app.post("/v1/sofiia/budget/limits")
|
||||
async def set_budget_limits(req: BudgetLimitRequest):
|
||||
"""Set monthly limit or top-up balance for a provider."""
|
||||
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Budget tracker not available")
|
||||
set_provider_limit(
|
||||
provider=req.provider,
|
||||
monthly_limit_usd=req.monthly_limit_usd,
|
||||
topup_balance_usd=req.topup_balance_usd,
|
||||
)
|
||||
return {"status": "ok", "provider": req.provider}
|
||||
|
||||
|
||||
@app.get("/v1/sofiia/budget/stats")
|
||||
async def sofiia_budget_stats(window_hours: int = 24):
|
||||
"""Return per-provider stats for the given time window (hours)."""
|
||||
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Budget tracker not available")
|
||||
stats = get_stats(window_hours=window_hours)
|
||||
return {
|
||||
p: {
|
||||
"provider": s.provider,
|
||||
"total_cost_usd": round(s.total_cost_usd, 5),
|
||||
"call_count": s.call_count,
|
||||
"tokens_in": s.total_input_tokens,
|
||||
"tokens_out": s.total_output_tokens,
|
||||
"avg_latency_ms": round(s.avg_latency_ms),
|
||||
"top_models": s.top_models,
|
||||
}
|
||||
for p, s in stats.items()
|
||||
}
|
||||
|
||||
|
||||
@app.get("/v1/sofiia/catalog")
|
||||
async def sofiia_model_catalog(refresh_ollama: bool = False):
|
||||
"""Return full model catalog with availability status."""
|
||||
if not SOFIIA_AUTO_ROUTER_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Auto-router not available")
|
||||
if refresh_ollama:
|
||||
await refresh_ollama_models_async()
|
||||
return {
|
||||
"models": get_full_catalog(),
|
||||
"total": len(get_full_catalog()),
|
||||
"available_count": sum(1 for m in get_full_catalog() if m["available"]),
|
||||
}
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
"""Cleanup connections on shutdown"""
|
||||
|
||||
@@ -20,6 +20,7 @@ import json
|
||||
import logging
|
||||
import re
|
||||
import hashlib
|
||||
from time import monotonic
|
||||
from typing import Optional, Dict, Any, List
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
@@ -41,6 +42,20 @@ PENDING_QUESTIONS_LIMIT = int(os.getenv("AGENT_PENDING_QUESTIONS_LIMIT", "5"))
|
||||
SHARED_AGRO_LIBRARY_ENABLED = os.getenv("AGROMATRIX_SHARED_LIBRARY_ENABLED", "true").lower() == "true"
|
||||
SHARED_AGRO_LIBRARY_REQUIRE_REVIEW = os.getenv("AGROMATRIX_SHARED_LIBRARY_REQUIRE_REVIEW", "true").lower() == "true"
|
||||
DOC_VERSION_PREVIEW_CHARS = int(os.getenv("DOC_VERSION_PREVIEW_CHARS", "240"))
|
||||
WARNING_THROTTLE_SECONDS = float(os.getenv("MEMORY_RETRIEVAL_WARNING_THROTTLE_S", "60") or "60")
|
||||
_warning_last_ts: Dict[str, float] = {}
|
||||
|
||||
|
||||
def _warning_throttled(key: str, message: str) -> None:
|
||||
"""Emit repetitive warnings at most once per throttle window."""
|
||||
if WARNING_THROTTLE_SECONDS <= 0:
|
||||
logger.warning(message)
|
||||
return
|
||||
now = monotonic()
|
||||
last = _warning_last_ts.get(key, 0.0)
|
||||
if now - last >= WARNING_THROTTLE_SECONDS:
|
||||
_warning_last_ts[key] = now
|
||||
logger.warning(message)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -1067,7 +1082,7 @@ class MemoryRetrieval:
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"register_pending_question failed: {e}")
|
||||
_warning_throttled("register_pending_question_failed", f"register_pending_question failed: {e}")
|
||||
return False
|
||||
|
||||
async def resolve_pending_question(
|
||||
@@ -1086,7 +1101,7 @@ class MemoryRetrieval:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
WITH target AS (
|
||||
SELECT id
|
||||
SELECT id, question_fingerprint
|
||||
FROM agent_pending_questions
|
||||
WHERE channel = $1
|
||||
AND chat_id = $2
|
||||
@@ -1095,17 +1110,49 @@ class MemoryRetrieval:
|
||||
AND status = 'pending'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT 1
|
||||
), decision AS (
|
||||
SELECT
|
||||
t.id,
|
||||
CASE
|
||||
WHEN $5 = 'dismissed' THEN 'dismissed'
|
||||
WHEN EXISTS (
|
||||
SELECT 1
|
||||
FROM agent_pending_questions q
|
||||
WHERE q.channel = $1
|
||||
AND q.chat_id = $2
|
||||
AND q.user_id = $3
|
||||
AND q.agent_id = $4
|
||||
AND q.status = 'answered'
|
||||
AND q.question_fingerprint = t.question_fingerprint
|
||||
) THEN 'dismissed'
|
||||
ELSE 'answered'
|
||||
END AS next_status,
|
||||
CASE
|
||||
WHEN $5 = 'dismissed' THEN $5
|
||||
WHEN EXISTS (
|
||||
SELECT 1
|
||||
FROM agent_pending_questions q
|
||||
WHERE q.channel = $1
|
||||
AND q.chat_id = $2
|
||||
AND q.user_id = $3
|
||||
AND q.agent_id = $4
|
||||
AND q.status = 'answered'
|
||||
AND q.question_fingerprint = t.question_fingerprint
|
||||
) THEN 'duplicate_answered'
|
||||
ELSE $5
|
||||
END AS resolution_reason
|
||||
FROM target t
|
||||
)
|
||||
UPDATE agent_pending_questions p
|
||||
SET status = CASE WHEN $5 = 'dismissed' THEN 'dismissed' ELSE 'answered' END,
|
||||
SET status = d.next_status,
|
||||
answered_at = NOW(),
|
||||
metadata = COALESCE(p.metadata, '{}'::jsonb)
|
||||
|| jsonb_build_object(
|
||||
'resolution_reason', $5,
|
||||
'resolution_reason', d.resolution_reason,
|
||||
'answer_fingerprint', COALESCE($6, '')
|
||||
)
|
||||
FROM target t
|
||||
WHERE p.id = t.id
|
||||
FROM decision d
|
||||
WHERE p.id = d.id
|
||||
RETURNING p.id
|
||||
""",
|
||||
channel,
|
||||
@@ -1117,7 +1164,7 @@ class MemoryRetrieval:
|
||||
)
|
||||
return bool(row)
|
||||
except Exception as e:
|
||||
logger.warning(f"resolve_pending_question failed: {e}")
|
||||
_warning_throttled("resolve_pending_question_failed", f"resolve_pending_question failed: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -81,7 +81,7 @@ def get_unavailable_nodes(req_type: str) -> Set[str]:
|
||||
async def offload_infer(
|
||||
nats_client,
|
||||
node_id: str,
|
||||
required_type: Literal["llm", "vision", "stt", "tts", "ocr", "image"],
|
||||
required_type: str, # "llm"|"vision"|"stt"|"tts"|"ocr"|"image"|"voice.tts"|"voice.llm"|"voice.stt"
|
||||
job_payload: Dict[str, Any],
|
||||
timeout_ms: int = 25000,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
@@ -89,6 +89,8 @@ async def offload_infer(
|
||||
|
||||
Returns parsed JobResponse dict or None on total failure.
|
||||
Retries on transient errors (timeout, busy). Does NOT retry on provider errors.
|
||||
|
||||
Voice HA subjects use dotted notation: "voice.tts" → node.{id}.voice.tts.request
|
||||
"""
|
||||
subject = f"node.{node_id.lower()}.{required_type}.request"
|
||||
payload_bytes = json.dumps(job_payload).encode()
|
||||
|
||||
@@ -9,6 +9,8 @@ Prompt Builder for DAGI Router
|
||||
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -43,6 +45,8 @@ class PromptBuilder:
|
||||
self.city_service_url = city_service_url.rstrip("/")
|
||||
self.router_config = router_config or {}
|
||||
self._http_client: Optional[httpx.AsyncClient] = None
|
||||
self._city_service_unavailable_until = 0.0
|
||||
self._city_service_cooldown_s = float(os.getenv("CITY_SERVICE_FAILURE_COOLDOWN_S", "120") or "120")
|
||||
|
||||
async def _get_http_client(self) -> httpx.AsyncClient:
|
||||
"""Lazy initialization of HTTP client"""
|
||||
@@ -80,6 +84,9 @@ class PromptBuilder:
|
||||
|
||||
async def _fetch_from_database(self, agent_id: str) -> Optional[AgentSystemPrompt]:
|
||||
"""Fetch system prompt from city-service API"""
|
||||
now = time.monotonic()
|
||||
if now < self._city_service_unavailable_until:
|
||||
return None
|
||||
try:
|
||||
client = await self._get_http_client()
|
||||
url = f"{self.city_service_url}/internal/agents/{agent_id}/system-prompt"
|
||||
@@ -100,10 +107,20 @@ class PromptBuilder:
|
||||
return None
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"Error fetching prompt from city-service: {e}")
|
||||
self._city_service_unavailable_until = time.monotonic() + max(0.0, self._city_service_cooldown_s)
|
||||
logger.warning(
|
||||
"Error fetching prompt from city-service: %s; suppressing retries for %.0fs",
|
||||
e,
|
||||
self._city_service_cooldown_s,
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error fetching prompt: {e}")
|
||||
self._city_service_unavailable_until = time.monotonic() + max(0.0, self._city_service_cooldown_s)
|
||||
logger.warning(
|
||||
"Unexpected error fetching prompt: %s; suppressing retries for %.0fs",
|
||||
e,
|
||||
self._city_service_cooldown_s,
|
||||
)
|
||||
return None
|
||||
|
||||
def _get_from_config(self, agent_id: str) -> Optional[AgentSystemPrompt]:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -229,6 +229,58 @@
|
||||
padding: 2px 6px;
|
||||
border-radius: 4px;
|
||||
}
|
||||
.aurora-clip-picker {
|
||||
margin-top: 8px;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
background: var(--bg2);
|
||||
padding: 8px;
|
||||
display: none;
|
||||
gap: 8px;
|
||||
}
|
||||
.aurora-clip-head {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
gap: 8px;
|
||||
font-size: 0.74rem;
|
||||
color: var(--muted);
|
||||
align-items: center;
|
||||
}
|
||||
.aurora-clip-head strong {
|
||||
color: var(--text);
|
||||
font-weight: 600;
|
||||
}
|
||||
.aurora-clip-range-row {
|
||||
display: grid;
|
||||
grid-template-columns: 54px 1fr 62px;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
font-size: 0.73rem;
|
||||
color: var(--muted);
|
||||
}
|
||||
.aurora-clip-range-row input[type="range"] {
|
||||
width: 100%;
|
||||
accent-color: var(--gold);
|
||||
cursor: pointer;
|
||||
}
|
||||
.aurora-clip-actions {
|
||||
display: flex;
|
||||
gap: 6px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.aurora-clip-btn {
|
||||
background: rgba(255,255,255,0.04);
|
||||
border: 1px solid var(--border);
|
||||
color: var(--muted);
|
||||
border-radius: 6px;
|
||||
padding: 4px 8px;
|
||||
font-size: 0.7rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.aurora-clip-btn:hover {
|
||||
border-color: var(--gold);
|
||||
color: var(--text);
|
||||
}
|
||||
.aurora-compare-wrap {
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
@@ -791,6 +843,27 @@
|
||||
accept=".mp4,.avi,.mov,.mkv,.webm,.mp3,.wav,.flac,.m4a,.aac,.ogg,.jpg,.jpeg,.png,.tiff,.tif,.webp"
|
||||
onchange="auroraOnFilePicked(this)">
|
||||
<div id="auroraThumbPreview" class="aurora-thumb-preview" style="display:none;"></div>
|
||||
<div id="auroraClipPicker" class="aurora-clip-picker">
|
||||
<div class="aurora-clip-head">
|
||||
<strong>🎚 Фрагмент На Прев'ю</strong>
|
||||
<span id="auroraClipSummary">—</span>
|
||||
</div>
|
||||
<div class="aurora-clip-range-row">
|
||||
<span>Start</span>
|
||||
<input id="auroraClipStartRange" type="range" min="0" max="0" step="0.1" value="0">
|
||||
<span id="auroraClipStartLabel">0s</span>
|
||||
</div>
|
||||
<div class="aurora-clip-range-row">
|
||||
<span>End</span>
|
||||
<input id="auroraClipEndRange" type="range" min="0" max="0" step="0.1" value="0">
|
||||
<span id="auroraClipEndLabel">0s</span>
|
||||
</div>
|
||||
<div class="aurora-clip-actions">
|
||||
<button type="button" class="aurora-clip-btn" id="auroraClipSetStartBtn">Start = поточний кадр</button>
|
||||
<button type="button" class="aurora-clip-btn" id="auroraClipSetEndBtn">End = поточний кадр</button>
|
||||
<button type="button" class="aurora-clip-btn" id="auroraClipFullBtn">Повне відео</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="aurora-kv" style="margin-top:10px;">
|
||||
<span class="k">Файл</span><span class="v" id="auroraSelectedFile">—</span>
|
||||
</div>
|
||||
@@ -833,6 +906,12 @@
|
||||
<option value="codeformer">CodeFormer</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>Clip start (sec)
|
||||
<input id="auroraOptClipStart" type="number" min="0" step="0.1" placeholder="0">
|
||||
</label>
|
||||
<label>Clip duration (sec)
|
||||
<input id="auroraOptClipDuration" type="number" min="0.1" step="0.1" placeholder="5">
|
||||
</label>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
@@ -869,7 +948,7 @@
|
||||
<button id="auroraAnalyzeBtn" class="btn btn-ghost" onclick="auroraAnalyze()" disabled>🔍 Аналіз</button>
|
||||
<button id="auroraAudioProcessBtn" class="btn btn-ghost" style="display:none;" onclick="auroraStartAudio()">🎧 Audio process</button>
|
||||
<button id="auroraStartBtn" class="btn btn-gold" style="flex:1;" onclick="auroraStart()" disabled>Почати обробку</button>
|
||||
<button id="auroraCancelBtn" class="btn btn-ghost" style="display:none;" onclick="auroraCancel()">Скасувати</button>
|
||||
<button id="auroraCancelBtn" class="btn btn-ghost" style="display:none;" onclick="auroraCancel()">Зупинити</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -912,6 +991,15 @@
|
||||
<label class="aurora-checkline"><input type="checkbox" id="auroraCtrlDenoise"> Enable denoise (FastDVDnet/SCUNet)</label>
|
||||
<label class="aurora-checkline"><input type="checkbox" id="auroraCtrlFaceRestore"> Run face restoration (GFPGAN)</label>
|
||||
<label class="aurora-checkline"><input type="checkbox" id="auroraCtrlPlateRoi"> License-plate ROI enhancement</label>
|
||||
<label class="aurora-checkline"><input type="checkbox" id="auroraCtrlMaxFace"> Max face quality (повільніше, але краще для облич)</label>
|
||||
<label class="aurora-note" style="display:block; margin-top:8px;">Фокус задачі:</label>
|
||||
<select id="auroraFocusProfile" style="width:100%; margin-top:4px;">
|
||||
<option value="auto" selected>Auto</option>
|
||||
<option value="max_faces">Max faces</option>
|
||||
<option value="text_readability">Text / logos readability</option>
|
||||
<option value="plates">License plates</option>
|
||||
</select>
|
||||
<input id="auroraTaskHint" type="text" style="width:100%; margin-top:8px;" placeholder="Ціль Aurora: напр. Прочитати напис на кепці персонажа (00:12-00:18)">
|
||||
<div class="aurora-priority-wrap">
|
||||
<div class="aurora-priority-head">
|
||||
<span>Пріоритет: Обличчя</span>
|
||||
@@ -997,7 +1085,14 @@
|
||||
<div style="display:flex; gap:8px; margin-top:10px; flex-wrap:wrap;">
|
||||
<button class="btn btn-ghost btn-sm" id="auroraDownloadResultBtn" style="display:none;" onclick="auroraDownloadResult()">Завантажити результат</button>
|
||||
<button class="btn btn-ghost btn-sm" id="auroraOpenFolderBtn" onclick="auroraRevealFolder()">Відкрити папку</button>
|
||||
<button id="auroraReprocessBtn" class="btn btn-ghost btn-sm" onclick="auroraReprocess()" disabled>Повторна обробка</button>
|
||||
<button id="auroraReprocessBtn" class="btn btn-ghost btn-sm" onclick="auroraReprocess()" disabled>Повторна обробка ×1</button>
|
||||
<select id="auroraReprocessPasses" class="btn btn-ghost btn-sm" style="min-width:92px;" onchange="auroraUpdateReprocessLabel()">
|
||||
<option value="1" selected>1 прохід</option>
|
||||
<option value="2">2 проходи</option>
|
||||
<option value="3">3 проходи</option>
|
||||
<option value="4">4 проходи</option>
|
||||
</select>
|
||||
<label class="aurora-checkline" style="margin:0;"><input type="checkbox" id="auroraReprocessSecondPass" checked> chain second-pass</label>
|
||||
</div>
|
||||
<div id="auroraForensicLogWrap" style="display:none; margin-top:10px;">
|
||||
<div class="aurora-note" style="margin-top:0;">Forensic log</div>
|
||||
@@ -2066,6 +2161,11 @@ let auroraTabBootstrapped = false;
|
||||
let auroraChatHistory = [];
|
||||
let auroraChatBusy = false;
|
||||
let auroraFolderPath = null;
|
||||
let auroraPreviewObjectUrl = null;
|
||||
let auroraPreviewVideoEl = null;
|
||||
let auroraVideoDurationSec = 0;
|
||||
let auroraClipBindingsReady = false;
|
||||
const AURORA_MIN_CLIP_SEC = 0.1;
|
||||
const AURORA_MAX_TRANSIENT_ERRORS = 12;
|
||||
const AURORA_ACTIVE_JOB_KEY = 'aurora_active_job_id';
|
||||
const AURORA_SMART_RUN_KEY = 'aurora_smart_run_id';
|
||||
@@ -2320,6 +2420,7 @@ function auroraSetActiveJobId(jobId) {
|
||||
if (el) el.textContent = auroraJobId || '—';
|
||||
const reBtn = document.getElementById('auroraReprocessBtn');
|
||||
if (reBtn) reBtn.disabled = !auroraJobId;
|
||||
auroraUpdateReprocessLabel();
|
||||
if (auroraJobId) {
|
||||
const cached = auroraGetPersistedTiming(auroraJobId);
|
||||
if (cached) {
|
||||
@@ -2328,6 +2429,25 @@ function auroraSetActiveJobId(jobId) {
|
||||
}
|
||||
}
|
||||
auroraPersistActiveJob();
|
||||
auroraUpdateCancelButton(null, null);
|
||||
}
|
||||
|
||||
function auroraUpdateCancelButton(status, stage) {
|
||||
const btn = document.getElementById('auroraCancelBtn');
|
||||
if (!btn) return;
|
||||
const s = String(status || '').toLowerCase();
|
||||
const st = String(stage || '').toLowerCase();
|
||||
const active = s === 'queued' || s === 'processing';
|
||||
if (!active) {
|
||||
btn.style.display = 'none';
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Зупинити';
|
||||
return;
|
||||
}
|
||||
btn.style.display = 'inline-block';
|
||||
const cancelling = st.includes('cancell') || st.includes('скасов');
|
||||
btn.disabled = cancelling;
|
||||
btn.textContent = cancelling ? 'Зупиняю...' : 'Зупинити';
|
||||
}
|
||||
|
||||
function auroraSetMode(mode) {
|
||||
@@ -2349,6 +2469,162 @@ function auroraIsAudioFile(file) {
|
||||
return ['.mp3', '.wav', '.flac', '.m4a', '.aac', '.ogg'].some(ext => name.endsWith(ext));
|
||||
}
|
||||
|
||||
function auroraRevokePreviewObjectUrl() {
|
||||
if (!auroraPreviewObjectUrl) return;
|
||||
try { URL.revokeObjectURL(auroraPreviewObjectUrl); } catch (_) {}
|
||||
auroraPreviewObjectUrl = null;
|
||||
}
|
||||
|
||||
function auroraFormatClipSeconds(seconds) {
|
||||
const value = Number(seconds);
|
||||
if (!Number.isFinite(value)) return '—';
|
||||
const rounded = Math.round(Math.max(0, value) * 10) / 10;
|
||||
if (Math.abs(rounded - Math.round(rounded)) < 1e-9) return `${Math.round(rounded)}s`;
|
||||
return `${rounded.toFixed(1)}s`;
|
||||
}
|
||||
|
||||
function auroraClampClipWindow(startSec, endSec, durationSec) {
|
||||
const total = Number(durationSec);
|
||||
if (!Number.isFinite(total) || total <= 0) return { start: 0, end: 0 };
|
||||
let start = Number(startSec);
|
||||
let end = Number(endSec);
|
||||
if (!Number.isFinite(start)) start = 0;
|
||||
if (!Number.isFinite(end)) end = total;
|
||||
start = Math.max(0, Math.min(start, total));
|
||||
end = Math.max(0, Math.min(end, total));
|
||||
if ((end - start) < AURORA_MIN_CLIP_SEC) {
|
||||
if ((start + AURORA_MIN_CLIP_SEC) <= total) {
|
||||
end = start + AURORA_MIN_CLIP_SEC;
|
||||
} else {
|
||||
end = total;
|
||||
start = Math.max(0, end - AURORA_MIN_CLIP_SEC);
|
||||
}
|
||||
}
|
||||
return { start, end };
|
||||
}
|
||||
|
||||
function auroraUpdateClipSummary(startSec, endSec, durationSec) {
|
||||
const summary = document.getElementById('auroraClipSummary');
|
||||
const startLabel = document.getElementById('auroraClipStartLabel');
|
||||
const endLabel = document.getElementById('auroraClipEndLabel');
|
||||
if (startLabel) startLabel.textContent = auroraFormatClipSeconds(startSec);
|
||||
if (endLabel) endLabel.textContent = auroraFormatClipSeconds(endSec);
|
||||
if (summary) {
|
||||
const clipDur = Math.max(0, Number(endSec) - Number(startSec));
|
||||
summary.textContent = `${auroraFormatClipSeconds(startSec)} → ${auroraFormatClipSeconds(endSec)} (${auroraFormatClipSeconds(clipDur)}) · total ${auroraFormatClipSeconds(durationSec)}`;
|
||||
}
|
||||
}
|
||||
|
||||
function auroraApplyClipWindow(startSec, endSec, { syncFields = true, syncSliders = true, seekTo = null } = {}) {
|
||||
const duration = Number(auroraVideoDurationSec || 0);
|
||||
if (!Number.isFinite(duration) || duration <= 0) return;
|
||||
const startRange = document.getElementById('auroraClipStartRange');
|
||||
const endRange = document.getElementById('auroraClipEndRange');
|
||||
const startInput = document.getElementById('auroraOptClipStart');
|
||||
const durationInput = document.getElementById('auroraOptClipDuration');
|
||||
const bounded = auroraClampClipWindow(startSec, endSec, duration);
|
||||
if (syncSliders && startRange && endRange) {
|
||||
startRange.value = bounded.start.toFixed(1);
|
||||
endRange.value = bounded.end.toFixed(1);
|
||||
}
|
||||
if (syncFields && startInput && durationInput) {
|
||||
const clipDuration = Math.max(AURORA_MIN_CLIP_SEC, bounded.end - bounded.start);
|
||||
startInput.value = bounded.start > 0 ? bounded.start.toFixed(1).replace(/\.0$/, '') : '';
|
||||
durationInput.value = clipDuration.toFixed(1).replace(/\.0$/, '');
|
||||
}
|
||||
auroraUpdateClipSummary(bounded.start, bounded.end, duration);
|
||||
if (auroraPreviewVideoEl && Number.isFinite(Number(seekTo))) {
|
||||
const target = Math.max(0, Math.min(Number(seekTo), duration));
|
||||
try { auroraPreviewVideoEl.currentTime = target; } catch (_) {}
|
||||
}
|
||||
}
|
||||
|
||||
function auroraSyncClipFromExportInputs() {
|
||||
const duration = Number(auroraVideoDurationSec || 0);
|
||||
if (!Number.isFinite(duration) || duration <= 0) return;
|
||||
const startInput = document.getElementById('auroraOptClipStart');
|
||||
const durationInput = document.getElementById('auroraOptClipDuration');
|
||||
const startValue = Number(startInput?.value || 0);
|
||||
const durationValue = Number(durationInput?.value || 0);
|
||||
const start = Number.isFinite(startValue) && startValue >= 0 ? startValue : 0;
|
||||
const hasDuration = Number.isFinite(durationValue) && durationValue > 0;
|
||||
const end = hasDuration ? start + durationValue : duration;
|
||||
auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: true });
|
||||
}
|
||||
|
||||
function auroraHideClipPicker() {
|
||||
const picker = document.getElementById('auroraClipPicker');
|
||||
if (picker) picker.style.display = 'none';
|
||||
const summary = document.getElementById('auroraClipSummary');
|
||||
if (summary) summary.textContent = '—';
|
||||
const startLabel = document.getElementById('auroraClipStartLabel');
|
||||
if (startLabel) startLabel.textContent = '0s';
|
||||
const endLabel = document.getElementById('auroraClipEndLabel');
|
||||
if (endLabel) endLabel.textContent = '0s';
|
||||
auroraPreviewVideoEl = null;
|
||||
auroraVideoDurationSec = 0;
|
||||
}
|
||||
|
||||
function auroraBindClipPicker() {
|
||||
if (auroraClipBindingsReady) return;
|
||||
auroraClipBindingsReady = true;
|
||||
const startRange = document.getElementById('auroraClipStartRange');
|
||||
const endRange = document.getElementById('auroraClipEndRange');
|
||||
const startInput = document.getElementById('auroraOptClipStart');
|
||||
const durationInput = document.getElementById('auroraOptClipDuration');
|
||||
const setStartBtn = document.getElementById('auroraClipSetStartBtn');
|
||||
const setEndBtn = document.getElementById('auroraClipSetEndBtn');
|
||||
const fullBtn = document.getElementById('auroraClipFullBtn');
|
||||
|
||||
if (startRange && endRange) {
|
||||
startRange.addEventListener('input', () => {
|
||||
const start = Number(startRange.value || 0);
|
||||
const end = Number(endRange.value || 0);
|
||||
auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: false, seekTo: start });
|
||||
});
|
||||
endRange.addEventListener('input', () => {
|
||||
const start = Number(startRange.value || 0);
|
||||
const end = Number(endRange.value || 0);
|
||||
auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: false, seekTo: end });
|
||||
});
|
||||
}
|
||||
|
||||
if (startInput) {
|
||||
startInput.addEventListener('input', auroraSyncClipFromExportInputs);
|
||||
startInput.addEventListener('change', auroraSyncClipFromExportInputs);
|
||||
}
|
||||
if (durationInput) {
|
||||
durationInput.addEventListener('input', auroraSyncClipFromExportInputs);
|
||||
durationInput.addEventListener('change', auroraSyncClipFromExportInputs);
|
||||
}
|
||||
|
||||
if (setStartBtn) {
|
||||
setStartBtn.addEventListener('click', () => {
|
||||
if (!auroraPreviewVideoEl) return;
|
||||
const current = Number(auroraPreviewVideoEl.currentTime || 0);
|
||||
const end = Number(document.getElementById('auroraClipEndRange')?.value || auroraVideoDurationSec || 0);
|
||||
auroraApplyClipWindow(current, end, { syncFields: true, syncSliders: true, seekTo: current });
|
||||
});
|
||||
}
|
||||
if (setEndBtn) {
|
||||
setEndBtn.addEventListener('click', () => {
|
||||
if (!auroraPreviewVideoEl) return;
|
||||
const current = Number(auroraPreviewVideoEl.currentTime || 0);
|
||||
const start = Number(document.getElementById('auroraClipStartRange')?.value || 0);
|
||||
auroraApplyClipWindow(start, current, { syncFields: true, syncSliders: true, seekTo: current });
|
||||
});
|
||||
}
|
||||
if (fullBtn) {
|
||||
fullBtn.addEventListener('click', () => {
|
||||
const startField = document.getElementById('auroraOptClipStart');
|
||||
const durField = document.getElementById('auroraOptClipDuration');
|
||||
if (startField) startField.value = '';
|
||||
if (durField) durField.value = '';
|
||||
auroraApplyClipWindow(0, auroraVideoDurationSec, { syncFields: false, syncSliders: true, seekTo: 0 });
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function auroraSetSelectedFile(file) {
|
||||
auroraSelectedFile = file || null;
|
||||
const label = document.getElementById('auroraSelectedFile');
|
||||
@@ -2364,6 +2640,10 @@ function auroraSetSelectedFile(file) {
|
||||
audioBtn.style.display = isAudio ? 'inline-block' : 'none';
|
||||
audioBtn.disabled = !file;
|
||||
}
|
||||
const clipStartInput = document.getElementById('auroraOptClipStart');
|
||||
const clipDurationInput = document.getElementById('auroraOptClipDuration');
|
||||
if (clipStartInput) clipStartInput.value = '';
|
||||
if (clipDurationInput) clipDurationInput.value = '';
|
||||
auroraAnalysisCache = null;
|
||||
auroraSuggestedPriority = 'balanced';
|
||||
auroraSuggestedExport = {};
|
||||
@@ -2386,6 +2666,7 @@ function auroraSetSelectedFile(file) {
|
||||
if (quickStartBtn) quickStartBtn.disabled = !file;
|
||||
const reBtn = document.getElementById('auroraReprocessBtn');
|
||||
if (reBtn) reBtn.disabled = !auroraJobId;
|
||||
auroraUpdateReprocessLabel();
|
||||
const batchInfo = document.getElementById('auroraBatchInfo');
|
||||
if (batchInfo && auroraBatchFiles.length <= 1) batchInfo.style.display = 'none';
|
||||
auroraShowThumbPreview(file);
|
||||
@@ -2394,19 +2675,57 @@ function auroraSetSelectedFile(file) {
|
||||
function auroraShowThumbPreview(file) {
|
||||
const wrap = document.getElementById('auroraThumbPreview');
|
||||
if (!wrap) return;
|
||||
auroraBindClipPicker();
|
||||
auroraRevokePreviewObjectUrl();
|
||||
auroraHideClipPicker();
|
||||
wrap.style.display = 'none';
|
||||
wrap.innerHTML = '';
|
||||
if (!file) return;
|
||||
const type = (file.type || '').toLowerCase();
|
||||
const url = URL.createObjectURL(file);
|
||||
auroraPreviewObjectUrl = url;
|
||||
if (type.startsWith('image/')) {
|
||||
wrap.innerHTML = `<img src="${url}" alt="preview"><span class="aurora-thumb-label">Original</span>`;
|
||||
wrap.style.display = 'block';
|
||||
} else if (type.startsWith('video/')) {
|
||||
const v = document.createElement('video');
|
||||
v.src = url; v.muted = true; v.playsInline = true; v.preload = 'metadata';
|
||||
v.addEventListener('loadeddata', () => { v.currentTime = 0.5; });
|
||||
v.addEventListener('seeked', () => { wrap.style.display = 'block'; }, { once: true });
|
||||
v.src = url;
|
||||
v.muted = true;
|
||||
v.controls = true;
|
||||
v.playsInline = true;
|
||||
v.preload = 'metadata';
|
||||
v.addEventListener('loadedmetadata', () => {
|
||||
const picker = document.getElementById('auroraClipPicker');
|
||||
const startRange = document.getElementById('auroraClipStartRange');
|
||||
const endRange = document.getElementById('auroraClipEndRange');
|
||||
const duration = Number(v.duration || 0);
|
||||
auroraPreviewVideoEl = v;
|
||||
auroraVideoDurationSec = Number.isFinite(duration) && duration > 0 ? duration : 0;
|
||||
if (!Number.isFinite(auroraVideoDurationSec) || auroraVideoDurationSec <= 0) {
|
||||
if (picker) picker.style.display = 'none';
|
||||
return;
|
||||
}
|
||||
if (startRange && endRange) {
|
||||
const max = auroraVideoDurationSec.toFixed(1);
|
||||
startRange.min = '0';
|
||||
endRange.min = '0';
|
||||
startRange.max = max;
|
||||
endRange.max = max;
|
||||
startRange.step = '0.1';
|
||||
endRange.step = '0.1';
|
||||
}
|
||||
if (picker) picker.style.display = 'grid';
|
||||
|
||||
const startInput = document.getElementById('auroraOptClipStart');
|
||||
const durInput = document.getElementById('auroraOptClipDuration');
|
||||
const startVal = Number(startInput?.value || 0);
|
||||
const durationVal = Number(durInput?.value || 0);
|
||||
const start = Number.isFinite(startVal) && startVal >= 0 ? startVal : 0;
|
||||
const hasDuration = Number.isFinite(durationVal) && durationVal > 0;
|
||||
const end = hasDuration ? (start + durationVal) : auroraVideoDurationSec;
|
||||
auroraApplyClipWindow(start, end, { syncFields: true, syncSliders: true });
|
||||
});
|
||||
v.addEventListener('loadeddata', () => { wrap.style.display = 'block'; });
|
||||
wrap.appendChild(v);
|
||||
const lbl = document.createElement('span');
|
||||
lbl.className = 'aurora-thumb-label'; lbl.textContent = 'Original';
|
||||
@@ -2633,7 +2952,38 @@ function auroraBindDropzone() {
|
||||
function auroraCollectExportOptions() {
|
||||
const opts = {};
|
||||
const outscale = document.getElementById('auroraOptOutscale')?.value;
|
||||
if (outscale && outscale !== 'auto') opts.outscale = Number(outscale);
|
||||
if (outscale && outscale !== 'auto') {
|
||||
opts.upscale = Number(outscale);
|
||||
opts.outscale = Number(outscale);
|
||||
}
|
||||
const clipPicker = document.getElementById('auroraClipPicker');
|
||||
const pickerVisible = !!clipPicker && getComputedStyle(clipPicker).display !== 'none';
|
||||
const startRange = document.getElementById('auroraClipStartRange');
|
||||
const endRange = document.getElementById('auroraClipEndRange');
|
||||
const durationTotal = Number(auroraVideoDurationSec || 0);
|
||||
const canUseRanges =
|
||||
pickerVisible &&
|
||||
Number.isFinite(durationTotal) &&
|
||||
durationTotal > 0 &&
|
||||
startRange &&
|
||||
endRange;
|
||||
if (canUseRanges) {
|
||||
const startRangeValue = Number(startRange.value || 0);
|
||||
const endRangeValue = Number(endRange.value || durationTotal);
|
||||
const bounded = auroraClampClipWindow(startRangeValue, endRangeValue, durationTotal);
|
||||
const clipDuration = Math.max(AURORA_MIN_CLIP_SEC, bounded.end - bounded.start);
|
||||
const isFullVideo = bounded.start <= 0.0001 && (durationTotal - bounded.end) <= 0.11;
|
||||
if (!isFullVideo) {
|
||||
if (bounded.start > 0.0001) opts.clip_start_sec = Number(bounded.start.toFixed(3));
|
||||
opts.clip_duration_sec = Number(clipDuration.toFixed(3));
|
||||
}
|
||||
} else {
|
||||
const clipStart = Number(document.getElementById('auroraOptClipStart')?.value || 0);
|
||||
const clipDurationRaw = document.getElementById('auroraOptClipDuration')?.value;
|
||||
const clipDuration = Number(clipDurationRaw || 0);
|
||||
if (Number.isFinite(clipStart) && clipStart > 0) opts.clip_start_sec = clipStart;
|
||||
if (clipDurationRaw !== '' && Number.isFinite(clipDuration) && clipDuration > 0) opts.clip_duration_sec = clipDuration;
|
||||
}
|
||||
const codec = document.getElementById('auroraOptCodec')?.value;
|
||||
if (codec && codec !== 'auto') opts.encoder = codec;
|
||||
const quality = document.getElementById('auroraOptQuality')?.value;
|
||||
@@ -2650,6 +3000,36 @@ function auroraAbsoluteUrl(url) {
|
||||
return `${API}${value}`;
|
||||
}
|
||||
|
||||
function auroraSleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function auroraUpdateReprocessLabel() {
|
||||
const btn = document.getElementById('auroraReprocessBtn');
|
||||
const passes = Math.max(1, Math.min(4, Number(document.getElementById('auroraReprocessPasses')?.value || 1)));
|
||||
if (btn) btn.textContent = `Повторна обробка ×${passes}`;
|
||||
}
|
||||
|
||||
async function auroraWaitForTerminal(jobId, { timeoutSec = 10800, passLabel = '' } = {}) {
|
||||
const id = String(jobId || '').trim();
|
||||
if (!id) throw new Error('job_id missing');
|
||||
const deadline = Date.now() + (timeoutSec * 1000);
|
||||
while (Date.now() < deadline) {
|
||||
const r = await fetch(`${API}/api/aurora/status/${encodeURIComponent(id)}`);
|
||||
if (!r.ok) {
|
||||
await auroraSleep(2000);
|
||||
continue;
|
||||
}
|
||||
const st = await r.json();
|
||||
const status = String(st.status || '').toLowerCase();
|
||||
const stage = st.current_stage || 'processing';
|
||||
if (passLabel) auroraSetProgress(st.progress || 1, status || 'processing', `${passLabel} · ${stage}`);
|
||||
if (status === 'completed' || status === 'failed' || status === 'cancelled') return st;
|
||||
await auroraSleep(2000);
|
||||
}
|
||||
throw new Error('reprocess timeout');
|
||||
}
|
||||
|
||||
function auroraSetPreset(preset) {
|
||||
const normalized = String(preset || 'balanced').trim();
|
||||
auroraPresetMode = ['turbo', 'balanced', 'max_quality'].includes(normalized) ? normalized : 'balanced';
|
||||
@@ -2675,6 +3055,7 @@ function auroraSetPreset(preset) {
|
||||
if (outscale) outscale.value = 'auto';
|
||||
if (codec) codec.value = 'auto';
|
||||
}
|
||||
auroraUpdateReprocessLabel();
|
||||
}
|
||||
|
||||
function auroraUpdatePriorityLabel() {
|
||||
@@ -2699,18 +3080,29 @@ function auroraResetAnalysisControls() {
|
||||
const denoise = document.getElementById('auroraCtrlDenoise');
|
||||
const face = document.getElementById('auroraCtrlFaceRestore');
|
||||
const plate = document.getElementById('auroraCtrlPlateRoi');
|
||||
const maxFace = document.getElementById('auroraCtrlMaxFace');
|
||||
const focusProfile = document.getElementById('auroraFocusProfile');
|
||||
const taskHint = document.getElementById('auroraTaskHint');
|
||||
const clipStart = document.getElementById('auroraOptClipStart');
|
||||
const clipDuration = document.getElementById('auroraOptClipDuration');
|
||||
const slider = document.getElementById('auroraPriorityBias');
|
||||
if (denoise) denoise.checked = false;
|
||||
if (face) face.checked = true;
|
||||
if (plate) plate.checked = false;
|
||||
if (maxFace) maxFace.checked = false;
|
||||
if (focusProfile) focusProfile.value = 'auto';
|
||||
if (taskHint) taskHint.value = '';
|
||||
if (clipStart) clipStart.value = '';
|
||||
if (clipDuration) clipDuration.value = '';
|
||||
if (slider) slider.value = '0';
|
||||
auroraSetPreset('balanced');
|
||||
auroraUpdatePriorityLabel();
|
||||
auroraUpdateReprocessLabel();
|
||||
}
|
||||
|
||||
function auroraApplySuggestedExportOptions(suggested) {
|
||||
if (!suggested || typeof suggested !== 'object') return;
|
||||
const outscale = String(suggested.outscale ?? '').trim();
|
||||
const outscale = String(suggested.upscale ?? suggested.outscale ?? '').trim();
|
||||
if (outscale && document.getElementById('auroraOptOutscale')) {
|
||||
const el = document.getElementById('auroraOptOutscale');
|
||||
const has = Array.from(el.options || []).some((o) => o.value === outscale);
|
||||
@@ -2745,6 +3137,8 @@ function auroraApplyAnalysisHints(data) {
|
||||
const denoise = document.getElementById('auroraCtrlDenoise');
|
||||
const face = document.getElementById('auroraCtrlFaceRestore');
|
||||
const plate = document.getElementById('auroraCtrlPlateRoi');
|
||||
const maxFace = document.getElementById('auroraCtrlMaxFace');
|
||||
const focusProfile = document.getElementById('auroraFocusProfile');
|
||||
const slider = document.getElementById('auroraPriorityBias');
|
||||
|
||||
const highNoise = ['high', 'very_high'].includes(String(quality.noise_level || '').toLowerCase());
|
||||
@@ -2759,6 +3153,13 @@ function auroraApplyAnalysisHints(data) {
|
||||
else if (suggested === 'plates') slider.value = '55';
|
||||
else slider.value = '0';
|
||||
}
|
||||
if (focusProfile) {
|
||||
if (suggested === 'details') focusProfile.value = 'text_readability';
|
||||
else if (suggested === 'faces') focusProfile.value = 'max_faces';
|
||||
else if (suggested === 'plates') focusProfile.value = 'plates';
|
||||
else focusProfile.value = 'auto';
|
||||
}
|
||||
if (maxFace) maxFace.checked = suggested === 'faces';
|
||||
|
||||
if (suggested === 'faces' || suggested === 'plates') auroraSetPreset('max_quality');
|
||||
else auroraSetPreset('balanced');
|
||||
@@ -2770,12 +3171,21 @@ function auroraCollectAnalysisControls() {
|
||||
const denoise = Boolean(document.getElementById('auroraCtrlDenoise')?.checked);
|
||||
const faceRestore = Boolean(document.getElementById('auroraCtrlFaceRestore')?.checked);
|
||||
const plateRoi = Boolean(document.getElementById('auroraCtrlPlateRoi')?.checked);
|
||||
const maxFaceQuality = Boolean(document.getElementById('auroraCtrlMaxFace')?.checked);
|
||||
const focusProfile = String(document.getElementById('auroraFocusProfile')?.value || 'auto').trim();
|
||||
const taskHint = String(document.getElementById('auroraTaskHint')?.value || '').trim();
|
||||
const preset = auroraPresetMode || 'balanced';
|
||||
const priority = bias <= -30 ? 'faces' : bias >= 30 ? 'plates' : (auroraSuggestedPriority || 'balanced');
|
||||
let priority = bias <= -30 ? 'faces' : bias >= 30 ? 'plates' : (auroraSuggestedPriority || 'balanced');
|
||||
if (focusProfile === 'text_readability') priority = 'details';
|
||||
if (focusProfile === 'plates') priority = 'plates';
|
||||
if (focusProfile === 'max_faces' || maxFaceQuality) priority = 'faces';
|
||||
return {
|
||||
denoise,
|
||||
face_restore: faceRestore,
|
||||
plate_roi_enhance: plateRoi,
|
||||
max_face_quality: maxFaceQuality,
|
||||
focus_profile: focusProfile || 'auto',
|
||||
task_hint: taskHint,
|
||||
priority_bias: bias,
|
||||
priority,
|
||||
preset,
|
||||
@@ -2784,15 +3194,45 @@ function auroraCollectAnalysisControls() {
|
||||
|
||||
function auroraBuildAnalysisExportHints(controls) {
|
||||
const c = controls || auroraCollectAnalysisControls();
|
||||
return {
|
||||
const outscaleRaw = String(document.getElementById('auroraOptOutscale')?.value || 'auto').trim().toLowerCase();
|
||||
const isAutoScale = !outscaleRaw || outscaleRaw === 'auto';
|
||||
const hints = {
|
||||
pre_denoise: Boolean(c.denoise),
|
||||
temporal_denoise: Boolean(c.denoise && c.preset === 'max_quality'),
|
||||
roi_only_faces: c.priority === 'faces',
|
||||
face_restore: Boolean(c.face_restore),
|
||||
plate_roi_enhance: Boolean(c.plate_roi_enhance),
|
||||
max_face_quality: Boolean(c.max_face_quality),
|
||||
focus_profile: c.focus_profile || 'auto',
|
||||
task_hint: String(c.task_hint || '').trim(),
|
||||
profile: c.preset || 'balanced',
|
||||
priority_bias: Number(c.priority_bias || 0),
|
||||
auto_forensic_outscale: true,
|
||||
};
|
||||
if (!hints.task_hint) delete hints.task_hint;
|
||||
|
||||
if (c.focus_profile === 'max_faces' || c.max_face_quality) {
|
||||
hints.pre_denoise = true;
|
||||
hints.temporal_denoise = true;
|
||||
hints.roi_only_faces = true;
|
||||
hints.face_model = 'codeformer';
|
||||
hints.deblur_before_face = true;
|
||||
hints.score_loop = true;
|
||||
hints.allow_roi_upscale = true;
|
||||
if (isAutoScale) hints.upscale = 2;
|
||||
} else if (c.focus_profile === 'text_readability') {
|
||||
hints.pre_denoise = true;
|
||||
hints.temporal_denoise = true;
|
||||
hints.roi_only_faces = false;
|
||||
hints.deblur_before_face = true;
|
||||
hints.score_loop = true;
|
||||
hints.text_focus = true;
|
||||
if (isAutoScale) hints.upscale = 2;
|
||||
} else if (c.focus_profile === 'plates') {
|
||||
hints.roi_only_faces = false;
|
||||
hints.plate_roi_enhance = true;
|
||||
}
|
||||
return hints;
|
||||
}
|
||||
|
||||
function auroraStartFromAnalysis() {
|
||||
@@ -2824,6 +3264,9 @@ function auroraRenderQualityReport(report) {
|
||||
const plates = report.plates || {};
|
||||
const overall = report.overall || {};
|
||||
const models = Array.isArray(overall.models) ? overall.models : [];
|
||||
const warnings = Array.isArray(overall.warnings) ? overall.warnings : [];
|
||||
const processingStatus = String(overall.processing_status || 'ok');
|
||||
const degraded = processingStatus !== 'ok' || Boolean(overall.identical_to_input) || Boolean(overall.fallback_used);
|
||||
const procSec = Number(overall.processing_time_sec);
|
||||
const procText = Number.isFinite(procSec) ? auroraFormatSeconds(procSec) : '—';
|
||||
const psnr = overall.psnr != null ? `${overall.psnr} dB` : '—';
|
||||
@@ -2845,9 +3288,11 @@ function auroraRenderQualityReport(report) {
|
||||
</div>
|
||||
<div class="aurora-quality-group">
|
||||
<div class="aurora-quality-head">Загальне</div>
|
||||
<div class="aurora-quality-line"><span>Статус обробки</span><span style="${degraded ? 'color:var(--warn);' : 'color:var(--ok);'}">${auroraEsc(processingStatus)}</span></div>
|
||||
<div class="aurora-quality-line"><span>PSNR</span><span>${psnr}</span></div>
|
||||
<div class="aurora-quality-line"><span>Час обробки</span><span>${procText}</span></div>
|
||||
<div class="aurora-quality-line"><span>Моделі</span><span>${models.length ? auroraEsc(models.join(', ')) : '—'}</span></div>
|
||||
${warnings.length ? `<div class="aurora-note" style="margin-top:6px; color:var(--warn);">⚠ ${auroraEsc(warnings.join(' | '))}</div>` : ''}
|
||||
</div>
|
||||
`;
|
||||
wrap.style.display = 'block';
|
||||
@@ -3228,44 +3673,82 @@ async function auroraReprocess(options) {
|
||||
}
|
||||
const reBtn = document.getElementById('auroraReprocessBtn');
|
||||
if (reBtn) reBtn.disabled = true;
|
||||
const payload = (options && typeof options === 'object') ? options : {};
|
||||
const incoming = (options && typeof options === 'object') ? options : {};
|
||||
const passCountUi = Number(document.getElementById('auroraReprocessPasses')?.value || 1);
|
||||
const passes = Math.max(1, Math.min(4, Number(incoming.passes) || passCountUi));
|
||||
const secondPassUi = Boolean(document.getElementById('auroraReprocessSecondPass')?.checked);
|
||||
const secondPass = Object.prototype.hasOwnProperty.call(incoming, 'second_pass')
|
||||
? Boolean(incoming.second_pass)
|
||||
: secondPassUi;
|
||||
|
||||
const analysisControls = auroraCollectAnalysisControls();
|
||||
const uiExport = auroraCollectExportOptions();
|
||||
const analysisExport = auroraBuildAnalysisExportHints(analysisControls);
|
||||
const mergedExport = { ...auroraSuggestedExport, ...uiExport, ...analysisExport, ...(incoming.export_options || {}) };
|
||||
let priority = incoming.priority || analysisControls.priority || auroraSuggestedPriority || 'balanced';
|
||||
if (typeof priority !== 'string' || !priority.trim()) priority = 'balanced';
|
||||
|
||||
const basePayload = {
|
||||
mode: auroraMode,
|
||||
priority,
|
||||
export_options: mergedExport,
|
||||
};
|
||||
|
||||
let sourceJobId = auroraJobId;
|
||||
let lastJobId = auroraJobId;
|
||||
try {
|
||||
const r = await fetch(`${API}/api/aurora/reprocess/${encodeURIComponent(auroraJobId)}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
if (!r.ok) {
|
||||
const body = await r.text();
|
||||
throw new Error(body || `HTTP ${r.status}`);
|
||||
auroraStopPolling();
|
||||
for (let i = 1; i <= passes; i += 1) {
|
||||
const payload = { ...basePayload, ...incoming, second_pass: secondPass };
|
||||
const r = await fetch(`${API}/api/aurora/reprocess/${encodeURIComponent(sourceJobId)}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
if (!r.ok) {
|
||||
const body = await r.text();
|
||||
throw new Error(body || `HTTP ${r.status}`);
|
||||
}
|
||||
const data = await r.json();
|
||||
const newJobId = String(data.job_id || '').trim();
|
||||
if (!newJobId) throw new Error('job_id missing in reprocess response');
|
||||
lastJobId = newJobId;
|
||||
auroraSetActiveJobId(newJobId);
|
||||
auroraSetSmartRunId(null);
|
||||
auroraSmartStatusCache = null;
|
||||
auroraSetSmartPolicyText(`reprocess ${i}/${passes}`);
|
||||
auroraStatusCache = null;
|
||||
auroraResultCache = null;
|
||||
auroraPollErrorCount = 0;
|
||||
auroraLastProgress = 1;
|
||||
auroraPollInFlight = false;
|
||||
const resultCard = document.getElementById('auroraResultCard');
|
||||
if (resultCard) resultCard.style.display = 'none';
|
||||
auroraSetProgress(1, 'processing', `dispatching reprocess ${i}/${passes}`);
|
||||
auroraUpdateQueuePosition(null);
|
||||
auroraUpdateTiming(0, null, null);
|
||||
auroraUpdateLivePerf(null, null);
|
||||
const cancelBtn = document.getElementById('auroraCancelBtn');
|
||||
if (cancelBtn) cancelBtn.style.display = 'inline-block';
|
||||
if (i < passes) {
|
||||
const done = await auroraWaitForTerminal(newJobId, { passLabel: `reprocess ${i}/${passes}` });
|
||||
const status = String(done?.status || '').toLowerCase();
|
||||
if (status !== 'completed') {
|
||||
throw new Error(`reprocess ${i}/${passes} завершився зі статусом ${status}`);
|
||||
}
|
||||
}
|
||||
sourceJobId = newJobId;
|
||||
}
|
||||
const data = await r.json();
|
||||
auroraSetActiveJobId(data.job_id);
|
||||
auroraSetSmartRunId(null);
|
||||
auroraSmartStatusCache = null;
|
||||
auroraSetSmartPolicyText('audio local');
|
||||
auroraStatusCache = null;
|
||||
auroraResultCache = null;
|
||||
auroraPollErrorCount = 0;
|
||||
auroraLastProgress = 1;
|
||||
auroraPollInFlight = false;
|
||||
const resultCard = document.getElementById('auroraResultCard');
|
||||
if (resultCard) resultCard.style.display = 'none';
|
||||
auroraSetProgress(1, 'processing', 'dispatching (reprocess)');
|
||||
auroraUpdateQueuePosition(null);
|
||||
auroraUpdateTiming(0, null, null);
|
||||
auroraUpdateLivePerf(null, null);
|
||||
const cancelBtn = document.getElementById('auroraCancelBtn');
|
||||
if (cancelBtn) cancelBtn.style.display = 'inline-block';
|
||||
auroraStopPolling();
|
||||
auroraPollTimer = setInterval(auroraPollStatus, 2000);
|
||||
await auroraPollStatus();
|
||||
auroraChatAdd('assistant', `Запустила reprocess: ${auroraJobId}`);
|
||||
auroraChatAdd('assistant', `Запустила reprocess ×${passes}: ${lastJobId}`);
|
||||
await auroraRefreshJobs();
|
||||
} catch (e) {
|
||||
alert(`Aurora reprocess error: ${e.message || e}`);
|
||||
} finally {
|
||||
if (reBtn) reBtn.disabled = false;
|
||||
auroraUpdateReprocessLabel();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3472,6 +3955,7 @@ async function auroraPollStatus() {
|
||||
});
|
||||
auroraUpdateQueuePosition(st.queue_position);
|
||||
auroraUpdateStorage(st.storage);
|
||||
auroraUpdateCancelButton(st.status, st.current_stage);
|
||||
const reBtn = document.getElementById('auroraReprocessBtn');
|
||||
if (reBtn) reBtn.disabled = !(st.status === 'completed' || st.status === 'failed' || st.status === 'cancelled');
|
||||
if (st.status === 'completed') {
|
||||
@@ -3604,10 +4088,19 @@ async function auroraStart() {
|
||||
|
||||
async function auroraCancel() {
|
||||
if (!auroraJobId) return;
|
||||
const cancelBtn = document.getElementById('auroraCancelBtn');
|
||||
if (cancelBtn) {
|
||||
cancelBtn.style.display = 'inline-block';
|
||||
cancelBtn.disabled = true;
|
||||
cancelBtn.textContent = 'Зупиняю...';
|
||||
}
|
||||
try {
|
||||
await fetch(`${API}/api/aurora/cancel/${encodeURIComponent(auroraJobId)}`, { method: 'POST' });
|
||||
await auroraPollStatus();
|
||||
await auroraRefreshJobs();
|
||||
} catch (_) {}
|
||||
} catch (_) {
|
||||
auroraUpdateCancelButton('processing', null);
|
||||
}
|
||||
}
|
||||
|
||||
async function auroraLoadResult(jobId) {
|
||||
@@ -3950,6 +4443,7 @@ function auroraInitTab() {
|
||||
auroraBindDropzone();
|
||||
auroraRefreshHealth();
|
||||
auroraUpdatePriorityLabel();
|
||||
auroraUpdateReprocessLabel();
|
||||
auroraSetSmartRunId(auroraSmartRunId);
|
||||
if (!auroraSmartRunId) {
|
||||
auroraSetSmartPolicyText('standby');
|
||||
|
||||
@@ -810,18 +810,18 @@ class SwapperService:
|
||||
# FLUX / Diffusion model loading
|
||||
logger.info(f"🎨 Loading diffusion model: {hf_name}")
|
||||
from diffusers import AutoPipelineForText2Image
|
||||
|
||||
diffusion_dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
|
||||
pipeline = AutoPipelineForText2Image.from_pretrained(
|
||||
hf_name,
|
||||
torch_dtype=torch.bfloat16,
|
||||
use_safetensors=True
|
||||
torch_dtype=diffusion_dtype
|
||||
)
|
||||
pipeline.to(self.device)
|
||||
pipeline.enable_model_cpu_offload() # Optimize VRAM usage
|
||||
if self.device == "cuda":
|
||||
pipeline.enable_model_cpu_offload() # Optimize VRAM usage on CUDA
|
||||
|
||||
self.hf_models[model_name] = pipeline
|
||||
self.hf_processors[model_name] = None # No separate processor for diffusion
|
||||
logger.info(f"✅ Diffusion model loaded: {model_name} with CPU offload enabled")
|
||||
logger.info(f"✅ Diffusion model loaded: {model_name} (device={self.device})")
|
||||
|
||||
else:
|
||||
# Generic loading
|
||||
|
||||
@@ -38,3 +38,12 @@ storage:
|
||||
models_dir: /app/models
|
||||
cache_dir: /app/cache
|
||||
swap_dir: /app/swap
|
||||
|
||||
models:
|
||||
flux-klein-4b:
|
||||
path: huggingface:segmind/tiny-sd
|
||||
type: image_generation
|
||||
size_gb: 0.7
|
||||
priority: medium
|
||||
capabilities:
|
||||
- image_generation
|
||||
|
||||
Reference in New Issue
Block a user