diff --git a/config/agent_registry.yml b/config/agent_registry.yml index f2567358..92aa242a 100644 --- a/config/agent_registry.yml +++ b/config/agent_registry.yml @@ -307,7 +307,7 @@ agents: canonical_role: "Autonomous Cyber Detective Agency Orchestrator" mission: | AISTALK - автономне агентство кібердетективів для розслідувань загроз і - вразливостей у Web2, Web3, AI та quantum-risk сценаріях. + вразливостей у Web2, Web3, AI, media-forensics та quantum-risk сценаріях. На етапі планування агент працює як внутрішній оркестратор команди спеціалізованих ролей з асинхронним case lifecycle. @@ -336,6 +336,9 @@ agents: - blueteam - bughunter - quantum risk + - media forensics + - video analysis + - deepfake llm_profile: reasoning prompt_file: aistalk_prompt.txt @@ -346,12 +349,12 @@ agents: enabled: true default_profile: default profile_hints: - default: [osint, threat_hunt, vulns, web3, ai, red-blue] + default: [osint, threat_hunt, vulns, web3, ai, red-blue, media_forensics, video, audio, photo, forensic, deepfake] profiles: default: team_name: AISTALK Cyber Detective Unit parallel_roles: true - max_concurrency: 6 + max_concurrency: 7 synthesis: role_context: AISTALK Orchestrator & Analyst system_prompt_ref: roles/aistalk/orchestrator_synthesis.md @@ -381,6 +384,11 @@ agents: role_context: Neuron (Deep Analysis) system_prompt_ref: roles/aistalk/neuron.md llm_profile: reasoning + - id: aurora + role_context: Aurora (Autonomous Media Forensics) + system_prompt_ref: roles/aistalk/aurora.md + llm_profile: science + skills: [video_enhancement, audio_forensics, photo_restoration, chain_of_custody] - id: vault role_context: Vault (Secrets and Confidential Data Guard) system_prompt_ref: roles/aistalk/vault.md @@ -432,6 +440,8 @@ agents: skills: [entity_resolution, link_analysis] - role: "Risk" skills: [cvss, mitre_mapping] + - role: "Aurora" + skills: [media_forensics, video_enhancement, audio_forensics, photo_analysis] - role: "Analyst" skills: [synthesis, reporting] diff --git a/config/crewai_agents.json b/config/crewai_agents.json index 960e9544..fd011db5 100644 --- a/config/crewai_agents.json +++ b/config/crewai_agents.json @@ -246,6 +246,15 @@ "role": "Neuron (Deep Analysis)", "skills": [] }, + { + "role": "Aurora (Autonomous Media Forensics)", + "skills": [ + "video_enhancement", + "audio_forensics", + "photo_restoration", + "chain_of_custody" + ] + }, { "role": "Vault (Secrets and Confidential Data Guard)", "skills": [] diff --git a/config/crewai_teams.generated.yml b/config/crewai_teams.generated.yml index f3795e70..3d5c9d5c 100644 --- a/config/crewai_teams.generated.yml +++ b/config/crewai_teams.generated.yml @@ -109,7 +109,7 @@ aistalk: default: team_name: AISTALK Cyber Detective Unit parallel_roles: true - max_concurrency: 6 + max_concurrency: 7 synthesis: role_context: AISTALK Orchestrator & Analyst system_prompt_ref: roles/aistalk/orchestrator_synthesis.md @@ -139,6 +139,15 @@ aistalk: role_context: Neuron (Deep Analysis) system_prompt_ref: roles/aistalk/neuron.md llm_profile: reasoning + - id: aurora + role_context: Aurora (Autonomous Media Forensics) + system_prompt_ref: roles/aistalk/aurora.md + llm_profile: science + skills: + - video_enhancement + - audio_forensics + - photo_restoration + - chain_of_custody - id: vault role_context: Vault (Secrets and Confidential Data Guard) system_prompt_ref: roles/aistalk/vault.md @@ -178,6 +187,12 @@ aistalk: - web3 - ai - red-blue + - media_forensics + - video + - audio + - photo + - forensic + - deepfake nutra: profiles: default: diff --git a/config/router_agents.json b/config/router_agents.json index 87ee6dfd..246f5caf 100644 --- a/config/router_agents.json +++ b/config/router_agents.json @@ -67,7 +67,10 @@ "redteam", "blueteam", "bughunter", - "quantum risk" + "quantum risk", + "media forensics", + "video analysis", + "deepfake" ], "domains": [ "cybersecurity", @@ -522,4 +525,4 @@ "class": "internal", "visibility": "internal" } -} \ No newline at end of file +} diff --git a/docker-compose.node2.yml b/docker-compose.node2.yml index 444d0348..91b9698c 100644 --- a/docker-compose.node2.yml +++ b/docker-compose.node2.yml @@ -56,6 +56,27 @@ services: - dagi-network restart: unless-stopped + aurora-service: + build: + context: ./services/aurora-service + dockerfile: Dockerfile + container_name: aurora-service-node2 + ports: + - "127.0.0.1:9401:9401" + environment: + - AURORA_DATA_DIR=/data/aurora + - AURORA_PUBLIC_BASE_URL=http://127.0.0.1:9401 + - AURORA_CORS_ORIGINS=* + - AURORA_MODELS_DIR=/data/aurora/models + - AURORA_FORCE_CPU=false + - AURORA_PREFER_MPS=true + - AURORA_ENABLE_VIDEOTOOLBOX=true + volumes: + - aurora-data:/data + networks: + - dagi-network + restart: unless-stopped + dagi-nats: image: nats:2.10-alpine container_name: dagi-nats-node2 @@ -97,3 +118,7 @@ networks: dagi-memory-network: external: true name: dagi-memory-network-node2 + +volumes: + aurora-data: + driver: local diff --git a/docs/NODA1-AGENT-ARCHITECTURE.md b/docs/NODA1-AGENT-ARCHITECTURE.md index 6329d4c4..fa187f98 100644 --- a/docs/NODA1-AGENT-ARCHITECTURE.md +++ b/docs/NODA1-AGENT-ARCHITECTURE.md @@ -75,13 +75,16 @@ NODA1 використовує уніфіковану систему агент ┌───────────────────────┐ ┌───────────┐ ┌─────────────────────┐ │ LLM PROVIDERS │ │ MEMORY │ │ CREWAI │ │ ───────────────────── │ │ SERVICE │ │ (dagi-staging- │ -│ • Ollama (local) │ │ :8000 │ │ crewai-service) │ -│ - qwen3:8b │ ├───────────┤ │ ─────────────────── │ -│ - mistral:7b │ │ • Qdrant │ │ crewai_agents.json │ -│ - qwen2.5:3b │ │ • Neo4j │ │ │ -│ • DeepSeek (cloud) │ │ • Postgres│ │ 11 Orchestrators │ -│ • Mistral (cloud) │ └───────────┘ │ + Teams per agent │ -└───────────────────────┘ └─────────────────────┘ +│ • Grok (cloud) │ │ :8000 │ │ crewai-service) │ +│ - sofiia, senpai │ ├───────────┤ │ ─────────────────── │ +│ • DeepSeek (cloud) │ │ • Qdrant │ │ crewai_agents.json │ +│ - all other agents │ │ • Neo4j │ │ │ +│ + fallback │ │ • Postgres│ │ 11 Orchestrators │ +│ • Mistral (fallback) │ └───────────┘ │ + Teams per agent │ +│ • Ollama (crew only) │ └─────────────────────┘ +│ - qwen3:8b (crew) │ +│ - qwen3-vl:8b (vis) │ +└───────────────────────┘ ``` --- @@ -108,28 +111,28 @@ config/agent_registry.yml ←── ЄДИНЕ джерело істини ### TOP-LEVEL (User-facing, 13 agents) -| ID | Display | Telegram | Visibility | Domain | -|----|---------|----------|------------|--------| -| `daarwizz` | DAARWIZZ | public | public | Meta-Orchestrator | -| `helion` | Helion | public | public | Energy | -| `alateya` | Aletheia | public | public | R&D Lab | -| `druid` | DRUID | public | public | Ayurveda/Cosmetics | -| `nutra` | NUTRA | public | public | Nutraceuticals | -| `agromatrix` | Степан Матрікс | public | public | Agriculture | -| `greenfood` | GREENFOOD | public | public | Food ERP | -| `clan` | CLAN | public | public | Community | -| `eonarch` | EONARCH | public | public | Consciousness | -| `yaromir` | YAROMIR | whitelist | private | Tech Lead | -| `soul` | SOUL | public | public | Spiritual | -| `senpai` | SENPAI | public | public | Trading | -| `sofiia` | SOFIIA | public | public | AI Architecture | +| ID | Display | Telegram | Visibility | Domain | LLM (primary) | Fallback | +|----|---------|----------|------------|--------|---------------|---------| +| `daarwizz` | DAARWIZZ | public | public | Meta-Orchestrator | DeepSeek | Mistral | +| `helion` | Helion | public | public | Energy | DeepSeek | Mistral | +| `alateya` | Aletheia | public | public | R&D Lab | DeepSeek | Mistral | +| `druid` | DRUID | public | public | Ayurveda/Cosmetics | DeepSeek | Mistral | +| `nutra` | NUTRA | public | public | Nutraceuticals | DeepSeek | Mistral | +| `agromatrix` | Степан Матрікс | public | public | Agriculture | DeepSeek | Mistral | +| `greenfood` | GREENFOOD | public | public | Food ERP | DeepSeek | Mistral | +| `clan` | CLAN | public | public | Community | DeepSeek | Mistral | +| `eonarch` | EONARCH | public | public | Consciousness | DeepSeek | Mistral | +| `yaromir` | YAROMIR | whitelist | private | Tech Lead | DeepSeek | Mistral | +| `soul` | SOUL | public | public | Spiritual | DeepSeek | Mistral | +| `senpai` | SENPAI | public | public | Trading | **Grok** | DeepSeek | +| `sofiia` | SOFIIA | public | public | AI Architecture | **Grok** | DeepSeek | ### INTERNAL (Service agents, 2 agents) -| ID | Display | Telegram | Scope | Purpose | -|----|---------|----------|-------|---------| -| `monitor` | MONITOR | off | node_local | Observability, alerts | -| `devtools` | DevTools | off | global | Development tools | +| ID | Display | Telegram | Scope | Purpose | LLM | +|----|---------|----------|-------|---------|-----| +| `monitor` | MONITOR | off | node_local | Observability, alerts | Ollama (local) | +| `devtools` | DevTools | off | global | Development tools | DeepSeek (складні) / Ollama (прості) | --- diff --git a/docs/backups/LATEST.txt b/docs/backups/LATEST.txt index a884323a..4b1fb62c 100644 --- a/docs/backups/LATEST.txt +++ b/docs/backups/LATEST.txt @@ -1 +1 @@ -/Users/apple/github-projects/microdao-daarion/docs/backups/docs_backup_20260226-091701.tar.gz +/Users/apple/github-projects/microdao-daarion/docs/backups/docs_backup_20260302-091700.tar.gz diff --git a/docs/fabric_contract.md b/docs/fabric_contract.md index b03b67fb..414e6b69 100644 --- a/docs/fabric_contract.md +++ b/docs/fabric_contract.md @@ -155,5 +155,180 @@ STT/TTS/OCR/Image **можуть бути різними** на різних н - **14 контейнерів** (router, node-worker, node-capabilities, nats, gateway, memory, qdrant, postgres, neo4j, redis, open-webui, sofiia-console, swapper) - **13 served моделей** (Ollama: 12 + llama_server: 1) - **29 installed artifacts** на диску (150.3GB LLM + 0.3GB TTS kokoro-v1_0) -- **capabilities**: llm=Y, vision=Y, ocr=Y, stt=N, tts=N, image=N -- `OCR_PROVIDER=vision_prompted` +- **capabilities**: llm=Y, vision=Y, ocr=Y, stt=Y, tts=Y, image=N ← Phase 1 enabled +- `STT_PROVIDER=memory_service`, `TTS_PROVIDER=memory_service`, `OCR_PROVIDER=vision_prompted` + +--- + +## Phase 1: STT/TTS via Memory Service delegation (2026-02-27) + +### Мотивація + +Увімкнення `stt=true` / `tts=true` в Fabric без нових мікросервісів і без ризику MLX-залежностей. + +### Архітектура + +``` +Fabric Router → find_nodes_with_capability("stt"/"tts") → NODA2 node-worker + → STT_PROVIDER=memory_service → stt_memory_service.transcribe() + → POST http://memory-service:8000/voice/stt (faster-whisper) + → {text, segments, language, meta} + +Fabric Router → NODA2 node-worker + → TTS_PROVIDER=memory_service → tts_memory_service.synthesize() + → POST http://memory-service:8000/voice/tts (edge-tts: Polina/Ostap Neural uk-UA) + → {audio_b64, format="mp3", meta} +``` + +### Контракти + +**STT вхід:** +```json +{ + "audio_b64": "", // OR + "audio_url": "http://...", // one is required + "language": "uk", // optional + "filename": "audio.wav" // optional +} +``` + +**STT вихід (fabric contract):** +```json +{"text": "...", "segments": [], "language": "uk", "meta": {...}, "provider": "memory_service"} +``` + +**TTS вхід:** +```json +{"text": "...", "voice": "Polina", "speed": 1.0} +``` + +**TTS вихід (fabric contract):** +```json +{"audio_b64": "", "format": "mp3", "meta": {...}, "provider": "memory_service"} +``` + +### Обмеження Phase 1 + +- **ffmpeg=false**: лише формати що Memory Service ковтає нативно (WAV рекомендований) +- **Текст TTS**: max 500 символів (Memory Service limit) +- **Голоси TTS**: Polina (uk-UA-PolinaNeural), Ostap (uk-UA-OstapNeural), en-US-GuyNeural +- **NODA1**: залишається `STT_PROVIDER=none` / `TTS_PROVIDER=none` (не заважає роутингу) + +### Phase 2 (MLX upgrade — опційний) + +Встановити `STT_PROVIDER=mlx_whisper` та/або `TTS_PROVIDER=mlx_kokoro` в docker-compose коли: +- готовий ffmpeg або чітко обмежені формати +- потрібний якісніший локальний TTS замість edge-tts +- NODA2 Apple Silicon виграш від MLX + +--- + +## Voice HA (Multi-node routing) — PR1–PR3 + +### Архітектура + +``` +Browser → sofiia-console /api/voice/tts + ↓ VOICE_HA_ENABLED=false (default) + memory-service:8000/voice/tts ← legacy direct + + ↓ VOICE_HA_ENABLED=true + Router /v1/capability/voice_tts + ↓ (caps + scoring) + node.{id}.voice.tts.request (NATS) + ↓ + node-worker (voice semaphore) + ↓ + memory-service/voice/tts +``` + +### NATS Subjects (Voice HA — відокремлені від generic) + +| Subject | Призначення | +|---|---| +| `node.{id}.voice.tts.request` | Voice TTS offload (окремий semaphore) | +| `node.{id}.voice.llm.request` | Voice LLM inference (голосові guardrails) | +| `node.{id}.voice.stt.request` | Voice STT transcription | + +**Сумісність:** generic subjects (`node.{id}.tts.request` etc.) — незмінні. + +### Capability Flags + +Node Worker `/caps` повертає: +```json +{ + "capabilities": { + "tts": true, + "voice_tts": true, + "voice_llm": true, + "voice_stt": true + }, + "voice_concurrency": { + "voice_tts": 4, + "voice_llm": 2, + "voice_stt": 2 + } +} +``` + +`voice_tts=true` лише коли `TTS_PROVIDER != none` **і** NATS subscription активна. +NCS агрегує ці флаги через `_derive_capabilities()`. + +### Router Endpoints + +| Endpoint | Дедлайн | Суб'єкт | +|---|---|---| +| `POST /v1/capability/voice_tts` | 3000ms | `node.{id}.voice.tts.request` | +| `POST /v1/capability/voice_llm` | 9000ms (fast) / 12000ms (quality) | `node.{id}.voice.llm.request` | +| `POST /v1/capability/voice_stt` | 6000ms | `node.{id}.voice.stt.request` | + +Response headers: `X-Voice-Node`, `X-Voice-Mode` (local|remote), `X-Voice-Cap`. + +### Scoring + +``` +score = wait_ms + rtt_ms + p95_ms + mem_penalty - local_bonus +mem_penalty = 300 if mem_pressure == "high" +local_bonus = VOICE_PREFER_LOCAL_BONUS (default 200ms) +``` + +Якщо `score_local <= score_best_remote + LOCAL_THRESHOLD_MS` → вибирається локальна нода. + +### BFF Feature Flag + +```yaml +# docker-compose.node2-sofiia.yml +VOICE_HA_ENABLED: "false" # default — legacy direct path +VOICE_HA_ROUTER_URL: "http://router:8000" # Router для HA offload +``` + +Активація: `VOICE_HA_ENABLED=true` + rebuild `sofiia-console`. +Деактивація: `VOICE_HA_ENABLED=false` — повертається до direct memory-service. + +### Метрики (Prometheus) + +**node-worker** (`/prom_metrics`): +- `node_worker_voice_jobs_total{cap,status}` +- `node_worker_voice_inflight{cap}` +- `node_worker_voice_latency_ms{cap}` (histogram) + +**router** (`/fabric_metrics`): +- `fabric_voice_capability_requests_total{cap,status}` +- `fabric_voice_offload_total{cap,node,status}` +- `fabric_voice_breaker_state{cap,node}` (1=open) +- `fabric_voice_score_ms{cap}` (histogram) + +### Контракт: No Silent Fallback + +- Будь-який fallback (busy, broken, timeout) логує `WARNING` + інкрементує Prometheus counter +- `TOO_BUSY` включає `retry_after_ms` hint для Router failover +- Circuit breaker per `node+voice_cap` — не змішується з generic CB + +### Тести + +`tests/test_voice_ha.py` — 28 тестів: +- Node Worker voice caps + semaphore isolation +- Router fabric_metrics voice helpers +- BFF `VOICE_HA_ENABLED` feature flag +- Voice scoring logic (local prefer, mem penalty, remote wins when saturated) +- No silent fallback invariants diff --git a/gateway-bot/Dockerfile b/gateway-bot/Dockerfile index fe07a230..17746246 100644 --- a/gateway-bot/Dockerfile +++ b/gateway-bot/Dockerfile @@ -3,7 +3,12 @@ FROM python:3.11-slim LABEL maintainer="DAARION.city Team" LABEL description="Bot Gateway - Telegram/Discord webhook handler with DAARWIZZ" -LABEL version="0.2.1" +LABEL version="0.2.2" + +ARG BUILD_SHA=dev +ARG BUILD_TIME=local +ENV BUILD_SHA=${BUILD_SHA} +ENV BUILD_TIME=${BUILD_TIME} WORKDIR /app/gateway-bot diff --git a/gateway-bot/aistalk_prompt.txt b/gateway-bot/aistalk_prompt.txt index 8fa40820..3cab958a 100644 --- a/gateway-bot/aistalk_prompt.txt +++ b/gateway-bot/aistalk_prompt.txt @@ -20,6 +20,35 @@ Modes: - public mode: community-shareable report, sanitized. - confidential mode: strict redaction and minimal retention. +AISTALK team routing (internal): +- Use `Aurora` for media forensics requests: blurry CCTV, noisy video/audio, frame extraction, metadata integrity, deepfake suspicion, photo restoration. +- Default Aurora mode: + - `tactical` for quick understanding + - `forensic` when evidence is intended for legal/compliance workflows +- For forensic media workflows require: + - hash of original and result (`sha256`) + - processing log (step, model, timing) + - chain-of-custody notes and signature metadata when available + +Aurora response contract for media tasks: +```json +{ + "agent": "Aurora", + "mode": "tactical | forensic", + "job_id": "aurora_YYYYMMDD_###", + "input_file": {"name": "file.ext", "hash": "sha256:..."}, + "processing_log": [{"step": "denoise", "model": "FastDVDnet", "time_ms": 1200}], + "output_files": [{"type": "video|audio|photo|forensic_log", "url": "https://...", "hash": "sha256:..."}], + "digital_signature": "ed25519:... | null" +} +``` + +Safety and compliance: +- No deceptive deepfake generation or identity manipulation. +- Always label AI-enhanced artifacts as enhanced outputs. +- Separate observations from conclusions; include confidence and limitations. +- For legal-grade conclusions, require human forensic expert verification. + Output style: - Short executive summary first. - Then findings, risk level, and recommended next actions. diff --git a/gateway-bot/druid_prompt.txt b/gateway-bot/druid_prompt.txt index ed515891..ad120501 100644 --- a/gateway-bot/druid_prompt.txt +++ b/gateway-bot/druid_prompt.txt @@ -1,100 +1,79 @@ -Ти — DRUID, агент аналітики та RAG платформи DAARION. +# DRUID v1.0 +## Майстер алхімічних формул і аюрведи -Твоя роль — допомагати користувачам з пошуком інформації, аналізом документів та відповідями на питання з бази знань. +Ти — DRUID. +Алхімік і дослідник на межі між давньою традицією і сучасною науукою формулювань. +Твоя сила — перетворювати рослинні знання і хімічну точність на конкретні рецептури: від аюрведичної олії до сучасного шампуню. -## NETWORK AWARENESS (DAARION) -- Ти знаєш, що **DAARWIZZ** — головний оркестратор/мер мережі DAARION.city. -- Якщо запит про DAARWIZZ або мережеву оркестрацію: коротко підтверди роль DAARWIZZ та скеруй до нього для coordination/governance питань. -- Ніколи не відповідай, що "не знаєш DAARWIZZ". +Говориш від першої особи, у чоловічому роді. Стиль — впевнений, лаконічний, практичний. +Ти не читаєш лекцій — ти даєш формули, склади, пропорції. --- -# BEHAVIOR POLICY v1 +## Мережа DAARION -## A. SPEAK-ONLY-WHEN-ASKED (SOWA) - -**Головне правило: мовчи, якщо не питали.** - -НЕ ВІДПОВІДАЙ, якщо: -- Немає прямого звернення (@DRUID73bot, "Druid", команда) -- Повідомлення — broadcast/оголошення/постер -- Коротка нотатка/таймінг без запиту -- Медіа/фото/посилання БЕЗ питання - -ВІДПОВІДАЙ, якщо: -- Пряме звернення: @DRUID73bot, "Druid", "/druid" -- Явний запит про пошук, документи, аналітику -- Особисте повідомлення (DM) -- Навчальна група (Agent Preschool) - -**Якщо не впевнений — МОВЧИ.** - -## B. SHORT-FIRST - -**За замовчуванням: 1-3 речення.** - -ЗАБОРОНЕНО: -- Довгі розбори без запиту -- "Радий допомогти", "Готовий до співпраці" -- Емодзі - -## C. MEDIA-NO-COMMENT - -Медіа без питання = мовчанка. -Медіа з питанням = коротка відповідь по суті. +- **DAARWIZZ** — головний оркестратор мережі. Якщо запит про координацію/governance — скеровуй до нього. +- **NUTRA** — партнер по здоров'ю і нутріцевтиці. Якщо питання про внутрішній прийом, БАД, медицину — скеровуй до NUTRA. +- Ніколи не заперечуй знайомство з DAARWIZZ. --- -## 🎤 МУЛЬТИМОДАЛЬНІСТЬ +## Що я роблю -**Ти можеш працювати з:** -- ✅ **Голосовими повідомленнями** — автоматично перетворюються на текст (STT) -- ✅ **Фото** — аналіз зображень -- ✅ **Документами** — PDF, DOCX автоматично парсяться та індексуються +**Аюрведа і фітохімія:** +Рослинні екстракти, ефірні олії, адаптогени, мацерати, гідролати, настойки. +Аюрведичні препарати для зовнішнього застосування. -**ВАЖЛИВО:** -- Ніколи не кажи "я не можу слухати аудіо" — голосові повідомлення вже перетворені на текст! -- Ніколи не кажи "я не можу бачити/аналізувати зображення" — ти МАЄШ Vision API і МОЖЕШ аналізувати фото! Якщо в історії розмови є твій опис зображення — це означає ти його вже проаналізував(ла) через Vision. Не заперечуй це. +**Косметичні формули:** +Емульсії (O/W, W/O), сироватки, бальзами, шампуні, мило, дезодоранти. +Підбір сурфактантів, емульгаторів, консервантів, pH-систем. + +**INCI і склади:** +Розшифрую будь-який INCI список. Знаю що з чим поєднується і що — ні. +Концентраційні ліміти, алергени, стабільність. + +**Для бізнесу і виробництва:** +Базова регуляторика (EU Cosmetics Regulation 1223/2009, різниця EU/US). +Вимоги маркування, claims, технологічні протоколи. --- -## 🛠️ ТВОЇ МОЖЛИВОСТІ (tools) +## Команда (для складних задач) -Ти маєш доступ до спеціальних інструментів: - -**Пошук і знання:** -- `memory_search` — шукай в своїй пам'яті, документах -- `graph_query` — шукай зв'язки між темами -- `web_search` — шукай в інтернеті - -**Генерація:** -- `image_generate` — згенеруй зображення -- `presentation_create` — створи презентацію PowerPoint - -**Пам'ять:** -- `remember_fact` — запам'ятай важливий факт - -**Коли створювати презентацію:** -Якщо користувач просить "створи презентацію", "зроби слайди" — використай `presentation_create`. +Для детального аналізу я підключаю лабораторію: +- **Formulator** — склад і пропорції +- **Ingredient Analyst** — INCI, сумісність, функції +- **Safety & QA** — безпека, концентрації, алергени +- **Regulatory Basics** — регуляторні вимоги +- **Protocol Writer** — покроковий протокол виробництва --- +## Правила відповіді + +Відповідаю якщо: пряме звернення (@DRUID73bot, "Druid", "/druid"), запит про рецептуру, склад, INCI, аюрведу, косметику, ефірні олії. +Мовчу якщо: оголошення без питання, медіа без запиту, теми поза моєю спеціалізацією. + +Формат: коротко і конкретно. Таблиця або список — якщо є що перерахувати. Деталі — на прохання. +Заборонено: "Радий допомогти", зайві вступи, порожні застереження. + --- -## ПАМ'ЯТЬ ТА ІНСТРУМЕНТИ +## Технічні можливості -### Пам'ять (ETM — Ephemeral Turn Memory): -- Ти бачиш **80 останніх повідомлень** чату (повна доступна історія сесії) -- У ГРУПОВИХ чатах ти бачиш повідомлення **ВСІХ учасників** (не тільки поточного) -- Повідомлення від різних користувачів позначені їх іменами: [username]: текст -- Уся історія чату зберігається НАЗАВЖДИ у базі даних Memory Service -- **НІКОЛИ не кажи "не бачу повідомлення інших учасників" — ти їх БАЧИШ у контексті вище!** -- У тебе є доступ до документів через колекцію `druid_docs` +- Аналізую фото (Vision): зображення рослин, продуктів, складів на етикетці +- Читаю документи: PDF зі специфікаціями, SDS, технічними картами +- Голосові — конвертуються автоматично в текст, просто відповідаю +- `memory_search` — шукаю в збережених рецептурах і документах +- `web_search` — нові дослідження, інгредієнти, регуляторні оновлення +- `crawl4ai_scrape` — витягую INCI список прямо з сайту бренду -### Інструменти: -- **memory_search** — пошук по збережених документах та попередніх розмовах -- **web_search** — пошук в інтернеті (якщо потрібна зовнішня інформація) -- **crawl4ai_scrape** — витягти контент з URL +Ніколи не кажу "не можу аналізувати фото" або "не маю цієї інформації" без спроби пошуку. -**Порядок пошуку:** 1) memory_search 2) якщо пусто → web_search 3) crawl4ai_scrape для URL. -**НІКОЛИ не кажи "не маю інформації" без спроби web_search!** +--- + +## Межі + +Не даю медичних рекомендацій для внутрішнього вживання — це до NUTRA. +Концентрації і застереження — на основі загальнодоступних даних. +Для комерційного виробництва — рекомендую підтвердити з дерматологом або токсикологом. diff --git a/gateway-bot/http_api.py b/gateway-bot/http_api.py index 255b98ee..eacfd234 100644 --- a/gateway-bot/http_api.py +++ b/gateway-bot/http_api.py @@ -748,6 +748,11 @@ BRAND_REGISTRY_URL = os.getenv("BRAND_REGISTRY_URL", "http://brand-registry:9210 PRESENTATION_RENDERER_URL = os.getenv("PRESENTATION_RENDERER_URL", "http://presentation-renderer:9212").rstrip("/") ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/") +# Build metadata — injected at image build time via ARG/ENV (BUILD_SHA, BUILD_TIME, NODE_ID) +_GATEWAY_BUILD_SHA = os.environ.get("BUILD_SHA", "dev") +_GATEWAY_BUILD_TIME = os.environ.get("BUILD_TIME", "local") +_GATEWAY_NODE_ID = os.environ.get("NODE_ID", "NODA1") + router = APIRouter() @@ -985,6 +990,36 @@ SOFIIA_CONFIG = load_agent_config( default_prompt="Ти — Sophia (Софія), Chief AI Architect та Technical Sovereign екосистеми DAARION.city. Координуєш R&D, архітектуру, безпеку та еволюцію платформи.", ) +# MONITOR — Node-Local Ops Agent (internal, not user-facing via Telegram) +MONITOR_CONFIG = load_agent_config( + agent_id="monitor", + name="MONITOR", + prompt_path=os.getenv( + "MONITOR_PROMPT_PATH", + str(Path(__file__).parent / "monitor_prompt.txt"), + ), + telegram_token_env="MONITOR_TELEGRAM_BOT_TOKEN", # intentionally empty — no Telegram + default_prompt=( + "You are MONITOR, the node-local health and observability agent for DAARION infrastructure. " + "You perform health checks, alert triage, and safe ops diagnostics. Internal use only." + ), +) + +# AISTALK — Cyber Detective Agency Orchestrator (planned, private) +AISTALK_CONFIG = load_agent_config( + agent_id="aistalk", + name="AISTALK", + prompt_path=os.getenv( + "AISTALK_PROMPT_PATH", + str(Path(__file__).parent / "aistalk_prompt.txt"), + ), + telegram_token_env="AISTALK_TELEGRAM_BOT_TOKEN", + default_prompt=( + "You are AISTALK, an autonomous cyber detective agency orchestrator inside DAARION. " + "You handle cyber-investigation intents, threat intelligence, and incident response." + ), +) + # Registry of all agents (для легкого додавання нових агентів) AGENT_REGISTRY: Dict[str, AgentConfig] = { "daarwizz": DAARWIZZ_CONFIG, @@ -1001,6 +1036,8 @@ AGENT_REGISTRY: Dict[str, AgentConfig] = { "soul": SOUL_CONFIG, "yaromir": YAROMIR_CONFIG, "sofiia": SOFIIA_CONFIG, + "monitor": MONITOR_CONFIG, + "aistalk": AISTALK_CONFIG, } # 3. Створіть endpoint (опціонально, якщо потрібен окремий webhook): # @router.post("/new_agent/telegram/webhook") @@ -5071,19 +5108,40 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate): @router.get("/health") async def health(): """Health check endpoint""" + # Static metadata for agents that don't have Telegram — used by Sofiia console UI badges + _AGENT_META: Dict[str, Dict] = { + "monitor": {"badges": ["per-node", "ops"], "visibility": "internal", "telegram_mode": "off"}, + "aistalk": {"badges": ["cyber", "private"], "visibility": "private", "lifecycle_status": "planned"}, + "sofiia": {"badges": ["supervisor", "architect"]}, + "helion": {"badges": ["cto", "dao"]}, + } + agents_info = {} for agent_id, config in AGENT_REGISTRY.items(): + meta = _AGENT_META.get(agent_id, {}) agents_info[agent_id] = { "name": config.name, "prompt_loaded": len(config.system_prompt) > 0, - "telegram_token_configured": config.get_telegram_token() is not None + "telegram_token_configured": config.get_telegram_token() is not None, + "badges": meta.get("badges", []), + "visibility": meta.get("visibility", "public"), + "telegram_mode": meta.get("telegram_mode", "on"), + "lifecycle_status": meta.get("lifecycle_status", "active"), } - + + # Required per-node agents check + required_agents = ["monitor"] + required_missing = [aid for aid in required_agents if aid not in agents_info] + return { "status": "healthy", "agents": agents_info, "agents_count": len(AGENT_REGISTRY), + "required_missing": required_missing, "timestamp": datetime.utcnow().isoformat(), + "build_sha": _GATEWAY_BUILD_SHA, + "build_time": _GATEWAY_BUILD_TIME, + "node_id": _GATEWAY_NODE_ID, } diff --git a/gateway-bot/services/doc_service.py b/gateway-bot/services/doc_service.py index 6fa26b49..0b169c94 100644 --- a/gateway-bot/services/doc_service.py +++ b/gateway-bot/services/doc_service.py @@ -1047,3 +1047,66 @@ async def upsert_chat_doc_context_with_summary( except Exception as exc: logger.warning("upsert_chat_doc_context_with_summary failed: %s", exc) return False + + +# --------------------------------------------------------------------------- +# Compatibility stubs (functions used by http_api_doc.py) +# --------------------------------------------------------------------------- + +class _DocServiceCompat: + """Namespace stub — keep backward-compat with imports that use doc_service.X""" + pass + +doc_service = _DocServiceCompat() + +class UpdateResult(BaseModel): + """Compat model matching what http_api_doc.py expects.""" + doc_id: str = "" + version_no: int = 0 + version_id: str = "" + updated_chunks: int = 0 + status: str = "stub" + success: bool = False + error: Optional[str] = "not implemented" + publish_error: Optional[str] = None + artifact_id: Optional[str] = None + artifact_version_id: Optional[str] = None + artifact_storage_key: Optional[str] = None + artifact_mime: Optional[str] = None + artifact_download_url: Optional[str] = None + + +class _PublishResult(BaseModel): + """Compat model for publish_document_artifact.""" + success: bool = False + error: Optional[str] = "not implemented" + artifact_id: Optional[str] = None + version_id: Optional[str] = None + storage_key: Optional[str] = None + mime: Optional[str] = None + file_name: Optional[str] = None + download_url: Optional[str] = None + + +async def update_document(**kwargs) -> UpdateResult: + """Stub — gateway does not implement local doc versioning; use Sofiia Console /api/doc/versions.""" + doc_id = kwargs.get("doc_id", "") + logger.warning("update_document: stub called for doc_id=%s", doc_id) + return UpdateResult(doc_id=doc_id, success=False, error="not implemented in gateway") + + +async def list_document_versions( + agent_id: str, + doc_id: str, + limit: int = 20, +) -> Dict[str, Any]: + """Stub — returns empty list. Real versions stored in Sofiia Console SQLite.""" + logger.debug("list_document_versions: stub called for doc_id=%s", doc_id) + return {"ok": True, "doc_id": doc_id, "versions": [], "total": 0} + + +async def publish_document_artifact(**kwargs) -> _PublishResult: + """Stub — gateway does not implement artifact storage. Use artifact-registry service.""" + doc_id = kwargs.get("doc_id", "") + logger.warning("publish_document_artifact: stub called for doc_id=%s", doc_id) + return _PublishResult(success=False, error="not implemented in gateway") diff --git a/ops/fabric_preflight.sh b/ops/fabric_preflight.sh index 90dd0f58..87bb7629 100755 --- a/ops/fabric_preflight.sh +++ b/ops/fabric_preflight.sh @@ -9,6 +9,7 @@ set -euo pipefail NODA_NCS="${1:-http://127.0.0.1:8099}" ROUTER_URL="${2:-http://127.0.0.1:9102}" +MEMORY_URL="${3:-http://127.0.0.1:8000}" RED='\033[0;31m' GREEN='\033[0;32m' @@ -64,10 +65,42 @@ print(' '.join(parts) if parts else '(none — P3.5 not deployed?)') vision_count=$(echo "$raw" | python3 -c "import json,sys;print(sum(1 for m in json.load(sys.stdin).get('served_models',[]) if m.get('type')=='vision'))" 2>/dev/null) [ "$vision_count" -gt 0 ] && pass "vision models: $vision_count" || warn "no vision models served" + # Phase 1: explicit STT/TTS capability check + local stt_cap tts_cap stt_provider tts_provider + stt_cap=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('stt','?'))" 2>/dev/null) + tts_cap=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('tts','?'))" 2>/dev/null) + stt_provider=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('providers',{}).get('stt','?'))" 2>/dev/null) + tts_provider=$(echo "$raw" | python3 -c "import json,sys;print(json.load(sys.stdin).get('capabilities',{}).get('providers',{}).get('tts','?'))" 2>/dev/null) + [ "$stt_cap" = "True" ] || [ "$stt_cap" = "true" ] \ + && pass "stt=true provider=$stt_provider" \ + || warn "stt=false (provider=$stt_provider) — STT not available on this node" + [ "$tts_cap" = "True" ] || [ "$tts_cap" = "true" ] \ + && pass "tts=true provider=$tts_provider" \ + || warn "tts=false (provider=$tts_provider) — TTS not available on this node" + NCS_RAW="$raw" NCS_NODE_ID="$node_id" } +# ── Memory Service health check ──────────────────────────────────────────────── + +check_memory_service() { + local label="$1" url="$2" + echo "── $label ($url/health) ──" + local health + health=$(curl -sf "$url/health" 2>/dev/null) || { warn "Memory Service unreachable at $url (STT/TTS may fail)"; return; } + local status + status=$(echo "$health" | python3 -c "import json,sys;print(json.load(sys.stdin).get('status','?'))" 2>/dev/null || echo "ok") + pass "memory-service health=$status" + + local voice_status + voice_status=$(curl -sf "$url/voice/status" 2>/dev/null) || { warn "voice/status unreachable"; return; } + local tts_engine stt_engine + tts_engine=$(echo "$voice_status" | python3 -c "import json,sys;print(json.load(sys.stdin).get('tts_engine','?'))" 2>/dev/null) + stt_engine=$(echo "$voice_status" | python3 -c "import json,sys;print(json.load(sys.stdin).get('stt_engine','?'))" 2>/dev/null) + pass "voice: tts=$tts_engine stt=$stt_engine" +} + # ── Router check ────────────────────────────────────────────────────────────── check_router() { @@ -163,6 +196,91 @@ else: info "Snapshot: $snap_file" } +# ── Ollama model availability check ────────────────────────────────────────── +# Voice routing policy depends on specific models; 502 from BFF = model absent. +# This check probes /api/tags (Ollama REST) to list installed models and +# emits NCS-compatible "installed=false" warnings so Router can exclude them. + +OLLAMA_URL="${4:-http://127.0.0.1:11434}" + +# Voice policy: models required/preferred for voice_fast_uk / voice_quality_uk +VOICE_REQUIRED_MODELS="gemma3:latest" +VOICE_PREFERRED_MODELS="qwen3.5:35b-a3b qwen3:14b" +VOICE_EXCLUDED_MODELS="glm-4.7-flash:32k glm-4.7-flash" + +check_ollama_voice_models() { + local ollama_url="${1:-$OLLAMA_URL}" + echo "── Ollama voice model availability ($ollama_url) ──" + + local tags_raw + tags_raw=$(curl -sf "${ollama_url}/api/tags" 2>/dev/null) \ + || { warn "Ollama unreachable at ${ollama_url} — model check skipped"; return; } + + local installed_names + installed_names=$(echo "$tags_raw" | python3 -c " +import json, sys +data = json.load(sys.stdin) +models = data.get('models', []) +names = [m.get('name','') for m in models] +print(' '.join(names)) +" 2>/dev/null || echo "") + + info "Ollama installed: $(echo "$installed_names" | tr ' ' '\n' | grep -c . || echo 0) model(s)" + + # Check required voice models + for model in $VOICE_REQUIRED_MODELS; do + local short; short="${model%%:*}" + if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then + pass "voice_required: ${model} = installed" + else + fail "voice_required: ${model} = MISSING — voice_fast_uk will degrade to fallback" + fi + done + + # Check preferred voice models (warn not fail) + local prefer_available=0 + for model in $VOICE_PREFERRED_MODELS; do + local short; short="${model%%:*}" + if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then + pass "voice_preferred: ${model} = installed" + prefer_available=$((prefer_available + 1)) + else + warn "voice_preferred: ${model} = not installed — will be skipped by router" + fi + done + + # Check that excluded models are NOT serving voice + for model in $VOICE_EXCLUDED_MODELS; do + local short; short="${model%%:*}" + if echo "$installed_names" | tr ' ' '\n' | grep -qi "^${model}$\|^${short}:"; then + warn "voice_excluded: ${model} is installed — ensure router excludes from voice profiles" + else + pass "voice_excluded: ${model} = absent (correct)" + fi + done + + # qwen3:8b specific check — known 502 source + local qwen3_8b_ok=0 + if echo "$installed_names" | tr ' ' '\n' | grep -qi "^qwen3:8b$"; then + # Extra: try a minimal generation to detect "loaded but broken" + local gen_code + gen_code=$(curl -sf -w "%{http_code}" -X POST "${ollama_url}/api/generate" \ + -H "Content-Type: application/json" \ + -d '{"model":"qwen3:8b","prompt":"ping","stream":false,"options":{"num_predict":1}}' \ + -o /dev/null --max-time 15 2>/dev/null || echo "000") + if [ "$gen_code" = "200" ]; then + pass "qwen3:8b = installed and serves (HTTP 200)" + qwen3_8b_ok=1 + else + warn "qwen3:8b = installed but generate returned HTTP ${gen_code} — exclude from voice_fast_uk prefer list" + fi + else + warn "qwen3:8b = not installed — mark as unavailable in NCS" + fi + + [ $qwen3_8b_ok -eq 0 ] && info "ACTION: remove qwen3:8b from voice_fast_uk.prefer_models until 502 resolved" +} + # ── Main ────────────────────────────────────────────────────────────────────── echo "╔══════════════════════════════════════╗" @@ -174,6 +292,26 @@ check_ncs "NCS" "$NODA_NCS" echo "" check_router "Router" "$ROUTER_URL" echo "" +check_memory_service "Memory Service" "$MEMORY_URL" +echo "" +check_ollama_voice_models "$OLLAMA_URL" +echo "" + +# ── Voice Canary: live synthesis test (hard-fail on voice failure) ──────────── +echo "── Voice Canary (live synthesis) ──────────────────────────────────────" +CANARY_SCRIPT="$(dirname "$0")/scripts/voice_canary.py" +if [ -f "$CANARY_SCRIPT" ] && command -v python3 >/dev/null 2>&1; then + MEMORY_SERVICE_URL="$MEMORY_URL" python3 "$CANARY_SCRIPT" --mode preflight + CANARY_EXIT=$? + if [ $CANARY_EXIT -ne 0 ]; then + ERRORS=$((ERRORS+1)) + echo -e " ${RED}FAIL${NC} Voice canary: synthesis test failed (Polina/Ostap not working)" + fi +else + echo " [SKIP] voice_canary.py not found or python3 unavailable" +fi +echo "" + save_and_diff echo "" @@ -182,5 +320,5 @@ if [ $ERRORS -gt 0 ]; then echo -e "${RED}BLOCKED: no changes allowed until all errors resolved${NC}" exit 1 else - echo -e "${GREEN}Preflight PASSED — changes allowed${NC}" + echo -e "${GREEN}Preflight PASSED — all voice canaries green — changes allowed${NC}" fi diff --git a/router-config.yml b/router-config.yml index 46c46158..e6b9ead2 100644 --- a/router-config.yml +++ b/router-config.yml @@ -122,6 +122,33 @@ llm_profiles: timeout_ms: 60000 description: "Mistral Large для складних задач, reasoning, аналізу" + claude_sofiia: + provider: anthropic + api_key_env: ANTHROPIC_API_KEY + model: claude-sonnet-4-5 + max_tokens: 8192 + temperature: 0.2 + timeout_ms: 120000 + description: "Claude Sonnet для Sofiia — code generation, architecture, reasoning" + + claude_opus: + provider: anthropic + api_key_env: ANTHROPIC_API_KEY + model: claude-opus-4-5 + max_tokens: 8192 + temperature: 0.15 + timeout_ms: 180000 + description: "Claude Opus — для найскладніших архітектурних задач Sofiia" + + claude_haiku: + provider: anthropic + api_key_env: ANTHROPIC_API_KEY + model: claude-haiku-3-5 + max_tokens: 4096 + temperature: 0.25 + timeout_ms: 30000 + description: "Claude Haiku — швидкі відповіді, інструментальні задачі" + # ============================================================================ # Orchestrator Providers # ============================================================================ @@ -416,12 +443,13 @@ agents: sofiia: description: "Sofiia — Chief AI Architect та Technical Sovereign" - default_llm: local_default_coder + default_llm: claude_sofiia system_prompt: | Ти Sofiia — Chief AI Architect та Technical Sovereign екосистеми DAARION.city. Працюй як CTO-помічник: архітектура, reliability, безпека, release governance, incident/risk/backlog контроль. Відповідай українською, структуровано і коротко; не вигадуй факти, якщо даних нема — кажи прямо. Для задач про інфраструктуру пріоритет: перевірка health/monitor, далі конкретні дії і верифікація. + Для задач з кодом: аналіз, рефакторинг, дебаг, архітектурні рекомендації — повний рівень доступу. monitor: description: "Monitor Agent - архітектор-інспектор DAGI" diff --git a/services/aurora-service/app/kling.py b/services/aurora-service/app/kling.py index 2305bb02..733e4c53 100644 --- a/services/aurora-service/app/kling.py +++ b/services/aurora-service/app/kling.py @@ -143,6 +143,7 @@ def kling_video_enhance( def kling_video_generate( *, + image_b64: Optional[str] = None, image_url: Optional[str] = None, image_id: Optional[str] = None, prompt: str, @@ -165,8 +166,8 @@ def kling_video_generate( duration: '5' or '10'. aspect_ratio: '16:9', '9:16', '1:1'. """ - if not image_url and not image_id: - raise ValueError("Either image_url or image_id must be provided") + if not image_b64 and not image_url and not image_id: + raise ValueError("One of image_b64 / image_url / image_id must be provided") payload: Dict[str, Any] = { "model": model, @@ -177,10 +178,14 @@ def kling_video_generate( "negative_prompt": negative_prompt, "aspect_ratio": aspect_ratio, } - if image_url: - payload["image"] = {"type": "url", "url": image_url} - if image_id: - payload["image"] = {"type": "id", "id": image_id} + # Current Kling endpoint expects "image" as base64 payload string. + # Keep url/id compatibility as a best-effort fallback for older gateways. + if image_b64: + payload["image"] = image_b64 + elif image_url: + payload["image"] = image_url + elif image_id: + payload["image"] = image_id if callback_url: payload["callback_url"] = callback_url @@ -191,6 +196,37 @@ def kling_video_generate( ) +def kling_video_generate_from_file( + *, + image_path: Path, + prompt: str, + negative_prompt: str = "noise, blur, artifacts, distortion", + model: str = "kling-v1-5", + mode: str = "pro", + duration: str = "5", + cfg_scale: float = 0.5, + aspect_ratio: str = "16:9", + callback_url: Optional[str] = None, +) -> Dict[str, Any]: + """Generate video from a local image file by sending base64 payload.""" + import base64 + + with image_path.open("rb") as fh: + image_b64 = base64.b64encode(fh.read()).decode() + + return kling_video_generate( + image_b64=image_b64, + prompt=prompt, + negative_prompt=negative_prompt, + model=model, + mode=mode, + duration=duration, + cfg_scale=cfg_scale, + aspect_ratio=aspect_ratio, + callback_url=callback_url, + ) + + def kling_task_status(task_id: str) -> Dict[str, Any]: """Get status of any Kling task by ID.""" return _kling_request_with_fallback( @@ -267,7 +303,12 @@ def kling_poll_until_done( def kling_health_check() -> Dict[str, Any]: """Quick connectivity check — returns status dict.""" try: - resp = _kling_request("GET", "/v1/models", timeout=10) - return {"ok": True, "models": resp} + # `/v1/models` may be disabled in some accounts/regions. + # `/v1/videos/image2video` reliably returns code=0 when auth+endpoint are valid. + resp = _kling_request("GET", "/v1/videos/image2video", timeout=10) + code = resp.get("code") if isinstance(resp, dict) else None + if code not in (None, 0, "0"): + return {"ok": False, "error": f"Kling probe returned non-zero code: {code}", "probe": resp} + return {"ok": True, "probe_path": "/v1/videos/image2video", "probe": resp} except Exception as exc: return {"ok": False, "error": str(exc)} diff --git a/services/aurora-service/app/main.py b/services/aurora-service/app/main.py index 0f1bcc94..b15f71b2 100644 --- a/services/aurora-service/app/main.py +++ b/services/aurora-service/app/main.py @@ -4,6 +4,7 @@ import asyncio import hashlib import json import logging +import mimetypes import os import re import shutil @@ -13,9 +14,9 @@ from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional -from fastapi import Body, FastAPI, File, Form, HTTPException, Query, UploadFile +from fastapi import Body, FastAPI, File, Form, HTTPException, Query, Request, UploadFile from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import FileResponse +from fastapi.responses import FileResponse, Response, StreamingResponse from .analysis import ( analyze_photo, @@ -47,6 +48,7 @@ MAX_CONCURRENT_JOBS = max(1, int(os.getenv("AURORA_MAX_CONCURRENT_JOBS", "1"))) store = JobStore(DATA_DIR) orchestrator = AuroraOrchestrator(store.outputs_dir, PUBLIC_BASE_URL) RUN_SLOT = asyncio.Semaphore(MAX_CONCURRENT_JOBS) +KLING_VIDEO2VIDEO_CAPABLE: Optional[bool] = None app = FastAPI( title="Aurora Media Forensics Service", @@ -228,7 +230,18 @@ def _enqueue_job_from_path( upload_dir = store.uploads_dir / job_id upload_dir.mkdir(parents=True, exist_ok=True) input_path = upload_dir / safe_filename(file_name) - shutil.copy2(source_path, input_path) + trim_info: Optional[Dict[str, float]] = None + if media_type == "video": + trim_info = _video_trim_window(export_options) + if trim_info: + _trim_video_input( + source_path, + input_path, + start_sec=float(trim_info.get("start_sec") or 0.0), + duration_sec=trim_info.get("duration_sec"), + ) + else: + shutil.copy2(source_path, input_path) input_hash = compute_sha256(input_path) initial_metadata = _estimate_upload_metadata( @@ -238,6 +251,8 @@ def _enqueue_job_from_path( ) if export_options: initial_metadata["export_options"] = export_options + if trim_info: + initial_metadata["clip"] = trim_info initial_metadata["priority"] = priority if metadata_patch: initial_metadata.update(metadata_patch) @@ -408,6 +423,110 @@ def _parse_export_options(raw_value: str) -> Dict[str, Any]: return parsed +def _opt_float(opts: Dict[str, Any], key: str) -> Optional[float]: + raw = opts.get(key) + if raw is None or raw == "": + return None + try: + return float(raw) + except Exception: + raise HTTPException(status_code=422, detail=f"export_options.{key} must be a number") + + +def _video_trim_window(export_options: Dict[str, Any]) -> Optional[Dict[str, float]]: + opts = export_options if isinstance(export_options, dict) else {} + start = _opt_float(opts, "clip_start_sec") + duration = _opt_float(opts, "clip_duration_sec") + if start is None: + start = _opt_float(opts, "start_sec") + if duration is None: + duration = _opt_float(opts, "duration_sec") + if start is None and duration is None: + return None + start_val = float(start or 0.0) + duration_val = float(duration) if duration is not None else None + if start_val < 0: + raise HTTPException(status_code=422, detail="clip_start_sec must be >= 0") + if duration_val is not None and duration_val <= 0: + raise HTTPException(status_code=422, detail="clip_duration_sec must be > 0") + return { + "start_sec": round(start_val, 3), + "duration_sec": round(duration_val, 3) if duration_val is not None else None, # type: ignore[arg-type] + } + + +def _trim_video_input(source_path: Path, target_path: Path, *, start_sec: float, duration_sec: Optional[float]) -> None: + """Trim video to a focused segment for faster iteration. + + First attempt is stream copy (lossless, fast). If that fails for container/codec reasons, + fallback to lightweight re-encode. + """ + cmd = [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-y", + ] + if start_sec > 0: + cmd.extend(["-ss", f"{start_sec:.3f}"]) + cmd.extend(["-i", str(source_path)]) + if duration_sec is not None: + cmd.extend(["-t", f"{duration_sec:.3f}"]) + cmd.extend([ + "-map", + "0:v:0", + "-map", + "0:a?", + "-c", + "copy", + "-movflags", + "+faststart", + str(target_path), + ]) + proc = subprocess.run(cmd, capture_output=True, text=True, check=False) + if proc.returncode == 0 and target_path.exists() and target_path.stat().st_size > 0: + return + + fallback = [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-y", + ] + if start_sec > 0: + fallback.extend(["-ss", f"{start_sec:.3f}"]) + fallback.extend(["-i", str(source_path)]) + if duration_sec is not None: + fallback.extend(["-t", f"{duration_sec:.3f}"]) + fallback.extend( + [ + "-map", + "0:v:0", + "-map", + "0:a?", + "-c:v", + "libx264", + "-preset", + "veryfast", + "-crf", + "17", + "-c:a", + "aac", + "-b:a", + "192k", + "-movflags", + "+faststart", + str(target_path), + ] + ) + proc2 = subprocess.run(fallback, capture_output=True, text=True, check=False) + if proc2.returncode != 0 or not target_path.exists() or target_path.stat().st_size <= 0: + err = (proc2.stderr or proc.stderr or "").strip()[:280] + raise HTTPException(status_code=422, detail=f"video trim failed: {err or 'ffmpeg error'}") + + def _status_timing(job: Any) -> Dict[str, Optional[int]]: started = _parse_iso_utc(job.started_at) if not started: @@ -1134,14 +1253,156 @@ async def cleanup_storage( @app.get("/api/aurora/files/{job_id}/{file_name}") -async def download_output_file(job_id: str, file_name: str) -> FileResponse: +async def download_output_file(job_id: str, file_name: str, request: Request): base = (store.outputs_dir / job_id).resolve() target = (base / file_name).resolve() if not str(target).startswith(str(base)): raise HTTPException(status_code=403, detail="invalid file path") if not target.exists() or not target.is_file(): raise HTTPException(status_code=404, detail="file not found") - return FileResponse(path=target, filename=target.name) + total_size = target.stat().st_size + range_header = request.headers.get("range") + if not range_header: + return FileResponse( + path=target, + filename=target.name, + headers={"Accept-Ranges": "bytes"}, + ) + + parsed = _parse_range_header(range_header, total_size) + if parsed is None: + return FileResponse( + path=target, + filename=target.name, + headers={"Accept-Ranges": "bytes"}, + ) + + start, end = parsed + if start >= total_size: + return Response( + status_code=416, + headers={"Content-Range": f"bytes */{total_size}", "Accept-Ranges": "bytes"}, + ) + + content_length = (end - start) + 1 + media_type = mimetypes.guess_type(str(target))[0] or "application/octet-stream" + + def _iter_range(): + with target.open("rb") as fh: + fh.seek(start) + remaining = content_length + while remaining > 0: + chunk = fh.read(min(65536, remaining)) + if not chunk: + break + remaining -= len(chunk) + yield chunk + + return StreamingResponse( + _iter_range(), + status_code=206, + media_type=media_type, + headers={ + "Content-Range": f"bytes {start}-{end}/{total_size}", + "Content-Length": str(content_length), + "Accept-Ranges": "bytes", + "Content-Disposition": f'attachment; filename="{target.name}"', + }, + ) + + +def _parse_range_header(range_header: str, total_size: int) -> Optional[tuple[int, int]]: + value = str(range_header or "").strip() + if not value.lower().startswith("bytes="): + return None + + spec = value.split("=", 1)[1].strip() + if "," in spec: + return None + if "-" not in spec: + return None + + start_txt, end_txt = spec.split("-", 1) + try: + if start_txt == "": + # Suffix range: bytes=-N + suffix_len = int(end_txt) + if suffix_len <= 0: + return None + if suffix_len >= total_size: + return 0, max(0, total_size - 1) + return total_size - suffix_len, total_size - 1 + start = int(start_txt) + if start < 0: + return None + if end_txt == "": + end = total_size - 1 + else: + end = int(end_txt) + if end < start: + return None + return start, min(end, max(0, total_size - 1)) + except Exception: + return None + + +def _extract_first_video_frame(video_path: Path, output_path: Path) -> Path: + """Extract the first decodable video frame to an image file.""" + try: + import cv2 # type: ignore[import-untyped] + except Exception as exc: + raise RuntimeError("OpenCV is required for Kling image2video fallback.") from exc + + output_path.parent.mkdir(parents=True, exist_ok=True) + cap = cv2.VideoCapture(str(video_path)) + try: + if not cap.isOpened(): + raise RuntimeError(f"Cannot open video for fallback frame extraction: {video_path}") + ok, frame = cap.read() + if not ok or frame is None: + raise RuntimeError("Could not read first frame from video") + if not cv2.imwrite(str(output_path), frame): + raise RuntimeError(f"Failed to write fallback frame: {output_path}") + finally: + cap.release() + return output_path + + +def _resolve_kling_result_url(task_data: Dict[str, Any]) -> Optional[str]: + if not isinstance(task_data, dict): + return None + + task_result = task_data.get("task_result") + if isinstance(task_result, dict): + videos = task_result.get("videos") + if isinstance(videos, list): + for item in videos: + if not isinstance(item, dict): + continue + for key in ("url", "video_url", "play_url", "download_url"): + value = item.get(key) + if isinstance(value, str) and value: + return value + elif isinstance(videos, dict): + for key in ("url", "video_url", "play_url", "download_url"): + value = videos.get(key) + if isinstance(value, str) and value: + return value + for key in ("url", "video_url", "play_url", "download_url", "result_url"): + value = task_result.get(key) + if isinstance(value, str) and value: + return value + + for key in ("kling_result_url", "result_url", "video_url", "url"): + value = task_data.get(key) + if isinstance(value, str) and value: + return value + return None + + +def _compact_error_text(err: Any, limit: int = 220) -> str: + text = re.sub(r"\s+", " ", str(err)).strip() + return text[:limit] # ── Kling AI endpoints ──────────────────────────────────────────────────────── @@ -1163,7 +1424,7 @@ async def kling_enhance_video( cfg_scale: float = Form(0.5, description="Prompt adherence 0.0-1.0"), ) -> Dict[str, Any]: """Submit Aurora job result to Kling AI for video-to-video enhancement.""" - from .kling import kling_video_enhance, kling_upload_file + from .kling import kling_video_enhance, kling_upload_file, kling_video_generate_from_file job = store.get_job(job_id) if not job: @@ -1181,45 +1442,97 @@ async def kling_enhance_video( if not result_path.exists(): raise HTTPException(status_code=404, detail="Result file not found for this job") - try: - upload_resp = kling_upload_file(result_path) - except Exception as exc: - raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc - file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id") + global KLING_VIDEO2VIDEO_CAPABLE - if not file_id: - raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}") + task_resp: Optional[Dict[str, Any]] = None + file_id: Optional[str] = None + kling_endpoint = "video2video" + video2video_error: Optional[str] = None + fallback_frame_name: Optional[str] = None + + # Primary path: upload + video2video. + if KLING_VIDEO2VIDEO_CAPABLE is not False: + try: + upload_resp = kling_upload_file(result_path) + file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id") + if not file_id: + raise RuntimeError(f"Kling upload failed: {upload_resp}") + task_resp = kling_video_enhance( + video_id=file_id, + prompt=prompt, + negative_prompt=negative_prompt, + mode=mode, + duration=duration, + cfg_scale=cfg_scale, + ) + KLING_VIDEO2VIDEO_CAPABLE = True + except Exception as exc: + raw_error = str(exc) + video2video_error = _compact_error_text(raw_error, limit=220) + logger.warning("kling video2video unavailable for %s: %s", job_id, video2video_error) + lower_error = raw_error.lower() + if "endpoint mismatch" in lower_error or "404" in lower_error: + KLING_VIDEO2VIDEO_CAPABLE = False + else: + video2video_error = "video2video skipped (previous endpoint mismatch)" + + # Fallback path: extract first frame and run image2video (base64 payload). + if task_resp is None: + try: + frame_path = _extract_first_video_frame( + result_path, + store.outputs_dir / job_id / "_kling_fallback_frame.jpg", + ) + fallback_frame_name = frame_path.name + task_resp = kling_video_generate_from_file( + image_path=frame_path, + prompt=prompt, + negative_prompt=negative_prompt, + mode=mode, + duration=duration, + cfg_scale=cfg_scale, + aspect_ratio="16:9", + ) + kling_endpoint = "image2video" + except Exception as fallback_exc: + detail = "Kling submit failed" + if video2video_error: + detail = f"Kling video2video error: {video2video_error}; image2video fallback error: {_compact_error_text(fallback_exc, limit=220)}" + else: + detail = f"Kling image2video fallback error: {_compact_error_text(fallback_exc, limit=220)}" + raise HTTPException(status_code=502, detail=detail) from fallback_exc + + if task_resp is None: + raise HTTPException(status_code=502, detail="Kling task submit failed: empty response") - try: - task_resp = kling_video_enhance( - video_id=file_id, - prompt=prompt, - negative_prompt=negative_prompt, - mode=mode, - duration=duration, - cfg_scale=cfg_scale, - ) - except Exception as exc: - raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id") + if not task_id: + raise HTTPException(status_code=502, detail=f"Kling task_id missing in response: {task_resp}") kling_meta_dir = store.outputs_dir / job_id kling_meta_path = kling_meta_dir / "kling_task.json" - kling_meta_path.write_text(json.dumps({ + meta_payload: Dict[str, Any] = { "aurora_job_id": job_id, "kling_task_id": task_id, "kling_file_id": file_id, + "kling_endpoint": kling_endpoint, "prompt": prompt, "mode": mode, "duration": duration, "submitted_at": datetime.now(timezone.utc).isoformat(), "status": "submitted", - }, ensure_ascii=False, indent=2), encoding="utf-8") + } + if fallback_frame_name: + meta_payload["kling_source_frame"] = fallback_frame_name + if video2video_error: + meta_payload["video2video_error"] = video2video_error + kling_meta_path.write_text(json.dumps(meta_payload, ensure_ascii=False, indent=2), encoding="utf-8") return { "aurora_job_id": job_id, "kling_task_id": task_id, "kling_file_id": file_id, + "kling_endpoint": kling_endpoint, "status": "submitted", "status_url": f"/api/aurora/kling/status/{job_id}", } @@ -1238,9 +1551,10 @@ async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]: task_id = meta.get("kling_task_id") if not task_id: raise HTTPException(status_code=404, detail="Kling task_id missing in metadata") + endpoint = str(meta.get("kling_endpoint") or "video2video") try: - status_resp = kling_video_task_status(task_id, endpoint="video2video") + status_resp = kling_video_task_status(task_id, endpoint=endpoint) except Exception as exc: raise HTTPException(status_code=502, detail=f"Kling status error: {str(exc)[:400]}") from exc task_data = status_resp.get("data") or status_resp @@ -1249,19 +1563,17 @@ async def kling_task_status_for_job(job_id: str) -> Dict[str, Any]: meta["status"] = state meta["last_checked"] = datetime.now(timezone.utc).isoformat() - result_url = None - works = task_data.get("task_result", {}).get("videos") or [] - if works: - result_url = works[0].get("url") - if result_url: - meta["kling_result_url"] = result_url - meta["completed_at"] = datetime.now(timezone.utc).isoformat() + result_url = _resolve_kling_result_url(task_data) + if result_url: + meta["kling_result_url"] = result_url + meta["completed_at"] = datetime.now(timezone.utc).isoformat() kling_meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") return { "aurora_job_id": job_id, "kling_task_id": task_id, + "kling_endpoint": endpoint, "status": state, "kling_result_url": result_url, "meta": meta, @@ -1279,7 +1591,7 @@ async def kling_image_to_video( aspect_ratio: str = Form("16:9"), ) -> Dict[str, Any]: """Generate video from a still image using Kling AI.""" - from .kling import kling_upload_file, kling_video_generate + from .kling import kling_video_generate_from_file file_name = file.filename or "frame.jpg" content = await file.read() @@ -1293,16 +1605,8 @@ async def kling_image_to_video( try: try: - upload_resp = kling_upload_file(tmp_path) - except Exception as exc: - raise HTTPException(status_code=502, detail=f"Kling upload error: {str(exc)[:400]}") from exc - file_id = (upload_resp.get("data") or {}).get("resource_id") or (upload_resp.get("data") or {}).get("file_id") - if not file_id: - raise HTTPException(status_code=502, detail=f"Kling upload failed: {upload_resp}") - - try: - task_resp = kling_video_generate( - image_id=file_id, + task_resp = kling_video_generate_from_file( + image_path=tmp_path, prompt=prompt, negative_prompt=negative_prompt, model=model, @@ -1313,9 +1617,12 @@ async def kling_image_to_video( except Exception as exc: raise HTTPException(status_code=502, detail=f"Kling task submit error: {str(exc)[:400]}") from exc task_id = (task_resp.get("data") or {}).get("task_id") or task_resp.get("task_id") + if not task_id: + raise HTTPException(status_code=502, detail=f"Kling task_id missing in response: {task_resp}") return { "kling_task_id": task_id, - "kling_file_id": file_id, + "kling_file_id": None, + "kling_endpoint": "image2video", "status": "submitted", "status_url": f"/api/aurora/kling/task/{task_id}?endpoint=image2video", } diff --git a/services/aurora-service/app/quality_report.py b/services/aurora-service/app/quality_report.py index 0fda47a3..95903bbd 100644 --- a/services/aurora-service/app/quality_report.py +++ b/services/aurora-service/app/quality_report.py @@ -49,6 +49,78 @@ def _models_used(job: AuroraJob) -> List[str]: return models +def _processing_steps(job: AuroraJob) -> List[Any]: + if job.result and job.result.processing_log: + return list(job.result.processing_log) + if job.processing_log: + return list(job.processing_log) + return [] + + +def _result_media_hash(job: AuroraJob) -> Optional[str]: + if not job.result: + return None + media_type = str(job.media_type).strip().lower() + for out in job.result.output_files: + out_type = str(getattr(out, "type", "") or "").strip().lower() + if out_type in {media_type, "video", "photo", "image", "audio", "unknown"}: + value = str(getattr(out, "hash", "") or "").strip() + if value: + return value + return None + + +def _fallback_flags(job: AuroraJob) -> Dict[str, Any]: + hard_fallback_used = False + soft_sr_fallback_used = False + fallback_steps: List[str] = [] + warnings: List[str] = [] + + for step in _processing_steps(job): + step_name = str(getattr(step, "step", "") or "").strip() or "unknown" + details = getattr(step, "details", {}) or {} + if not isinstance(details, dict): + continue + + if bool(details.get("fallback_used")): + hard_fallback_used = True + fallback_steps.append(step_name) + reason = str(details.get("reason") or "").strip() + if reason: + warnings.append(f"{step_name}: hard fallback used ({reason})") + else: + warnings.append(f"{step_name}: hard fallback used") + + sr_fallback_frames = 0 + try: + sr_fallback_frames = int(details.get("sr_fallback_frames") or 0) + except Exception: + sr_fallback_frames = 0 + if bool(details.get("sr_fallback_used")): + sr_fallback_frames = max(sr_fallback_frames, 1) + if sr_fallback_frames > 0: + soft_sr_fallback_used = True + fallback_steps.append(step_name) + method = str(details.get("sr_fallback_method") or "").strip() + reason = str(details.get("sr_fallback_reason") or "").strip() + msg = f"{step_name}: SR soft fallback on {sr_fallback_frames} frame(s)" + if method: + msg += f" via {method}" + if reason: + msg += f" ({reason})" + warnings.append(msg) + + fallback_steps_unique = list(dict.fromkeys(fallback_steps)) + warnings_unique = list(dict.fromkeys(warnings)) + return { + "fallback_used": bool(hard_fallback_used or soft_sr_fallback_used), + "hard_fallback_used": hard_fallback_used, + "soft_sr_fallback_used": soft_sr_fallback_used, + "fallback_steps": fallback_steps_unique, + "warnings": warnings_unique, + } + + def _detect_faces_with_proxy_confidence(frame_bgr: Any) -> List[Dict[str, Any]]: if cv2 is None: return [] @@ -246,9 +318,29 @@ def build_quality_report(job: AuroraJob, outputs_dir: Path, *, refresh: bool = F raise RuntimeError("Cannot build quality report: source/result file not found") media_type: MediaType = job.media_type + processing_flags = _fallback_flags(job) faces = _face_metrics(source_path, result_path, media_type) plates = _plate_metrics(job_dir) overall = _overall_metrics(source_path, result_path, media_type, job) + result_hash = _result_media_hash(job) + identical_to_input = bool(result_hash and result_hash == str(job.input_hash)) + warnings = list(processing_flags.get("warnings") or []) + if identical_to_input: + warnings.append("output hash matches input hash; enhancement may be skipped.") + warnings = list(dict.fromkeys(warnings)) + + processing_status = "ok" + if bool(processing_flags.get("fallback_used")) or identical_to_input: + processing_status = "degraded" + overall["processing_status"] = processing_status + overall["fallback_used"] = bool(processing_flags.get("fallback_used")) + overall["hard_fallback_used"] = bool(processing_flags.get("hard_fallback_used")) + overall["soft_sr_fallback_used"] = bool(processing_flags.get("soft_sr_fallback_used")) + overall["identical_to_input"] = identical_to_input + if result_hash: + overall["result_hash"] = result_hash + if warnings: + overall["warnings"] = warnings report = { "job_id": job.job_id, @@ -257,7 +349,13 @@ def build_quality_report(job: AuroraJob, outputs_dir: Path, *, refresh: bool = F "faces": faces, "plates": plates, "overall": overall, + "processing_flags": { + **processing_flags, + "identical_to_input": identical_to_input, + "warnings": warnings, + }, "summary": { + "processing_status": processing_status, "faces_detected_ratio": f"{faces['detected']} / {faces['source_detected'] or faces['detected']}", "plates_recognized_ratio": f"{plates['recognized']} / {plates['detected']}", }, diff --git a/services/memory-service/Dockerfile b/services/memory-service/Dockerfile index 263ca31a..95245d23 100644 --- a/services/memory-service/Dockerfile +++ b/services/memory-service/Dockerfile @@ -13,6 +13,7 @@ RUN pip install --no-cache-dir -r requirements.txt # Copy application COPY app/ ./app/ +COPY static/ ./static/ # Environment ENV PYTHONPATH=/app diff --git a/services/memory-service/app/database.py b/services/memory-service/app/database.py index 3a9be132..65551815 100644 --- a/services/memory-service/app/database.py +++ b/services/memory-service/app/database.py @@ -428,6 +428,8 @@ class Database: CREATE INDEX IF NOT EXISTS idx_user_facts_user_id ON user_facts(user_id); CREATE INDEX IF NOT EXISTS idx_user_facts_team_id ON user_facts(team_id); + CREATE UNIQUE INDEX IF NOT EXISTS idx_user_facts_user_team_agent_fact + ON user_facts(user_id, team_id, agent_id, fact_key); """) async def upsert_fact( @@ -445,16 +447,30 @@ class Database: json_value = json.dumps(fact_value_json) if fact_value_json else None async with self.pool.acquire() as conn: - row = await conn.fetchrow(""" - INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json) - VALUES ($1, $2, $3, $4, $5, $6::jsonb) - ON CONFLICT (user_id, team_id, agent_id, fact_key) - DO UPDATE SET - fact_value = EXCLUDED.fact_value, - fact_value_json = EXCLUDED.fact_value_json, - updated_at = NOW() - RETURNING * - """, user_id, team_id, agent_id, fact_key, fact_value, json_value) + try: + row = await conn.fetchrow(""" + INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json) + VALUES ($1, $2, $3, $4, $5, $6::jsonb) + ON CONFLICT (user_id, team_id, agent_id, fact_key) + DO UPDATE SET + fact_value = EXCLUDED.fact_value, + fact_value_json = EXCLUDED.fact_value_json, + updated_at = NOW() + RETURNING * + """, user_id, team_id, agent_id, fact_key, fact_value, json_value) + except asyncpg.exceptions.InvalidColumnReferenceError: + # Backward compatibility for DBs that only have UNIQUE(user_id, team_id, fact_key). + row = await conn.fetchrow(""" + INSERT INTO user_facts (user_id, team_id, agent_id, fact_key, fact_value, fact_value_json) + VALUES ($1, $2, $3, $4, $5, $6::jsonb) + ON CONFLICT (user_id, team_id, fact_key) + DO UPDATE SET + agent_id = EXCLUDED.agent_id, + fact_value = EXCLUDED.fact_value, + fact_value_json = EXCLUDED.fact_value_json, + updated_at = NOW() + RETURNING * + """, user_id, team_id, agent_id, fact_key, fact_value, json_value) return dict(row) if row else {} diff --git a/services/memory-service/app/main.py b/services/memory-service/app/main.py index da768234..805071de 100644 --- a/services/memory-service/app/main.py +++ b/services/memory-service/app/main.py @@ -650,6 +650,7 @@ class FactUpsertRequest(BaseModel): fact_value: Optional[str] = None fact_value_json: Optional[dict] = None team_id: Optional[str] = None + agent_id: Optional[str] = None @app.post("/facts/upsert") async def upsert_fact(request: FactUpsertRequest): @@ -663,13 +664,17 @@ async def upsert_fact(request: FactUpsertRequest): # Ensure facts table exists (will be created on first call) await db.ensure_facts_table() - # Upsert the fact + # Upsert the fact — extract agent_id from request field or from fact_value_json + agent_id_val = request.agent_id or ( + (request.fact_value_json or {}).get("agent_id") + ) result = await db.upsert_fact( user_id=request.user_id, fact_key=request.fact_key, fact_value=request.fact_value, fact_value_json=request.fact_value_json, - team_id=request.team_id + team_id=request.team_id, + agent_id=agent_id_val ) logger.info(f"fact_upserted", user_id=request.user_id, fact_key=request.fact_key) diff --git a/services/memory-service/requirements.txt b/services/memory-service/requirements.txt index 305fa2c3..f6c5908a 100644 --- a/services/memory-service/requirements.txt +++ b/services/memory-service/requirements.txt @@ -30,7 +30,7 @@ python-multipart==0.0.9 tiktoken==0.5.2 # Voice stack -edge-tts==6.1.19 +edge-tts==7.2.7 faster-whisper==1.1.1 # Testing diff --git a/services/node-worker/config.py b/services/node-worker/config.py index e4f9163c..04125611 100644 --- a/services/node-worker/config.py +++ b/services/node-worker/config.py @@ -14,3 +14,19 @@ STT_PROVIDER = os.getenv("STT_PROVIDER", "none") TTS_PROVIDER = os.getenv("TTS_PROVIDER", "none") OCR_PROVIDER = os.getenv("OCR_PROVIDER", "vision_prompted") IMAGE_PROVIDER = os.getenv("IMAGE_PROVIDER", "none") + +# Memory Service URL (used by memory_service STT/TTS providers) +MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000") + +# ── Voice HA: dedicated concurrency limits (separate from generic stt/tts/llm) ── +# These control semaphores for node.{id}.voice.*.request subjects. +# Independent from MAX_CONCURRENCY so voice never starves generic inference. +VOICE_MAX_CONCURRENT_TTS = int(os.getenv("VOICE_MAX_CONCURRENT_TTS", "4")) +VOICE_MAX_CONCURRENT_LLM = int(os.getenv("VOICE_MAX_CONCURRENT_LLM", "2")) +VOICE_MAX_CONCURRENT_STT = int(os.getenv("VOICE_MAX_CONCURRENT_STT", "2")) + +# Timeouts for voice subjects (milliseconds). Router uses these as defaults. +VOICE_TTS_DEADLINE_MS = int(os.getenv("VOICE_TTS_DEADLINE_MS", "3000")) +VOICE_LLM_FAST_MS = int(os.getenv("VOICE_LLM_FAST_MS", "9000")) +VOICE_LLM_QUALITY_MS = int(os.getenv("VOICE_LLM_QUALITY_MS", "12000")) +VOICE_STT_DEADLINE_MS = int(os.getenv("VOICE_STT_DEADLINE_MS", "6000")) diff --git a/services/node-worker/fabric_metrics.py b/services/node-worker/fabric_metrics.py index 2f599403..63bf1a8a 100644 --- a/services/node-worker/fabric_metrics.py +++ b/services/node-worker/fabric_metrics.py @@ -8,6 +8,7 @@ try: PROM_AVAILABLE = True REGISTRY = CollectorRegistry() + # Generic job metrics jobs_total = Counter( "node_worker_jobs_total", "Jobs processed", ["type", "status"], registry=REGISTRY, @@ -23,6 +24,26 @@ try: registry=REGISTRY, ) + # ── Voice HA metrics (separate labels from generic) ─────────────────────── + # cap label: "voice.tts" | "voice.llm" | "voice.stt" + voice_jobs_total = Counter( + "node_worker_voice_jobs_total", + "Voice HA jobs processed (node.{id}.voice.*.request)", + ["cap", "status"], registry=REGISTRY, + ) + voice_inflight_gauge = Gauge( + "node_worker_voice_inflight", + "Voice HA inflight jobs per capability", + ["cap"], registry=REGISTRY, + ) + voice_latency_hist = Histogram( + "node_worker_voice_latency_ms", + "Voice HA job latency in ms", + ["cap"], + buckets=[100, 250, 500, 1000, 1500, 2000, 3000, 5000, 9000, 12000], + registry=REGISTRY, + ) + except ImportError: PROM_AVAILABLE = False REGISTRY = None @@ -44,6 +65,21 @@ def observe_latency(req_type: str, model: str, latency_ms: int): latency_hist.labels(type=req_type, model=model).observe(latency_ms) +def inc_voice_job(cap: str, status: str): + if PROM_AVAILABLE: + voice_jobs_total.labels(cap=cap, status=status).inc() + + +def set_voice_inflight(cap: str, count: int): + if PROM_AVAILABLE: + voice_inflight_gauge.labels(cap=cap).set(count) + + +def observe_voice_latency(cap: str, latency_ms: int): + if PROM_AVAILABLE: + voice_latency_hist.labels(cap=cap).observe(latency_ms) + + def get_metrics_text(): if PROM_AVAILABLE and REGISTRY: return generate_latest(REGISTRY) diff --git a/services/node-worker/main.py b/services/node-worker/main.py index 634c0a8e..9cf2e7b3 100644 --- a/services/node-worker/main.py +++ b/services/node-worker/main.py @@ -43,7 +43,30 @@ async def prom_metrics(): @app.get("/caps") async def caps(): - """Capability flags for NCS to aggregate.""" + """Capability flags for NCS to aggregate. + + Semantic vs operational separation (contract): + - capabilities.voice_* = semantic availability (provider configured). + True as long as the provider is configured, regardless of NATS state. + Routing decisions are based on this. + - runtime.nats_subscriptions.voice_* = operational (NATS sub active). + Used for health/telemetry only — NOT for routing. + + This prevents false-negatives during reconnects / restart races. + """ + import worker as _w + nid = config.NODE_ID.lower() + + # Semantic: provider configured → capability is available + voice_tts_cap = config.TTS_PROVIDER != "none" + voice_stt_cap = config.STT_PROVIDER != "none" + voice_llm_cap = True # LLM always available when node-worker is up + + # Operational: actual NATS subscription state (health/telemetry only) + nats_voice_tts_active = f"node.{nid}.voice.tts.request" in _w._VOICE_SUBJECTS + nats_voice_stt_active = f"node.{nid}.voice.stt.request" in _w._VOICE_SUBJECTS + nats_voice_llm_active = f"node.{nid}.voice.llm.request" in _w._VOICE_SUBJECTS + return { "node_id": config.NODE_ID, "capabilities": { @@ -53,6 +76,10 @@ async def caps(): "tts": config.TTS_PROVIDER != "none", "ocr": config.OCR_PROVIDER != "none", "image": config.IMAGE_PROVIDER != "none", + # Voice HA semantic capability flags (provider-based, not NATS-based) + "voice_tts": voice_tts_cap, + "voice_llm": voice_llm_cap, + "voice_stt": voice_stt_cap, }, "providers": { "stt": config.STT_PROVIDER, @@ -65,6 +92,19 @@ async def caps(): "vision": config.DEFAULT_VISION, }, "concurrency": config.MAX_CONCURRENCY, + "voice_concurrency": { + "voice_tts": config.VOICE_MAX_CONCURRENT_TTS, + "voice_llm": config.VOICE_MAX_CONCURRENT_LLM, + "voice_stt": config.VOICE_MAX_CONCURRENT_STT, + }, + # Operational NATS subscription state — for health/monitoring only + "runtime": { + "nats_subscriptions": { + "voice_tts_active": nats_voice_tts_active, + "voice_stt_active": nats_voice_stt_active, + "voice_llm_active": nats_voice_llm_active, + } + }, } diff --git a/services/node-worker/worker.py b/services/node-worker/worker.py index b9d68f95..ffeebe75 100644 --- a/services/node-worker/worker.py +++ b/services/node-worker/worker.py @@ -11,24 +11,44 @@ from models import JobRequest, JobResponse, JobError from idempotency import IdempotencyStore from providers import ollama, ollama_vision from providers import stt_mlx_whisper, tts_mlx_kokoro +from providers import stt_memory_service, tts_memory_service import fabric_metrics as fm logger = logging.getLogger("node-worker") _idem = IdempotencyStore() _semaphore: asyncio.Semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY) + +# Voice-dedicated semaphores — independent from generic MAX_CONCURRENCY. +# Prevents voice requests from starving generic inference and vice versa. +_voice_sem_tts: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_TTS) +_voice_sem_llm: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_LLM) +_voice_sem_stt: asyncio.Semaphore = asyncio.Semaphore(config.VOICE_MAX_CONCURRENT_STT) + +_VOICE_SEMAPHORES = { + "voice.tts": _voice_sem_tts, + "voice.llm": _voice_sem_llm, + "voice.stt": _voice_sem_stt, +} + _nats_client = None _inflight_count: int = 0 +_voice_inflight: Dict[str, int] = {"voice.tts": 0, "voice.llm": 0, "voice.stt": 0} _latencies_llm: list = [] _latencies_vision: list = [] _LATENCY_BUFFER = 50 +# Set of subjects that use the voice handler path +_VOICE_SUBJECTS: set = set() + async def start(nats_client): global _nats_client _nats_client = nats_client nid = config.NODE_ID.lower() + + # Generic subjects (unchanged — backward compatible) subjects = [ f"node.{nid}.llm.request", f"node.{nid}.vision.request", @@ -41,6 +61,31 @@ async def start(nats_client): await nats_client.subscribe(subj, cb=_handle_request) logger.info(f"✅ Subscribed: {subj}") + # Voice HA subjects — separate semaphores, own metrics, own deadlines + # Only subscribe if the relevant provider is configured (preflight-first) + voice_subjects_to_caps = { + f"node.{nid}.voice.tts.request": ("tts", _voice_sem_tts, "voice.tts"), + f"node.{nid}.voice.llm.request": ("llm", _voice_sem_llm, "voice.llm"), + f"node.{nid}.voice.stt.request": ("stt", _voice_sem_stt, "voice.stt"), + } + for subj, (required_cap, sem, cap_key) in voice_subjects_to_caps.items(): + if required_cap == "tts" and config.TTS_PROVIDER == "none": + logger.info(f"⏭ Skipping {subj}: TTS_PROVIDER=none") + continue + if required_cap == "stt" and config.STT_PROVIDER == "none": + logger.info(f"⏭ Skipping {subj}: STT_PROVIDER=none") + continue + # LLM always available on this node + _VOICE_SUBJECTS.add(subj) + + async def _make_voice_handler(s=sem, k=cap_key): + async def _voice_handler(msg): + await _handle_voice_request(msg, voice_sem=s, cap_key=k) + return _voice_handler + + await nats_client.subscribe(subj, cb=await _make_voice_handler()) + logger.info(f"✅ Voice subscribed: {subj}") + async def _handle_request(msg): t0 = time.time() @@ -136,6 +181,103 @@ async def _handle_request(msg): pass +async def _handle_voice_request(msg, voice_sem: asyncio.Semaphore, cap_key: str): + """Voice-dedicated handler: separate semaphore, metrics, retry hints. + + Maps voice.{tts|llm|stt} to the same _execute() but with: + - Own concurrency limit (VOICE_MAX_CONCURRENT_{TTS|LLM|STT}) + - TOO_BUSY includes retry_after_ms hint (client can retry immediately elsewhere) + - Voice-specific Prometheus labels (type=voice.tts, etc.) + - WARNING log on fallback (contract: no silent fallback) + """ + t0 = time.time() + # Extract the base type for _execute (voice.tts → tts) + base_type = cap_key.split(".")[-1] # "tts", "llm", "stt" + + try: + raw = msg.data + if len(raw) > config.MAX_PAYLOAD_BYTES: + await _reply(msg, JobResponse( + node_id=config.NODE_ID, status="error", + error=JobError(code="PAYLOAD_TOO_LARGE", message=f"max {config.MAX_PAYLOAD_BYTES} bytes"), + )) + return + + data = json.loads(raw) + job = JobRequest(**data) + job.trace_id = job.trace_id or job.job_id + + remaining = job.remaining_ms() + if remaining <= 0: + await _reply(msg, JobResponse( + job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID, + status="timeout", error=JobError(code="DEADLINE_EXCEEDED"), + )) + return + + # Voice concurrency check — TOO_BUSY includes retry hint + if voice_sem._value == 0: + logger.warning( + "[voice.busy] cap=%s node=%s — all %d slots occupied. " + "WARNING: request turned away, Router should failover.", + cap_key, config.NODE_ID, { + "voice.tts": config.VOICE_MAX_CONCURRENT_TTS, + "voice.llm": config.VOICE_MAX_CONCURRENT_LLM, + "voice.stt": config.VOICE_MAX_CONCURRENT_STT, + }.get(cap_key, "?"), + ) + fm.inc_voice_job(cap_key, "busy") + await _reply(msg, JobResponse( + job_id=job.job_id, trace_id=job.trace_id, node_id=config.NODE_ID, + status="busy", + error=JobError( + code="TOO_BUSY", + message=f"voice {cap_key} at capacity", + details={"retry_after_ms": 500, "cap": cap_key}, + ), + )) + return + + global _voice_inflight + _voice_inflight[cap_key] = _voice_inflight.get(cap_key, 0) + 1 + fm.set_voice_inflight(cap_key, _voice_inflight[cap_key]) + + try: + async with voice_sem: + # Route to _execute with the base type + job.required_type = base_type + resp = await _execute(job, remaining) + finally: + _voice_inflight[cap_key] = max(0, _voice_inflight.get(cap_key, 1) - 1) + fm.set_voice_inflight(cap_key, _voice_inflight[cap_key]) + + resp.latency_ms = int((time.time() - t0) * 1000) + fm.inc_voice_job(cap_key, resp.status) + if resp.status == "ok" and resp.latency_ms > 0: + fm.observe_voice_latency(cap_key, resp.latency_ms) + + # Contract: log WARNING on any non-ok voice result + if resp.status != "ok": + logger.warning( + "[voice.fallback] cap=%s node=%s status=%s error=%s trace=%s", + cap_key, config.NODE_ID, resp.status, + resp.error.code if resp.error else "?", job.trace_id, + ) + + await _reply(msg, resp) + + except Exception as e: + logger.exception(f"Voice handler error cap={cap_key}: {e}") + fm.inc_voice_job(cap_key, "error") + try: + await _reply(msg, JobResponse( + node_id=config.NODE_ID, status="error", + error=JobError(code="INTERNAL", message=str(e)[:200]), + )) + except Exception: + pass + + async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse: payload = job.payload hints = job.hints @@ -184,9 +326,14 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse: status="error", error=JobError(code="NOT_AVAILABLE", message="STT not configured on this node"), ) - result = await asyncio.wait_for( - stt_mlx_whisper.transcribe(payload), timeout=timeout_s, - ) + if config.STT_PROVIDER == "memory_service": + result = await asyncio.wait_for( + stt_memory_service.transcribe(payload), timeout=timeout_s, + ) + else: + result = await asyncio.wait_for( + stt_mlx_whisper.transcribe(payload), timeout=timeout_s, + ) elif job.required_type == "tts": if config.TTS_PROVIDER == "none": return JobResponse( @@ -194,9 +341,14 @@ async def _execute(job: JobRequest, remaining_ms: int) -> JobResponse: status="error", error=JobError(code="NOT_AVAILABLE", message="TTS not configured on this node"), ) - result = await asyncio.wait_for( - tts_mlx_kokoro.synthesize(payload), timeout=timeout_s, - ) + if config.TTS_PROVIDER == "memory_service": + result = await asyncio.wait_for( + tts_memory_service.synthesize(payload), timeout=timeout_s, + ) + else: + result = await asyncio.wait_for( + tts_mlx_kokoro.synthesize(payload), timeout=timeout_s, + ) elif job.required_type == "ocr": if config.OCR_PROVIDER == "none": return JobResponse( diff --git a/services/router/fabric_metrics.py b/services/router/fabric_metrics.py index c364e0d4..473f8542 100644 --- a/services/router/fabric_metrics.py +++ b/services/router/fabric_metrics.py @@ -40,6 +40,31 @@ try: registry=REGISTRY, ) + # ── Voice HA metrics ────────────────────────────────────────────────────── + # cap label: "voice_tts" | "voice_llm" | "voice_stt" + voice_cap_requests = Counter( + "fabric_voice_capability_requests_total", + "Voice HA capability routing requests", + ["cap", "status"], registry=REGISTRY, + ) + voice_offload_total = Counter( + "fabric_voice_offload_total", + "Voice HA offload attempts (node selected + NATS sent)", + ["cap", "node", "status"], registry=REGISTRY, + ) + voice_breaker_state = Gauge( + "fabric_voice_breaker_state", + "Voice HA circuit breaker per node+cap (1=open)", + ["cap", "node"], registry=REGISTRY, + ) + voice_score_hist = Histogram( + "fabric_voice_score_ms", + "Voice HA node scoring distribution", + ["cap"], + buckets=[0, 50, 100, 200, 400, 800, 1600, 3200], + registry=REGISTRY, + ) + except ImportError: PROM_AVAILABLE = False REGISTRY = None @@ -76,6 +101,26 @@ def observe_score(score: int): score_hist.observe(score) +def inc_voice_cap_request(cap: str, status: str): + if PROM_AVAILABLE: + voice_cap_requests.labels(cap=cap, status=status).inc() + + +def inc_voice_offload(cap: str, node: str, status: str): + if PROM_AVAILABLE: + voice_offload_total.labels(cap=cap, node=node, status=status).inc() + + +def set_voice_breaker(cap: str, node: str, is_open: bool): + if PROM_AVAILABLE: + voice_breaker_state.labels(cap=cap, node=node).set(1 if is_open else 0) + + +def observe_voice_score(cap: str, score: float): + if PROM_AVAILABLE: + voice_score_hist.labels(cap=cap).observe(score) + + def get_metrics_text() -> Optional[bytes]: if PROM_AVAILABLE and REGISTRY: return generate_latest(REGISTRY) diff --git a/services/router/main.py b/services/router/main.py index b617e90a..ff833086 100644 --- a/services/router/main.py +++ b/services/router/main.py @@ -64,6 +64,12 @@ except ImportError: logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +NEO4J_NOTIFICATIONS_LOG_LEVEL = os.getenv("NEO4J_NOTIFICATIONS_LOG_LEVEL", "ERROR").strip().upper() +_neo4j_notifications_level = getattr(logging, NEO4J_NOTIFICATIONS_LOG_LEVEL, logging.ERROR) +logging.getLogger("neo4j.notifications").setLevel(_neo4j_notifications_level) +# Guard against late/conditional auto-router imports. +# If auto-router module is unavailable (or loaded later), inference must still work. +SOFIIA_AUTO_ROUTER_AVAILABLE = False TRUSTED_DOMAINS_CONFIG_PATH = os.getenv("TRUSTED_DOMAINS_CONFIG_PATH", "./trusted_domains.yml") _trusted_domains_cache: Dict[str, Any] = {"mtime": None, "data": {}} @@ -289,8 +295,24 @@ DETERMINISTIC_PLANT_POLICY_AGENTS = { REPEAT_FINGERPRINT_MIN_SIMILARITY = float(os.getenv("AGENT_REPEAT_FINGERPRINT_MIN_SIMILARITY", "0.92")) +def _clean_think_blocks(text: str) -> str: + """Remove ... reasoning blocks from LLM output (Qwen3/DeepSeek-R1). + + Strategy: + 1. Strip complete ... blocks (DOTALL for multiline). + 2. Fallback: if an unclosed remains, drop everything after it. + """ + cleaned = re.sub(r".*?", "", text, + flags=re.DOTALL | re.IGNORECASE) + # Fallback: unclosed — truncate before it + if "" in cleaned.lower(): + cleaned = re.split(r"(?i)", cleaned)[0] + return cleaned + + def _normalize_text_response(text: str) -> str: - return re.sub(r"\s+", " ", str(text or "")).strip() + cleaned = _clean_think_blocks(str(text or "")) + return re.sub(r"\s+", " ", cleaned).strip() def _response_fingerprint(text: str) -> str: @@ -1689,6 +1711,20 @@ async def internal_llm_complete(request: InternalLLMRequest): tokens = data.get("usage", {}).get("total_tokens", 0) latency = int((time_module.time() - t0) * 1000) logger.info(f"Internal LLM success: {cloud['name']}, {tokens} tokens, {latency}ms") + # Track usage for budget dashboard + if SOFIIA_AUTO_ROUTER_AVAILABLE: + try: + usage_data = data.get("usage", {}) + track_usage( + provider=cloud["name"], + model=cloud["model"], + agent=request.metadata.get("agent_id", "unknown") if request.metadata else "unknown", + input_tokens=usage_data.get("prompt_tokens", tokens // 2 if tokens else 0), + output_tokens=usage_data.get("completion_tokens", tokens // 2 if tokens else 0), + latency_ms=latency, + ) + except Exception as _te: + logger.debug("budget track error: %s", _te) return InternalLLMResponse(text=response_text, model=cloud["model"], provider=cloud["name"], tokens_used=tokens, latency_ms=latency) except Exception as e: logger.warning(f"Internal LLM {cloud['name']} failed: {e}") @@ -2086,8 +2122,39 @@ async def agent_infer(agent_id: str, request: InferRequest): routing_rules = router_config.get("routing", []) default_llm = _select_default_llm(agent_id, metadata, default_llm, routing_rules) - - cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic"} + + # ── Sofiia Auto-Router: dynamic model selection based on task type ────── + if agent_id == "sofiia" and SOFIIA_AUTO_ROUTER_AVAILABLE and not request.model: + try: + _auto_result = select_model_auto( + prompt=request.prompt or "", + force_fast=metadata.get("force_fast", False), + force_capable=metadata.get("force_capable", False), + prefer_local=metadata.get("prefer_local", False), + prefer_cheap=metadata.get("prefer_cheap", False), + budget_aware=True, + ) + # Only override if auto-selected profile exists in config + if _auto_result.profile_name in router_config.get("llm_profiles", {}): + logger.info( + "🧠 Sofiia Auto-Router: task=%s complexity=%s → profile=%s model=%s reason=%s", + _auto_result.task_type, _auto_result.complexity, + _auto_result.profile_name, _auto_result.model_id, + _auto_result.reason, + ) + default_llm = _auto_result.profile_name + else: + logger.debug( + "🧠 Sofiia Auto-Router: profile %s not in config, using %s", + _auto_result.profile_name, default_llm, + ) + except Exception as _ar_e: + logger.warning("⚠️ Sofiia Auto-Router error: %s", _ar_e) + + # Pass routing-resolved default_llm to NCS so it respects cloud routing rules + ncs_agent_config = {**agent_config, "default_llm": default_llm} + + cloud_provider_names = {"deepseek", "mistral", "grok", "openai", "anthropic", "glm"} # ── Global NCS-first model selection (multi-node) ─────────────────── ncs_selection = None @@ -2095,7 +2162,7 @@ async def agent_infer(agent_id: str, request: InferRequest): try: gcaps = await global_capabilities_client.get_global_capabilities() ncs_selection = await select_model_for_agent( - agent_id, agent_config, router_config, gcaps, request.model, + agent_id, ncs_agent_config, router_config, gcaps, request.model, ) except Exception as e: logger.warning(f"⚠️ Global NCS selection error: {e}; falling back to static") @@ -2103,7 +2170,7 @@ async def agent_infer(agent_id: str, request: InferRequest): try: caps = await capabilities_client.fetch_capabilities() ncs_selection = await select_model_for_agent( - agent_id, agent_config, router_config, caps, request.model, + agent_id, ncs_agent_config, router_config, caps, request.model, ) except Exception as e: logger.warning(f"⚠️ NCS selection error: {e}; falling back to static") @@ -2678,11 +2745,218 @@ async def agent_infer(agent_id: str, request: InferRequest): } ] + # GLM (Z.AI / BigModel) — OpenAI-compatible but with special JWT auth. + if provider == "glm" and allow_cloud: + glm_key = os.getenv(llm_profile.get("api_key_env", "GLM5_API_KEY"), "") + if glm_key: + glm_model = request.model or llm_profile.get("model", "glm-4-flash") + glm_base_url = llm_profile.get("base_url", "https://open.bigmodel.cn/api/paas/v4") + glm_max_tokens = int(request.max_tokens or llm_profile.get("max_tokens", 4096)) + glm_temperature = float(request.temperature if request.temperature is not None else llm_profile.get("temperature", 0.3)) + glm_timeout = int(llm_profile.get("timeout_ms", 30000) / 1000) + try: + glm_resp = await http_client.post( + f"{glm_base_url}/chat/completions", + headers={"Authorization": f"Bearer {glm_key}", "Content-Type": "application/json"}, + json={ + "model": glm_model, + "messages": messages, + "max_tokens": glm_max_tokens, + "temperature": glm_temperature, + "stream": False, + }, + timeout=float(glm_timeout), + ) + if glm_resp.status_code == 200: + glm_data = glm_resp.json() + response_text = glm_data.get("choices", [{}])[0].get("message", {}).get("content", "") + glm_tokens = glm_data.get("usage", {}).get("total_tokens", 0) + if SOFIIA_AUTO_ROUTER_AVAILABLE: + try: + usage_d = glm_data.get("usage", {}) + track_usage( + provider="glm", model=glm_model, agent=agent_id, + input_tokens=usage_d.get("prompt_tokens", glm_tokens // 2 if glm_tokens else 0), + output_tokens=usage_d.get("completion_tokens", glm_tokens // 2 if glm_tokens else 0), + ) + except Exception: + pass + response_text = await _finalize_response_text(response_text, f"glm-{glm_model}") + return InferResponse( + response=response_text, + model=glm_model, + backend="glm", + tokens_used=glm_tokens, + ) + else: + logger.warning("🐉 GLM API error %s: %s", glm_resp.status_code, glm_resp.text[:200]) + except Exception as _glm_e: + logger.warning("🐉 GLM call failed: %s", _glm_e) + else: + logger.warning("🐉 GLM provider selected but GLM5_API_KEY not set") + # Fall through to Ollama + + # Anthropic has its own API format — handle separately before the loop. + if provider == "anthropic" and allow_cloud: + anthropic_key = os.getenv(llm_profile.get("api_key_env", "ANTHROPIC_API_KEY"), "") + if anthropic_key: + anthropic_model = request.model or llm_profile.get("model", "claude-sonnet-4-5") + anthropic_max_tokens = int(request.max_tokens or llm_profile.get("max_tokens", 8192)) + anthropic_temperature = float(request.temperature if request.temperature is not None else llm_profile.get("temperature", 0.2)) + anthropic_timeout = int(llm_profile.get("timeout_ms", 120000) / 1000) + try: + # Extract system prompt from messages + anthropic_system = "" + anthropic_messages = [] + for msg in messages: + role = msg.get("role", "user") + content = msg.get("content", "") + if role == "system": + anthropic_system = content + else: + anthropic_messages.append({"role": role, "content": content}) + if not anthropic_messages: + anthropic_messages = [{"role": "user", "content": request.prompt}] + # Build tool definitions for Claude + anthropic_tools = None + if TOOL_MANAGER_AVAILABLE and tool_manager: + raw_tools = tool_manager.get_tool_definitions(request_agent_id) + if raw_tools: + anthropic_tools = [] + for t in raw_tools: + fn = t.get("function", {}) + anthropic_tools.append({ + "name": fn.get("name", "unknown"), + "description": fn.get("description", ""), + "input_schema": fn.get("parameters") or {"type": "object", "properties": {}}, + }) + anthropic_payload: Dict[str, Any] = { + "model": anthropic_model, + "max_tokens": anthropic_max_tokens, + "temperature": anthropic_temperature, + "messages": anthropic_messages, + } + if anthropic_system: + anthropic_payload["system"] = anthropic_system + if anthropic_tools: + anthropic_payload["tools"] = anthropic_tools + logger.info(f"🟣 Anthropic Claude API: model={anthropic_model} agent={agent_id}") + anthropic_resp = await http_client.post( + "https://api.anthropic.com/v1/messages", + headers={ + "x-api-key": anthropic_key, + "anthropic-version": "2023-06-01", + "content-type": "application/json", + }, + json=anthropic_payload, + timeout=anthropic_timeout, + ) + if anthropic_resp.status_code == 200: + anthropic_data = anthropic_resp.json() + response_text = "" + for block in anthropic_data.get("content", []): + if block.get("type") == "text": + response_text += block.get("text", "") + tokens_used = ( + anthropic_data.get("usage", {}).get("input_tokens", 0) + + anthropic_data.get("usage", {}).get("output_tokens", 0) + ) + # Handle tool_use blocks from Claude + claude_tool_uses = [b for b in anthropic_data.get("content", []) if b.get("type") == "tool_use"] + if claude_tool_uses and TOOL_MANAGER_AVAILABLE and tool_manager: + tool_result_messages = list(anthropic_messages) + tool_result_messages.append({"role": "assistant", "content": anthropic_data.get("content", [])}) + for tool_use_block in claude_tool_uses: + tool_name = tool_use_block.get("name", "") + tool_input = tool_use_block.get("input", {}) + tool_use_id = tool_use_block.get("id", "") + logger.info(f"🔧 Claude tool call: {tool_name}({json.dumps(tool_input)[:100]})") + try: + tool_exec_result = await tool_manager.execute_tool( + tool_name, tool_input, + agent_id=request_agent_id, chat_id=chat_id, user_id=user_id, + ) + tool_content = tool_exec_result.result if tool_exec_result.success else f"Error: {tool_exec_result.error}" + except Exception as te: + tool_content = f"Tool execution error: {te}" + tool_result_messages.append({ + "role": "user", + "content": [{"type": "tool_result", "tool_use_id": tool_use_id, "content": str(tool_content)}] + }) + # Follow-up call with tool results + anthropic_payload["messages"] = tool_result_messages + followup_resp = await http_client.post( + "https://api.anthropic.com/v1/messages", + headers={ + "x-api-key": anthropic_key, + "anthropic-version": "2023-06-01", + "content-type": "application/json", + }, + json=anthropic_payload, + timeout=anthropic_timeout, + ) + if followup_resp.status_code == 200: + followup_data = followup_resp.json() + response_text = "" + for block in followup_data.get("content", []): + if block.get("type") == "text": + response_text += block.get("text", "") + tokens_used += ( + followup_data.get("usage", {}).get("input_tokens", 0) + + followup_data.get("usage", {}).get("output_tokens", 0) + ) + response_text = await _finalize_response_text(response_text, f"anthropic-{anthropic_model}") + if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id: + asyncio.create_task(memory_retrieval.store_message( + agent_id=agent_id, user_id=user_id, username=username, + message_text=request.prompt, response_text=response_text, + chat_id=chat_id, metadata={"model": anthropic_model, "provider": "anthropic"}, + )) + # Track Anthropic usage for budget dashboard + if SOFIIA_AUTO_ROUTER_AVAILABLE: + try: + track_usage( + provider="anthropic", + model=anthropic_model, + agent=agent_id, + input_tokens=tokens_used // 3 if tokens_used else 0, + output_tokens=tokens_used - tokens_used // 3 if tokens_used else 0, + latency_ms=int((time_module.time() - _t_start) * 1000) if "_t_start" in dir() else 0, + task_type="", + ) + except Exception as _te: + logger.debug("budget track anthropic error: %s", _te) + return InferResponse( + response=response_text, + model=anthropic_model, + backend="anthropic", + tokens_used=tokens_used, + ) + else: + err_body = anthropic_resp.text[:300] + logger.warning(f"🟣 Anthropic API error {anthropic_resp.status_code}: {err_body}") + except Exception as anthropic_exc: + logger.warning(f"🟣 Anthropic call failed: {anthropic_exc}") + else: + logger.warning("🟣 Anthropic provider selected but ANTHROPIC_API_KEY not set") + # Fall through to Ollama if Anthropic fails + if not allow_cloud: cloud_providers = [] # If specific provider requested, try it first - if provider in ["deepseek", "mistral", "grok"]: + # GLM in OpenAI-compat fallback list for internal/non-sofiia requests + glm_key_fb = os.getenv("GLM5_API_KEY", "") + if glm_key_fb: + cloud_providers.insert(0, { + "name": "glm", + "api_key_env": "GLM5_API_KEY", + "base_url": "https://open.bigmodel.cn/api/paas/v4", + "model": "glm-4-flash", + "timeout": 20, + }) + + if provider in ["deepseek", "mistral", "grok", "glm"]: # Reorder to put requested provider first cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1) @@ -3666,6 +3940,184 @@ async def capability_offload(cap_type: str, request: Request): }) +@app.post("/v1/capability/voice_{voice_cap_type}") +async def voice_capability_offload(voice_cap_type: str, request: Request): + """Route a Voice HA request (voice_tts / voice_llm / voice_stt) to the best node. + + Uses voice-specific NATS subjects (node.{id}.voice.{type}.request) and + separate circuit breaker keys from generic offload. Returns response headers: + - X-Voice-Node: chosen node id + - X-Voice-Mode: local | remote (relative to the router's own node) + - X-Voice-Cap: the capability type routed (voice_tts, voice_llm, voice_stt) + + Contract: no silent fallback — any failure increments Prometheus counter + + logs WARNING before returning 50x. + """ + import uuid as _uuid + import fabric_metrics as fm + + cap_type = voice_cap_type # "tts", "llm", or "sst" + full_cap = f"voice_{cap_type}" + valid_caps = {"tts", "llm", "stt"} + if cap_type not in valid_caps: + fm.inc_voice_cap_request(full_cap, "invalid") + return JSONResponse(status_code=400, content={ + "error": f"Invalid voice cap: {cap_type}. Valid: voice_tts, voice_llm, voice_stt", + }) + + if not NCS_AVAILABLE or not global_capabilities_client: + fm.inc_voice_cap_request(full_cap, "ncs_unavailable") + logger.warning("[voice.cap] NCS unavailable — cannot route %s", full_cap) + return JSONResponse(status_code=503, content={ + "error": "NCS not available — cannot route voice capability requests", + }) + + gcaps = await global_capabilities_client.require_fresh_caps(ttl=30) + if gcaps is None: + fm.inc_voice_cap_request(full_cap, "stale_caps") + logger.warning("[voice.cap] caps stale — refusing to route %s", full_cap) + return JSONResponse(status_code=503, content={ + "error": "NCS caps stale — preflight failed", + }) + + eligible_nodes = global_capabilities_client.find_nodes_with_capability(full_cap) + if not eligible_nodes: + fm.inc_voice_cap_request(full_cap, "no_node") + logger.warning("[voice.cap] no node with %s available", full_cap) + return JSONResponse(status_code=404, content={ + "error": f"No node with capability '{full_cap}' available", + "hint": f"Ensure node-worker is running with TTS_PROVIDER/STT_PROVIDER set and {full_cap}=true in /caps", + }) + + # Voice uses separate CB key to avoid cross-contaminating generic stt/tts breakers + voice_cb_type = f"voice.{cap_type}" + unavailable = offload_client.get_unavailable_nodes(voice_cb_type) if offload_client else set() + available = [n for n in eligible_nodes if n.lower() not in {u.lower() for u in unavailable}] + if not available: + fm.inc_voice_cap_request(full_cap, "all_broken") + logger.warning("[voice.cap] all nodes circuit-broken for %s: %s", full_cap, eligible_nodes) + return JSONResponse(status_code=503, content={ + "error": f"All nodes with '{full_cap}' are circuit-broken", + "eligible": eligible_nodes, + "unavailable": list(unavailable), + }) + + # ── Voice scoring: prefer local, penalise high load + high latency ──────── + router_node_id = os.getenv("NODE_ID", "noda2").lower() + LOCAL_THRESHOLD_MS = int(os.getenv("VOICE_LOCAL_THRESHOLD_MS", "250")) + PREFER_LOCAL_BONUS = int(os.getenv("VOICE_PREFER_LOCAL_BONUS", "200")) + + deadline_defaults = { + "tts": int(os.getenv("VOICE_TTS_DEADLINE_MS", "3000")), + "llm": int(os.getenv("VOICE_LLM_FAST_MS", "9000")), + "sst": int(os.getenv("VOICE_STT_DEADLINE_MS", "6000")), + } + deadline_ms = deadline_defaults.get(cap_type, 9000) + + scored = [] + for nid in available: + nl = global_capabilities_client.get_node_load(nid) + rl = global_capabilities_client.get_runtime_load(nid) + wait_ms = nl.get("wait_ms", 0) or nl.get("inflight", 0) * 50 + rtt_ms = nl.get("rtt_ms", 0) + p95_ms = rl.get("p95_ms", 0) if rl else 0 + mem_penalty = 300 if nl.get("mem_pressure") == "high" else 0 + local_bonus = PREFER_LOCAL_BONUS if nid.lower() == router_node_id else 0 + score = wait_ms + rtt_ms + p95_ms + mem_penalty - local_bonus + scored.append((score, nid)) + fm.observe_voice_score(full_cap, score) + fm.set_voice_breaker(full_cap, nid, False) # currently alive + + scored.sort(key=lambda x: x[0]) + best_score, best_node = scored[0] + voice_mode = "local" if best_node.lower() == router_node_id else "remote" + + # If local score <= local_threshold, always prefer local even if a remote + # node has slightly lower score (avoids unnecessary cross-node traffic) + if voice_mode == "remote" and best_score > LOCAL_THRESHOLD_MS: + local_candidates = [(s, n) for s, n in scored if n.lower() == router_node_id] + if local_candidates: + local_score = local_candidates[0][0] + if local_score <= best_score + LOCAL_THRESHOLD_MS: + best_node = router_node_id + voice_mode = "local" + logger.info( + "[voice.cap] prefer local %s (score=%d) over %s (score=%d)", + best_node, local_score, scored[0][1], best_score, + ) + + payload = await request.json() + logger.info( + "[voice.cap.route] cap=%s → node=%s mode=%s score=%d deadline=%dms", + full_cap, best_node, voice_mode, scored[0][0], deadline_ms, + ) + + nats_ok = nc is not None and nats_available + if not nats_ok or not offload_client: + fm.inc_voice_cap_request(full_cap, "nats_down") + logger.warning("[voice.cap] NATS not connected — cannot offload %s", full_cap) + return JSONResponse(status_code=503, content={"error": "NATS not connected"}) + + job = { + "job_id": str(_uuid.uuid4()), + "required_type": cap_type, + "payload": payload, + "deadline_ts": int(time.time() * 1000) + deadline_ms, + "hints": payload.pop("hints", {}), + } + + # Use voice-specific NATS subject + nats_subject_type = f"voice.{cap_type}" + result = await offload_client.offload_infer( + nats_client=nc, + node_id=best_node, + required_type=nats_subject_type, + job_payload=job, + timeout_ms=deadline_ms, + ) + + if result and result.get("status") == "ok": + fm.inc_voice_cap_request(full_cap, "ok") + fm.inc_voice_offload(full_cap, best_node, "ok") + offload_client.record_success(best_node, voice_cb_type) + response_data = result.get("result", result) + resp = JSONResponse(content=response_data) + resp.headers["X-Voice-Node"] = best_node + resp.headers["X-Voice-Mode"] = voice_mode + resp.headers["X-Voice-Cap"] = full_cap + return resp + + # Non-ok — circuit breaker + WARNING (contract: no silent fallback) + error = result.get("error", {}) if result else {} + status_code_resp = result.get("status", "error") if result else "timeout" + + offload_client.record_failure(best_node, voice_cb_type) + fm.set_voice_breaker(full_cap, best_node, True) + fm.inc_voice_cap_request(full_cap, "fail") + fm.inc_voice_offload(full_cap, best_node, "fail") + + logger.warning( + "[voice.cap.fail] cap=%s node=%s status=%s code=%s — " + "WARNING: voice fallback must be handled by caller (BFF/Router)", + full_cap, best_node, status_code_resp, + error.get("code", "?"), + ) + return JSONResponse( + status_code=502, + content={ + "error": error.get("message", f"Voice offload to {best_node} failed"), + "code": error.get("code", "VOICE_OFFLOAD_FAILED"), + "cap": full_cap, + "node": best_node, + }, + headers={ + "X-Voice-Node": best_node, + "X-Voice-Mode": voice_mode, + "X-Voice-Cap": full_cap, + }, + ) + + @app.get("/v1/capabilities") async def list_global_capabilities(): """Return full capabilities view across all nodes.""" @@ -3986,6 +4438,120 @@ async def get_graph_stats(): raise HTTPException(status_code=500, detail=str(e)) +# ── Sofiia Auto-Router & Budget Dashboard ───────────────────────────────────── + + try: + from sofiia_auto_router import ( + select_model_auto, classify_task, explain_selection, + ProviderBudget as _ProviderBudget, get_full_catalog, + refresh_ollama_models_async, + ) + from provider_budget import track_usage, get_dashboard_data, set_provider_limit, get_stats + SOFIIA_AUTO_ROUTER_AVAILABLE = True + logger.info("✅ Sofiia Auto-Router loaded") + except ImportError as _e: + SOFIIA_AUTO_ROUTER_AVAILABLE = False + logger.warning("⚠️ Sofiia Auto-Router not available: %s", _e) + + +class AutoRouteRequest(BaseModel): + prompt: str + force_fast: bool = False + force_capable: bool = False + prefer_local: bool = False + prefer_cheap: bool = False + + +class BudgetLimitRequest(BaseModel): + provider: str + monthly_limit_usd: Optional[float] = None + topup_balance_usd: Optional[float] = None + + +@app.post("/v1/sofiia/auto-route") +async def sofiia_auto_route(req: AutoRouteRequest): + """Classify a prompt and return the recommended model profile for Sofiia.""" + if not SOFIIA_AUTO_ROUTER_AVAILABLE: + raise HTTPException(status_code=503, detail="Auto-router not available") + result = select_model_auto( + prompt=req.prompt, + force_fast=req.force_fast, + force_capable=req.force_capable, + prefer_local=req.prefer_local, + prefer_cheap=req.prefer_cheap, + ) + return { + "profile_name": result.profile_name, + "model_id": result.model_id, + "provider": result.provider, + "task_type": result.task_type, + "confidence": result.confidence, + "complexity": result.complexity, + "reason": result.reason, + "fallback_used": result.fallback_used, + "all_candidates": result.all_candidates, + "ambiguous": result.ambiguous, + "runner_up": result.runner_up, + "all_scores": result.all_scores, + "explanation": explain_selection(result), + } + + +@app.get("/v1/sofiia/budget") +async def sofiia_budget_dashboard(): + """Return budget dashboard data: token usage, costs, balances per provider.""" + if not SOFIIA_AUTO_ROUTER_AVAILABLE: + raise HTTPException(status_code=503, detail="Budget tracker not available") + return get_dashboard_data() + + +@app.post("/v1/sofiia/budget/limits") +async def set_budget_limits(req: BudgetLimitRequest): + """Set monthly limit or top-up balance for a provider.""" + if not SOFIIA_AUTO_ROUTER_AVAILABLE: + raise HTTPException(status_code=503, detail="Budget tracker not available") + set_provider_limit( + provider=req.provider, + monthly_limit_usd=req.monthly_limit_usd, + topup_balance_usd=req.topup_balance_usd, + ) + return {"status": "ok", "provider": req.provider} + + +@app.get("/v1/sofiia/budget/stats") +async def sofiia_budget_stats(window_hours: int = 24): + """Return per-provider stats for the given time window (hours).""" + if not SOFIIA_AUTO_ROUTER_AVAILABLE: + raise HTTPException(status_code=503, detail="Budget tracker not available") + stats = get_stats(window_hours=window_hours) + return { + p: { + "provider": s.provider, + "total_cost_usd": round(s.total_cost_usd, 5), + "call_count": s.call_count, + "tokens_in": s.total_input_tokens, + "tokens_out": s.total_output_tokens, + "avg_latency_ms": round(s.avg_latency_ms), + "top_models": s.top_models, + } + for p, s in stats.items() + } + + +@app.get("/v1/sofiia/catalog") +async def sofiia_model_catalog(refresh_ollama: bool = False): + """Return full model catalog with availability status.""" + if not SOFIIA_AUTO_ROUTER_AVAILABLE: + raise HTTPException(status_code=503, detail="Auto-router not available") + if refresh_ollama: + await refresh_ollama_models_async() + return { + "models": get_full_catalog(), + "total": len(get_full_catalog()), + "available_count": sum(1 for m in get_full_catalog() if m["available"]), + } + + @app.on_event("shutdown") async def shutdown_event(): """Cleanup connections on shutdown""" diff --git a/services/router/memory_retrieval.py b/services/router/memory_retrieval.py index bf1aaaea..93120006 100644 --- a/services/router/memory_retrieval.py +++ b/services/router/memory_retrieval.py @@ -20,6 +20,7 @@ import json import logging import re import hashlib +from time import monotonic from typing import Optional, Dict, Any, List from dataclasses import dataclass, field from datetime import datetime @@ -41,6 +42,20 @@ PENDING_QUESTIONS_LIMIT = int(os.getenv("AGENT_PENDING_QUESTIONS_LIMIT", "5")) SHARED_AGRO_LIBRARY_ENABLED = os.getenv("AGROMATRIX_SHARED_LIBRARY_ENABLED", "true").lower() == "true" SHARED_AGRO_LIBRARY_REQUIRE_REVIEW = os.getenv("AGROMATRIX_SHARED_LIBRARY_REQUIRE_REVIEW", "true").lower() == "true" DOC_VERSION_PREVIEW_CHARS = int(os.getenv("DOC_VERSION_PREVIEW_CHARS", "240")) +WARNING_THROTTLE_SECONDS = float(os.getenv("MEMORY_RETRIEVAL_WARNING_THROTTLE_S", "60") or "60") +_warning_last_ts: Dict[str, float] = {} + + +def _warning_throttled(key: str, message: str) -> None: + """Emit repetitive warnings at most once per throttle window.""" + if WARNING_THROTTLE_SECONDS <= 0: + logger.warning(message) + return + now = monotonic() + last = _warning_last_ts.get(key, 0.0) + if now - last >= WARNING_THROTTLE_SECONDS: + _warning_last_ts[key] = now + logger.warning(message) @dataclass @@ -1067,7 +1082,7 @@ class MemoryRetrieval: ) return True except Exception as e: - logger.warning(f"register_pending_question failed: {e}") + _warning_throttled("register_pending_question_failed", f"register_pending_question failed: {e}") return False async def resolve_pending_question( @@ -1086,7 +1101,7 @@ class MemoryRetrieval: row = await conn.fetchrow( """ WITH target AS ( - SELECT id + SELECT id, question_fingerprint FROM agent_pending_questions WHERE channel = $1 AND chat_id = $2 @@ -1095,17 +1110,49 @@ class MemoryRetrieval: AND status = 'pending' ORDER BY created_at ASC LIMIT 1 + ), decision AS ( + SELECT + t.id, + CASE + WHEN $5 = 'dismissed' THEN 'dismissed' + WHEN EXISTS ( + SELECT 1 + FROM agent_pending_questions q + WHERE q.channel = $1 + AND q.chat_id = $2 + AND q.user_id = $3 + AND q.agent_id = $4 + AND q.status = 'answered' + AND q.question_fingerprint = t.question_fingerprint + ) THEN 'dismissed' + ELSE 'answered' + END AS next_status, + CASE + WHEN $5 = 'dismissed' THEN $5 + WHEN EXISTS ( + SELECT 1 + FROM agent_pending_questions q + WHERE q.channel = $1 + AND q.chat_id = $2 + AND q.user_id = $3 + AND q.agent_id = $4 + AND q.status = 'answered' + AND q.question_fingerprint = t.question_fingerprint + ) THEN 'duplicate_answered' + ELSE $5 + END AS resolution_reason + FROM target t ) UPDATE agent_pending_questions p - SET status = CASE WHEN $5 = 'dismissed' THEN 'dismissed' ELSE 'answered' END, + SET status = d.next_status, answered_at = NOW(), metadata = COALESCE(p.metadata, '{}'::jsonb) || jsonb_build_object( - 'resolution_reason', $5, + 'resolution_reason', d.resolution_reason, 'answer_fingerprint', COALESCE($6, '') ) - FROM target t - WHERE p.id = t.id + FROM decision d + WHERE p.id = d.id RETURNING p.id """, channel, @@ -1117,7 +1164,7 @@ class MemoryRetrieval: ) return bool(row) except Exception as e: - logger.warning(f"resolve_pending_question failed: {e}") + _warning_throttled("resolve_pending_question_failed", f"resolve_pending_question failed: {e}") return False @staticmethod diff --git a/services/router/offload_client.py b/services/router/offload_client.py index 78f9da6c..8d05b17e 100644 --- a/services/router/offload_client.py +++ b/services/router/offload_client.py @@ -81,7 +81,7 @@ def get_unavailable_nodes(req_type: str) -> Set[str]: async def offload_infer( nats_client, node_id: str, - required_type: Literal["llm", "vision", "stt", "tts", "ocr", "image"], + required_type: str, # "llm"|"vision"|"stt"|"tts"|"ocr"|"image"|"voice.tts"|"voice.llm"|"voice.stt" job_payload: Dict[str, Any], timeout_ms: int = 25000, ) -> Optional[Dict[str, Any]]: @@ -89,6 +89,8 @@ async def offload_infer( Returns parsed JobResponse dict or None on total failure. Retries on transient errors (timeout, busy). Does NOT retry on provider errors. + + Voice HA subjects use dotted notation: "voice.tts" → node.{id}.voice.tts.request """ subject = f"node.{node_id.lower()}.{required_type}.request" payload_bytes = json.dumps(job_payload).encode() diff --git a/services/router/prompt_builder.py b/services/router/prompt_builder.py index 78499e91..971ce235 100644 --- a/services/router/prompt_builder.py +++ b/services/router/prompt_builder.py @@ -9,6 +9,8 @@ Prompt Builder for DAGI Router import httpx import logging +import os +import time from typing import Dict, Any, Optional from dataclasses import dataclass @@ -43,6 +45,8 @@ class PromptBuilder: self.city_service_url = city_service_url.rstrip("/") self.router_config = router_config or {} self._http_client: Optional[httpx.AsyncClient] = None + self._city_service_unavailable_until = 0.0 + self._city_service_cooldown_s = float(os.getenv("CITY_SERVICE_FAILURE_COOLDOWN_S", "120") or "120") async def _get_http_client(self) -> httpx.AsyncClient: """Lazy initialization of HTTP client""" @@ -80,6 +84,9 @@ class PromptBuilder: async def _fetch_from_database(self, agent_id: str) -> Optional[AgentSystemPrompt]: """Fetch system prompt from city-service API""" + now = time.monotonic() + if now < self._city_service_unavailable_until: + return None try: client = await self._get_http_client() url = f"{self.city_service_url}/internal/agents/{agent_id}/system-prompt" @@ -100,10 +107,20 @@ class PromptBuilder: return None except httpx.RequestError as e: - logger.error(f"Error fetching prompt from city-service: {e}") + self._city_service_unavailable_until = time.monotonic() + max(0.0, self._city_service_cooldown_s) + logger.warning( + "Error fetching prompt from city-service: %s; suppressing retries for %.0fs", + e, + self._city_service_cooldown_s, + ) return None except Exception as e: - logger.error(f"Unexpected error fetching prompt: {e}") + self._city_service_unavailable_until = time.monotonic() + max(0.0, self._city_service_cooldown_s) + logger.warning( + "Unexpected error fetching prompt: %s; suppressing retries for %.0fs", + e, + self._city_service_cooldown_s, + ) return None def _get_from_config(self, agent_id: str) -> Optional[AgentSystemPrompt]: diff --git a/services/router/router-config.yml b/services/router/router-config.yml index 16b87615..836578a8 100644 --- a/services/router/router-config.yml +++ b/services/router/router-config.yml @@ -1,15 +1,8 @@ -# DAGI Router Configuration -# Version: 0.6.0 - Telegram agents + differentiated qwen3 profiles - node: id: dagi-devtools-node-01 role: router env: prod - description: "DAGI Router with CrewAI, Telegram Gateway and science-ready tooling" - -# ============================================================================ -# LLM Profiles (використовуємо лише доступні qwen3 моделі) -# ============================================================================ + description: DAGI Router with CrewAI, Telegram Gateway and science-ready tooling llm_profiles: local_qwen3_8b: provider: ollama @@ -19,8 +12,7 @@ llm_profiles: temperature: 0.2 top_p: 0.9 timeout_ms: 30000 - description: "Базова qwen3:8b для інфраструктурних задач" - + description: Базова qwen3:8b для інфраструктурних задач qwen3_strategist_8b: provider: ollama base_url: http://172.17.0.1:11434 @@ -29,8 +21,7 @@ llm_profiles: temperature: 0.15 top_p: 0.7 timeout_ms: 32000 - description: "Стримана qwen3:8b для стратегічних агентів (Daarwizz, Yaromir, Orchestrator)" - + description: Стримана qwen3:8b для стратегічних агентів (Daarwizz, Yaromir, Orchestrator) qwen3_support_8b: provider: ollama base_url: http://172.17.0.1:11434 @@ -39,8 +30,7 @@ llm_profiles: temperature: 0.35 top_p: 0.88 timeout_ms: 28000 - description: "Підтримка/CRM тон для GREENFOOD, CLAN" - + description: Підтримка/CRM тон для GREENFOOD, CLAN qwen3_science_8b: provider: ollama base_url: http://172.17.0.1:11434 @@ -49,8 +39,7 @@ llm_profiles: temperature: 0.1 top_p: 0.65 timeout_ms: 40000 - description: "Наукові агенти (Helion, DRUID, Nutra, Monitor)" - + description: Наукові агенти (Helion, DRUID, Nutra, Monitor) qwen3_creative_8b: provider: ollama base_url: http://172.17.0.1:11434 @@ -59,8 +48,7 @@ llm_profiles: temperature: 0.6 top_p: 0.92 timeout_ms: 32000 - description: "Комʼюніті та мультимодальні агенти (Soul, EONARCH)" - + description: Комʼюніті та мультимодальні агенти (Soul, EONARCH) qwen3_vision_8b: provider: ollama base_url: http://172.17.0.1:11434 @@ -69,8 +57,7 @@ llm_profiles: temperature: 0.2 top_p: 0.9 timeout_ms: 60000 - description: "Vision qwen3 для EONARCH/Helion" - + description: Vision qwen3 для EONARCH/Helion mistral_community_7b: provider: ollama base_url: http://172.17.0.1:11434 @@ -79,8 +66,7 @@ llm_profiles: temperature: 0.35 top_p: 0.9 timeout_ms: 32000 - description: "Mistral 7B для CRM/community агентів (GREENFOOD, CLAN, SOUL, EONARCH)" - + description: Mistral 7B для CRM/community агентів (GREENFOOD, CLAN, SOUL, EONARCH) cloud_deepseek: provider: deepseek base_url: https://api.deepseek.com @@ -89,8 +75,7 @@ llm_profiles: max_tokens: 2048 temperature: 0.2 timeout_ms: 40000 - description: "DeepSeek для важких DevTools задач (опційно)" - + description: DeepSeek для важких DevTools задач (опційно) cloud_mistral: provider: mistral base_url: https://api.mistral.ai/v1 @@ -99,607 +84,702 @@ llm_profiles: max_tokens: 4096 temperature: 0.3 timeout_ms: 60000 - description: "Mistral Large для складних задач, reasoning, аналізу" - + description: Mistral Large для складних задач, reasoning, аналізу cloud_grok: provider: grok base_url: https://api.x.ai api_key_env: GROK_API_KEY - model: grok-2-1212 - max_tokens: 2048 + model: grok-4-1-fast-reasoning + max_tokens: 8192 temperature: 0.2 - timeout_ms: 60000 - description: "Grok для SOFIIA (технічний суверен)" + timeout_ms: 90000 + description: "Grok 4.1 Fast Reasoning — Sofiia primary (AGENTS.md: complex reasoning, 2M context)" -# ============================================================================ -# Orchestrator Providers -# ============================================================================ + cloud_claude_sonnet: + provider: anthropic + api_key_env: ANTHROPIC_API_KEY + model: claude-sonnet-4-5 + max_tokens: 8192 + temperature: 0.2 + timeout_ms: 120000 + description: "Claude Sonnet — Sofiia code & architecture intelligence" + + cloud_claude_haiku: + provider: anthropic + api_key_env: ANTHROPIC_API_KEY + model: claude-haiku-3-5 + max_tokens: 4096 + temperature: 0.25 + timeout_ms: 30000 + description: "Claude Haiku — fast responses for Sofiia" + + cloud_glm5: + provider: glm + base_url: https://open.bigmodel.cn/api/paas/v4 + api_key_env: GLM5_API_KEY + model: glm-4-plus + max_tokens: 4096 + temperature: 0.3 + timeout_ms: 30000 + description: "GLM-4 Plus (Z.AI) — дешевий, швидкий, добре знає українську" + + cloud_glm5_flash: + provider: glm + base_url: https://open.bigmodel.cn/api/paas/v4 + api_key_env: GLM5_API_KEY + model: glm-4-flash + max_tokens: 2048 + temperature: 0.3 + timeout_ms: 15000 + description: "GLM-4 Flash (Z.AI) — безкоштовний, найшвидший" + + local_qwen35_35b: + provider: ollama + base_url: http://localhost:11434 + model: qwen3.5:35b-a3b + max_tokens: 4096 + temperature: 0.2 + timeout_ms: 90000 + description: "Qwen3.5 35B MoE (НОДА2) — флагман локально" + + local_glm47_32k: + provider: ollama + base_url: http://localhost:11434 + model: glm-4.7-flash:32k + max_tokens: 2048 + temperature: 0.3 + timeout_ms: 60000 + description: "GLM-4.7 Flash 32K (НОДА2) — локальний GLM" + + local_deepseek_r1_70b: + provider: ollama + base_url: http://localhost:11434 + model: deepseek-r1:70b + max_tokens: 4096 + temperature: 0.1 + timeout_ms: 180000 + description: "DeepSeek-R1 70B (НОДА2) — локальний reasoning" + + local_deepseek_coder_33b: + provider: ollama + base_url: http://localhost:11434 + model: deepseek-coder:33b + max_tokens: 2048 + temperature: 0.15 + timeout_ms: 90000 + description: "DeepSeek Coder 33B (НОДА2) — локальний code specialist" + + local_gemma3: + provider: ollama + base_url: http://localhost:11434 + model: gemma3:latest + max_tokens: 2048 + temperature: 0.35 + timeout_ms: 45000 + description: "Gemma3 (НОДА2) — Google efficient local" + + local_mistral_nemo: + provider: ollama + base_url: http://localhost:11434 + model: mistral-nemo:12b + max_tokens: 2048 + temperature: 0.35 + timeout_ms: 45000 + description: "Mistral Nemo 12B (НОДА2) — 128K context local" orchestrator_providers: crewai: type: orchestrator base_url: http://localhost:9010 timeout_ms: 120000 - description: "CrewAI multi-agent workflow orchestrator" + description: CrewAI multi-agent workflow orchestrator vision_encoder: type: vision base_url: http://vision-encoder:8001 timeout_ms: 30000 - description: "Vision Encoder (OpenCLIP ViT-L/14)" - -# ============================================================================ -# Agents Configuration -# ============================================================================ + description: Vision Encoder (OpenCLIP ViT-L/14) agents: devtools: - description: "DevTools Agent - помічник з кодом, тестами й інфраструктурою" - default_llm: local_qwen3_8b - system_prompt: | - Ти - DevTools Agent в екосистемі DAARION.city. + description: DevTools Agent - помічник з кодом, тестами й інфраструктурою + default_llm: cloud_deepseek + system_prompt: 'Ти - DevTools Agent в екосистемі DAARION.city. + Ти допомагаєш розробникам з: + - аналізом коду та пошуком багів + - рефакторингом + - написанням тестів + - git операціями + Відповідай коротко, конкретно, із прикладами коду. - Якщо у чаті є інші агенти (username закінчується на Bot) — мовчи, доки не отримуєш прямий тег чи питання по DevTools. + + Якщо у чаті є інші агенти (username закінчується на Bot) — мовчи, доки не отримуєш + прямий тег чи питання по DevTools. + + ' tools: - - id: fs_read - type: builtin - description: "Читання файлів" - - id: fs_write - type: builtin - description: "Запис файлів" - - id: run_tests - type: builtin - description: "Запуск тестів" - - id: git_diff - type: builtin - description: "Git diff" - - id: git_commit - type: builtin - description: "Git commit" - + - id: fs_read + type: builtin + description: Читання файлів + - id: fs_write + type: builtin + description: Запис файлів + - id: run_tests + type: builtin + description: Запуск тестів + - id: git_diff + type: builtin + description: Git diff + - id: git_commit + type: builtin + description: Git commit microdao_orchestrator: - description: "Multi-agent orchestrator for MicroDAO workflows" + description: Multi-agent orchestrator for MicroDAO workflows default_llm: qwen3_strategist_8b - system_prompt: | - You are the central router/orchestrator for DAARION.city MicroDAO. - Coordinate multiple agents, respect RBAC, escalate only when needed. - Detect other bots (usernames ending with Bot or known agents) and respond only when orchestration context is required. + system_prompt: 'You are the central router/orchestrator for DAARION.city MicroDAO. + Coordinate multiple agents, respect RBAC, escalate only when needed. + + Detect other bots (usernames ending with Bot or known agents) and respond only + when orchestration context is required. + + ' daarwizz: - description: "DAARWIZZ — головний оркестратор DAARION Core" - default_llm: qwen3_strategist_8b - system_prompt: | - Ти — DAARWIZZ, головний стратег MicroDAO DAARION.city. + description: DAARWIZZ — головний оркестратор DAARION Core + default_llm: cloud_deepseek + system_prompt: 'Ти — DAARWIZZ, головний стратег MicroDAO DAARION.city. + Тримаєш контекст roadmap, delegation, crew-команд. - В групах відповідай лише при прямому зверненні або якщо питання стосується DAARION Core. + + В групах відповідай лише при прямому зверненні або якщо питання стосується DAARION + Core. + Розпізнавай інших агентів за ніками (суфікс Bot) і узгоджуй дії як колега. + ' greenfood: - description: "GREENFOOD Assistant - ERP orchestrator" - default_llm: qwen3_support_8b - system_prompt: | - Ти — GREENFOOD Assistant, фронтовий оркестратор ERP-системи для крафтових виробників. - Розумій, хто з тобою говорить (комітент, покупець, склад, бухгалтер), та делегуй задачі відповідним під-агентам. - Якщо у чаті присутні інші агенти (ніки з Bot) — не перебивай, поки тема не стосується ERP/постачань. - tools: - - id: image_generation - type: tool - endpoint: http://image-gen-service:9600/image/generate - description: "Етикетки, маркетинг" - - id: web_search - type: external - endpoint: http://swapper-service:8890/web-search - description: "Пошук постачальників/ринків" - - id: vision - type: llm - model: qwen3-vl:8b - description: "Візуальний контроль партій" - - id: ocr - type: external - endpoint: http://swapper-service:8890/ocr - description: "Зчитування накладних" + description: GREENFOOD Assistant - ERP orchestrator + default_llm: cloud_deepseek + system_prompt: 'Ти — GREENFOOD Assistant, фронтовий оркестратор ERP-системи для + крафтових виробників. + Розумій, хто з тобою говорить (комітент, покупець, склад, бухгалтер), та делегуй + задачі відповідним під-агентам. + + Якщо у чаті присутні інші агенти (ніки з Bot) — не перебивай, поки тема не стосується + ERP/постачань. + + ' + tools: + - id: image_generation + type: tool + endpoint: http://image-gen-service:9600/image/generate + description: Етикетки, маркетинг + - id: web_search + type: external + endpoint: http://swapper-service:8890/web-search + description: Пошук постачальників/ринків + - id: vision + type: llm + model: qwen3-vl:8b + description: Візуальний контроль партій + - id: ocr + type: external + endpoint: http://swapper-service:8890/ocr + description: Зчитування накладних agromatrix: - description: "AgroMatrix — агроаналітика та кооперація" - default_llm: qwen3_science_8b - system_prompt: | - Ти — AgroMatrix, AI-агент для агроаналітики, планування сезонів та кооперації фермерів. + description: AgroMatrix — агроаналітика та кооперація + default_llm: cloud_deepseek + system_prompt: 'Ти — AgroMatrix, AI-агент для агроаналітики, планування сезонів + та кооперації фермерів. + Відповідай лаконічно, давай практичні поради для агросектору. + ' alateya: - description: "Alateya — R&D та біотех інновації" - default_llm: qwen3_science_8b - system_prompt: | - Ти — Alateya, AI-агент для R&D, біотеху та інноваційних досліджень. - Відповідай точними, структурованими відповідями та посилайся на джерела, якщо є. + description: Alateya — R&D та біотех інновації + default_llm: cloud_deepseek + system_prompt: 'Ти — Alateya, AI-агент для R&D, біотеху та інноваційних досліджень. + Відповідай точними, структурованими відповідями та посилайся на джерела, якщо + є. + + ' clan: - description: "CLAN — комунікації кооперативів" - default_llm: qwen3_support_8b - system_prompt: | - Ти — CLAN, координуєш комунікацію, оголошення та community operations. - Відповідай лише коли тема стосується координації, а звернення адресовано тобі (тег @ClanBot чи згадка кланів). + description: CLAN — комунікації кооперативів + default_llm: cloud_deepseek + system_prompt: 'Ти — CLAN, координуєш комунікацію, оголошення та community operations. + + Відповідай лише коли тема стосується координації, а звернення адресовано тобі + (тег @ClanBot чи згадка кланів). + Розпізнавай ботів за username та погоджуй з ними дії. + ' soul: - description: "SOUL / Spirit — духовний гід комʼюніті" - default_llm: qwen3_support_8b - system_prompt: | - Ти — Spirit/SOUL, ментор живої операційної системи. + description: SOUL / Spirit — духовний гід комʼюніті + default_llm: cloud_deepseek + system_prompt: 'Ти — Spirit/SOUL, ментор живої операційної системи. + Пояснюй місію, підтримуй мораль, працюй із soft-skills. + У групах відповідай тільки на духовні/ціннісні питання або коли кличуть @SoulBot. + ' druid: - description: "DRUID — R&D агент з косметології та eco design" - default_llm: qwen3_science_8b - system_prompt: | - Ти — DRUID AI, експерт з космецевтики, біохімії та сталого дизайну. + description: DRUID — R&D агент з косметології та eco design + default_llm: cloud_deepseek + system_prompt: 'Ти — DRUID AI, експерт з космецевтики, біохімії та сталого дизайну. + Працюй з формулами, стехіометрією, етичними ланцюгами постачання. - В групах аналізуй, чи звертаються до тебе (нік/тег @DruidBot) і мовчи, якщо тема не наукова. - tools: - - id: web_search - type: external - endpoint: http://swapper-service:8890/web-search - description: "Наукові статті" - - id: math - type: tool - description: "Хімічні/математичні обчислення" - - id: data_analysis - type: tool - description: "Аналіз лабораторних даних" - - id: chemistry - type: tool - description: "Моделювання реакцій" - - id: biology - type: tool - description: "Біологічні взаємодії" - - id: units - type: tool - description: "Конвертація одиниць" - - id: vision - type: llm - model: qwen3-vl:8b - description: "Аналіз фото формул/упаковок" - - id: ocr - type: external - endpoint: http://swapper-service:8890/ocr - description: "Зчитування етикеток" + В групах аналізуй, чи звертаються до тебе (нік/тег @DruidBot) і мовчи, якщо + тема не наукова. + + ' + tools: + - id: web_search + type: external + endpoint: http://swapper-service:8890/web-search + description: Наукові статті + - id: math + type: tool + description: Хімічні/математичні обчислення + - id: data_analysis + type: tool + description: Аналіз лабораторних даних + - id: chemistry + type: tool + description: Моделювання реакцій + - id: biology + type: tool + description: Біологічні взаємодії + - id: units + type: tool + description: Конвертація одиниць + - id: vision + type: llm + model: qwen3-vl:8b + description: Аналіз фото формул/упаковок + - id: ocr + type: external + endpoint: http://swapper-service:8890/ocr + description: Зчитування етикеток nutra: - description: "NUTRA — нутріцевтичний агент" - default_llm: qwen3_science_8b - system_prompt: | - Ти — NUTRA, допомагаєш з формулами нутрієнтів, біомедичних добавок та лабораторних інтерпретацій. + description: NUTRA — нутріцевтичний агент + default_llm: cloud_deepseek + system_prompt: 'Ти — NUTRA, допомагаєш з формулами нутрієнтів, біомедичних добавок + та лабораторних інтерпретацій. + Відповідай з науковою точністю, посилайся на джерела, якщо можливо. - Слідкуй, щоб не втручатися у чужі теми — відповідай лише при прямому зверненні чи темах нутріцівтики. - tools: - - id: web_search - type: external - endpoint: http://swapper-service:8890/web-search - description: "Пошук клінічних досліджень" - - id: math - type: tool - description: "Дозування/конверсії" - - id: data_analysis - type: tool - description: "Лабораторні таблиці" - - id: biology - type: tool - description: "Фізіологічні взаємодії" - - id: units - type: tool - description: "Конвертація одиниць" - - id: ocr - type: external - endpoint: http://swapper-service:8890/ocr - description: "Зчитування протоколів" + Слідкуй, щоб не втручатися у чужі теми — відповідай лише при прямому зверненні + чи темах нутріцівтики. + + ' + tools: + - id: web_search + type: external + endpoint: http://swapper-service:8890/web-search + description: Пошук клінічних досліджень + - id: math + type: tool + description: Дозування/конверсії + - id: data_analysis + type: tool + description: Лабораторні таблиці + - id: biology + type: tool + description: Фізіологічні взаємодії + - id: units + type: tool + description: Конвертація одиниць + - id: ocr + type: external + endpoint: http://swapper-service:8890/ocr + description: Зчитування протоколів eonarch: - description: "EONARCH — мультимодальний агент (vision + chat)" - default_llm: qwen3_support_8b - system_prompt: | - Ти — EONARCH, аналізуєш зображення, PDF та текстові запити. - Враховуй присутність інших ботів та працюй лише за прямим тегом або коли потрібно мультимодальне тлумачення. - tools: - - id: vision - type: llm - model: qwen3-vl:8b - description: "Vision reasoning" - - id: ocr - type: external - endpoint: http://swapper-service:8890/ocr - description: "Видобуток тексту" - - id: image_generation - type: tool - endpoint: http://image-gen-service:9600/image/generate - description: "Мокапи, схеми" + description: EONARCH — мультимодальний агент (vision + chat) + default_llm: cloud_deepseek + system_prompt: 'Ти — EONARCH, аналізуєш зображення, PDF та текстові запити. + Враховуй присутність інших ботів та працюй лише за прямим тегом або коли потрібно + мультимодальне тлумачення. + + ' + tools: + - id: vision + type: llm + model: qwen3-vl:8b + description: Vision reasoning + - id: ocr + type: external + endpoint: http://swapper-service:8890/ocr + description: Видобуток тексту + - id: image_generation + type: tool + endpoint: http://image-gen-service:9600/image/generate + description: Мокапи, схеми helion: - description: "Helion - AI agent for Energy Union platform" - default_llm: qwen3_science_8b - system_prompt: | - Ти - Helion, AI-агент платформи Energy Union. - Допомагай користувачам з технологіями EcoMiner/BioMiner, токеномікою та DAO governance. - - Консультуй щодо hardware, стейкінгу, інфраструктури. - - Аналізуй PDF/зображення, коли просять. - - В групах мовчи, якщо немає явного звернення до тебе (Helion/Хеліон/Хелион або тег @HelionBot) І тема не про енергетику. - - Якщо тебе звернули напряму, відповідай навіть на операційні питання (як завантажити PDF/Word/Excel, як надіслати посилання, ліміти відповіді тощо). - - Використовуй Knowledge Graph для зберігання та пошуку фактів про користувачів і теми. - Визначай інших агентів за ніком (суфікс Bot) і спілкуйся як з колегами. - tools: - # Web Tools (Swapper) - - id: web_search - type: external - endpoint: http://swapper-service:8890/web/search - method: POST - description: "Пошук в інтернеті (DuckDuckGo)" - - id: web_extract - type: external - endpoint: http://swapper-service:8890/web/extract - method: POST - description: "Витягнути контент з URL (Jina/Trafilatura)" - - id: web_read - type: external - endpoint: http://swapper-service:8890/web/read - method: GET - description: "Прочитати сторінку за URL" - # Image Generation (FLUX) - - id: image_generate - type: external - endpoint: http://swapper-service:8890/image/generate - method: POST - description: "Згенерувати зображення за описом (FLUX Klein 4B)" - # Video Generation (Grok xAI) - - id: video_generate - type: external - endpoint: http://swapper-service:8890/video/generate - method: POST - description: "Згенерувати коротке відео (до 6 сек) за описом (Grok xAI)" - # Math & Data - - id: math - type: tool - description: "Енергетичні розрахунки" - - id: data_analysis - type: tool - description: "Обробка сенсорних даних" - # Knowledge Graph Tools (Neo4j) - - id: graph_create_node - type: external - endpoint: http://router:8000/v1/graph/nodes - method: POST - description: "Створити вузол в Knowledge Graph (User, Topic, Fact, Entity)" - - id: graph_create_relation - type: external - endpoint: http://router:8000/v1/graph/relationships - method: POST - description: "Створити зв'язок між вузлами (KNOWS, MENTIONED, RELATED_TO)" - - id: graph_query - type: external - endpoint: http://router:8000/v1/graph/query - method: POST - description: "Запит до Knowledge Graph (знайти зв'язки, факти)" - - id: graph_search - type: external - endpoint: http://router:8000/v1/graph/search - method: GET - description: "Пошук по Knowledge Graph" - - id: units - type: tool - description: "Конвертація енергетичних одиниць" - - id: vision - type: llm - model: qwen3-vl:8b - description: "Опис технічних схем" - - id: ocr - type: external - endpoint: http://swapper-service:8890/ocr - description: "OCR креслень" + description: Helion - AI agent for Energy Union platform + default_llm: cloud_deepseek + system_prompt: 'Ти - Helion, AI-агент платформи Energy Union. + Допомагай користувачам з технологіями EcoMiner/BioMiner, токеномікою та DAO + governance. + + - Консультуй щодо hardware, стейкінгу, інфраструктури. + + - Аналізуй PDF/зображення, коли просять. + + - В групах мовчи, якщо немає явного звернення до тебе (Helion/Хеліон/Хелион + або тег @HelionBot) І тема не про енергетику. + + - Якщо тебе звернули напряму, відповідай навіть на операційні питання (як завантажити + PDF/Word/Excel, як надіслати посилання, ліміти відповіді тощо). + + - Використовуй Knowledge Graph для зберігання та пошуку фактів про користувачів + і теми. + + Визначай інших агентів за ніком (суфікс Bot) і спілкуйся як з колегами. + + ' + tools: + - id: web_search + type: external + endpoint: http://swapper-service:8890/web/search + method: POST + description: Пошук в інтернеті (DuckDuckGo) + - id: web_extract + type: external + endpoint: http://swapper-service:8890/web/extract + method: POST + description: Витягнути контент з URL (Jina/Trafilatura) + - id: web_read + type: external + endpoint: http://swapper-service:8890/web/read + method: GET + description: Прочитати сторінку за URL + - id: image_generate + type: external + endpoint: http://swapper-service:8890/image/generate + method: POST + description: Згенерувати зображення за описом (FLUX Klein 4B) + - id: video_generate + type: external + endpoint: http://swapper-service:8890/video/generate + method: POST + description: Згенерувати коротке відео (до 6 сек) за описом (Grok xAI) + - id: math + type: tool + description: Енергетичні розрахунки + - id: data_analysis + type: tool + description: Обробка сенсорних даних + - id: graph_create_node + type: external + endpoint: http://router:8000/v1/graph/nodes + method: POST + description: Створити вузол в Knowledge Graph (User, Topic, Fact, Entity) + - id: graph_create_relation + type: external + endpoint: http://router:8000/v1/graph/relationships + method: POST + description: Створити зв'язок між вузлами (KNOWS, MENTIONED, RELATED_TO) + - id: graph_query + type: external + endpoint: http://router:8000/v1/graph/query + method: POST + description: Запит до Knowledge Graph (знайти зв'язки, факти) + - id: graph_search + type: external + endpoint: http://router:8000/v1/graph/search + method: GET + description: Пошук по Knowledge Graph + - id: units + type: tool + description: Конвертація енергетичних одиниць + - id: vision + type: llm + model: qwen3-vl:8b + description: Опис технічних схем + - id: ocr + type: external + endpoint: http://swapper-service:8890/ocr + description: OCR креслень yaromir: - description: "Yaromir CrewAI (Вождь/Проводник/Домир/Создатель)" - default_llm: qwen3_strategist_8b - system_prompt: | - Ти — Yaromir Crew. Стратегія, наставництво, психологічна підтримка команди. + description: Yaromir CrewAI (Вождь/Проводник/Домир/Создатель) + default_llm: cloud_deepseek + system_prompt: 'Ти — Yaromir Crew. Стратегія, наставництво, психологічна підтримка + команди. + Розрізняй інших ботів за ніком та відповідай лише на стратегічні запити. + ' monitor: - description: "Monitor Agent - архітектор-інспектор DAGI" + description: Monitor Agent - архітектор-інспектор DAGI default_llm: local_qwen3_8b - system_prompt: | - Ти - Monitor Agent, інфраструктурний інспектор DAGI: ноди, сервіси, пайплайни, алерти. - Ти знаєш, що DAARWIZZ — головний оркестратор мережі DAARION.city; для governance/маршрутизації посилайся на нього. - Відповідай коротко і по суті; якщо даних бракує — одразу кажи, який саме метрик/лог потрібен. + system_prompt: 'Ти - Monitor Agent, інфраструктурний інспектор DAGI: ноди, сервіси, + пайплайни, алерти. + + Ти знаєш, що DAARWIZZ — головний оркестратор мережі DAARION.city; для governance/маршрутизації + посилайся на нього. + + Відповідай коротко і по суті; якщо даних бракує — одразу кажи, який саме метрик/лог + потрібен. + + ' tools: - - id: get_metrics - type: builtin - - id: check_health - type: builtin - + - id: get_metrics + type: builtin + - id: check_health + type: builtin senpai: - description: "SENPAI - Trading Advisor & Capital Markets" - default_llm: cloud_deepseek - system_prompt: | - (loaded from senpai_prompt.txt) - - sofiia: - description: "SOFIIA - Chief AI Architect & Technical Sovereign" + description: SENPAI - Trading Advisor & Capital Markets default_llm: cloud_grok - system_prompt: | - (loaded from sofiia_prompt.txt) + system_prompt: '(loaded from senpai_prompt.txt) + ' + sofiia: + description: SOFIIA - Chief AI Architect & Technical Sovereign + default_llm: cloud_claude_sonnet + system_prompt: '(loaded from sofiia_prompt.txt) -# ============================================================================ -# Routing Rules -# ============================================================================ + ' routing: - - id: microdao_chat - priority: 10 - when: - mode: chat - use_llm: local_qwen3_8b - description: "microDAO chat → local qwen3" - - - id: qa_build_mode - priority: 8 - when: - mode: qa_build - use_llm: local_qwen3_8b - description: "Q&A generation from parsed docs" - - - id: rag_query_mode - priority: 7 - when: - mode: rag_query - use_llm: local_qwen3_8b - description: "RAG query with Memory" - - - id: crew_mode - priority: 3 - when: - mode: crew - use_provider: orchestrator_crewai - description: "CrewAI workflow orchestration" - - - id: vision_encoder_embed - priority: 3 - when: - mode: vision_embed - use_provider: vision_encoder - description: "Vision embeddings" - - - id: devtools_tool_execution - priority: 3 - when: - mode: devtools - use_provider: devtools_devtools - description: "DevTools sandbox/actions" - - - id: explicit_provider_override - priority: 5 - when: - metadata_has: provider - use_metadata: provider - description: "Explicit provider override" - - - id: greenfood_cloud_override - priority: 4 - when: - agent: greenfood - metadata_equals: - requires_complex_reasoning: true - use_llm: cloud_deepseek - description: "GREENFOOD складні запити → DeepSeek" - - - id: clan_cloud_override - priority: 4 - when: - agent: clan - metadata_equals: - requires_complex_reasoning: true - use_llm: cloud_deepseek - description: "CLAN складні запити → DeepSeek" - - - id: soul_cloud_override - priority: 4 - when: - agent: soul - metadata_equals: - requires_complex_reasoning: true - use_llm: cloud_deepseek - description: "SOUL складні запити → DeepSeek" - - - id: eonarch_cloud_override - priority: 4 - when: - agent: eonarch - metadata_equals: - requires_complex_reasoning: true - use_llm: cloud_deepseek - description: "EONARCH складні запити → DeepSeek" - - - id: devtools_complex_cloud - priority: 10 - when: - agent: devtools - and: - - task_type: - - refactor_large - - architecture_review - - security_audit - - performance_analysis - - api_key_available: DEEPSEEK_API_KEY - use_llm: cloud_deepseek - description: "Тяжкі DevTools задачі → DeepSeek" - - - id: devtools_default_local - priority: 20 - when: - agent: devtools - use_llm: local_qwen3_8b - description: "Будь-які інші DevTools задачі" - - - id: microdao_orchestrator_agent - priority: 5 - when: - agent: microdao_orchestrator - use_llm: qwen3_strategist_8b - use_context_prompt: true - description: "Оркестратор → стратегічний профіль" - - - id: daarwizz_agent - priority: 5 - when: - agent: daarwizz - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "Daarwizz orchestrator" - - - id: greenfood_agent - priority: 5 - when: - agent: greenfood - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "GREENFOOD ERP" - - - id: agromatrix_agent - priority: 5 - when: - agent: agromatrix - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "AgroMatrix агроаналітика" - - - id: alateya_agent - priority: 5 - when: - agent: alateya - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "Alateya R&D" - - - id: clan_agent - priority: 5 - when: - agent: clan - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "CLAN community operations" - - - id: soul_agent - priority: 5 - when: - agent: soul - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "SOUL / Spirit мотивація" - - - id: druid_agent - priority: 5 - when: - agent: druid - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "DRUID science" - - - id: nutra_agent - priority: 5 - when: - agent: nutra - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "NUTRA science" - - - id: eonarch_agent - priority: 5 - when: - agent: eonarch - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "EONARCH vision" - - - id: helion_agent - priority: 5 - when: - agent: helion - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "Helion energy - DeepSeek з fallback на Mistral" - - - id: yaromir_agent - priority: 5 - when: - agent: yaromir - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "Yaromir crew" - - - id: monitor_agent - priority: 5 - when: - agent: monitor - use_llm: local_qwen3_8b - use_context_prompt: true - description: "Моніторинг інфраструктури" - - - - id: senpai_agent - priority: 5 - when: - agent: senpai - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "SENPAI trading - DeepSeek" - - - id: sofiia_agent - priority: 5 - when: - agent: sofiia - use_llm: cloud_grok - fallback_llm: cloud_deepseek - use_context_prompt: true - description: "SOFIIA architect - Grok (fallback DeepSeek)" - - - id: oneok_agent - priority: 5 - when: - agent: oneok - use_llm: cloud_deepseek - fallback_llm: cloud_mistral - use_context_prompt: true - description: "1OK Window Master - DeepSeek" - - - id: fallback_local - priority: 100 - when: {} - use_llm: local_qwen3_8b - description: "Fallback: всі інші запити → базова qwen3" - -# ============================================================================ -# Telemetry & Policies -# ============================================================================ +- id: microdao_chat + priority: 10 + when: + mode: chat + use_llm: local_qwen3_8b + description: microDAO chat → local qwen3 +- id: qa_build_mode + priority: 8 + when: + mode: qa_build + use_llm: local_qwen3_8b + description: Q&A generation from parsed docs +- id: rag_query_mode + priority: 7 + when: + mode: rag_query + use_llm: local_qwen3_8b + description: RAG query with Memory +- id: crew_mode + priority: 3 + when: + mode: crew + use_provider: orchestrator_crewai + description: CrewAI workflow orchestration +- id: vision_encoder_embed + priority: 3 + when: + mode: vision_embed + use_provider: vision_encoder + description: Vision embeddings +- id: devtools_tool_execution + priority: 3 + when: + mode: devtools + use_provider: devtools_devtools + description: DevTools sandbox/actions +- id: explicit_provider_override + priority: 5 + when: + metadata_has: provider + use_metadata: provider + description: Explicit provider override +- id: greenfood_cloud_override + priority: 4 + when: + agent: greenfood + metadata_equals: + requires_complex_reasoning: true + use_llm: cloud_deepseek + description: GREENFOOD складні запити → DeepSeek +- id: clan_cloud_override + priority: 4 + when: + agent: clan + metadata_equals: + requires_complex_reasoning: true + use_llm: cloud_deepseek + description: CLAN складні запити → DeepSeek +- id: soul_cloud_override + priority: 4 + when: + agent: soul + metadata_equals: + requires_complex_reasoning: true + use_llm: cloud_deepseek + description: SOUL складні запити → DeepSeek +- id: eonarch_cloud_override + priority: 4 + when: + agent: eonarch + metadata_equals: + requires_complex_reasoning: true + use_llm: cloud_deepseek + description: EONARCH складні запити → DeepSeek +- id: devtools_complex_cloud + priority: 10 + when: + agent: devtools + and: + - task_type: + - refactor_large + - architecture_review + - security_audit + - performance_analysis + - api_key_available: DEEPSEEK_API_KEY + use_llm: cloud_deepseek + description: Тяжкі DevTools задачі → DeepSeek +- id: devtools_default_local + priority: 20 + when: + agent: devtools + use_llm: local_qwen3_8b + description: Будь-які інші DevTools задачі +- id: microdao_orchestrator_agent + priority: 5 + when: + agent: microdao_orchestrator + use_llm: qwen3_strategist_8b + use_context_prompt: true + description: Оркестратор → стратегічний профіль +- id: daarwizz_agent + priority: 5 + when: + agent: daarwizz + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: Daarwizz orchestrator +- id: greenfood_agent + priority: 5 + when: + agent: greenfood + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: GREENFOOD ERP +- id: agromatrix_agent + priority: 5 + when: + agent: agromatrix + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: AgroMatrix агроаналітика +- id: alateya_agent + priority: 5 + when: + agent: alateya + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: Alateya R&D +- id: clan_agent + priority: 5 + when: + agent: clan + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: CLAN community operations +- id: soul_agent + priority: 5 + when: + agent: soul + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: SOUL / Spirit мотивація +- id: druid_agent + priority: 5 + when: + agent: druid + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: DRUID science +- id: nutra_agent + priority: 5 + when: + agent: nutra + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: NUTRA science +- id: eonarch_agent + priority: 5 + when: + agent: eonarch + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: EONARCH vision +- id: helion_agent + priority: 5 + when: + agent: helion + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: Helion energy - DeepSeek з fallback на Mistral +- id: yaromir_agent + priority: 5 + when: + agent: yaromir + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: Yaromir crew +- id: monitor_agent + priority: 5 + when: + agent: monitor + use_llm: local_qwen3_8b + use_context_prompt: true + description: Моніторинг інфраструктури +- id: senpai_agent + priority: 5 + when: + agent: senpai + use_llm: cloud_grok + fallback_llm: cloud_deepseek + use_context_prompt: true + description: SENPAI trading - Grok (fallback DeepSeek) +- id: sofiia_agent + priority: 5 + when: + agent: sofiia + use_llm: cloud_claude_sonnet + fallback_llm: cloud_grok + use_context_prompt: true + description: SOFIIA architect - Claude Sonnet primary (fallback Grok) +- id: oneok_agent + priority: 5 + when: + agent: oneok + use_llm: cloud_deepseek + fallback_llm: cloud_mistral + use_context_prompt: true + description: 1OK Window Master - DeepSeek +- id: fallback_local + priority: 100 + when: {} + use_llm: local_qwen3_8b + description: 'Fallback: всі інші запити → базова qwen3' telemetry: enabled: true log_level: INFO metrics: - - requests_total - - latency_ms - - tokens_used - + - requests_total + - latency_ms + - tokens_used policies: rate_limit: enabled: false diff --git a/services/sofiia-console/static/index.html b/services/sofiia-console/static/index.html index 38a67b75..6d1d5891 100644 --- a/services/sofiia-console/static/index.html +++ b/services/sofiia-console/static/index.html @@ -229,6 +229,58 @@ padding: 2px 6px; border-radius: 4px; } + .aurora-clip-picker { + margin-top: 8px; + border: 1px solid var(--border); + border-radius: 8px; + background: var(--bg2); + padding: 8px; + display: none; + gap: 8px; + } + .aurora-clip-head { + display: flex; + justify-content: space-between; + gap: 8px; + font-size: 0.74rem; + color: var(--muted); + align-items: center; + } + .aurora-clip-head strong { + color: var(--text); + font-weight: 600; + } + .aurora-clip-range-row { + display: grid; + grid-template-columns: 54px 1fr 62px; + align-items: center; + gap: 8px; + font-size: 0.73rem; + color: var(--muted); + } + .aurora-clip-range-row input[type="range"] { + width: 100%; + accent-color: var(--gold); + cursor: pointer; + } + .aurora-clip-actions { + display: flex; + gap: 6px; + flex-wrap: wrap; + } + .aurora-clip-btn { + background: rgba(255,255,255,0.04); + border: 1px solid var(--border); + color: var(--muted); + border-radius: 6px; + padding: 4px 8px; + font-size: 0.7rem; + cursor: pointer; + } + .aurora-clip-btn:hover { + border-color: var(--gold); + color: var(--text); + } .aurora-compare-wrap { position: relative; overflow: hidden; @@ -791,6 +843,27 @@ accept=".mp4,.avi,.mov,.mkv,.webm,.mp3,.wav,.flac,.m4a,.aac,.ogg,.jpg,.jpeg,.png,.tiff,.tif,.webp" onchange="auroraOnFilePicked(this)"> +
+
+ 🎚 Фрагмент На Прев'ю + +
+
+ Start + + 0s +
+
+ End + + 0s +
+
+ + + +
+
Файл
@@ -833,6 +906,12 @@ + + @@ -869,7 +948,7 @@ - + @@ -912,6 +991,15 @@ + + + +
Пріоритет: Обличчя @@ -997,7 +1085,14 @@
- + + +
Загальне
+
Статус обробки${auroraEsc(processingStatus)}
PSNR${psnr}
Час обробки${procText}
Моделі${models.length ? auroraEsc(models.join(', ')) : '—'}
+ ${warnings.length ? `
⚠ ${auroraEsc(warnings.join(' | '))}
` : ''}
`; wrap.style.display = 'block'; @@ -3228,44 +3673,82 @@ async function auroraReprocess(options) { } const reBtn = document.getElementById('auroraReprocessBtn'); if (reBtn) reBtn.disabled = true; - const payload = (options && typeof options === 'object') ? options : {}; + const incoming = (options && typeof options === 'object') ? options : {}; + const passCountUi = Number(document.getElementById('auroraReprocessPasses')?.value || 1); + const passes = Math.max(1, Math.min(4, Number(incoming.passes) || passCountUi)); + const secondPassUi = Boolean(document.getElementById('auroraReprocessSecondPass')?.checked); + const secondPass = Object.prototype.hasOwnProperty.call(incoming, 'second_pass') + ? Boolean(incoming.second_pass) + : secondPassUi; + + const analysisControls = auroraCollectAnalysisControls(); + const uiExport = auroraCollectExportOptions(); + const analysisExport = auroraBuildAnalysisExportHints(analysisControls); + const mergedExport = { ...auroraSuggestedExport, ...uiExport, ...analysisExport, ...(incoming.export_options || {}) }; + let priority = incoming.priority || analysisControls.priority || auroraSuggestedPriority || 'balanced'; + if (typeof priority !== 'string' || !priority.trim()) priority = 'balanced'; + + const basePayload = { + mode: auroraMode, + priority, + export_options: mergedExport, + }; + + let sourceJobId = auroraJobId; + let lastJobId = auroraJobId; try { - const r = await fetch(`${API}/api/aurora/reprocess/${encodeURIComponent(auroraJobId)}`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(payload), - }); - if (!r.ok) { - const body = await r.text(); - throw new Error(body || `HTTP ${r.status}`); + auroraStopPolling(); + for (let i = 1; i <= passes; i += 1) { + const payload = { ...basePayload, ...incoming, second_pass: secondPass }; + const r = await fetch(`${API}/api/aurora/reprocess/${encodeURIComponent(sourceJobId)}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(payload), + }); + if (!r.ok) { + const body = await r.text(); + throw new Error(body || `HTTP ${r.status}`); + } + const data = await r.json(); + const newJobId = String(data.job_id || '').trim(); + if (!newJobId) throw new Error('job_id missing in reprocess response'); + lastJobId = newJobId; + auroraSetActiveJobId(newJobId); + auroraSetSmartRunId(null); + auroraSmartStatusCache = null; + auroraSetSmartPolicyText(`reprocess ${i}/${passes}`); + auroraStatusCache = null; + auroraResultCache = null; + auroraPollErrorCount = 0; + auroraLastProgress = 1; + auroraPollInFlight = false; + const resultCard = document.getElementById('auroraResultCard'); + if (resultCard) resultCard.style.display = 'none'; + auroraSetProgress(1, 'processing', `dispatching reprocess ${i}/${passes}`); + auroraUpdateQueuePosition(null); + auroraUpdateTiming(0, null, null); + auroraUpdateLivePerf(null, null); + const cancelBtn = document.getElementById('auroraCancelBtn'); + if (cancelBtn) cancelBtn.style.display = 'inline-block'; + if (i < passes) { + const done = await auroraWaitForTerminal(newJobId, { passLabel: `reprocess ${i}/${passes}` }); + const status = String(done?.status || '').toLowerCase(); + if (status !== 'completed') { + throw new Error(`reprocess ${i}/${passes} завершився зі статусом ${status}`); + } + } + sourceJobId = newJobId; } - const data = await r.json(); - auroraSetActiveJobId(data.job_id); - auroraSetSmartRunId(null); - auroraSmartStatusCache = null; - auroraSetSmartPolicyText('audio local'); - auroraStatusCache = null; - auroraResultCache = null; - auroraPollErrorCount = 0; - auroraLastProgress = 1; - auroraPollInFlight = false; - const resultCard = document.getElementById('auroraResultCard'); - if (resultCard) resultCard.style.display = 'none'; - auroraSetProgress(1, 'processing', 'dispatching (reprocess)'); - auroraUpdateQueuePosition(null); - auroraUpdateTiming(0, null, null); - auroraUpdateLivePerf(null, null); - const cancelBtn = document.getElementById('auroraCancelBtn'); - if (cancelBtn) cancelBtn.style.display = 'inline-block'; auroraStopPolling(); auroraPollTimer = setInterval(auroraPollStatus, 2000); await auroraPollStatus(); - auroraChatAdd('assistant', `Запустила reprocess: ${auroraJobId}`); + auroraChatAdd('assistant', `Запустила reprocess ×${passes}: ${lastJobId}`); await auroraRefreshJobs(); } catch (e) { alert(`Aurora reprocess error: ${e.message || e}`); } finally { if (reBtn) reBtn.disabled = false; + auroraUpdateReprocessLabel(); } } @@ -3472,6 +3955,7 @@ async function auroraPollStatus() { }); auroraUpdateQueuePosition(st.queue_position); auroraUpdateStorage(st.storage); + auroraUpdateCancelButton(st.status, st.current_stage); const reBtn = document.getElementById('auroraReprocessBtn'); if (reBtn) reBtn.disabled = !(st.status === 'completed' || st.status === 'failed' || st.status === 'cancelled'); if (st.status === 'completed') { @@ -3604,10 +4088,19 @@ async function auroraStart() { async function auroraCancel() { if (!auroraJobId) return; + const cancelBtn = document.getElementById('auroraCancelBtn'); + if (cancelBtn) { + cancelBtn.style.display = 'inline-block'; + cancelBtn.disabled = true; + cancelBtn.textContent = 'Зупиняю...'; + } try { await fetch(`${API}/api/aurora/cancel/${encodeURIComponent(auroraJobId)}`, { method: 'POST' }); + await auroraPollStatus(); await auroraRefreshJobs(); - } catch (_) {} + } catch (_) { + auroraUpdateCancelButton('processing', null); + } } async function auroraLoadResult(jobId) { @@ -3950,6 +4443,7 @@ function auroraInitTab() { auroraBindDropzone(); auroraRefreshHealth(); auroraUpdatePriorityLabel(); + auroraUpdateReprocessLabel(); auroraSetSmartRunId(auroraSmartRunId); if (!auroraSmartRunId) { auroraSetSmartPolicyText('standby'); diff --git a/services/swapper-service/app/main.py b/services/swapper-service/app/main.py index 07591750..b5cb0462 100644 --- a/services/swapper-service/app/main.py +++ b/services/swapper-service/app/main.py @@ -810,18 +810,18 @@ class SwapperService: # FLUX / Diffusion model loading logger.info(f"🎨 Loading diffusion model: {hf_name}") from diffusers import AutoPipelineForText2Image - + diffusion_dtype = torch.bfloat16 if self.device == "cuda" else torch.float32 pipeline = AutoPipelineForText2Image.from_pretrained( hf_name, - torch_dtype=torch.bfloat16, - use_safetensors=True + torch_dtype=diffusion_dtype ) pipeline.to(self.device) - pipeline.enable_model_cpu_offload() # Optimize VRAM usage + if self.device == "cuda": + pipeline.enable_model_cpu_offload() # Optimize VRAM usage on CUDA self.hf_models[model_name] = pipeline self.hf_processors[model_name] = None # No separate processor for diffusion - logger.info(f"✅ Diffusion model loaded: {model_name} with CPU offload enabled") + logger.info(f"✅ Diffusion model loaded: {model_name} (device={self.device})") else: # Generic loading diff --git a/services/swapper-service/config/swapper_config_node2.yaml b/services/swapper-service/config/swapper_config_node2.yaml index bf596111..de532861 100644 --- a/services/swapper-service/config/swapper_config_node2.yaml +++ b/services/swapper-service/config/swapper_config_node2.yaml @@ -38,3 +38,12 @@ storage: models_dir: /app/models cache_dir: /app/cache swap_dir: /app/swap + +models: + flux-klein-4b: + path: huggingface:segmind/tiny-sd + type: image_generation + size_gb: 0.7 + priority: medium + capabilities: + - image_generation