feat(production): sync all modified production files to git

Includes updates across gateway, router, node-worker, memory-service, aurora-service, swapper, sofiia-console UI and node2 infrastructure: - gateway-bot: Dockerfile, http_api.py, druid/aistalk prompts, doc_service - services/router: main.py, router-config.yml, fabric_metrics, memory_retrieval, offload_client, prompt_builder - services/node-worker: worker.py, main.py, config.py, fabric_metrics - services/memory-service: Dockerfile, database.py, main.py, requirements - services/aurora-service: main.py (+399), kling.py, quality_report.py - services/swapper-service: main.py, swapper_config_node2.yaml - services/sofiia-console: static/index.html (console UI update) - config: agent_registry, crewai_agents/teams, router_agents - ops/fabric_preflight.sh: updated preflight checks - router-config.yml, docker-compose.node2.yml: infra updates - docs: NODA1-AGENT-ARCHITECTURE, fabric_contract updated Made-with: Cursor
2026-03-03 07:13:29 -08:00
parent 9aac835882
commit e9dedffa48
35 changed files with 3317 additions and 805 deletions
--- a/services/node-worker/config.py
+++ b/services/node-worker/config.py
@@ -14,3 +14,19 @@ STT_PROVIDER = os.getenv("STT_PROVIDER", "none")
 TTS_PROVIDER = os.getenv("TTS_PROVIDER", "none")
 OCR_PROVIDER = os.getenv("OCR_PROVIDER", "vision_prompted")
 IMAGE_PROVIDER = os.getenv("IMAGE_PROVIDER", "none")
+
+# Memory Service URL (used by memory_service STT/TTS providers)
+MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000")
+
+# ── Voice HA: dedicated concurrency limits (separate from generic stt/tts/llm) ──
+# These control semaphores for node.{id}.voice.*.request subjects.
+# Independent from MAX_CONCURRENCY so voice never starves generic inference.
+VOICE_MAX_CONCURRENT_TTS = int(os.getenv("VOICE_MAX_CONCURRENT_TTS", "4"))
+VOICE_MAX_CONCURRENT_LLM = int(os.getenv("VOICE_MAX_CONCURRENT_LLM", "2"))
+VOICE_MAX_CONCURRENT_STT = int(os.getenv("VOICE_MAX_CONCURRENT_STT", "2"))
+
+# Timeouts for voice subjects (milliseconds). Router uses these as defaults.
+VOICE_TTS_DEADLINE_MS  = int(os.getenv("VOICE_TTS_DEADLINE_MS",  "3000"))
+VOICE_LLM_FAST_MS      = int(os.getenv("VOICE_LLM_FAST_MS",      "9000"))
+VOICE_LLM_QUALITY_MS   = int(os.getenv("VOICE_LLM_QUALITY_MS",  "12000"))
+VOICE_STT_DEADLINE_MS  = int(os.getenv("VOICE_STT_DEADLINE_MS",  "6000"))