"""Node-worker configuration from environment.""" import os NODE_ID = os.getenv("NODE_ID", "noda2") NATS_URL = os.getenv("NATS_URL", "nats://dagi-nats:4222") OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434") DEFAULT_LLM = os.getenv("NODE_DEFAULT_LLM", "qwen3:14b") DEFAULT_VISION = os.getenv("NODE_DEFAULT_VISION", "llava:13b") MAX_CONCURRENCY = int(os.getenv("NODE_WORKER_MAX_CONCURRENCY", "2")) MAX_PAYLOAD_BYTES = int(os.getenv("NODE_WORKER_MAX_PAYLOAD_BYTES", str(1024 * 1024))) PORT = int(os.getenv("PORT", "8109")) STT_PROVIDER = os.getenv("STT_PROVIDER", "none") TTS_PROVIDER = os.getenv("TTS_PROVIDER", "none") OCR_PROVIDER = os.getenv("OCR_PROVIDER", "vision_prompted") IMAGE_PROVIDER = os.getenv("IMAGE_PROVIDER", "none") # Memory Service URL (used by memory_service STT/TTS providers) MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000") # ── Voice HA: dedicated concurrency limits (separate from generic stt/tts/llm) ── # These control semaphores for node.{id}.voice.*.request subjects. # Independent from MAX_CONCURRENCY so voice never starves generic inference. VOICE_MAX_CONCURRENT_TTS = int(os.getenv("VOICE_MAX_CONCURRENT_TTS", "4")) VOICE_MAX_CONCURRENT_LLM = int(os.getenv("VOICE_MAX_CONCURRENT_LLM", "2")) VOICE_MAX_CONCURRENT_STT = int(os.getenv("VOICE_MAX_CONCURRENT_STT", "2")) # Timeouts for voice subjects (milliseconds). Router uses these as defaults. VOICE_TTS_DEADLINE_MS = int(os.getenv("VOICE_TTS_DEADLINE_MS", "3000")) VOICE_LLM_FAST_MS = int(os.getenv("VOICE_LLM_FAST_MS", "9000")) VOICE_LLM_QUALITY_MS = int(os.getenv("VOICE_LLM_QUALITY_MS", "12000")) VOICE_STT_DEADLINE_MS = int(os.getenv("VOICE_STT_DEADLINE_MS", "6000"))