# Swapper Configuration for Node #1 (Production Server)
# Hetzner GEX44 - NVIDIA RTX 4000 SFF Ada (20GB VRAM)
#
# NOTE: Swapper is now a runtime gateway / executor only.
# Source of truth for models is NCS (Node Capabilities Service).
# No hardcoded model lists — Swapper queries NCS or Ollama /api/tags at startup.

node_id: noda1

runtimes:
  ollama:
    url: http://172.18.0.1:11434
    timeout: 300
  # comfyui:
  #   url: http://127.0.0.1:8188

limits:
  llm_concurrency: 2
  vision_concurrency: 1
  max_concurrent_models: 4
  model_swap_timeout: 300

timeouts:
  llm_ms: 120000
  vision_ms: 180000
  stt_ms: 60000
  tts_ms: 60000
  image_gen_ms: 300000

gpu:
  enabled: true
  metal_acceleration: false
  auto_unload_on_oom: true
  vram_threshold_gb: 18

storage:
  models_dir: /app/models
  cache_dir: /app/cache
  swap_dir: /app/swap
  huggingface_cache: /root/.cache/huggingface

huggingface:
  device: cuda
  torch_dtype: float16
  trust_remote_code: true
  low_cpu_mem_usage: true