microdao-daarion/services/swapper-service/config/swapper_config_node1.yaml

# Swapper Configuration for Node #1 (Production Server)
# Single-active LLM scheduler
# Hetzner GEX44 - NVIDIA RTX 4000 SFF Ada (20GB VRAM)

swapper:
  mode: single-active
  max_concurrent_models: 1
  model_swap_timeout: 300
  gpu_enabled: true
  metal_acceleration: false  # NVIDIA GPU, not Apple Silicon
  # Модель для автоматичного завантаження при старті
  # qwen3-8b - основна модель (4.87 GB), швидка відповідь на перший запит
  default_model: qwen3-8b

models:
  # Primary LLM - Qwen3 8B (High Priority) - Main model from INFRASTRUCTURE.md
  qwen3-8b:
    path: ollama:qwen3:8b
    type: llm
    size_gb: 4.87
    priority: high
    description: "Primary LLM for general tasks and conversations"

  # Vision Model - Qwen3-VL 8B (High Priority) - For image processing
  qwen3-vl-8b:
    path: ollama:qwen3-vl:8b
    type: vision
    size_gb: 5.72
    priority: high
    description: "Vision model for image understanding and processing"

  # Qwen2.5 7B Instruct (High Priority)
  qwen2.5-7b-instruct:
    path: ollama:qwen2.5:7b-instruct-q4_K_M
    type: llm
    size_gb: 4.36
    priority: high
    description: "Qwen2.5 7B Instruct model"

  # Lightweight LLM - Qwen2.5 3B Instruct (Medium Priority)
  qwen2.5-3b-instruct:
    path: ollama:qwen2.5:3b-instruct-q4_K_M
    type: llm
    size_gb: 1.80
    priority: medium
    description: "Lightweight LLM for faster responses"

  # Math Specialist - Qwen2 Math 7B (High Priority)
  qwen2-math-7b:
    path: ollama:qwen2-math:7b
    type: math
    size_gb: 4.13
    priority: high
    description: "Specialized model for mathematical tasks"

storage:
  models_dir: /app/models
  cache_dir: /app/cache
  swap_dir: /app/swap

ollama:
  url: http://ollama:11434  # From Docker container to Ollama service
  timeout: 300