# Swapper Configuration for Node #3 (AI/ML Workstation)
# Single-active LLM scheduler
# Threadripper PRO + RTX 3090 24GB - GPU-intensive workloads

swapper:
  mode: single-active
  max_concurrent_models: 1
  model_swap_timeout: 300
  gpu_enabled: true
  metal_acceleration: false  # NVIDIA GPU, not Apple Silicon
  # Модель для автоматичного завантаження при старті
  # qwen3-8b - основна модель (4.87 GB), швидка відповідь на перший запит
  default_model: qwen3-8b

models:
  # Primary LLM - Qwen3 8B (High Priority) - Main model from INFRASTRUCTURE.md
  qwen3-8b:
    path: ollama:qwen3:8b
    type: llm
    size_gb: 4.87
    priority: high
    description: "Primary LLM for general tasks and conversations"
    
  # Vision Model - Qwen3-VL 8B (High Priority) - For image processing
  qwen3-vl-8b:
    path: ollama:qwen3-vl:8b
    type: vision
    size_gb: 5.72
    priority: high
    description: "Vision model for image understanding and processing"
    
  # Qwen2.5 7B Instruct (High Priority)
  qwen2.5-7b-instruct:
    path: ollama:qwen2.5:7b-instruct-q4_K_M
    type: llm
    size_gb: 4.36
    priority: high
    description: "Qwen2.5 7B Instruct model"
    
  # Lightweight LLM - Qwen2.5 3B Instruct (Medium Priority)
  qwen2.5-3b-instruct:
    path: ollama:qwen2.5:3b-instruct-q4_K_M
    type: llm
    size_gb: 1.80
    priority: medium
    description: "Lightweight LLM for faster responses"
    
  # Math Specialist - Qwen2 Math 7B (High Priority)
  qwen2-math-7b:
    path: ollama:qwen2-math:7b
    type: math
    size_gb: 4.13
    priority: high
    description: "Specialized model for mathematical tasks"

storage:
  models_dir: /app/models
  cache_dir: /app/cache
  swap_dir: /app/swap

ollama:
  url: http://ollama:11434  # From Docker container to Ollama service
  timeout: 300