# Swapper Configuration for Node #2 (Development Node)
# Single-active LLM scheduler
# MacBook Pro M4 Max - Apple Silicon (40-core GPU, 64GB RAM)
# Auto-generated configuration with available Ollama models

swapper:
  mode: single-active
  max_concurrent_models: 1
  model_swap_timeout: 300
  gpu_enabled: true
  metal_acceleration: true  # Apple Silicon GPU acceleration
  # Модель для автоматичного завантаження при старті (опціонально)
  # Якщо не вказано - моделі завантажуються тільки за запитом
  # Рекомендовано: gpt-oss:latest (швидка модель) або phi3:latest (легка модель)
  # Стартова модель має бути реально встановлена в Ollama на NODA2
  default_model: qwen3:14b  # Модель активується автоматично при старті

models:
  # Fast LLM - GPT-OSS 20B (High Priority) - Main model for general tasks
  gpt-oss-latest:
    path: ollama:gpt-oss:latest
    type: llm
    size_gb: 13.0
    priority: high
    description: "Fast LLM for general tasks and conversations (20.9B params)"
    
  # Lightweight LLM - Phi3 3.8B (High Priority) - Fast responses
  phi3-latest:
    path: ollama:phi3:latest
    type: llm
    size_gb: 2.2
    priority: high
    description: "Lightweight LLM for fast responses (3.8B params)"
    
  # General Reasoning - Qwen3 14B (High Priority)
  qwen3-14b:
    path: ollama:qwen3:14b
    type: llm
    size_gb: 9.3
    priority: high
    description: "Balanced local model for Sofiia and router fallback"

  # Reasoning Model - Qwen3.5 35B A3B (High Priority)
  qwen3.5-35b-a3b:
    path: ollama:qwen3.5:35b-a3b
    type: llm
    size_gb: 22.0
    priority: high
    description: "Large reasoning model for complex Sofiia requests"
    
  # Reasoning Model - GLM 4.7 Flash (High Priority) - Fast general model
  glm-4.7-flash:
    path: ollama:glm-4.7-flash:32k
    type: llm
    size_gb: 19.0
    priority: high
    description: "Multi-purpose reasoning model (fast context)"
    
  # Reasoning Model - Gemma2 27B (Medium Priority) - Strategic reasoning
  gemma2-27b:
    path: ollama:gemma2:27b
    type: llm
    size_gb: 15.0
    priority: medium
    description: "Reasoning model for strategic tasks (27.2B params)"
    
  # Code Specialist - DeepSeek Coder 33B (High Priority) - Advanced code tasks
  deepseek-coder-33b:
    path: ollama:deepseek-coder:33b
    type: code
    size_gb: 18.0
    priority: high
    description: "Advanced code specialist model (33B params)"
    
  # Code Specialist - Qwen2.5 Coder 32B (High Priority) - Advanced code tasks
  qwen2.5-coder-32b:
    path: ollama:qwen2.5-coder:32b
    type: code
    size_gb: 19.0
    priority: high
    description: "Advanced code specialist model (32.8B params)"
    
  # Reasoning Model - DeepSeek R1 70B (High Priority) - Strategic reasoning (large model)
  deepseek-r1-70b:
    path: ollama:deepseek-r1:70b
    type: llm
    size_gb: 42.0
    priority: high
    description: "Strategic reasoning model (70.6B params, quantized)"

  # Vision Model - LLaVA 13B (P0 Fix: NODA2 fallback vision)
  # Available in Ollama on NODA2 — used until qwen3-vl:8b is installed
  llava-13b:
    path: ollama:llava:13b
    type: vision
    size_gb: 8.0
    priority: high
    description: "LLaVA 13B vision model (multimodal CLIP+LLM). P0 fallback until qwen3-vl:8b."
    vision: true
    ollama_model: "llava:13b"

  # Vision Model - Qwen3-VL 8B (RECOMMENDED: install with: ollama pull qwen3-vl:8b)
  # Better quality than llava:13b. Enable once installed.
  # qwen3-vl-8b:
  #   path: ollama:qwen3-vl:8b
  #   type: vision
  #   size_gb: 5.5
  #   priority: high
  #   description: "Qwen3-VL 8B — modern vision-language model (recommended)"
  #   vision: true
  #   ollama_model: "qwen3-vl:8b"

storage:
  models_dir: /app/models
  cache_dir: /app/cache
  swap_dir: /app/swap

ollama:
  url: http://host.docker.internal:11434  # host.docker.internal → native Ollama on MacBook (NODA2 P1 fix)
  timeout: 300

# Vision endpoint configuration
# /vision/models returns all models where vision: true
vision:
  default_model: llava-13b
  ollama_base_url: http://host.docker.internal:11434