# Swapper Configuration for Node #2 (Development Node) # Single-active LLM scheduler # MacBook Pro M4 Max - Apple Silicon (40-core GPU, 64GB RAM) # Auto-generated configuration with available Ollama models swapper: mode: single-active max_concurrent_models: 1 model_swap_timeout: 300 gpu_enabled: true metal_acceleration: true # Apple Silicon GPU acceleration # Модель для автоматичного завантаження при старті (опціонально) # Якщо не вказано - моделі завантажуються тільки за запитом # Рекомендовано: gpt-oss:latest (швидка модель) або phi3:latest (легка модель) # Стартова модель має бути реально встановлена в Ollama на NODA2 default_model: qwen3:14b # Модель активується автоматично при старті models: # Fast LLM - GPT-OSS 20B (High Priority) - Main model for general tasks gpt-oss-latest: path: ollama:gpt-oss:latest type: llm size_gb: 13.0 priority: high description: "Fast LLM for general tasks and conversations (20.9B params)" # Lightweight LLM - Phi3 3.8B (High Priority) - Fast responses phi3-latest: path: ollama:phi3:latest type: llm size_gb: 2.2 priority: high description: "Lightweight LLM for fast responses (3.8B params)" # General Reasoning - Qwen3 14B (High Priority) qwen3-14b: path: ollama:qwen3:14b type: llm size_gb: 9.3 priority: high description: "Balanced local model for Sofiia and router fallback" # Reasoning Model - Qwen3.5 35B A3B (High Priority) qwen3.5-35b-a3b: path: ollama:qwen3.5:35b-a3b type: llm size_gb: 22.0 priority: high description: "Large reasoning model for complex Sofiia requests" # Reasoning Model - GLM 4.7 Flash (High Priority) - Fast general model glm-4.7-flash: path: ollama:glm-4.7-flash:32k type: llm size_gb: 19.0 priority: high description: "Multi-purpose reasoning model (fast context)" # Reasoning Model - Gemma2 27B (Medium Priority) - Strategic reasoning gemma2-27b: path: ollama:gemma2:27b type: llm size_gb: 15.0 priority: medium description: "Reasoning model for strategic tasks (27.2B params)" # Code Specialist - DeepSeek Coder 33B (High Priority) - Advanced code tasks deepseek-coder-33b: path: ollama:deepseek-coder:33b type: code size_gb: 18.0 priority: high description: "Advanced code specialist model (33B params)" # Code Specialist - Qwen2.5 Coder 32B (High Priority) - Advanced code tasks qwen2.5-coder-32b: path: ollama:qwen2.5-coder:32b type: code size_gb: 19.0 priority: high description: "Advanced code specialist model (32.8B params)" # Reasoning Model - DeepSeek R1 70B (High Priority) - Strategic reasoning (large model) deepseek-r1-70b: path: ollama:deepseek-r1:70b type: llm size_gb: 42.0 priority: high description: "Strategic reasoning model (70.6B params, quantized)" # Vision Model - LLaVA 13B (P0 Fix: NODA2 fallback vision) # Available in Ollama on NODA2 — used until qwen3-vl:8b is installed llava-13b: path: ollama:llava:13b type: vision size_gb: 8.0 priority: high description: "LLaVA 13B vision model (multimodal CLIP+LLM). P0 fallback until qwen3-vl:8b." vision: true ollama_model: "llava:13b" # Vision Model - Qwen3-VL 8B (RECOMMENDED: install with: ollama pull qwen3-vl:8b) # Better quality than llava:13b. Enable once installed. # qwen3-vl-8b: # path: ollama:qwen3-vl:8b # type: vision # size_gb: 5.5 # priority: high # description: "Qwen3-VL 8B — modern vision-language model (recommended)" # vision: true # ollama_model: "qwen3-vl:8b" storage: models_dir: /app/models cache_dir: /app/cache swap_dir: /app/swap ollama: url: http://host.docker.internal:11434 # host.docker.internal → native Ollama on MacBook (NODA2 P1 fix) timeout: 300 # Vision endpoint configuration # /vision/models returns all models where vision: true vision: default_model: llava-13b ollama_base_url: http://host.docker.internal:11434