# Swapper Configuration for Node #3 (AI/ML Workstation) # Single-active LLM scheduler # Threadripper PRO + RTX 3090 24GB - GPU-intensive workloads swapper: mode: single-active max_concurrent_models: 1 model_swap_timeout: 300 gpu_enabled: true metal_acceleration: false # NVIDIA GPU, not Apple Silicon # Модель для автоматичного завантаження при старті # qwen3-8b - основна модель (4.87 GB), швидка відповідь на перший запит default_model: qwen3-8b models: # Primary LLM - Qwen3 8B (High Priority) - Main model from INFRASTRUCTURE.md qwen3-8b: path: ollama:qwen3:8b type: llm size_gb: 4.87 priority: high description: "Primary LLM for general tasks and conversations" # Vision Model - Qwen3-VL 8B (High Priority) - For image processing qwen3-vl-8b: path: ollama:qwen3-vl:8b type: vision size_gb: 5.72 priority: high description: "Vision model for image understanding and processing" # Qwen2.5 7B Instruct (High Priority) qwen2.5-7b-instruct: path: ollama:qwen2.5:7b-instruct-q4_K_M type: llm size_gb: 4.36 priority: high description: "Qwen2.5 7B Instruct model" # Lightweight LLM - Qwen2.5 3B Instruct (Medium Priority) qwen2.5-3b-instruct: path: ollama:qwen2.5:3b-instruct-q4_K_M type: llm size_gb: 1.80 priority: medium description: "Lightweight LLM for faster responses" # Math Specialist - Qwen2 Math 7B (High Priority) qwen2-math-7b: path: ollama:qwen2-math:7b type: math size_gb: 4.13 priority: high description: "Specialized model for mathematical tasks" storage: models_dir: /app/models cache_dir: /app/cache swap_dir: /app/swap ollama: url: http://ollama:11434 # From Docker container to Ollama service timeout: 300