- TTS: xtts-v2 integration with voice cloning support
- Document: docling integration for PDF/DOCX/PPTX processing
- Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints
- Added required dependencies (TTS, docling)
64 lines
1.8 KiB
YAML
64 lines
1.8 KiB
YAML
# Swapper Configuration for Node #3 (AI/ML Workstation)
|
|
# Single-active LLM scheduler
|
|
# Threadripper PRO + RTX 3090 24GB - GPU-intensive workloads
|
|
|
|
swapper:
|
|
mode: single-active
|
|
max_concurrent_models: 1
|
|
model_swap_timeout: 300
|
|
gpu_enabled: true
|
|
metal_acceleration: false # NVIDIA GPU, not Apple Silicon
|
|
# Модель для автоматичного завантаження при старті
|
|
# qwen3-8b - основна модель (4.87 GB), швидка відповідь на перший запит
|
|
default_model: qwen3-8b
|
|
|
|
models:
|
|
# Primary LLM - Qwen3 8B (High Priority) - Main model from INFRASTRUCTURE.md
|
|
qwen3-8b:
|
|
path: ollama:qwen3:8b
|
|
type: llm
|
|
size_gb: 4.87
|
|
priority: high
|
|
description: "Primary LLM for general tasks and conversations"
|
|
|
|
# Vision Model - Qwen3-VL 8B (High Priority) - For image processing
|
|
qwen3-vl-8b:
|
|
path: ollama:qwen3-vl:8b
|
|
type: vision
|
|
size_gb: 5.72
|
|
priority: high
|
|
description: "Vision model for image understanding and processing"
|
|
|
|
# Qwen2.5 7B Instruct (High Priority)
|
|
qwen2.5-7b-instruct:
|
|
path: ollama:qwen2.5:7b-instruct-q4_K_M
|
|
type: llm
|
|
size_gb: 4.36
|
|
priority: high
|
|
description: "Qwen2.5 7B Instruct model"
|
|
|
|
# Lightweight LLM - Qwen2.5 3B Instruct (Medium Priority)
|
|
qwen2.5-3b-instruct:
|
|
path: ollama:qwen2.5:3b-instruct-q4_K_M
|
|
type: llm
|
|
size_gb: 1.80
|
|
priority: medium
|
|
description: "Lightweight LLM for faster responses"
|
|
|
|
# Math Specialist - Qwen2 Math 7B (High Priority)
|
|
qwen2-math-7b:
|
|
path: ollama:qwen2-math:7b
|
|
type: math
|
|
size_gb: 4.13
|
|
priority: high
|
|
description: "Specialized model for mathematical tasks"
|
|
|
|
storage:
|
|
models_dir: /app/models
|
|
cache_dir: /app/cache
|
|
swap_dir: /app/swap
|
|
|
|
ollama:
|
|
url: http://ollama:11434 # From Docker container to Ollama service
|
|
timeout: 300
|