Files
microdao-daarion/services/swapper-service/config/swapper_config.yaml
Apple 3de3c8cb36 feat: Add presence heartbeat for Matrix online status
- matrix-gateway: POST /internal/matrix/presence/online endpoint
- usePresenceHeartbeat hook with activity tracking
- Auto away after 5 min inactivity
- Offline on page close/visibility change
- Integrated in MatrixChatRoom component
2025-11-27 00:19:40 -08:00

82 lines
2.6 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Swapper Configuration for Node #1 (Production Server)
# Single-active LLM scheduler
# Hetzner GEX44 - NVIDIA RTX 4000 SFF Ada (20GB VRAM)
# Auto-generated configuration with all available Ollama models
swapper:
mode: single-active
max_concurrent_models: 1
model_swap_timeout: 300
gpu_enabled: true
metal_acceleration: false # NVIDIA GPU, not Apple Silicon
# Модель для автоматичного завантаження при старті (опціонально)
# Якщо не вказано - моделі завантажуються тільки за запитом
# Рекомендовано: qwen3-8b (основна модель) або qwen2.5-3b-instruct (легка модель)
default_model: qwen3-8b # Модель активується автоматично при старті
models:
# Primary LLM - Qwen3 8B (High Priority) - Main model from INFRASTRUCTURE.md
qwen3-8b:
path: ollama:qwen3:8b
type: llm
size_gb: 4.87
priority: high
description: "Primary LLM for general tasks and conversations"
# Vision Model - Qwen3-VL 8B (High Priority) - For image processing
qwen3-vl-8b:
path: ollama:qwen3-vl:8b
type: vision
size_gb: 5.72
priority: high
description: "Vision model for image understanding and processing"
# Qwen2.5 7B Instruct (High Priority)
qwen2.5-7b-instruct:
path: ollama:qwen2.5:7b-instruct-q4_K_M
type: llm
size_gb: 4.36
priority: high
description: "Qwen2.5 7B Instruct model"
# Lightweight LLM - Qwen2.5 3B Instruct (Medium Priority)
qwen2.5-3b-instruct:
path: ollama:qwen2.5:3b-instruct-q4_K_M
type: llm
size_gb: 1.80
priority: medium
description: "Lightweight LLM for faster responses"
# Math Specialist - Qwen2 Math 7B (High Priority)
qwen2-math-7b:
path: ollama:qwen2-math:7b
type: math
size_gb: 4.13
priority: high
description: "Specialized model for mathematical tasks"
# Lightweight conversational LLM - Mistral Nemo 2.3B (Medium Priority)
mistral-nemo-2_3b:
path: ollama:mistral-nemo:2.3b-instruct
type: llm
size_gb: 1.60
priority: medium
description: "Fast low-cost replies for monitor/service agents"
# Compact Math Specialist - Qwen2.5 Math 1.5B (Medium Priority)
qwen2_5-math-1_5b:
path: ollama:qwen2.5-math:1.5b
type: math
size_gb: 1.20
priority: medium
description: "Lightweight math model for DRUID/Nutra micro-calculations"
storage:
models_dir: /app/models
cache_dir: /app/cache
swap_dir: /app/swap
ollama:
url: http://ollama:11434 # From Docker container to Ollama service
timeout: 300