P0 — Vision: - swapper_config_node2.yaml: add llava-13b as vision model (vision:true) /vision/models now returns non-empty list; inference verified ~3.5s - ollama.url fixed to host.docker.internal:11434 (was localhost, broken in Docker) P1 — Security: - Remove NODES_NODA1_SSH_PASSWORD from .env and docker-compose.node2-sofiia.yml - SSH ED25519 key generated, authorized on NODA1, mounted as /run/secrets/noda1_ssh_key - sofiia-console reads key via NODES_NODA1_SSH_PRIVATE_KEY env var - secrets/noda1_id_ed25519 added to .gitignore P1 — Router: - services/router/router-config.node2.yml: new node2-specific config replaces all 172.17.0.1:11434 → host.docker.internal:11434 - docker-compose.node2-sofiia.yml: mount router-config.node2.yml (not root config) P1 — Ports: - router (9102), swapper (8890), sofiia-console (8002): bind to 127.0.0.1 - gateway (9300): keep 0.0.0.0 (Telegram webhook requires public access) Artifacts: - ops/patch_node2_P0P1_20260227.md — change log - ops/validation_node2_P0P1_20260227.md — all checks PASS - ops/node2.env.example — safe env template (no secrets) - ops/security_hardening_node2.md — SSH key migration guide + firewall - ops/node2_models_pull.sh — model pull script for P0/P1 Made-with: Cursor
127 lines
4.2 KiB
YAML
127 lines
4.2 KiB
YAML
# Swapper Configuration for Node #2 (Development Node)
|
||
# Single-active LLM scheduler
|
||
# MacBook Pro M4 Max - Apple Silicon (40-core GPU, 64GB RAM)
|
||
# Auto-generated configuration with available Ollama models
|
||
|
||
swapper:
|
||
mode: single-active
|
||
max_concurrent_models: 1
|
||
model_swap_timeout: 300
|
||
gpu_enabled: true
|
||
metal_acceleration: true # Apple Silicon GPU acceleration
|
||
# Модель для автоматичного завантаження при старті (опціонально)
|
||
# Якщо не вказано - моделі завантажуються тільки за запитом
|
||
# Рекомендовано: gpt-oss:latest (швидка модель) або phi3:latest (легка модель)
|
||
# Стартова модель має бути реально встановлена в Ollama на NODA2
|
||
default_model: qwen3:14b # Модель активується автоматично при старті
|
||
|
||
models:
|
||
# Fast LLM - GPT-OSS 20B (High Priority) - Main model for general tasks
|
||
gpt-oss-latest:
|
||
path: ollama:gpt-oss:latest
|
||
type: llm
|
||
size_gb: 13.0
|
||
priority: high
|
||
description: "Fast LLM for general tasks and conversations (20.9B params)"
|
||
|
||
# Lightweight LLM - Phi3 3.8B (High Priority) - Fast responses
|
||
phi3-latest:
|
||
path: ollama:phi3:latest
|
||
type: llm
|
||
size_gb: 2.2
|
||
priority: high
|
||
description: "Lightweight LLM for fast responses (3.8B params)"
|
||
|
||
# General Reasoning - Qwen3 14B (High Priority)
|
||
qwen3-14b:
|
||
path: ollama:qwen3:14b
|
||
type: llm
|
||
size_gb: 9.3
|
||
priority: high
|
||
description: "Balanced local model for Sofiia and router fallback"
|
||
|
||
# Reasoning Model - Qwen3.5 35B A3B (High Priority)
|
||
qwen3.5-35b-a3b:
|
||
path: ollama:qwen3.5:35b-a3b
|
||
type: llm
|
||
size_gb: 22.0
|
||
priority: high
|
||
description: "Large reasoning model for complex Sofiia requests"
|
||
|
||
# Reasoning Model - GLM 4.7 Flash (High Priority) - Fast general model
|
||
glm-4.7-flash:
|
||
path: ollama:glm-4.7-flash:32k
|
||
type: llm
|
||
size_gb: 19.0
|
||
priority: high
|
||
description: "Multi-purpose reasoning model (fast context)"
|
||
|
||
# Reasoning Model - Gemma2 27B (Medium Priority) - Strategic reasoning
|
||
gemma2-27b:
|
||
path: ollama:gemma2:27b
|
||
type: llm
|
||
size_gb: 15.0
|
||
priority: medium
|
||
description: "Reasoning model for strategic tasks (27.2B params)"
|
||
|
||
# Code Specialist - DeepSeek Coder 33B (High Priority) - Advanced code tasks
|
||
deepseek-coder-33b:
|
||
path: ollama:deepseek-coder:33b
|
||
type: code
|
||
size_gb: 18.0
|
||
priority: high
|
||
description: "Advanced code specialist model (33B params)"
|
||
|
||
# Code Specialist - Qwen2.5 Coder 32B (High Priority) - Advanced code tasks
|
||
qwen2.5-coder-32b:
|
||
path: ollama:qwen2.5-coder:32b
|
||
type: code
|
||
size_gb: 19.0
|
||
priority: high
|
||
description: "Advanced code specialist model (32.8B params)"
|
||
|
||
# Reasoning Model - DeepSeek R1 70B (High Priority) - Strategic reasoning (large model)
|
||
deepseek-r1-70b:
|
||
path: ollama:deepseek-r1:70b
|
||
type: llm
|
||
size_gb: 42.0
|
||
priority: high
|
||
description: "Strategic reasoning model (70.6B params, quantized)"
|
||
|
||
# Vision Model - LLaVA 13B (P0 Fix: NODA2 fallback vision)
|
||
# Available in Ollama on NODA2 — used until qwen3-vl:8b is installed
|
||
llava-13b:
|
||
path: ollama:llava:13b
|
||
type: vision
|
||
size_gb: 8.0
|
||
priority: high
|
||
description: "LLaVA 13B vision model (multimodal CLIP+LLM). P0 fallback until qwen3-vl:8b."
|
||
vision: true
|
||
ollama_model: "llava:13b"
|
||
|
||
# Vision Model - Qwen3-VL 8B (RECOMMENDED: install with: ollama pull qwen3-vl:8b)
|
||
# Better quality than llava:13b. Enable once installed.
|
||
# qwen3-vl-8b:
|
||
# path: ollama:qwen3-vl:8b
|
||
# type: vision
|
||
# size_gb: 5.5
|
||
# priority: high
|
||
# description: "Qwen3-VL 8B — modern vision-language model (recommended)"
|
||
# vision: true
|
||
# ollama_model: "qwen3-vl:8b"
|
||
|
||
storage:
|
||
models_dir: /app/models
|
||
cache_dir: /app/cache
|
||
swap_dir: /app/swap
|
||
|
||
ollama:
|
||
url: http://host.docker.internal:11434 # host.docker.internal → native Ollama on MacBook (NODA2 P1 fix)
|
||
timeout: 300
|
||
|
||
# Vision endpoint configuration
|
||
# /vision/models returns all models where vision: true
|
||
vision:
|
||
default_model: llava-13b
|
||
ollama_base_url: http://host.docker.internal:11434
|