node2: P0 vision restore + P1 security hardening + node-specific router config
P0 — Vision: - swapper_config_node2.yaml: add llava-13b as vision model (vision:true) /vision/models now returns non-empty list; inference verified ~3.5s - ollama.url fixed to host.docker.internal:11434 (was localhost, broken in Docker) P1 — Security: - Remove NODES_NODA1_SSH_PASSWORD from .env and docker-compose.node2-sofiia.yml - SSH ED25519 key generated, authorized on NODA1, mounted as /run/secrets/noda1_ssh_key - sofiia-console reads key via NODES_NODA1_SSH_PRIVATE_KEY env var - secrets/noda1_id_ed25519 added to .gitignore P1 — Router: - services/router/router-config.node2.yml: new node2-specific config replaces all 172.17.0.1:11434 → host.docker.internal:11434 - docker-compose.node2-sofiia.yml: mount router-config.node2.yml (not root config) P1 — Ports: - router (9102), swapper (8890), sofiia-console (8002): bind to 127.0.0.1 - gateway (9300): keep 0.0.0.0 (Telegram webhook requires public access) Artifacts: - ops/patch_node2_P0P1_20260227.md — change log - ops/validation_node2_P0P1_20260227.md — all checks PASS - ops/node2.env.example — safe env template (no secrets) - ops/security_hardening_node2.md — SSH key migration guide + firewall - ops/node2_models_pull.sh — model pull script for P0/P1 Made-with: Cursor
This commit is contained in:
@@ -12,7 +12,8 @@ swapper:
|
||||
# Модель для автоматичного завантаження при старті (опціонально)
|
||||
# Якщо не вказано - моделі завантажуються тільки за запитом
|
||||
# Рекомендовано: gpt-oss:latest (швидка модель) або phi3:latest (легка модель)
|
||||
default_model: gpt-oss:latest # Модель активується автоматично при старті
|
||||
# Стартова модель має бути реально встановлена в Ollama на NODA2
|
||||
default_model: qwen3:14b # Модель активується автоматично при старті
|
||||
|
||||
models:
|
||||
# Fast LLM - GPT-OSS 20B (High Priority) - Main model for general tasks
|
||||
@@ -31,21 +32,29 @@ models:
|
||||
priority: high
|
||||
description: "Lightweight LLM for fast responses (3.8B params)"
|
||||
|
||||
# Code Specialist - StarCoder2 3B (Medium Priority) - Code engineering
|
||||
starcoder2-3b:
|
||||
path: ollama:starcoder2:3b
|
||||
type: code
|
||||
size_gb: 1.7
|
||||
priority: medium
|
||||
description: "Code specialist model for code engineering (3B params)"
|
||||
|
||||
# Reasoning Model - Mistral Nemo 12.2B (High Priority) - Advanced reasoning
|
||||
mistral-nemo-12b:
|
||||
path: ollama:mistral-nemo:12b
|
||||
# General Reasoning - Qwen3 14B (High Priority)
|
||||
qwen3-14b:
|
||||
path: ollama:qwen3:14b
|
||||
type: llm
|
||||
size_gb: 7.1
|
||||
size_gb: 9.3
|
||||
priority: high
|
||||
description: "Advanced reasoning model for complex tasks (12.2B params)"
|
||||
description: "Balanced local model for Sofiia and router fallback"
|
||||
|
||||
# Reasoning Model - Qwen3.5 35B A3B (High Priority)
|
||||
qwen3.5-35b-a3b:
|
||||
path: ollama:qwen3.5:35b-a3b
|
||||
type: llm
|
||||
size_gb: 22.0
|
||||
priority: high
|
||||
description: "Large reasoning model for complex Sofiia requests"
|
||||
|
||||
# Reasoning Model - GLM 4.7 Flash (High Priority) - Fast general model
|
||||
glm-4.7-flash:
|
||||
path: ollama:glm-4.7-flash:32k
|
||||
type: llm
|
||||
size_gb: 19.0
|
||||
priority: high
|
||||
description: "Multi-purpose reasoning model (fast context)"
|
||||
|
||||
# Reasoning Model - Gemma2 27B (Medium Priority) - Strategic reasoning
|
||||
gemma2-27b:
|
||||
@@ -79,12 +88,39 @@ models:
|
||||
priority: high
|
||||
description: "Strategic reasoning model (70.6B params, quantized)"
|
||||
|
||||
# Vision Model - LLaVA 13B (P0 Fix: NODA2 fallback vision)
|
||||
# Available in Ollama on NODA2 — used until qwen3-vl:8b is installed
|
||||
llava-13b:
|
||||
path: ollama:llava:13b
|
||||
type: vision
|
||||
size_gb: 8.0
|
||||
priority: high
|
||||
description: "LLaVA 13B vision model (multimodal CLIP+LLM). P0 fallback until qwen3-vl:8b."
|
||||
vision: true
|
||||
ollama_model: "llava:13b"
|
||||
|
||||
# Vision Model - Qwen3-VL 8B (RECOMMENDED: install with: ollama pull qwen3-vl:8b)
|
||||
# Better quality than llava:13b. Enable once installed.
|
||||
# qwen3-vl-8b:
|
||||
# path: ollama:qwen3-vl:8b
|
||||
# type: vision
|
||||
# size_gb: 5.5
|
||||
# priority: high
|
||||
# description: "Qwen3-VL 8B — modern vision-language model (recommended)"
|
||||
# vision: true
|
||||
# ollama_model: "qwen3-vl:8b"
|
||||
|
||||
storage:
|
||||
models_dir: /app/models
|
||||
cache_dir: /app/cache
|
||||
swap_dir: /app/swap
|
||||
|
||||
ollama:
|
||||
url: http://localhost:11434 # Native Ollama on MacBook (via Pieces OS or brew)
|
||||
url: http://host.docker.internal:11434 # host.docker.internal → native Ollama on MacBook (NODA2 P1 fix)
|
||||
timeout: 300
|
||||
|
||||
# Vision endpoint configuration
|
||||
# /vision/models returns all models where vision: true
|
||||
vision:
|
||||
default_model: llava-13b
|
||||
ollama_base_url: http://host.docker.internal:11434
|
||||
|
||||
Reference in New Issue
Block a user