node2: P0 vision restore + P1 security hardening + node-specific router config

P0 — Vision: - swapper_config_node2.yaml: add llava-13b as vision model (vision:true) /vision/models now returns non-empty list; inference verified ~3.5s - ollama.url fixed to host.docker.internal:11434 (was localhost, broken in Docker) P1 — Security: - Remove NODES_NODA1_SSH_PASSWORD from .env and docker-compose.node2-sofiia.yml - SSH ED25519 key generated, authorized on NODA1, mounted as /run/secrets/noda1_ssh_key - sofiia-console reads key via NODES_NODA1_SSH_PRIVATE_KEY env var - secrets/noda1_id_ed25519 added to .gitignore P1 — Router: - services/router/router-config.node2.yml: new node2-specific config replaces all 172.17.0.1:11434 → host.docker.internal:11434 - docker-compose.node2-sofiia.yml: mount router-config.node2.yml (not root config) P1 — Ports: - router (9102), swapper (8890), sofiia-console (8002): bind to 127.0.0.1 - gateway (9300): keep 0.0.0.0 (Telegram webhook requires public access) Artifacts: - ops/patch_node2_P0P1_20260227.md — change log - ops/validation_node2_P0P1_20260227.md — all checks PASS - ops/node2.env.example — safe env template (no secrets) - ops/security_hardening_node2.md — SSH key migration guide + firewall - ops/node2_models_pull.sh — model pull script for P0/P1 Made-with: Cursor
2026-02-27 01:27:38 -08:00
parent 46d7dea88a
commit 7b8499dd8a
10 changed files with 1485 additions and 15 deletions
--- a/services/swapper-service/config/swapper_config_node2.yaml
+++ b/services/swapper-service/config/swapper_config_node2.yaml
@@ -12,7 +12,8 @@ swapper:
  # Модель для автоматичного завантаження при старті (опціонально)
  # Якщо не вказано - моделі завантажуються тільки за запитом
  # Рекомендовано: gpt-oss:latest (швидка модель) або phi3:latest (легка модель)
-  default_model: gpt-oss:latest  # Модель активується автоматично при старті
+  # Стартова модель має бути реально встановлена в Ollama на NODA2
+  default_model: qwen3:14b  # Модель активується автоматично при старті

 models:
  # Fast LLM - GPT-OSS 20B (High Priority) - Main model for general tasks
@@ -31,21 +32,29 @@ models:
    priority: high
    description: "Lightweight LLM for fast responses (3.8B params)"
    
-  # Code Specialist - StarCoder2 3B (Medium Priority) - Code engineering
-  starcoder2-3b:
-    path: ollama:starcoder2:3b
-    type: code
-    size_gb: 1.7
-    priority: medium
-    description: "Code specialist model for code engineering (3B params)"
-    
-  # Reasoning Model - Mistral Nemo 12.2B (High Priority) - Advanced reasoning
-  mistral-nemo-12b:
-    path: ollama:mistral-nemo:12b
+  # General Reasoning - Qwen3 14B (High Priority)
+  qwen3-14b:
+    path: ollama:qwen3:14b
    type: llm
-    size_gb: 7.1
+    size_gb: 9.3
    priority: high
-    description: "Advanced reasoning model for complex tasks (12.2B params)"
+    description: "Balanced local model for Sofiia and router fallback"
+
+  # Reasoning Model - Qwen3.5 35B A3B (High Priority)
+  qwen3.5-35b-a3b:
+    path: ollama:qwen3.5:35b-a3b
+    type: llm
+    size_gb: 22.0
+    priority: high
+    description: "Large reasoning model for complex Sofiia requests"
+    
+  # Reasoning Model - GLM 4.7 Flash (High Priority) - Fast general model
+  glm-4.7-flash:
+    path: ollama:glm-4.7-flash:32k
+    type: llm
+    size_gb: 19.0
+    priority: high
+    description: "Multi-purpose reasoning model (fast context)"
    
  # Reasoning Model - Gemma2 27B (Medium Priority) - Strategic reasoning
  gemma2-27b:
@@ -79,12 +88,39 @@ models:
    priority: high
    description: "Strategic reasoning model (70.6B params, quantized)"

+  # Vision Model - LLaVA 13B (P0 Fix: NODA2 fallback vision)
+  # Available in Ollama on NODA2 — used until qwen3-vl:8b is installed
+  llava-13b:
+    path: ollama:llava:13b
+    type: vision
+    size_gb: 8.0
+    priority: high
+    description: "LLaVA 13B vision model (multimodal CLIP+LLM). P0 fallback until qwen3-vl:8b."
+    vision: true
+    ollama_model: "llava:13b"
+
+  # Vision Model - Qwen3-VL 8B (RECOMMENDED: install with: ollama pull qwen3-vl:8b)
+  # Better quality than llava:13b. Enable once installed.
+  # qwen3-vl-8b:
+  #   path: ollama:qwen3-vl:8b
+  #   type: vision
+  #   size_gb: 5.5
+  #   priority: high
+  #   description: "Qwen3-VL 8B — modern vision-language model (recommended)"
+  #   vision: true
+  #   ollama_model: "qwen3-vl:8b"
+
 storage:
  models_dir: /app/models
  cache_dir: /app/cache
  swap_dir: /app/swap

 ollama:
-  url: http://localhost:11434  # Native Ollama on MacBook (via Pieces OS or brew)
+  url: http://host.docker.internal:11434  # host.docker.internal → native Ollama on MacBook (NODA2 P1 fix)
  timeout: 300

+# Vision endpoint configuration
+# /vision/models returns all models where vision: true
+vision:
+  default_model: llava-13b
+  ollama_base_url: http://host.docker.internal:11434