docs(audit): NODA2 full audit 2026-02-27
- ops/audit_node2_20260227.md: readable report (hardware, containers, models, Sofiia, findings) - ops/audit_node2_20260227.json: structured machine-readable inventory - ops/audit_node2_findings.yml: 10 PASS + 5 PARTIAL + 3 FAIL + 3 SECURITY gaps - ops/node2_capabilities.yml: router-ready capabilities (vision/text/code/stt/tts models) Key findings: P0: vision pipeline broken (/vision/models=empty, qwen3-vl:8b not installed) P1: node-ops-worker missing, SSH root password in sofiia-console env P1: router-config.yml uses 172.17.0.1 (Linux bridge) not host.docker.internal Made-with: Cursor
This commit is contained in:
145
ops/node2_capabilities.yml
Normal file
145
ops/node2_capabilities.yml
Normal file
@@ -0,0 +1,145 @@
|
||||
# NODA2 Capabilities — Machine-readable for NODA1 Router
|
||||
# Generated: 2026-02-27
|
||||
# Node: MacBook Pro M4 Max, 64GB RAM, macOS 26.3
|
||||
|
||||
node_id: noda2
|
||||
hostname: MacBook-Pro.local
|
||||
nats_endpoint: "nats://145.224.111.147:4222" # leafnode spoke, via NODA1 hub
|
||||
router_url: "http://145.224.111.147:9102" # NODA2 router (external, if firewall allows)
|
||||
swapper_url: "http://145.224.111.147:8890" # swapper (external, if firewall allows)
|
||||
ollama_url: "http://145.224.111.147:11434" # Ollama GPU (external, if firewall allows)
|
||||
|
||||
# ── Vision Models ──────────────────────────────────────────────────────────
|
||||
vision_models:
|
||||
- name: llava:13b
|
||||
backend: ollama
|
||||
base_url: http://localhost:11434
|
||||
type: vision
|
||||
size_gb: 8.0
|
||||
quality: medium
|
||||
note: "LLaVA 1.6 + CLIP, 13B. Available now. Legacy quality."
|
||||
capability_hint: "image_understanding"
|
||||
|
||||
# RECOMMENDED - not yet installed, install with: ollama pull qwen3-vl:8b
|
||||
- name: qwen3-vl:8b
|
||||
backend: ollama
|
||||
base_url: http://localhost:11434
|
||||
type: vision
|
||||
size_gb: ~8.0
|
||||
quality: high
|
||||
installed: false
|
||||
note: "Install: ollama pull qwen3-vl:8b. Superior OCR, document, chart understanding."
|
||||
|
||||
# ── Text/LLM Models ────────────────────────────────────────────────────────
|
||||
text_models:
|
||||
- name: qwen3.5:35b-a3b
|
||||
backend: ollama
|
||||
base_url: http://localhost:11434
|
||||
type: llm
|
||||
size_gb: 9.3
|
||||
params: "14.8B active (MoE 35B total)"
|
||||
quality: high
|
||||
speed: fast
|
||||
note: "Primary recommendation: MoE architecture, fast inference on M4 Max"
|
||||
|
||||
- name: qwen3:14b
|
||||
backend: ollama
|
||||
base_url: http://localhost:11434
|
||||
type: llm
|
||||
size_gb: 9.3
|
||||
params: "14B"
|
||||
quality: high
|
||||
speed: medium
|
||||
|
||||
- name: glm-4.7-flash:32k
|
||||
backend: ollama
|
||||
base_url: http://localhost:11434
|
||||
type: llm
|
||||
size_gb: 19.0
|
||||
params: "32B"
|
||||
quality: high
|
||||
speed: slow
|
||||
note: "Long context (32k). Use for document-heavy tasks."
|
||||
|
||||
- name: deepseek-r1:70b
|
||||
backend: ollama
|
||||
base_url: http://localhost:11434
|
||||
type: llm
|
||||
size_gb: 42.0
|
||||
params: "70B"
|
||||
quality: very_high
|
||||
speed: very_slow
|
||||
note: "Reasoning model. Use for complex multi-step planning only."
|
||||
|
||||
- name: Qwen3.5-35B-A3B-Q4_K_M
|
||||
backend: llama-server
|
||||
base_url: http://localhost:11435
|
||||
api_type: openai_compatible
|
||||
type: llm
|
||||
note: "Same model as qwen3.5:35b-a3b via llama.cpp. Redundant with Ollama."
|
||||
|
||||
# ── Code Models ────────────────────────────────────────────────────────────
|
||||
code_models:
|
||||
- name: deepseek-coder:33b
|
||||
backend: ollama
|
||||
base_url: http://localhost:11434
|
||||
type: code
|
||||
size_gb: 18.0
|
||||
|
||||
- name: starcoder2:3b
|
||||
backend: ollama
|
||||
base_url: http://localhost:11434
|
||||
type: code
|
||||
size_gb: 1.7
|
||||
speed: very_fast
|
||||
|
||||
# ── Embedding Models ───────────────────────────────────────────────────────
|
||||
embedding_models:
|
||||
# Not explicitly found in Ollama list
|
||||
# memory-service uses its own embedding (check dagi-memory-service-node2)
|
||||
- name: unknown
|
||||
note: "Audit needed: check memory-service embedding model config"
|
||||
|
||||
# ── STT Models ─────────────────────────────────────────────────────────────
|
||||
stt_models:
|
||||
- name: whisper (swapper)
|
||||
backend: swapper
|
||||
base_url: http://localhost:8890
|
||||
endpoint: /stt
|
||||
note: "Swapper exposes /stt/models (HTTP 200). Model details need swapper config check."
|
||||
|
||||
# ── TTS Models ─────────────────────────────────────────────────────────────
|
||||
tts_models:
|
||||
- name: tts (swapper)
|
||||
backend: swapper
|
||||
base_url: http://localhost:8890
|
||||
endpoint: /tts
|
||||
note: "Swapper exposes /tts/models (HTTP 200). Model details need swapper config check."
|
||||
|
||||
# ── Routing Policy ─────────────────────────────────────────────────────────
|
||||
routing_policy:
|
||||
vision_request:
|
||||
capability: "vision"
|
||||
selection_logic: |
|
||||
if quality_tier == "best":
|
||||
prefer: qwen3-vl:8b (when installed)
|
||||
fallback: llava:13b
|
||||
if quality_tier == "fast":
|
||||
prefer: llava:13b
|
||||
return_model_used: true
|
||||
nats_subject: "node.noda2.vision.request"
|
||||
|
||||
text_request:
|
||||
capability: "text"
|
||||
selection_logic: |
|
||||
if quality_tier == "best": deepseek-r1:70b
|
||||
if quality_tier == "high": qwen3.5:35b-a3b (default)
|
||||
if quality_tier == "fast": qwen3:14b or gemma3:latest
|
||||
nats_subject: "node.noda2.llm.request"
|
||||
|
||||
code_request:
|
||||
capability: "code"
|
||||
selection_logic: |
|
||||
prefer: deepseek-coder:33b
|
||||
fast fallback: starcoder2:3b
|
||||
nats_subject: "node.noda2.code.request"
|
||||
Reference in New Issue
Block a user