docs(audit): NODA2 full audit 2026-02-27

- ops/audit_node2_20260227.md: readable report (hardware, containers, models, Sofiia, findings) - ops/audit_node2_20260227.json: structured machine-readable inventory - ops/audit_node2_findings.yml: 10 PASS + 5 PARTIAL + 3 FAIL + 3 SECURITY gaps - ops/node2_capabilities.yml: router-ready capabilities (vision/text/code/stt/tts models) Key findings: P0: vision pipeline broken (/vision/models=empty, qwen3-vl:8b not installed) P1: node-ops-worker missing, SSH root password in sofiia-console env P1: router-config.yml uses 172.17.0.1 (Linux bridge) not host.docker.internal Made-with: Cursor
2026-02-27 01:14:38 -08:00
parent 974522f12b
commit 46d7dea88a
4 changed files with 642 additions and 0 deletions
--- a/ops/node2_capabilities.yml
+++ b/ops/node2_capabilities.yml
@@ -0,0 +1,145 @@
+# NODA2 Capabilities — Machine-readable for NODA1 Router
+# Generated: 2026-02-27
+# Node: MacBook Pro M4 Max, 64GB RAM, macOS 26.3
+
+node_id: noda2
+hostname: MacBook-Pro.local
+nats_endpoint: "nats://145.224.111.147:4222"  # leafnode spoke, via NODA1 hub
+router_url: "http://145.224.111.147:9102"      # NODA2 router (external, if firewall allows)
+swapper_url: "http://145.224.111.147:8890"     # swapper (external, if firewall allows)
+ollama_url: "http://145.224.111.147:11434"     # Ollama GPU (external, if firewall allows)
+
+# ── Vision Models ──────────────────────────────────────────────────────────
+vision_models:
+  - name: llava:13b
+    backend: ollama
+    base_url: http://localhost:11434
+    type: vision
+    size_gb: 8.0
+    quality: medium
+    note: "LLaVA 1.6 + CLIP, 13B. Available now. Legacy quality."
+    capability_hint: "image_understanding"
+
+  # RECOMMENDED - not yet installed, install with: ollama pull qwen3-vl:8b
+  - name: qwen3-vl:8b
+    backend: ollama
+    base_url: http://localhost:11434
+    type: vision
+    size_gb: ~8.0
+    quality: high
+    installed: false
+    note: "Install: ollama pull qwen3-vl:8b. Superior OCR, document, chart understanding."
+
+# ── Text/LLM Models ────────────────────────────────────────────────────────
+text_models:
+  - name: qwen3.5:35b-a3b
+    backend: ollama
+    base_url: http://localhost:11434
+    type: llm
+    size_gb: 9.3
+    params: "14.8B active (MoE 35B total)"
+    quality: high
+    speed: fast
+    note: "Primary recommendation: MoE architecture, fast inference on M4 Max"
+
+  - name: qwen3:14b
+    backend: ollama
+    base_url: http://localhost:11434
+    type: llm
+    size_gb: 9.3
+    params: "14B"
+    quality: high
+    speed: medium
+
+  - name: glm-4.7-flash:32k
+    backend: ollama
+    base_url: http://localhost:11434
+    type: llm
+    size_gb: 19.0
+    params: "32B"
+    quality: high
+    speed: slow
+    note: "Long context (32k). Use for document-heavy tasks."
+
+  - name: deepseek-r1:70b
+    backend: ollama
+    base_url: http://localhost:11434
+    type: llm
+    size_gb: 42.0
+    params: "70B"
+    quality: very_high
+    speed: very_slow
+    note: "Reasoning model. Use for complex multi-step planning only."
+
+  - name: Qwen3.5-35B-A3B-Q4_K_M
+    backend: llama-server
+    base_url: http://localhost:11435
+    api_type: openai_compatible
+    type: llm
+    note: "Same model as qwen3.5:35b-a3b via llama.cpp. Redundant with Ollama."
+
+# ── Code Models ────────────────────────────────────────────────────────────
+code_models:
+  - name: deepseek-coder:33b
+    backend: ollama
+    base_url: http://localhost:11434
+    type: code
+    size_gb: 18.0
+
+  - name: starcoder2:3b
+    backend: ollama
+    base_url: http://localhost:11434
+    type: code
+    size_gb: 1.7
+    speed: very_fast
+
+# ── Embedding Models ───────────────────────────────────────────────────────
+embedding_models:
+  # Not explicitly found in Ollama list
+  # memory-service uses its own embedding (check dagi-memory-service-node2)
+  - name: unknown
+    note: "Audit needed: check memory-service embedding model config"
+
+# ── STT Models ─────────────────────────────────────────────────────────────
+stt_models:
+  - name: whisper (swapper)
+    backend: swapper
+    base_url: http://localhost:8890
+    endpoint: /stt
+    note: "Swapper exposes /stt/models (HTTP 200). Model details need swapper config check."
+
+# ── TTS Models ─────────────────────────────────────────────────────────────
+tts_models:
+  - name: tts (swapper)
+    backend: swapper
+    base_url: http://localhost:8890
+    endpoint: /tts
+    note: "Swapper exposes /tts/models (HTTP 200). Model details need swapper config check."
+
+# ── Routing Policy ─────────────────────────────────────────────────────────
+routing_policy:
+  vision_request:
+    capability: "vision"
+    selection_logic: |
+      if quality_tier == "best":
+        prefer: qwen3-vl:8b (when installed)
+        fallback: llava:13b
+      if quality_tier == "fast":
+        prefer: llava:13b
+    return_model_used: true
+    nats_subject: "node.noda2.vision.request"
+
+  text_request:
+    capability: "text"
+    selection_logic: |
+      if quality_tier == "best": deepseek-r1:70b
+      if quality_tier == "high": qwen3.5:35b-a3b (default)
+      if quality_tier == "fast": qwen3:14b or gemma3:latest
+    nats_subject: "node.noda2.llm.request"
+
+  code_request:
+    capability: "code"
+    selection_logic: |
+      prefer: deepseek-coder:33b
+      fast fallback: starcoder2:3b
+    nats_subject: "node.noda2.code.request"