Files
microdao-daarion/ops/node2_capabilities.yml
Apple 46d7dea88a docs(audit): NODA2 full audit 2026-02-27
- ops/audit_node2_20260227.md: readable report (hardware, containers, models, Sofiia, findings)
- ops/audit_node2_20260227.json: structured machine-readable inventory
- ops/audit_node2_findings.yml: 10 PASS + 5 PARTIAL + 3 FAIL + 3 SECURITY gaps
- ops/node2_capabilities.yml: router-ready capabilities (vision/text/code/stt/tts models)

Key findings:
  P0: vision pipeline broken (/vision/models=empty, qwen3-vl:8b not installed)
  P1: node-ops-worker missing, SSH root password in sofiia-console env
  P1: router-config.yml uses 172.17.0.1 (Linux bridge) not host.docker.internal

Made-with: Cursor
2026-02-27 01:14:38 -08:00

146 lines
5.2 KiB
YAML

# NODA2 Capabilities — Machine-readable for NODA1 Router
# Generated: 2026-02-27
# Node: MacBook Pro M4 Max, 64GB RAM, macOS 26.3
node_id: noda2
hostname: MacBook-Pro.local
nats_endpoint: "nats://145.224.111.147:4222" # leafnode spoke, via NODA1 hub
router_url: "http://145.224.111.147:9102" # NODA2 router (external, if firewall allows)
swapper_url: "http://145.224.111.147:8890" # swapper (external, if firewall allows)
ollama_url: "http://145.224.111.147:11434" # Ollama GPU (external, if firewall allows)
# ── Vision Models ──────────────────────────────────────────────────────────
vision_models:
- name: llava:13b
backend: ollama
base_url: http://localhost:11434
type: vision
size_gb: 8.0
quality: medium
note: "LLaVA 1.6 + CLIP, 13B. Available now. Legacy quality."
capability_hint: "image_understanding"
# RECOMMENDED - not yet installed, install with: ollama pull qwen3-vl:8b
- name: qwen3-vl:8b
backend: ollama
base_url: http://localhost:11434
type: vision
size_gb: ~8.0
quality: high
installed: false
note: "Install: ollama pull qwen3-vl:8b. Superior OCR, document, chart understanding."
# ── Text/LLM Models ────────────────────────────────────────────────────────
text_models:
- name: qwen3.5:35b-a3b
backend: ollama
base_url: http://localhost:11434
type: llm
size_gb: 9.3
params: "14.8B active (MoE 35B total)"
quality: high
speed: fast
note: "Primary recommendation: MoE architecture, fast inference on M4 Max"
- name: qwen3:14b
backend: ollama
base_url: http://localhost:11434
type: llm
size_gb: 9.3
params: "14B"
quality: high
speed: medium
- name: glm-4.7-flash:32k
backend: ollama
base_url: http://localhost:11434
type: llm
size_gb: 19.0
params: "32B"
quality: high
speed: slow
note: "Long context (32k). Use for document-heavy tasks."
- name: deepseek-r1:70b
backend: ollama
base_url: http://localhost:11434
type: llm
size_gb: 42.0
params: "70B"
quality: very_high
speed: very_slow
note: "Reasoning model. Use for complex multi-step planning only."
- name: Qwen3.5-35B-A3B-Q4_K_M
backend: llama-server
base_url: http://localhost:11435
api_type: openai_compatible
type: llm
note: "Same model as qwen3.5:35b-a3b via llama.cpp. Redundant with Ollama."
# ── Code Models ────────────────────────────────────────────────────────────
code_models:
- name: deepseek-coder:33b
backend: ollama
base_url: http://localhost:11434
type: code
size_gb: 18.0
- name: starcoder2:3b
backend: ollama
base_url: http://localhost:11434
type: code
size_gb: 1.7
speed: very_fast
# ── Embedding Models ───────────────────────────────────────────────────────
embedding_models:
# Not explicitly found in Ollama list
# memory-service uses its own embedding (check dagi-memory-service-node2)
- name: unknown
note: "Audit needed: check memory-service embedding model config"
# ── STT Models ─────────────────────────────────────────────────────────────
stt_models:
- name: whisper (swapper)
backend: swapper
base_url: http://localhost:8890
endpoint: /stt
note: "Swapper exposes /stt/models (HTTP 200). Model details need swapper config check."
# ── TTS Models ─────────────────────────────────────────────────────────────
tts_models:
- name: tts (swapper)
backend: swapper
base_url: http://localhost:8890
endpoint: /tts
note: "Swapper exposes /tts/models (HTTP 200). Model details need swapper config check."
# ── Routing Policy ─────────────────────────────────────────────────────────
routing_policy:
vision_request:
capability: "vision"
selection_logic: |
if quality_tier == "best":
prefer: qwen3-vl:8b (when installed)
fallback: llava:13b
if quality_tier == "fast":
prefer: llava:13b
return_model_used: true
nats_subject: "node.noda2.vision.request"
text_request:
capability: "text"
selection_logic: |
if quality_tier == "best": deepseek-r1:70b
if quality_tier == "high": qwen3.5:35b-a3b (default)
if quality_tier == "fast": qwen3:14b or gemma3:latest
nats_subject: "node.noda2.llm.request"
code_request:
capability: "code"
selection_logic: |
prefer: deepseek-coder:33b
fast fallback: starcoder2:3b
nats_subject: "node.noda2.code.request"