# NODA2 Capabilities — Machine-readable for NODA1 Router # Generated: 2026-02-27 # Node: MacBook Pro M4 Max, 64GB RAM, macOS 26.3 node_id: noda2 hostname: MacBook-Pro.local nats_endpoint: "nats://145.224.111.147:4222" # leafnode spoke, via NODA1 hub router_url: "http://145.224.111.147:9102" # NODA2 router (external, if firewall allows) swapper_url: "http://145.224.111.147:8890" # swapper (external, if firewall allows) ollama_url: "http://145.224.111.147:11434" # Ollama GPU (external, if firewall allows) # ── Vision Models ────────────────────────────────────────────────────────── vision_models: - name: llava:13b backend: ollama base_url: http://localhost:11434 type: vision size_gb: 8.0 quality: medium note: "LLaVA 1.6 + CLIP, 13B. Available now. Legacy quality." capability_hint: "image_understanding" # RECOMMENDED - not yet installed, install with: ollama pull qwen3-vl:8b - name: qwen3-vl:8b backend: ollama base_url: http://localhost:11434 type: vision size_gb: ~8.0 quality: high installed: false note: "Install: ollama pull qwen3-vl:8b. Superior OCR, document, chart understanding." # ── Text/LLM Models ──────────────────────────────────────────────────────── text_models: - name: qwen3.5:35b-a3b backend: ollama base_url: http://localhost:11434 type: llm size_gb: 9.3 params: "14.8B active (MoE 35B total)" quality: high speed: fast note: "Primary recommendation: MoE architecture, fast inference on M4 Max" - name: qwen3:14b backend: ollama base_url: http://localhost:11434 type: llm size_gb: 9.3 params: "14B" quality: high speed: medium - name: glm-4.7-flash:32k backend: ollama base_url: http://localhost:11434 type: llm size_gb: 19.0 params: "32B" quality: high speed: slow note: "Long context (32k). Use for document-heavy tasks." - name: deepseek-r1:70b backend: ollama base_url: http://localhost:11434 type: llm size_gb: 42.0 params: "70B" quality: very_high speed: very_slow note: "Reasoning model. Use for complex multi-step planning only." - name: Qwen3.5-35B-A3B-Q4_K_M backend: llama-server base_url: http://localhost:11435 api_type: openai_compatible type: llm note: "Same model as qwen3.5:35b-a3b via llama.cpp. Redundant with Ollama." # ── Code Models ──────────────────────────────────────────────────────────── code_models: - name: deepseek-coder:33b backend: ollama base_url: http://localhost:11434 type: code size_gb: 18.0 - name: starcoder2:3b backend: ollama base_url: http://localhost:11434 type: code size_gb: 1.7 speed: very_fast # ── Embedding Models ─────────────────────────────────────────────────────── embedding_models: # Not explicitly found in Ollama list # memory-service uses its own embedding (check dagi-memory-service-node2) - name: unknown note: "Audit needed: check memory-service embedding model config" # ── STT Models ───────────────────────────────────────────────────────────── stt_models: - name: whisper (swapper) backend: swapper base_url: http://localhost:8890 endpoint: /stt note: "Swapper exposes /stt/models (HTTP 200). Model details need swapper config check." # ── TTS Models ───────────────────────────────────────────────────────────── tts_models: - name: tts (swapper) backend: swapper base_url: http://localhost:8890 endpoint: /tts note: "Swapper exposes /tts/models (HTTP 200). Model details need swapper config check." # ── Routing Policy ───────────────────────────────────────────────────────── routing_policy: vision_request: capability: "vision" selection_logic: | if quality_tier == "best": prefer: qwen3-vl:8b (when installed) fallback: llava:13b if quality_tier == "fast": prefer: llava:13b return_model_used: true nats_subject: "node.noda2.vision.request" text_request: capability: "text" selection_logic: | if quality_tier == "best": deepseek-r1:70b if quality_tier == "high": qwen3.5:35b-a3b (default) if quality_tier == "fast": qwen3:14b or gemma3:latest nats_subject: "node.noda2.llm.request" code_request: capability: "code" selection_logic: | prefer: deepseek-coder:33b fast fallback: starcoder2:3b nats_subject: "node.noda2.code.request"