# NODA2 Model Capabilities — Machine-readable # Node: MacBook Pro M4 Max, 64GB unified memory # Date: 2026-02-27 # Usage: router offload decisions, swapper config, capability queries node_id: noda2 hostname: MacBook-Pro.local hardware: cpu: "Apple M4 Max" ram_gb: 64 unified_memory: true gpu: "Apple Silicon (MPS/Metal)" # ─── LLM HEAVY ──────────────────────────────────────────────────────────────── llm_heavy: - name: qwen3.5:35b-a3b backend: ollama base_url: http://localhost:11434 size_gb: 9.3 params: "14.8B MoE" quality: high speed: fast_for_size recommended: true note: "Primary reasoning model. MoE architecture — fast despite 14.8B active params." - name: glm-4.7-flash:32k backend: ollama base_url: http://localhost:11434 size_gb: 19.0 params: "~32B" quality: high speed: medium recommended: false note: "Duplicate of q4_K_M variant. Keep only one." - name: deepseek-r1:70b backend: ollama base_url: http://localhost:11434 size_gb: 42.0 params: "70B" quality: very_high speed: slow recommended: false note: "Rarely needed. Only for max-quality reasoning tasks." - name: Qwen3.5-35B-A3B-Q4_K_M.gguf backend: llama-server base_url: http://localhost:11435 size_gb: 20.0 params: "34.6B" quality: high speed: fast recommended: false note: "DUPLICATE of qwen3.5:35b-a3b in Ollama. Same model, different backend. Remove or unify." # ─── LLM SMALL/MEDIUM ───────────────────────────────────────────────────────── llm_small: - name: qwen3:14b backend: ollama base_url: http://localhost:11434 size_gb: 9.3 params: "14B" quality: medium_high speed: fast note: "Good all-rounder. Used by swapper as default." - name: gemma3:latest backend: ollama base_url: http://localhost:11434 size_gb: 3.3 params: "4B" quality: medium speed: very_fast recommended: true note: "Best small model for routing, classification, quick Q&A." - name: phi3:latest backend: ollama base_url: http://localhost:11434 size_gb: 2.2 params: "3.8B" quality: medium speed: very_fast note: "Lightweight general model. Can replace with gemma3." - name: gpt-oss:latest backend: ollama base_url: http://localhost:11434 size_gb: 13.0 params: "20.9B" quality: medium_high speed: medium note: "Old model. Not recommended for new workloads." - name: mistral-nemo:12b backend: ollama base_url: http://localhost:11434 size_gb: 7.1 params: "12B" quality: medium speed: medium note: "Old. Not recommended for new workloads." # ─── CODE ───────────────────────────────────────────────────────────────────── code: - name: deepseek-coder:33b backend: ollama base_url: http://localhost:11434 size_gb: 18.0 params: "33B" quality: high speed: medium note: "Good for code review. Heavy, cold-start slow." - name: starcoder2:3b backend: ollama base_url: http://localhost:11434 size_gb: 1.7 params: "3B" quality: medium speed: very_fast note: "Fast code completion. Limited context." # ─── VISION ─────────────────────────────────────────────────────────────────── vision_fast: - name: llava:13b backend: ollama base_url: http://localhost:11434 swapper_url: http://localhost:8890/vision size_gb: 8.0 params: "13B" quality: medium speed: medium installed: true active_in_swapper: true note: "P0 fallback. CLIP-based. Outdated architecture but functional." vision_hq: - name: MiniCPM-V-4_5 backend: hf_transformers size_gb: 16.0 params: "8B" quality: high speed: medium installed: true active_in_swapper: false note: "RECOMMENDED for P2. Better than llava:13b. Needs integration into Swapper." - name: Qwen3-VL-32B-Instruct backend: hf size_gb: 0 installed: false note: "Only refs/ placeholder — NOT downloaded. Would require 65+ GB." # ─── EMBEDDING ──────────────────────────────────────────────────────────────── embedding: - name: sentence-transformers/all-MiniLM-L6-v2 backend: open-webui size_gb: 0.1 installed: true active: true note: "Used by open-webui for RAG. Not exposed as standalone endpoint." - name: TaylorAI/bge-micro-v2 backend: open-webui size_gb: 0.05 installed: true active: true note: "Auxiliary embedding in open-webui." - name: Qwen/Qwen2.5-7B-Instruct backend: hf_transformers size_gb: 14.0 installed: true active: false note: "HF weights only. Can be used for embedding but not configured." # ─── STT ────────────────────────────────────────────────────────────────────── stt: - name: mlx-community/whisper-large-v3-turbo-asr-fp16 backend: mlx size_gb: 1.5 device: mps_metal quality: high speed: fast installed: true active: false note: "READY — installed, MLX-accelerated. NOT integrated into Swapper or memory-service. P2 task." - name: whisper-base backend: open-webui size_gb: 0.15 device: cpu quality: low installed: true active: true note: "Active in open-webui only. Low quality." # ─── TTS ────────────────────────────────────────────────────────────────────── tts: - name: mlx-community/Kokoro-82M-bf16 backend: mlx size_gb: 0.35 device: mps_metal quality: high speed: fast installed: true active: false note: "READY — installed, MLX-accelerated. NOT integrated into any service. P2 task." # ─── IMAGE GENERATION ───────────────────────────────────────────────────────── image_gen: - name: flux2-dev-Q8_0.gguf backend: comfyui path: ~/ComfyUI/models/checkpoints/ size_gb: 33.0 quality: very_high installed: true active: false note: "ComfyUI + llama.cpp GGUF format. FLUX.2-dev — state-of-art image gen." - name: ltx-2-19b-distilled-fp8.safetensors backend: comfyui path: ~/Documents/ComfyUI/models/checkpoints/ size_gb: 25.0 type: video_gen quality: very_high installed: true active: false note: "LTX-2 19B — video generation. Very large." - name: z_image_turbo_bf16.safetensors backend: comfyui size_gb: 11.0 quality: high installed: true active: false note: "Fast image gen via ComfyUI." - name: SDXL-base-1.0 + refiner backend: comfyui_symlink size_gb: 72.0 quality: medium installed: true active: false note: "Legacy SDXL. Very large. Consider cleanup." - name: sdxl_sofia_lora_v1.safetensors backend: comfyui_lora size_gb: 0.08 installed: true active: false note: "Custom Sofiia appearance LoRA for SDXL." # ─── OCR ────────────────────────────────────────────────────────────────────── ocr: - name: "(none configured)" note: "Swapper /ocr returns 405. No dedicated OCR model. Could use llava:13b or MiniCPM-V as OCR." # ─── ROUTING POLICY (suggestions) ──────────────────────────────────────────── routing_policy: nats_subjects: vision_request: "node.noda2.vision.request" vision_reply: "node.noda2.vision.reply.*" stt_request: "node.noda2.stt.request" llm_request: "node.noda2.llm.request" default_vision_model: "llava:13b" recommended_vision_model: "MiniCPM-V-4_5 (needs integration)" default_llm: "qwen3.5:35b-a3b" default_small_llm: "gemma3:latest"