Files
microdao-daarion/ops/node2_model_capabilities.yml
Apple 3965f68fac node2: full model inventory audit 2026-02-27
Read-only audit of all installed models on NODA2 (MacBook M4 Max):
- 12 Ollama models, 1 llama-server duplicate, 16 HF cache models
- ComfyUI stack (200+ GB): FLUX.2-dev, LTX-2 video, SDXL
- Whisper-large-v3-turbo (MLX, 1.5GB) + Kokoro TTS (MLX, 0.35GB) installed but unused
- MiniCPM-V-4_5 (16GB) installed but not in Swapper (better than llava:13b)
- Key finding: 149GB cleanup potential; llama-server duplicates Ollama (P1, 20GB)

Artifacts:
- ops/node2_models_inventory_20260227.json
- ops/node2_models_inventory_20260227.md
- ops/node2_model_capabilities.yml
- ops/node2_model_gaps.yml

Made-with: Cursor
2026-02-27 01:44:26 -08:00

271 lines
8.6 KiB
YAML

# NODA2 Model Capabilities — Machine-readable
# Node: MacBook Pro M4 Max, 64GB unified memory
# Date: 2026-02-27
# Usage: router offload decisions, swapper config, capability queries
node_id: noda2
hostname: MacBook-Pro.local
hardware:
cpu: "Apple M4 Max"
ram_gb: 64
unified_memory: true
gpu: "Apple Silicon (MPS/Metal)"
# ─── LLM HEAVY ────────────────────────────────────────────────────────────────
llm_heavy:
- name: qwen3.5:35b-a3b
backend: ollama
base_url: http://localhost:11434
size_gb: 9.3
params: "14.8B MoE"
quality: high
speed: fast_for_size
recommended: true
note: "Primary reasoning model. MoE architecture — fast despite 14.8B active params."
- name: glm-4.7-flash:32k
backend: ollama
base_url: http://localhost:11434
size_gb: 19.0
params: "~32B"
quality: high
speed: medium
recommended: false
note: "Duplicate of q4_K_M variant. Keep only one."
- name: deepseek-r1:70b
backend: ollama
base_url: http://localhost:11434
size_gb: 42.0
params: "70B"
quality: very_high
speed: slow
recommended: false
note: "Rarely needed. Only for max-quality reasoning tasks."
- name: Qwen3.5-35B-A3B-Q4_K_M.gguf
backend: llama-server
base_url: http://localhost:11435
size_gb: 20.0
params: "34.6B"
quality: high
speed: fast
recommended: false
note: "DUPLICATE of qwen3.5:35b-a3b in Ollama. Same model, different backend. Remove or unify."
# ─── LLM SMALL/MEDIUM ─────────────────────────────────────────────────────────
llm_small:
- name: qwen3:14b
backend: ollama
base_url: http://localhost:11434
size_gb: 9.3
params: "14B"
quality: medium_high
speed: fast
note: "Good all-rounder. Used by swapper as default."
- name: gemma3:latest
backend: ollama
base_url: http://localhost:11434
size_gb: 3.3
params: "4B"
quality: medium
speed: very_fast
recommended: true
note: "Best small model for routing, classification, quick Q&A."
- name: phi3:latest
backend: ollama
base_url: http://localhost:11434
size_gb: 2.2
params: "3.8B"
quality: medium
speed: very_fast
note: "Lightweight general model. Can replace with gemma3."
- name: gpt-oss:latest
backend: ollama
base_url: http://localhost:11434
size_gb: 13.0
params: "20.9B"
quality: medium_high
speed: medium
note: "Old model. Not recommended for new workloads."
- name: mistral-nemo:12b
backend: ollama
base_url: http://localhost:11434
size_gb: 7.1
params: "12B"
quality: medium
speed: medium
note: "Old. Not recommended for new workloads."
# ─── CODE ─────────────────────────────────────────────────────────────────────
code:
- name: deepseek-coder:33b
backend: ollama
base_url: http://localhost:11434
size_gb: 18.0
params: "33B"
quality: high
speed: medium
note: "Good for code review. Heavy, cold-start slow."
- name: starcoder2:3b
backend: ollama
base_url: http://localhost:11434
size_gb: 1.7
params: "3B"
quality: medium
speed: very_fast
note: "Fast code completion. Limited context."
# ─── VISION ───────────────────────────────────────────────────────────────────
vision_fast:
- name: llava:13b
backend: ollama
base_url: http://localhost:11434
swapper_url: http://localhost:8890/vision
size_gb: 8.0
params: "13B"
quality: medium
speed: medium
installed: true
active_in_swapper: true
note: "P0 fallback. CLIP-based. Outdated architecture but functional."
vision_hq:
- name: MiniCPM-V-4_5
backend: hf_transformers
size_gb: 16.0
params: "8B"
quality: high
speed: medium
installed: true
active_in_swapper: false
note: "RECOMMENDED for P2. Better than llava:13b. Needs integration into Swapper."
- name: Qwen3-VL-32B-Instruct
backend: hf
size_gb: 0
installed: false
note: "Only refs/ placeholder — NOT downloaded. Would require 65+ GB."
# ─── EMBEDDING ────────────────────────────────────────────────────────────────
embedding:
- name: sentence-transformers/all-MiniLM-L6-v2
backend: open-webui
size_gb: 0.1
installed: true
active: true
note: "Used by open-webui for RAG. Not exposed as standalone endpoint."
- name: TaylorAI/bge-micro-v2
backend: open-webui
size_gb: 0.05
installed: true
active: true
note: "Auxiliary embedding in open-webui."
- name: Qwen/Qwen2.5-7B-Instruct
backend: hf_transformers
size_gb: 14.0
installed: true
active: false
note: "HF weights only. Can be used for embedding but not configured."
# ─── STT ──────────────────────────────────────────────────────────────────────
stt:
- name: mlx-community/whisper-large-v3-turbo-asr-fp16
backend: mlx
size_gb: 1.5
device: mps_metal
quality: high
speed: fast
installed: true
active: false
note: "READY — installed, MLX-accelerated. NOT integrated into Swapper or memory-service. P2 task."
- name: whisper-base
backend: open-webui
size_gb: 0.15
device: cpu
quality: low
installed: true
active: true
note: "Active in open-webui only. Low quality."
# ─── TTS ──────────────────────────────────────────────────────────────────────
tts:
- name: mlx-community/Kokoro-82M-bf16
backend: mlx
size_gb: 0.35
device: mps_metal
quality: high
speed: fast
installed: true
active: false
note: "READY — installed, MLX-accelerated. NOT integrated into any service. P2 task."
# ─── IMAGE GENERATION ─────────────────────────────────────────────────────────
image_gen:
- name: flux2-dev-Q8_0.gguf
backend: comfyui
path: ~/ComfyUI/models/checkpoints/
size_gb: 33.0
quality: very_high
installed: true
active: false
note: "ComfyUI + llama.cpp GGUF format. FLUX.2-dev — state-of-art image gen."
- name: ltx-2-19b-distilled-fp8.safetensors
backend: comfyui
path: ~/Documents/ComfyUI/models/checkpoints/
size_gb: 25.0
type: video_gen
quality: very_high
installed: true
active: false
note: "LTX-2 19B — video generation. Very large."
- name: z_image_turbo_bf16.safetensors
backend: comfyui
size_gb: 11.0
quality: high
installed: true
active: false
note: "Fast image gen via ComfyUI."
- name: SDXL-base-1.0 + refiner
backend: comfyui_symlink
size_gb: 72.0
quality: medium
installed: true
active: false
note: "Legacy SDXL. Very large. Consider cleanup."
- name: sdxl_sofia_lora_v1.safetensors
backend: comfyui_lora
size_gb: 0.08
installed: true
active: false
note: "Custom Sofiia appearance LoRA for SDXL."
# ─── OCR ──────────────────────────────────────────────────────────────────────
ocr:
- name: "(none configured)"
note: "Swapper /ocr returns 405. No dedicated OCR model. Could use llava:13b or MiniCPM-V as OCR."
# ─── ROUTING POLICY (suggestions) ────────────────────────────────────────────
routing_policy:
nats_subjects:
vision_request: "node.noda2.vision.request"
vision_reply: "node.noda2.vision.reply.*"
stt_request: "node.noda2.stt.request"
llm_request: "node.noda2.llm.request"
default_vision_model: "llava:13b"
recommended_vision_model: "MiniCPM-V-4_5 (needs integration)"
default_llm: "qwen3.5:35b-a3b"
default_small_llm: "gemma3:latest"