Read-only audit of all installed models on NODA2 (MacBook M4 Max): - 12 Ollama models, 1 llama-server duplicate, 16 HF cache models - ComfyUI stack (200+ GB): FLUX.2-dev, LTX-2 video, SDXL - Whisper-large-v3-turbo (MLX, 1.5GB) + Kokoro TTS (MLX, 0.35GB) installed but unused - MiniCPM-V-4_5 (16GB) installed but not in Swapper (better than llava:13b) - Key finding: 149GB cleanup potential; llama-server duplicates Ollama (P1, 20GB) Artifacts: - ops/node2_models_inventory_20260227.json - ops/node2_models_inventory_20260227.md - ops/node2_model_capabilities.yml - ops/node2_model_gaps.yml Made-with: Cursor
271 lines
8.6 KiB
YAML
271 lines
8.6 KiB
YAML
# NODA2 Model Capabilities — Machine-readable
|
|
# Node: MacBook Pro M4 Max, 64GB unified memory
|
|
# Date: 2026-02-27
|
|
# Usage: router offload decisions, swapper config, capability queries
|
|
|
|
node_id: noda2
|
|
hostname: MacBook-Pro.local
|
|
hardware:
|
|
cpu: "Apple M4 Max"
|
|
ram_gb: 64
|
|
unified_memory: true
|
|
gpu: "Apple Silicon (MPS/Metal)"
|
|
|
|
# ─── LLM HEAVY ────────────────────────────────────────────────────────────────
|
|
llm_heavy:
|
|
- name: qwen3.5:35b-a3b
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 9.3
|
|
params: "14.8B MoE"
|
|
quality: high
|
|
speed: fast_for_size
|
|
recommended: true
|
|
note: "Primary reasoning model. MoE architecture — fast despite 14.8B active params."
|
|
|
|
- name: glm-4.7-flash:32k
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 19.0
|
|
params: "~32B"
|
|
quality: high
|
|
speed: medium
|
|
recommended: false
|
|
note: "Duplicate of q4_K_M variant. Keep only one."
|
|
|
|
- name: deepseek-r1:70b
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 42.0
|
|
params: "70B"
|
|
quality: very_high
|
|
speed: slow
|
|
recommended: false
|
|
note: "Rarely needed. Only for max-quality reasoning tasks."
|
|
|
|
- name: Qwen3.5-35B-A3B-Q4_K_M.gguf
|
|
backend: llama-server
|
|
base_url: http://localhost:11435
|
|
size_gb: 20.0
|
|
params: "34.6B"
|
|
quality: high
|
|
speed: fast
|
|
recommended: false
|
|
note: "DUPLICATE of qwen3.5:35b-a3b in Ollama. Same model, different backend. Remove or unify."
|
|
|
|
# ─── LLM SMALL/MEDIUM ─────────────────────────────────────────────────────────
|
|
llm_small:
|
|
- name: qwen3:14b
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 9.3
|
|
params: "14B"
|
|
quality: medium_high
|
|
speed: fast
|
|
note: "Good all-rounder. Used by swapper as default."
|
|
|
|
- name: gemma3:latest
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 3.3
|
|
params: "4B"
|
|
quality: medium
|
|
speed: very_fast
|
|
recommended: true
|
|
note: "Best small model for routing, classification, quick Q&A."
|
|
|
|
- name: phi3:latest
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 2.2
|
|
params: "3.8B"
|
|
quality: medium
|
|
speed: very_fast
|
|
note: "Lightweight general model. Can replace with gemma3."
|
|
|
|
- name: gpt-oss:latest
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 13.0
|
|
params: "20.9B"
|
|
quality: medium_high
|
|
speed: medium
|
|
note: "Old model. Not recommended for new workloads."
|
|
|
|
- name: mistral-nemo:12b
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 7.1
|
|
params: "12B"
|
|
quality: medium
|
|
speed: medium
|
|
note: "Old. Not recommended for new workloads."
|
|
|
|
# ─── CODE ─────────────────────────────────────────────────────────────────────
|
|
code:
|
|
- name: deepseek-coder:33b
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 18.0
|
|
params: "33B"
|
|
quality: high
|
|
speed: medium
|
|
note: "Good for code review. Heavy, cold-start slow."
|
|
|
|
- name: starcoder2:3b
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
size_gb: 1.7
|
|
params: "3B"
|
|
quality: medium
|
|
speed: very_fast
|
|
note: "Fast code completion. Limited context."
|
|
|
|
# ─── VISION ───────────────────────────────────────────────────────────────────
|
|
vision_fast:
|
|
- name: llava:13b
|
|
backend: ollama
|
|
base_url: http://localhost:11434
|
|
swapper_url: http://localhost:8890/vision
|
|
size_gb: 8.0
|
|
params: "13B"
|
|
quality: medium
|
|
speed: medium
|
|
installed: true
|
|
active_in_swapper: true
|
|
note: "P0 fallback. CLIP-based. Outdated architecture but functional."
|
|
|
|
vision_hq:
|
|
- name: MiniCPM-V-4_5
|
|
backend: hf_transformers
|
|
size_gb: 16.0
|
|
params: "8B"
|
|
quality: high
|
|
speed: medium
|
|
installed: true
|
|
active_in_swapper: false
|
|
note: "RECOMMENDED for P2. Better than llava:13b. Needs integration into Swapper."
|
|
|
|
- name: Qwen3-VL-32B-Instruct
|
|
backend: hf
|
|
size_gb: 0
|
|
installed: false
|
|
note: "Only refs/ placeholder — NOT downloaded. Would require 65+ GB."
|
|
|
|
# ─── EMBEDDING ────────────────────────────────────────────────────────────────
|
|
embedding:
|
|
- name: sentence-transformers/all-MiniLM-L6-v2
|
|
backend: open-webui
|
|
size_gb: 0.1
|
|
installed: true
|
|
active: true
|
|
note: "Used by open-webui for RAG. Not exposed as standalone endpoint."
|
|
|
|
- name: TaylorAI/bge-micro-v2
|
|
backend: open-webui
|
|
size_gb: 0.05
|
|
installed: true
|
|
active: true
|
|
note: "Auxiliary embedding in open-webui."
|
|
|
|
- name: Qwen/Qwen2.5-7B-Instruct
|
|
backend: hf_transformers
|
|
size_gb: 14.0
|
|
installed: true
|
|
active: false
|
|
note: "HF weights only. Can be used for embedding but not configured."
|
|
|
|
# ─── STT ──────────────────────────────────────────────────────────────────────
|
|
stt:
|
|
- name: mlx-community/whisper-large-v3-turbo-asr-fp16
|
|
backend: mlx
|
|
size_gb: 1.5
|
|
device: mps_metal
|
|
quality: high
|
|
speed: fast
|
|
installed: true
|
|
active: false
|
|
note: "READY — installed, MLX-accelerated. NOT integrated into Swapper or memory-service. P2 task."
|
|
|
|
- name: whisper-base
|
|
backend: open-webui
|
|
size_gb: 0.15
|
|
device: cpu
|
|
quality: low
|
|
installed: true
|
|
active: true
|
|
note: "Active in open-webui only. Low quality."
|
|
|
|
# ─── TTS ──────────────────────────────────────────────────────────────────────
|
|
tts:
|
|
- name: mlx-community/Kokoro-82M-bf16
|
|
backend: mlx
|
|
size_gb: 0.35
|
|
device: mps_metal
|
|
quality: high
|
|
speed: fast
|
|
installed: true
|
|
active: false
|
|
note: "READY — installed, MLX-accelerated. NOT integrated into any service. P2 task."
|
|
|
|
# ─── IMAGE GENERATION ─────────────────────────────────────────────────────────
|
|
image_gen:
|
|
- name: flux2-dev-Q8_0.gguf
|
|
backend: comfyui
|
|
path: ~/ComfyUI/models/checkpoints/
|
|
size_gb: 33.0
|
|
quality: very_high
|
|
installed: true
|
|
active: false
|
|
note: "ComfyUI + llama.cpp GGUF format. FLUX.2-dev — state-of-art image gen."
|
|
|
|
- name: ltx-2-19b-distilled-fp8.safetensors
|
|
backend: comfyui
|
|
path: ~/Documents/ComfyUI/models/checkpoints/
|
|
size_gb: 25.0
|
|
type: video_gen
|
|
quality: very_high
|
|
installed: true
|
|
active: false
|
|
note: "LTX-2 19B — video generation. Very large."
|
|
|
|
- name: z_image_turbo_bf16.safetensors
|
|
backend: comfyui
|
|
size_gb: 11.0
|
|
quality: high
|
|
installed: true
|
|
active: false
|
|
note: "Fast image gen via ComfyUI."
|
|
|
|
- name: SDXL-base-1.0 + refiner
|
|
backend: comfyui_symlink
|
|
size_gb: 72.0
|
|
quality: medium
|
|
installed: true
|
|
active: false
|
|
note: "Legacy SDXL. Very large. Consider cleanup."
|
|
|
|
- name: sdxl_sofia_lora_v1.safetensors
|
|
backend: comfyui_lora
|
|
size_gb: 0.08
|
|
installed: true
|
|
active: false
|
|
note: "Custom Sofiia appearance LoRA for SDXL."
|
|
|
|
# ─── OCR ──────────────────────────────────────────────────────────────────────
|
|
ocr:
|
|
- name: "(none configured)"
|
|
note: "Swapper /ocr returns 405. No dedicated OCR model. Could use llava:13b or MiniCPM-V as OCR."
|
|
|
|
# ─── ROUTING POLICY (suggestions) ────────────────────────────────────────────
|
|
routing_policy:
|
|
nats_subjects:
|
|
vision_request: "node.noda2.vision.request"
|
|
vision_reply: "node.noda2.vision.reply.*"
|
|
stt_request: "node.noda2.stt.request"
|
|
llm_request: "node.noda2.llm.request"
|
|
default_vision_model: "llava:13b"
|
|
recommended_vision_model: "MiniCPM-V-4_5 (needs integration)"
|
|
default_llm: "qwen3.5:35b-a3b"
|
|
default_small_llm: "gemma3:latest"
|