Read-only audit of all installed models on NODA2 (MacBook M4 Max): - 12 Ollama models, 1 llama-server duplicate, 16 HF cache models - ComfyUI stack (200+ GB): FLUX.2-dev, LTX-2 video, SDXL - Whisper-large-v3-turbo (MLX, 1.5GB) + Kokoro TTS (MLX, 0.35GB) installed but unused - MiniCPM-V-4_5 (16GB) installed but not in Swapper (better than llava:13b) - Key finding: 149GB cleanup potential; llama-server duplicates Ollama (P1, 20GB) Artifacts: - ops/node2_models_inventory_20260227.json - ops/node2_models_inventory_20260227.md - ops/node2_model_capabilities.yml - ops/node2_model_gaps.yml Made-with: Cursor
142 lines
9.3 KiB
JSON
142 lines
9.3 KiB
JSON
{
|
|
"node_id": "noda2",
|
|
"hostname": "MacBook-Pro.local",
|
|
"timestamp": "2026-02-27T10:00:00Z",
|
|
"hardware": {
|
|
"cpu": "Apple M4 Max",
|
|
"ram_gb": 64,
|
|
"unified_memory": true,
|
|
"storage_total_tb": 1.8,
|
|
"storage_free_gb": 634,
|
|
"os": "macOS 26.3 (Darwin arm64 25.3.0)"
|
|
},
|
|
|
|
"backends": [
|
|
{
|
|
"id": "ollama-main",
|
|
"type": "ollama",
|
|
"version": "0.17.1",
|
|
"port": 11434,
|
|
"base_url": "http://localhost:11434",
|
|
"gpu_mode": "Apple Silicon MPS/Metal (unified memory)",
|
|
"currently_loaded": null,
|
|
"models_count": 12,
|
|
"models": [
|
|
{"name": "qwen3.5:35b-a3b", "type": "llm", "size_gb": 9.3, "params": "14.8B MoE", "running": false, "modified": "2026-02-26", "tags": ["reasoning", "primary"]},
|
|
{"name": "qwen3:14b", "type": "llm", "size_gb": 9.3, "params": "14B", "running": false, "modified": "2026-02-26", "tags": ["general"]},
|
|
{"name": "gemma3:latest", "type": "llm", "size_gb": 3.3, "params": "4B", "running": false, "modified": "2026-02-25", "tags": ["general", "fast"]},
|
|
{"name": "glm-4.7-flash:32k", "type": "llm", "size_gb": 19.0, "params": "~32B", "running": false, "modified": "2026-02-09", "tags": ["long-context"]},
|
|
{"name": "glm-4.7-flash:q4_K_M","type": "llm", "size_gb": 19.0, "params": "~32B", "running": false, "modified": "2026-02-09", "tags": ["duplicate-of-32k"]},
|
|
{"name": "llava:13b", "type": "vision", "size_gb": 8.0, "params": "13B", "running": false, "modified": "2025-11-27", "tags": ["vision", "multimodal", "p0-fallback"], "vision_capable": true},
|
|
{"name": "mistral-nemo:12b", "type": "llm", "size_gb": 7.1, "params": "12B", "running": false, "modified": "2025-11-21", "tags": ["general", "old"]},
|
|
{"name": "deepseek-coder:33b", "type": "code", "size_gb": 18.0, "params": "33B", "running": false, "modified": "2025-11-21", "tags": ["code", "heavy"]},
|
|
{"name": "deepseek-r1:70b", "type": "llm", "size_gb": 42.0, "params": "70B", "running": false, "modified": "2025-11-21", "tags": ["reasoning", "very-heavy"]},
|
|
{"name": "starcoder2:3b", "type": "code", "size_gb": 1.7, "params": "3B", "running": false, "modified": "2025-11-21", "tags": ["code", "small"]},
|
|
{"name": "phi3:latest", "type": "llm", "size_gb": 2.2, "params": "3.8B", "running": false, "modified": "2025-11-21", "tags": ["small", "fast"]},
|
|
{"name": "gpt-oss:latest", "type": "llm", "size_gb": 13.0, "params": "20.9B", "running": false, "modified": "2025-11-21", "tags": ["general", "old"]}
|
|
]
|
|
},
|
|
{
|
|
"id": "llama-server-cpu",
|
|
"type": "llama.cpp",
|
|
"port": 11435,
|
|
"base_url": "http://localhost:11435",
|
|
"gpu_mode": "Apple Silicon Metal (via llama.cpp)",
|
|
"model_file": "/Users/apple/Library/Application Support/llama.cpp/models/Qwen3.5-35B-A3B-Q4_K_M.gguf",
|
|
"model_name": "Qwen3.5-35B-A3B-Q4_K_M",
|
|
"size_gb": 20.0,
|
|
"currently_loaded": true,
|
|
"note": "DUPLICATE — same model as qwen3.5:35b-a3b in Ollama. Two instances of identical model consume 2x disk."
|
|
},
|
|
{
|
|
"id": "swapper-service",
|
|
"type": "swapper",
|
|
"port": 8890,
|
|
"base_url": "http://localhost:8890",
|
|
"status": "healthy",
|
|
"active_model": null,
|
|
"mode": "single-active",
|
|
"ollama_url": "http://host.docker.internal:11434",
|
|
"gpu_enabled": true,
|
|
"metal_acceleration": true,
|
|
"llm_models_configured": 9,
|
|
"vision_models_configured": 1,
|
|
"stt_models_configured": 0,
|
|
"tts_models_configured": 0,
|
|
"vision_models": [
|
|
{"name": "llava-13b", "ollama_model": "llava:13b", "size_gb": 8.0, "status": "unloaded"}
|
|
]
|
|
},
|
|
{
|
|
"id": "open-webui",
|
|
"type": "open-webui",
|
|
"port": 8080,
|
|
"base_url": "http://localhost:8080",
|
|
"ollama_base_url": "http://host.docker.internal:11434",
|
|
"whisper_model": "base",
|
|
"whisper_device": "cpu",
|
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
|
"note": "Internal WebUI for Ollama. Has Whisper-base STT (CPU) built-in."
|
|
}
|
|
],
|
|
|
|
"hf_cache_models": [
|
|
{"name": "mlx-community/whisper-large-v3-turbo-asr-fp16", "type": "stt", "size_gb": 1.5, "backend": "mlx", "installed": true, "active": false, "note": "MLX Whisper — ready to use, NOT integrated into Swapper"},
|
|
{"name": "mlx-community/Kokoro-82M-bf16", "type": "tts", "size_gb": 0.35, "backend": "mlx", "installed": true, "active": false, "note": "Kokoro TTS — ready to use, NOT integrated"},
|
|
{"name": "openbmb/MiniCPM-V-4_5", "type": "vision", "size_gb": 16.0, "backend": "hf", "installed": true, "active": false, "note": "MiniCPM-V 4.5 — high-quality vision model, NOT in Swapper"},
|
|
{"name": "Qwen/Qwen2.5-7B-Instruct", "type": "llm", "size_gb": 14.0, "backend": "hf", "installed": true, "active": false, "note": "HF weights only — not serving"},
|
|
{"name": "Qwen/Qwen2.5-1.5B-Instruct", "type": "llm", "size_gb": 2.9, "backend": "hf", "installed": true, "active": false, "note": "HF weights only — not serving"},
|
|
{"name": "Qwen/Qwen3-VL-32B-Instruct", "type": "vision", "size_gb": 0.004,"backend": "hf", "installed": false, "active": false, "note": "Only refs/ directory — model NOT downloaded (4KB, just placeholder)"},
|
|
{"name": "Aquiles-ai/FLUX.2-dev", "type": "image_gen", "size_gb": 105, "backend": "hf/comfyui", "installed": true, "active": false, "note": "FLUX.2-dev (105GB) — VERY large, used by ComfyUI"},
|
|
{"name": "google/gemma-3-12b-it-qat-q4_0-unquantized", "type": "llm", "size_gb": 0.004,"backend": "hf", "installed": false, "active": false, "note": "Only refs/ — NOT downloaded"}
|
|
],
|
|
|
|
"comfyui_models": {
|
|
"location": "~/Documents/ComfyUI/models + ~/ComfyUI/models",
|
|
"total_size_gb": 101,
|
|
"checkpoints": [
|
|
{"name": "flux2-dev-Q8_0.gguf", "size_gb": 33.0, "type": "image_gen", "path": "~/ComfyUI/models/checkpoints/"},
|
|
{"name": "ltx-2-19b-distilled-fp8.safetensors","size_gb": 25.0, "type": "video_gen", "path": "~/Documents/ComfyUI/models/checkpoints/"}
|
|
],
|
|
"diffusion_models": [
|
|
{"name": "z_image_turbo_bf16.safetensors", "size_gb": 11.0, "type": "image_gen"}
|
|
],
|
|
"loras": [
|
|
{"name": "ltx-2-19b-distilled-lora-384.safetensors", "size_gb": 7.1, "type": "video_gen"},
|
|
{"name": "sdxl_sofia_lora_v1.safetensors", "size_gb": 0.08, "type": "image_gen", "note": "Sofiia custom LoRA"},
|
|
{"name": "ip-adapter-faceid-plusv2_sdxl_lora.safetensors", "size_gb": 0.35, "type": "image_gen"},
|
|
{"name": "sdxl_agent_lora_smoke.safetensors", "size_gb": 0.02, "type": "image_gen"}
|
|
],
|
|
"hf_linked": [
|
|
{"name": "FLUX.1-dev", "size_gb": 40, "location": "~/hf_models/FLUX.1-dev"},
|
|
{"name": "FLUX.1-schnell", "size_gb": 12, "location": "~/hf_models/FLUX.1-schnell"},
|
|
{"name": "SDXL-base-1.0", "size_gb": 72, "location": "~/hf_models/stabilityai_sdxl_base_1.0"}
|
|
]
|
|
},
|
|
|
|
"docker_containers": [
|
|
{"name": "dagi-router-node2", "status": "Up 13m (healthy)", "port": "127.0.0.1:9102→8000"},
|
|
{"name": "dagi-gateway-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:9300"},
|
|
{"name": "swapper-service-node2", "status": "Up 14m (healthy)", "port": "0.0.0.0:8890", "note": "P1: should be 127.0.0.1:8890 (not yet applied via --remove-orphans)"},
|
|
{"name": "dagi-memory-service-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:8000"},
|
|
{"name": "dagi-qdrant-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:6333-6334"},
|
|
{"name": "dagi-postgres-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:5433"},
|
|
{"name": "dagi-neo4j-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:7474,7687"},
|
|
{"name": "dagi-nats-node2", "status": "Up 2h", "port": "0.0.0.0:4222,8222"},
|
|
{"name": "open-webui", "status": "Up 19h (healthy)", "port": "0.0.0.0:8080"},
|
|
{"name": "dagi-postgres", "status": "Up 19h (healthy)", "port": "0.0.0.0:5432"},
|
|
{"name": "dagi-redis", "status": "Up 19h (healthy)", "port": "0.0.0.0:6379"},
|
|
{"name": "sofiia-console", "status": "Up 13m", "port": "127.0.0.1:8002"}
|
|
],
|
|
|
|
"non_docker_processes": [
|
|
{"name": "ollama", "port": 11434, "type": "native", "model_serving": "all ollama models"},
|
|
{"name": "llama-server", "port": 11435, "type": "llama.cpp", "model": "Qwen3.5-35B-A3B-Q4_K_M.gguf", "note": "DUPLICATE — same model as in Ollama"},
|
|
{"name": "spacebot", "port": 19898, "type": "telegram-bot","note": "Sofiia Telegram bot"},
|
|
{"name": "gitea", "port": 3000, "type": "git-server", "note": "Self-hosted Git"},
|
|
{"name": "opencode", "port": 3456, "type": "ai-coding", "note": "AI coding tool"},
|
|
{"name": "Pieces OS", "port": 39300, "type": "ai-assistant","note": "Pieces OS — dev AI tools, not LLM serving"},
|
|
{"name": "memory-service", "port": 8000, "type": "uvicorn", "note": "Running outside Docker (dev mode)"}
|
|
]
|
|
}
|