Files
microdao-daarion/ops/node2_models_inventory_20260227.json
Apple 3965f68fac node2: full model inventory audit 2026-02-27
Read-only audit of all installed models on NODA2 (MacBook M4 Max):
- 12 Ollama models, 1 llama-server duplicate, 16 HF cache models
- ComfyUI stack (200+ GB): FLUX.2-dev, LTX-2 video, SDXL
- Whisper-large-v3-turbo (MLX, 1.5GB) + Kokoro TTS (MLX, 0.35GB) installed but unused
- MiniCPM-V-4_5 (16GB) installed but not in Swapper (better than llava:13b)
- Key finding: 149GB cleanup potential; llama-server duplicates Ollama (P1, 20GB)

Artifacts:
- ops/node2_models_inventory_20260227.json
- ops/node2_models_inventory_20260227.md
- ops/node2_model_capabilities.yml
- ops/node2_model_gaps.yml

Made-with: Cursor
2026-02-27 01:44:26 -08:00

142 lines
9.3 KiB
JSON

{
"node_id": "noda2",
"hostname": "MacBook-Pro.local",
"timestamp": "2026-02-27T10:00:00Z",
"hardware": {
"cpu": "Apple M4 Max",
"ram_gb": 64,
"unified_memory": true,
"storage_total_tb": 1.8,
"storage_free_gb": 634,
"os": "macOS 26.3 (Darwin arm64 25.3.0)"
},
"backends": [
{
"id": "ollama-main",
"type": "ollama",
"version": "0.17.1",
"port": 11434,
"base_url": "http://localhost:11434",
"gpu_mode": "Apple Silicon MPS/Metal (unified memory)",
"currently_loaded": null,
"models_count": 12,
"models": [
{"name": "qwen3.5:35b-a3b", "type": "llm", "size_gb": 9.3, "params": "14.8B MoE", "running": false, "modified": "2026-02-26", "tags": ["reasoning", "primary"]},
{"name": "qwen3:14b", "type": "llm", "size_gb": 9.3, "params": "14B", "running": false, "modified": "2026-02-26", "tags": ["general"]},
{"name": "gemma3:latest", "type": "llm", "size_gb": 3.3, "params": "4B", "running": false, "modified": "2026-02-25", "tags": ["general", "fast"]},
{"name": "glm-4.7-flash:32k", "type": "llm", "size_gb": 19.0, "params": "~32B", "running": false, "modified": "2026-02-09", "tags": ["long-context"]},
{"name": "glm-4.7-flash:q4_K_M","type": "llm", "size_gb": 19.0, "params": "~32B", "running": false, "modified": "2026-02-09", "tags": ["duplicate-of-32k"]},
{"name": "llava:13b", "type": "vision", "size_gb": 8.0, "params": "13B", "running": false, "modified": "2025-11-27", "tags": ["vision", "multimodal", "p0-fallback"], "vision_capable": true},
{"name": "mistral-nemo:12b", "type": "llm", "size_gb": 7.1, "params": "12B", "running": false, "modified": "2025-11-21", "tags": ["general", "old"]},
{"name": "deepseek-coder:33b", "type": "code", "size_gb": 18.0, "params": "33B", "running": false, "modified": "2025-11-21", "tags": ["code", "heavy"]},
{"name": "deepseek-r1:70b", "type": "llm", "size_gb": 42.0, "params": "70B", "running": false, "modified": "2025-11-21", "tags": ["reasoning", "very-heavy"]},
{"name": "starcoder2:3b", "type": "code", "size_gb": 1.7, "params": "3B", "running": false, "modified": "2025-11-21", "tags": ["code", "small"]},
{"name": "phi3:latest", "type": "llm", "size_gb": 2.2, "params": "3.8B", "running": false, "modified": "2025-11-21", "tags": ["small", "fast"]},
{"name": "gpt-oss:latest", "type": "llm", "size_gb": 13.0, "params": "20.9B", "running": false, "modified": "2025-11-21", "tags": ["general", "old"]}
]
},
{
"id": "llama-server-cpu",
"type": "llama.cpp",
"port": 11435,
"base_url": "http://localhost:11435",
"gpu_mode": "Apple Silicon Metal (via llama.cpp)",
"model_file": "/Users/apple/Library/Application Support/llama.cpp/models/Qwen3.5-35B-A3B-Q4_K_M.gguf",
"model_name": "Qwen3.5-35B-A3B-Q4_K_M",
"size_gb": 20.0,
"currently_loaded": true,
"note": "DUPLICATE — same model as qwen3.5:35b-a3b in Ollama. Two instances of identical model consume 2x disk."
},
{
"id": "swapper-service",
"type": "swapper",
"port": 8890,
"base_url": "http://localhost:8890",
"status": "healthy",
"active_model": null,
"mode": "single-active",
"ollama_url": "http://host.docker.internal:11434",
"gpu_enabled": true,
"metal_acceleration": true,
"llm_models_configured": 9,
"vision_models_configured": 1,
"stt_models_configured": 0,
"tts_models_configured": 0,
"vision_models": [
{"name": "llava-13b", "ollama_model": "llava:13b", "size_gb": 8.0, "status": "unloaded"}
]
},
{
"id": "open-webui",
"type": "open-webui",
"port": 8080,
"base_url": "http://localhost:8080",
"ollama_base_url": "http://host.docker.internal:11434",
"whisper_model": "base",
"whisper_device": "cpu",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
"note": "Internal WebUI for Ollama. Has Whisper-base STT (CPU) built-in."
}
],
"hf_cache_models": [
{"name": "mlx-community/whisper-large-v3-turbo-asr-fp16", "type": "stt", "size_gb": 1.5, "backend": "mlx", "installed": true, "active": false, "note": "MLX Whisper — ready to use, NOT integrated into Swapper"},
{"name": "mlx-community/Kokoro-82M-bf16", "type": "tts", "size_gb": 0.35, "backend": "mlx", "installed": true, "active": false, "note": "Kokoro TTS — ready to use, NOT integrated"},
{"name": "openbmb/MiniCPM-V-4_5", "type": "vision", "size_gb": 16.0, "backend": "hf", "installed": true, "active": false, "note": "MiniCPM-V 4.5 — high-quality vision model, NOT in Swapper"},
{"name": "Qwen/Qwen2.5-7B-Instruct", "type": "llm", "size_gb": 14.0, "backend": "hf", "installed": true, "active": false, "note": "HF weights only — not serving"},
{"name": "Qwen/Qwen2.5-1.5B-Instruct", "type": "llm", "size_gb": 2.9, "backend": "hf", "installed": true, "active": false, "note": "HF weights only — not serving"},
{"name": "Qwen/Qwen3-VL-32B-Instruct", "type": "vision", "size_gb": 0.004,"backend": "hf", "installed": false, "active": false, "note": "Only refs/ directory — model NOT downloaded (4KB, just placeholder)"},
{"name": "Aquiles-ai/FLUX.2-dev", "type": "image_gen", "size_gb": 105, "backend": "hf/comfyui", "installed": true, "active": false, "note": "FLUX.2-dev (105GB) — VERY large, used by ComfyUI"},
{"name": "google/gemma-3-12b-it-qat-q4_0-unquantized", "type": "llm", "size_gb": 0.004,"backend": "hf", "installed": false, "active": false, "note": "Only refs/ — NOT downloaded"}
],
"comfyui_models": {
"location": "~/Documents/ComfyUI/models + ~/ComfyUI/models",
"total_size_gb": 101,
"checkpoints": [
{"name": "flux2-dev-Q8_0.gguf", "size_gb": 33.0, "type": "image_gen", "path": "~/ComfyUI/models/checkpoints/"},
{"name": "ltx-2-19b-distilled-fp8.safetensors","size_gb": 25.0, "type": "video_gen", "path": "~/Documents/ComfyUI/models/checkpoints/"}
],
"diffusion_models": [
{"name": "z_image_turbo_bf16.safetensors", "size_gb": 11.0, "type": "image_gen"}
],
"loras": [
{"name": "ltx-2-19b-distilled-lora-384.safetensors", "size_gb": 7.1, "type": "video_gen"},
{"name": "sdxl_sofia_lora_v1.safetensors", "size_gb": 0.08, "type": "image_gen", "note": "Sofiia custom LoRA"},
{"name": "ip-adapter-faceid-plusv2_sdxl_lora.safetensors", "size_gb": 0.35, "type": "image_gen"},
{"name": "sdxl_agent_lora_smoke.safetensors", "size_gb": 0.02, "type": "image_gen"}
],
"hf_linked": [
{"name": "FLUX.1-dev", "size_gb": 40, "location": "~/hf_models/FLUX.1-dev"},
{"name": "FLUX.1-schnell", "size_gb": 12, "location": "~/hf_models/FLUX.1-schnell"},
{"name": "SDXL-base-1.0", "size_gb": 72, "location": "~/hf_models/stabilityai_sdxl_base_1.0"}
]
},
"docker_containers": [
{"name": "dagi-router-node2", "status": "Up 13m (healthy)", "port": "127.0.0.1:9102→8000"},
{"name": "dagi-gateway-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:9300"},
{"name": "swapper-service-node2", "status": "Up 14m (healthy)", "port": "0.0.0.0:8890", "note": "P1: should be 127.0.0.1:8890 (not yet applied via --remove-orphans)"},
{"name": "dagi-memory-service-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:8000"},
{"name": "dagi-qdrant-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:6333-6334"},
{"name": "dagi-postgres-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:5433"},
{"name": "dagi-neo4j-node2", "status": "Up 19h (healthy)", "port": "0.0.0.0:7474,7687"},
{"name": "dagi-nats-node2", "status": "Up 2h", "port": "0.0.0.0:4222,8222"},
{"name": "open-webui", "status": "Up 19h (healthy)", "port": "0.0.0.0:8080"},
{"name": "dagi-postgres", "status": "Up 19h (healthy)", "port": "0.0.0.0:5432"},
{"name": "dagi-redis", "status": "Up 19h (healthy)", "port": "0.0.0.0:6379"},
{"name": "sofiia-console", "status": "Up 13m", "port": "127.0.0.1:8002"}
],
"non_docker_processes": [
{"name": "ollama", "port": 11434, "type": "native", "model_serving": "all ollama models"},
{"name": "llama-server", "port": 11435, "type": "llama.cpp", "model": "Qwen3.5-35B-A3B-Q4_K_M.gguf", "note": "DUPLICATE — same model as in Ollama"},
{"name": "spacebot", "port": 19898, "type": "telegram-bot","note": "Sofiia Telegram bot"},
{"name": "gitea", "port": 3000, "type": "git-server", "note": "Self-hosted Git"},
{"name": "opencode", "port": 3456, "type": "ai-coding", "note": "AI coding tool"},
{"name": "Pieces OS", "port": 39300, "type": "ai-assistant","note": "Pieces OS — dev AI tools, not LLM serving"},
{"name": "memory-service", "port": 8000, "type": "uvicorn", "note": "Running outside Docker (dev mode)"}
]
}