Bug fixes:
- Bug A: GROK_API_KEY env mismatch — router expected GROK_API_KEY but only
XAI_API_KEY was present. Added GROK_API_KEY=${XAI_API_KEY} alias in compose.
- Bug B: 'grok' profile missing in router-config.node2.yml — added cloud_grok
profile (provider: grok, model: grok-2-1212). Sofiia now has
default_llm=cloud_grok with fallback_llm=local_default_coder.
- Bug C: Router silently defaulted to cloud DeepSeek when profile was unknown.
Now falls back to agent.fallback_llm or local_default_coder with WARNING log.
Hardcoded Ollama URL (172.18.0.1) replaced with config-driven base_url.
New service: Node Capabilities Service (NCS)
- services/node-capabilities/ — FastAPI microservice exposing live model
inventory from Ollama, Swapper, and llama-server.
- GET /capabilities — canonical JSON with served_models[] and inventory_only[]
- GET /capabilities/models — flat list of served models
- POST /capabilities/refresh — force cache refresh
- Cache TTL 15s, bound to 127.0.0.1:8099
- services/router/capabilities_client.py — async client with TTL cache
Artifacts:
- ops/node2_models_audit.md — 3-layer model view (served/disk/cloud)
- ops/node2_models_audit.yml — machine-readable audit
- ops/node2_capabilities_example.json — sample NCS output (14 served models)
Made-with: Cursor
77 lines
4.1 KiB
YAML
77 lines
4.1 KiB
YAML
# NODA2 Model Audit — Three-layer view
|
|
# Date: 2026-02-27
|
|
# Source: Node Capabilities Service + manual disk scan
|
|
|
|
# ─── LAYER 1: SERVED BY RUNTIME (routing-eligible) ───────────────────────────
|
|
served_by_runtime:
|
|
ollama:
|
|
base_url: http://host.docker.internal:11434
|
|
version: "0.17.1"
|
|
models:
|
|
- {name: "qwen3.5:35b-a3b", type: llm, size_gb: 9.3, params: "14.8B MoE"}
|
|
- {name: "qwen3:14b", type: llm, size_gb: 9.3, params: "14B"}
|
|
- {name: "gemma3:latest", type: llm, size_gb: 3.3, params: "4B"}
|
|
- {name: "glm-4.7-flash:32k", type: llm, size_gb: 19.0, params: "~32B"}
|
|
- {name: "glm-4.7-flash:q4_K_M", type: llm, size_gb: 19.0, note: "DUPLICATE of :32k"}
|
|
- {name: "llava:13b", type: vision, size_gb: 8.0, params: "13B"}
|
|
- {name: "mistral-nemo:12b", type: llm, size_gb: 7.1, note: "old"}
|
|
- {name: "deepseek-coder:33b", type: code, size_gb: 18.8, params: "33B"}
|
|
- {name: "deepseek-r1:70b", type: llm, size_gb: 42.5, params: "70B"}
|
|
- {name: "starcoder2:3b", type: code, size_gb: 1.7}
|
|
- {name: "phi3:latest", type: llm, size_gb: 2.2}
|
|
- {name: "gpt-oss:latest", type: llm, size_gb: 13.8, note: "old"}
|
|
|
|
swapper:
|
|
base_url: http://swapper-service:8890
|
|
active_model: null
|
|
vision_models:
|
|
- {name: "llava-13b", type: vision, size_gb: 8.0, status: unloaded}
|
|
llm_models_count: 9
|
|
|
|
llama_server:
|
|
base_url: http://host.docker.internal:11435
|
|
models:
|
|
- {name: "Qwen3.5-35B-A3B-Q4_K_M.gguf", type: llm, note: "DUPLICATE of ollama qwen3.5:35b-a3b"}
|
|
|
|
# ─── LAYER 2: INSTALLED ON DISK (not served, not for routing) ────────────────
|
|
installed_on_disk:
|
|
hf_cache:
|
|
- {name: "whisper-large-v3-turbo-asr-fp16", type: stt, size_gb: 1.5, backend: mlx, ready: true}
|
|
- {name: "Kokoro-82M-bf16", type: tts, size_gb: 0.35, backend: mlx, ready: true}
|
|
- {name: "MiniCPM-V-4_5", type: vision, size_gb: 16.0, backend: hf, ready: false}
|
|
- {name: "Qwen2.5-7B-Instruct", type: llm, size_gb: 14.0, backend: hf}
|
|
- {name: "Qwen2.5-1.5B-Instruct", type: llm, size_gb: 2.9, backend: hf}
|
|
- {name: "FLUX.2-dev (Aquiles)", type: image_gen, size_gb: 105.0, backend: comfyui}
|
|
|
|
cursor_worktree:
|
|
- {name: "Qwen3-VL-32B-Instruct", type: vision, size_gb: 123.0, path: "~/.cursor/worktrees/.../models/"}
|
|
|
|
jan_ai:
|
|
- {name: "Jan-v2-VL-med-Q8_0", type: vision, size_gb: 9.2, path: "~/Library/Application Support/Jan/"}
|
|
|
|
llama_cpp_models:
|
|
- {name: "Qwen3.5-35B-A3B-Q4_K_M.gguf", type: llm, size_gb: 20.0, note: "DUPLICATE, served by llama-server"}
|
|
|
|
comfyui:
|
|
- {name: "flux2-dev-Q8_0.gguf", type: image_gen, size_gb: 33.0}
|
|
- {name: "ltx-2-19b-distilled-fp8.safetensors", type: video_gen, size_gb: 25.0}
|
|
- {name: "z_image_turbo_bf16.safetensors", type: image_gen, size_gb: 11.0}
|
|
- {name: "SDXL-base-1.0", type: image_gen, size_gb: 72.0, note: "legacy"}
|
|
|
|
hf_models_dir:
|
|
- {name: "stabilityai_sdxl_base_1.0", type: image_gen, size_gb: 72.0, note: "legacy"}
|
|
|
|
# ─── LAYER 3: CLOUD / EXTERNAL APIs ──────────────────────────────────────────
|
|
cloud_apis:
|
|
- {name: "grok-2-1212", provider: grok, api_key_env: "GROK_API_KEY", active: true}
|
|
- {name: "deepseek-chat", provider: deepseek, api_key_env: "DEEPSEEK_API_KEY", active: true}
|
|
- {name: "mistral-large-latest", provider: mistral, api_key_env: "MISTRAL_API_KEY", active: false}
|
|
|
|
# ─── SOFIIA ROUTING CHAIN (after fix) ────────────────────────────────────────
|
|
sofiia_routing:
|
|
agent_registry: "llm_profile: grok"
|
|
router_config: "agents.sofiia.default_llm: cloud_grok → provider=grok, model=grok-2-1212"
|
|
fallback: "fallback_llm: local_default_coder → qwen3:14b (Ollama)"
|
|
env_mapping: "XAI_API_KEY → GROK_API_KEY (aliased in compose)"
|
|
deterministic: true
|