docs(audit): NODA2 full audit 2026-02-27

- ops/audit_node2_20260227.md: readable report (hardware, containers, models, Sofiia, findings)
- ops/audit_node2_20260227.json: structured machine-readable inventory
- ops/audit_node2_findings.yml: 10 PASS + 5 PARTIAL + 3 FAIL + 3 SECURITY gaps
- ops/node2_capabilities.yml: router-ready capabilities (vision/text/code/stt/tts models)

Key findings:
  P0: vision pipeline broken (/vision/models=empty, qwen3-vl:8b not installed)
  P1: node-ops-worker missing, SSH root password in sofiia-console env
  P1: router-config.yml uses 172.17.0.1 (Linux bridge) not host.docker.internal

Made-with: Cursor
This commit is contained in:
Apple
2026-02-27 01:14:38 -08:00
parent 974522f12b
commit 46d7dea88a
4 changed files with 642 additions and 0 deletions

View File

@@ -0,0 +1,129 @@
{
"node_id": "noda2",
"hostname": "MacBook-Pro.local",
"timestamp": "2026-02-27T08:00:00Z",
"hardware": {
"cpu": "Apple M4 Max",
"ram_gb": 64,
"storage_free_gb": 634,
"os": "macOS 26.3 (Darwin arm64)"
},
"backends": [
{
"name": "ollama-main",
"base_url": "http://localhost:11434",
"pid_owner": "ollama (system daemon)",
"version": "0.17.1",
"gpu_mode": "Apple Silicon (MPS/Metal, unified memory)",
"models": [
{"name": "qwen3.5:35b-a3b", "type": "llm", "size_gb": 9.3, "family": "qwen3", "params": "14.8B (MoE)", "modified": "19h ago"},
{"name": "qwen3:14b", "type": "llm", "size_gb": 9.3, "family": "qwen3", "params": "14B", "modified": "20h ago"},
{"name": "gemma3:latest", "type": "llm", "size_gb": 3.3, "family": "gemma3","params": "~4B", "modified": "25h ago"},
{"name": "glm-4.7-flash:32k", "type": "llm", "size_gb": 19.0, "family": "glm", "params": "32B", "modified": "2w ago"},
{"name": "glm-4.7-flash:q4_K_M", "type": "llm", "size_gb": 19.0, "family": "glm", "params": "32B", "modified": "2w ago"},
{"name": "llava:13b", "type": "vision", "size_gb": 8.0, "family": "llava+clip", "params": "13B", "modified": "3mo ago", "vision_capable": true},
{"name": "mistral-nemo:12b","type": "llm", "size_gb": 7.1, "family": "mistral","params": "12B", "modified": "3mo ago"},
{"name": "deepseek-coder:33b","type":"code","size_gb": 18.0,"family":"deepseek","params":"33B", "modified": "3mo ago"},
{"name": "deepseek-r1:70b", "type": "llm", "size_gb": 42.0,"family":"deepseek","params":"70B", "modified": "3mo ago"},
{"name": "starcoder2:3b", "type": "code","size_gb": 1.7, "family":"starcoder","params":"3B", "modified": "3mo ago"},
{"name": "phi3:latest", "type": "llm", "size_gb": 2.2, "family":"phi3", "params": "~4B", "modified": "3mo ago"},
{"name": "gpt-oss:latest", "type": "llm", "size_gb": 13.0,"family":"unknown","params":"~13B", "modified": "3mo ago"}
],
"running": [],
"notes": "llava:13b is only vision-capable model in Ollama (CLIP multimodal). qwen3-vl NOT installed."
},
{
"name": "llama-server-cpu",
"base_url": "http://localhost:11435",
"pid_owner": "llama-server process (user: apple)",
"binary": "llama-server",
"model_path": "/Users/apple/Library/Application Support/llama.cpp/models/Qwen3.5-35B-A3B-Q4_K_M.gguf",
"model_name": "Qwen3.5-35B-A3B (Q4_K_M, llama.cpp)",
"api_type": "OpenAI-compatible (/v1/models, /v1/chat/completions)",
"health_url": "http://localhost:11435/health",
"health_status": "ok",
"gpu_mode": "Apple Silicon (Metal via llama.cpp)",
"notes": "Separate llama.cpp server instance running same Qwen3.5-35B-A3B model. Duplicates Ollama coverage."
},
{
"name": "swapper-service",
"base_url": "http://localhost:8890",
"container": "swapper-service-node2",
"health_url": "http://localhost:8890/health",
"health_status": "healthy",
"active_model": "qwen3-14b",
"mode": "single-active",
"ollama_base_url": "http://host.docker.internal:11434",
"endpoints": {
"/models": "200",
"/vision/models": "200 (empty list - no vision models configured!)",
"/stt/models": "200",
"/tts/models": "200",
"/ocr": "405 (method)"
},
"swapper_models_configured": [
"gpt-oss:latest", "phi3:latest", "qwen3:14b (loaded)",
"qwen3.5:35b-a3b", "glm-4.7-flash:32k", "gemma2:27b (not installed)",
"deepseek-coder:33b", "qwen2.5-coder:32b (not installed)", "deepseek-r1:70b"
],
"gap": "vision/models returns empty - llava:13b not in swapper_config_node2.yaml"
}
],
"containers": [
{"name": "dagi-router-node2", "port": "9102->8000", "status": "healthy", "nats_connected": true, "node_id": "NODA2"},
{"name": "dagi-gateway-node2", "port": "9300", "status": "healthy", "agents": 14},
{"name": "dagi-nats-node2", "port": "4222,8222", "status": "running", "leafnode": "spoke->144.76.224.179:7422", "rtt_ms": 58},
{"name": "dagi-memory-service-node2","port": "8000", "status": "healthy", "collections": 6},
{"name": "dagi-qdrant-node2", "port": "6333-6334", "status": "healthy"},
{"name": "swapper-service-node2", "port": "8890", "status": "healthy"},
{"name": "dagi-postgres-node2", "port": "5433->5432", "status": "healthy"},
{"name": "dagi-neo4j-node2", "port": "7474,7687", "status": "healthy"},
{"name": "sofiia-console", "port": "8002", "status": "running (Python process, not Docker healthcheck)"},
{"name": "open-webui", "port": "8080", "status": "healthy", "version": "0.7.2"}
],
"non_docker_services": [
{"name": "ollama", "port": 11434, "type": "system daemon", "binary": "ollama"},
{"name": "llama-server", "port": 11435, "type": "user process", "model": "Qwen3.5-35B-A3B-Q4_K_M.gguf"},
{"name": "gitea", "port": 3000, "type": "git server", "version": "1.25.3"},
{"name": "spacebot", "port": 19898, "type": "Telegram bot", "config": "uses sofiia-console BFF"},
{"name": "opencode", "port": 3456, "type": "AI coding tool", "note": "OpenCode.app"},
{"name": "stable (Warp)", "port": 9277, "type": "terminal helper","note": "Warp.app stable process, not SD"}
],
"sofiia_agent": {
"agent_id": "sofiia",
"display_name": "Sophia",
"class": "top_level",
"canonical_role": "Chief AI Architect & Technical Sovereign",
"telegram": "@SofiiaDAARION_bot",
"prompt_file": "gateway-bot/sofiia_prompt.txt",
"prompt_lines": 1579,
"llm_profile": "NODA2 router -> Ollama 11434",
"gateway": "dagi-gateway-node2:9300",
"control_plane": {
"console_ui": "http://localhost:8002 (sofiia-console)",
"spacebot": "spacebot process -> sofiia-console BFF (http://localhost:8002/api)",
"nats_subjects": "not yet configured for node-ops",
"ssh_access": "NODES_NODA1_SSH_PASSWORD in env (present, SECURITY RISK)",
"node_ops_worker": "NOT IMPLEMENTED"
}
},
"nats_leafnode": {
"noda2_role": "spoke",
"noda1_hub": "144.76.224.179:7422",
"rtt_ms": 58,
"connection_status": "connected",
"cross_node_subjects_tested": ["node.test.hello"],
"cross_node_pubsub": "PASS"
},
"qdrant_collections": {
"sofiia_messages": {"points": 2},
"sofiia_docs": {"points": 0},
"sofiia_memory_items":{"points": 0},
"sofiia_user_context":{"points": 0},
"memories": {"points": 0},
"messages": {"points": 0}
},
"recommended_default_vision_model": "llava:13b (Ollama, port 11434) — only available, but outdated. Install qwen3-vl:8b for better quality.",
"recommended_default_text_model": "qwen3.5:35b-a3b (Ollama, port 11434) — fastest large model via MoE architecture",
"recommended_default_code_model": "deepseek-coder:33b (Ollama) or qwen3.5:35b-a3b"
}

217
ops/audit_node2_20260227.md Normal file
View File

@@ -0,0 +1,217 @@
# NODA2 Audit Report
**Дата:** 2026-02-27
**Нода:** MacBook Pro M4 Max (Apple Silicon)
**Аудитор:** Sofiia (Cursor session)
---
## Executive Summary
NODA2 — це MacBook Pro M4 Max з 64GB RAM, на якому розгорнутий повний dev-стек DAARION.city. NATS leafnode успішно підключений до NODA1 (rtt=58ms). Основний стек (router, gateway, memory, swapper, qdrant) здоровий. Критичний gap: vision pipeline зламаний (`/vision/models` порожній, qwen3-vl:8b не встановлена). Sofiia керує NODA1 через SSH root password — SECURITY risk. node-ops-worker не реалізований.
---
## Part A — Runtime Inventory
### Hardware
| Параметр | Значення |
|----------|---------|
| CPU | Apple M4 Max |
| RAM | 64 GB (unified) |
| Storage free | 634 GB / 1.8 TB |
| OS | macOS 26.3 (Darwin arm64) |
### Docker Containers (12)
| Container | Port | Status |
|-----------|------|--------|
| `dagi-router-node2` | 9102→8000 | ✅ healthy |
| `dagi-gateway-node2` | 9300 | ✅ healthy (14 agents) |
| `dagi-nats-node2` | 4222, 8222 | ✅ running (leafnode→NODA1) |
| `dagi-memory-service-node2` | 8000 | ✅ healthy |
| `dagi-qdrant-node2` | 6333-6334 | ✅ healthy |
| `swapper-service-node2` | 8890 | ✅ healthy |
| `dagi-postgres-node2` | 5433→5432 | ✅ healthy |
| `dagi-neo4j-node2` | 7474, 7687 | ✅ healthy |
| `sofiia-console` | 8002 | ⚠️ running (no healthcheck) |
| `open-webui` | 8080 | ✅ healthy (v0.7.2) |
| `dagi-postgres` | 5432 | ✅ healthy |
| `dagi-redis` | 6379 | ✅ healthy |
### Non-Docker Services
| Process | Port | Description |
|---------|------|-------------|
| `ollama` | 11434 | System daemon, 11 models |
| `llama-server` | 11435 | llama.cpp server, Qwen3.5-35B-A3B |
| `gitea` | 3000 | Self-hosted Git (v1.25.3) |
| `spacebot` | 19898 | Telegram bot → sofiia-console BFF |
| `opencode` | 3456 | OpenCode.app AI coding tool |
### NATS Leafnode Status
```
NODA2 (spoke) ──58ms──> NODA1 144.76.224.179:7422 (hub)
Cross-node pub/sub: PASS (node.test.hello confirmed)
```
### Qdrant Collections
| Collection | Points |
|-----------|--------|
| sofiia_messages | 2 |
| sofiia_docs | 0 |
| sofiia_memory_items | 0 |
| sofiia_user_context | 0 |
| memories | 0 |
| messages | 0 |
---
## Part B — Sofiia Agent Inventory
### Registry Entry
```yaml
agent_id: sofiia
display_name: Sophia
class: top_level
canonical_role: Chief AI Architect & Technical Sovereign
visibility: private
telegram: enabled (whitelist)
prompt_file: gateway-bot/sofiia_prompt.txt (1579 lines)
```
### Runtime
- **Gateway**: `dagi-gateway-node2:9300` — зареєстрована з 13 іншими агентами
- **Router**: `dagi-router-node2:9102``NODE_ID=NODA2`, `nats_connected=true`
- **Console UI**: `http://localhost:8002` — Python process, HTML UI
- `NODES_NODA1_ROUTER_URL=http://144.76.224.179:9102`
- `NODES_NODA2_ROUTER_URL=http://router:8000`
- `ROUTER_URL=http://router:8000`
- ⚠️ `NODES_NODA1_SSH_PASSWORD=[secret present]` — SECURITY RISK
- **Spacebot**: Telegram → `sofiia-console BFF (http://localhost:8002/api)`
### Node Control (current state)
- **Механізм**: SSH root з паролем (в env sofiia-console)
- **Що є**: `NODES_NODA1_SSH_PASSWORD` в Docker env
- **Чого немає**: NATS node-ops-worker, allowlist команд, audit log
---
## Part C — Models Audit
### Ollama (port 11434)
| Model | Type | Size | Status |
|-------|------|------|--------|
| `qwen3.5:35b-a3b` | LLM (MoE) | 9.3 GB | available (14.8B active) |
| `qwen3:14b` | LLM | 9.3 GB | available |
| `gemma3:latest` | LLM | 3.3 GB | available |
| `glm-4.7-flash:32k` | LLM | 19 GB | available (32k context) |
| `glm-4.7-flash:q4_K_M` | LLM | 19 GB | available |
| **`llava:13b`** | **Vision** | **8.0 GB** | **available (LLaVA+CLIP)** |
| `mistral-nemo:12b` | LLM | 7.1 GB | available |
| `deepseek-coder:33b` | Code | 18 GB | available |
| `deepseek-r1:70b` | LLM (Reasoning) | 42 GB | available |
| `starcoder2:3b` | Code | 1.7 GB | available |
| `phi3:latest` | LLM | 2.2 GB | available |
| `gpt-oss:latest` | LLM | 13 GB | available |
| ~~qwen3-vl:8b~~ | Vision | ~8 GB | **NOT INSTALLED** |
### llama-server (port 11435, llama.cpp)
| Model | Type | Note |
|-------|------|------|
| `Qwen3.5-35B-A3B-Q4_K_M.gguf` | LLM | Same as Ollama qwen3.5:35b-a3b — DUPLICATE |
### Swapper (port 8890)
| Endpoint | Status |
|----------|--------|
| `/health` | `{"status":"healthy","active_model":"qwen3-14b"}` |
| `/models` | 200 (9 configured, 1 loaded) |
| `/vision/models` | ⚠️ 200 but **empty list** |
| `/stt/models` | 200 |
| `/tts/models` | 200 |
**Swapper models configured** (з swapper_config_node2.yaml):
- Loaded: `qwen3:14b`
- Unloaded: gpt-oss, phi3, qwen3.5:35b-a3b, glm-4.7-flash, deepseek-coder:33b, deepseek-r1:70b
- ⚠️ NOT installed: `gemma2:27b`, `qwen2.5-coder:32b`
### Capabilities Summary
```yaml
vision_models: [llava:13b] # legacy, available; qwen3-vl:8b recommended
text_models: [qwen3.5:35b-a3b, qwen3:14b, glm-4.7-flash, gemma3, mistral-nemo, deepseek-r1:70b]
code_models: [deepseek-coder:33b, starcoder2:3b]
embedding_models: [unknown - check memory-service]
stt_models: [whisper via swapper - details TBD]
tts_models: [xtts/coqui via swapper - details TBD]
```
---
## Part D — Findings
### P0 — Негайно
| ID | Issue |
|----|-------|
| FAIL-01 | **Vision pipeline broken**: `/vision/models=[]`, qwen3-vl:8b not installed, llava:13b not in swapper config |
### P1 — Цього тижня
| ID | Issue |
|----|-------|
| FAIL-02 | **node-ops-worker не реалізований** — Sofiia керує NODA1 через SSH root password |
| FAIL-03 | **router-config.yml**: `172.17.0.1:11434` (Linux bridge) — потрібно `host.docker.internal:11434` |
| SEC-01 | **SSH password в Docker env** sofiia-console |
| SEC-03 | **NODA2 порти на 0.0.0.0** без firewall |
### P2 — Наступний спринт
| ID | Issue |
|----|-------|
| PART-03 | `llama-server:11435` дублює Ollama — waste of memory |
| PART-04 | Qdrant memory collections empty — memory/RAG не використовується |
| PART-02 | Swapper config: gemma2:27b, qwen2.5-coder:32b — не встановлені |
| - | Cross-node vision routing NODA1→NODA2 через NATS не реалізований |
---
## Рекомендований Action Plan
### Крок 1 (P0): Виправити vision
```bash
# На NODA2:
ollama pull qwen3-vl:8b
# Додати до swapper_config_node2.yaml секцію vision:
# vision_models:
# - name: qwen3-vl:8b
# type: vision
# priority: high
```
### Крок 2 (P1): Виправити router-config.yml
```bash
sed -i '' 's|http://172.17.0.1:11434|http://host.docker.internal:11434|g' router-config.yml
docker restart dagi-router-node2
```
### Крок 3 (P1): Реалізувати node-ops-worker
```
services/node_ops_worker/
main.py — NATS subscriber
allowlist.py — whitelist команд
metrics.py — ops_requests_total, ops_errors_total
Subjects:
node.noda1.ops.request / node.noda2.ops.request
```
### Крок 4 (P1): Видалити SSH password з env
```bash
# Видалити NODES_NODA1_SSH_PASSWORD з docker-compose.node2.yml
# Додати SSH key-based auth або перейти на NATS node-ops
```
---
## Канонічні Endpoints NODA2 (для NODA1 routing)
| Service | Internal | Via NATS |
|---------|----------|----------|
| Ollama LLM | `http://host.docker.internal:11434` | `node.noda2.llm.request` (TBD) |
| Ollama Vision | `http://host.docker.internal:11434` | `node.noda2.vision.request` (TBD) |
| Swapper | `http://host.docker.internal:8890` | `node.noda2.swapper.request` (TBD) |
| Router | `http://host.docker.internal:9102` | via NATS messaging |

View File

@@ -0,0 +1,151 @@
# NODA2 Audit Findings
# Date: 2026-02-27
# Auditor: Sofiia (Cursor session)
# ── PASS (що працює) ──────────────────────────────────────────────────────
pass:
- id: PASS-01
title: "NATS leafnode NODA2→NODA1 connected"
detail: "spoke=true, rtt=58ms, cross-node pub/sub tested and working"
- id: PASS-02
title: "Docker stack healthy"
detail: "All 12 containers running. 10 have healthcheck PASS. NATS no healthcheck docker-level (compensated by leafnode check)."
- id: PASS-03
title: "Router NODA2 operational"
detail: "nats_connected=true, NODE_ID=NODA2, 14 agents registered, mounts local router-config.yml"
- id: PASS-04
title: "Ollama 11 models available"
detail: "qwen3.5:35b-a3b (primary), qwen3:14b, llava:13b (vision), deepseek-r1:70b, deepseek-coder:33b, glm-4.7-flash, gemma3, mistral-nemo, starcoder2, phi3, gpt-oss"
- id: PASS-05
title: "Swapper healthy, active_model=qwen3-14b"
detail: "/health=healthy, /models=200, /vision/models=200, /stt/models=200, /tts/models=200"
- id: PASS-06
title: "Sofiia agent registered in gateway (14 agents)"
detail: "sofiia present in agent_registry.yml, 1579-line prompt, class=top_level, telegram enabled"
- id: PASS-07
title: "Memory + Qdrant operational"
detail: "6 collections present (sofiia_messages:2 points, others empty). sofiia_docs ready for indexing."
- id: PASS-08
title: "Sofiia Console UI running"
detail: "http://localhost:8002, Python process, connects to NODA1 router (http://144.76.224.179:9102) and NODA2 router"
- id: PASS-09
title: "Gitea (self-hosted Git) running"
detail: "http://localhost:3000, version 1.25.3. Internal code repo."
- id: PASS-10
title: "Spacebot + OpenCode running"
detail: "Spacebot uses sofiia-console BFF. OpenCode.app active. Both integrated with NODA2 stack."
# ── PARTIAL (частково) ────────────────────────────────────────────────────
partial:
- id: PART-01
title: "Vision capability: llava:13b available but outdated"
detail: "llava:13b in Ollama (vision_capable=true via CLIP). But swapper /vision/models returns empty - llava not added to swapper_config_node2.yaml. qwen3-vl:8b not installed."
action: "Install qwen3-vl:8b OR add llava:13b to swapper vision config"
- id: PART-02
title: "Router LLM profiles use 172.17.0.1 (Docker bridge IP, Linux-style)"
detail: "router-config.yml has base_url: http://172.17.0.1:11434 for all profiles. On macOS Docker, this is host.docker.internal. Works because Ollama binds 127.0.0.1 and Docker on Mac uses host.docker.internal correctly — BUT 172.17.0.1 may not work in all network configurations."
action: "Replace 172.17.0.1:11434 with host.docker.internal:11434 in router-config.yml for NODA2"
- id: PART-03
title: "llama-server:11435 duplicates Ollama (same Qwen3.5-35B model)"
detail: "Wastes memory. Two instances of same model. No routing config points to 11435."
action: "Either remove llama-server or add it as dedicated profile in router-config.yml with specific purpose"
- id: PART-04
title: "Qdrant memory collections mostly empty"
detail: "sofiia_messages:2, all others 0. Memory not being used/indexed actively."
action: "Enable memory indexing in memory-service config or start ingesting docs"
- id: PART-05
title: "Sofiia Console not fully integrated"
detail: "UI serves HTML on :8002. Has NODES_NODA1_ROUTER_URL configured. But no node-ops-worker for NATS-based control. SSH password in env."
action: "Implement node-ops-worker (see FAIL-02)"
# ── FAIL (зламано) ────────────────────────────────────────────────────────
fail:
- id: FAIL-01
title: "Vision pipeline broken end-to-end"
detail: "Swapper /vision/models=[] (empty). No vision model configured in swapper_config_node2.yaml. Router has no vision profile pointing to NODA2. Photo requests would fail silently or go to NODA1."
priority: P0
action: |
1. Add llava:13b to swapper_config_node2.yaml vision section
2. OR install: ollama pull qwen3-vl:8b and add to swapper config
3. Add NODA2 vision profile to router-config.yml
- id: FAIL-02
title: "Node-ops-worker not implemented"
detail: "Sofiia has no NATS-based node control. Only mechanism is SSH root (password in env = SECURITY RISK). No subjects: node.noda1.ops.request, node.noda2.ops.request"
priority: P1
action: |
1. Implement services/node_ops_worker/ (Python NATS subscriber)
2. Allowlist: docker ps/logs/restart, health curl, tail logs
3. Deploy to both NODA1 and NODA2
4. Remove NODES_NODA1_SSH_PASSWORD from env
- id: FAIL-03
title: "NODA2 router-config.yml profiles use 172.17.0.1 (Linux Docker bridge)"
detail: "On macOS, Docker containers use host.docker.internal for host access. 172.17.0.1 may or may not resolve depending on Docker version."
priority: P1
action: "Replace 172.17.0.1:11434 → host.docker.internal:11434 in router-config.yml"
# ── SECURITY risks ────────────────────────────────────────────────────────
security:
- id: SEC-01
title: "SSH root password in sofiia-console container env"
detail: "NODES_NODA1_SSH_PASSWORD=[secret present] in sofiia-console Docker env. Any process inside the container can read it."
severity: HIGH
priority: P1
action: |
1. Remove NODES_NODA1_SSH_PASSWORD from docker env
2. Use SSH key-based auth instead (mount private key as secret)
3. Better: implement NATS node-ops-worker (FAIL-02) to eliminate SSH dependency
- id: SEC-02
title: "DATABASE_URL with plaintext password in router env"
detail: "DATABASE_URL=postgresql://daarion:XXXXX@... in router container. Acceptable for local dev, risk if container is compromised."
severity: MEDIUM
priority: P2
action: "Use Docker secrets or env file with restricted permissions"
- id: SEC-03
title: "NODA2 external IP (145.224.111.147) — ports exposed"
detail: "All Docker ports bound to 0.0.0.0. If NODA2 has external IP, ports 9300 (gateway), 9102 (router), 8890 (swapper), 8000 (memory) are publicly accessible."
severity: HIGH
priority: P1
action: "Add firewall rules (macOS pfctl or router-level) to restrict inbound to trusted IPs only"
# ── Summary ───────────────────────────────────────────────────────────────
summary:
current_state:
- "NODA2 = MacBook Pro M4 Max, 64GB RAM, macOS 26.3"
- "12 Docker containers running, 10 healthy"
- "NATS leafnode connected to NODA1 (rtt=58ms, cross-node pub/sub PASS)"
- "Ollama: 11 models, primary=qwen3.5:35b-a3b + qwen3:14b. Vision=llava:13b (legacy)"
- "llama-server:11435 running Qwen3.5-35B-A3B via llama.cpp (duplicates Ollama)"
- "Swapper active_model=qwen3-14b, vision/models=EMPTY (gap)"
- "Sofiia: agent_registry entry, 1579-line prompt, gateway registered"
- "Sofiia Console UI on :8002, connects to NODA1+NODA2 routers"
- "Spacebot (Telegram) → sofiia-console BFF → NODA2 stack"
- "Memory: 6 Qdrant collections (mostly empty), Neo4j running"
gaps:
- "P0: Vision pipeline broken — swapper /vision/models=empty, qwen3-vl:8b not installed"
- "P1: node-ops-worker not implemented — Sofiia controls NODA1 via SSH root password (SECURITY)"
- "P1: router-config.yml on NODA2 uses 172.17.0.1 (should be host.docker.internal)"
- "P1: NODA2 ports exposed on 0.0.0.0 without firewall — security risk"
- "P1: SSH root password in sofiia-console env"
- "P2: llama-server:11435 duplicates Ollama — memory waste"
- "P2: Qdrant memory collections empty — memory/RAG not being used"
- "P2: No cross-node vision routing (NODA1 can't send vision tasks to NODA2 via NATS yet)"
- "P2: Swapper config missing llava:13b in vision section"
- "P2: swapper_config_node2.yaml references gemma2:27b and qwen2.5-coder:32b — NOT installed"

145
ops/node2_capabilities.yml Normal file
View File

@@ -0,0 +1,145 @@
# NODA2 Capabilities — Machine-readable for NODA1 Router
# Generated: 2026-02-27
# Node: MacBook Pro M4 Max, 64GB RAM, macOS 26.3
node_id: noda2
hostname: MacBook-Pro.local
nats_endpoint: "nats://145.224.111.147:4222" # leafnode spoke, via NODA1 hub
router_url: "http://145.224.111.147:9102" # NODA2 router (external, if firewall allows)
swapper_url: "http://145.224.111.147:8890" # swapper (external, if firewall allows)
ollama_url: "http://145.224.111.147:11434" # Ollama GPU (external, if firewall allows)
# ── Vision Models ──────────────────────────────────────────────────────────
vision_models:
- name: llava:13b
backend: ollama
base_url: http://localhost:11434
type: vision
size_gb: 8.0
quality: medium
note: "LLaVA 1.6 + CLIP, 13B. Available now. Legacy quality."
capability_hint: "image_understanding"
# RECOMMENDED - not yet installed, install with: ollama pull qwen3-vl:8b
- name: qwen3-vl:8b
backend: ollama
base_url: http://localhost:11434
type: vision
size_gb: ~8.0
quality: high
installed: false
note: "Install: ollama pull qwen3-vl:8b. Superior OCR, document, chart understanding."
# ── Text/LLM Models ────────────────────────────────────────────────────────
text_models:
- name: qwen3.5:35b-a3b
backend: ollama
base_url: http://localhost:11434
type: llm
size_gb: 9.3
params: "14.8B active (MoE 35B total)"
quality: high
speed: fast
note: "Primary recommendation: MoE architecture, fast inference on M4 Max"
- name: qwen3:14b
backend: ollama
base_url: http://localhost:11434
type: llm
size_gb: 9.3
params: "14B"
quality: high
speed: medium
- name: glm-4.7-flash:32k
backend: ollama
base_url: http://localhost:11434
type: llm
size_gb: 19.0
params: "32B"
quality: high
speed: slow
note: "Long context (32k). Use for document-heavy tasks."
- name: deepseek-r1:70b
backend: ollama
base_url: http://localhost:11434
type: llm
size_gb: 42.0
params: "70B"
quality: very_high
speed: very_slow
note: "Reasoning model. Use for complex multi-step planning only."
- name: Qwen3.5-35B-A3B-Q4_K_M
backend: llama-server
base_url: http://localhost:11435
api_type: openai_compatible
type: llm
note: "Same model as qwen3.5:35b-a3b via llama.cpp. Redundant with Ollama."
# ── Code Models ────────────────────────────────────────────────────────────
code_models:
- name: deepseek-coder:33b
backend: ollama
base_url: http://localhost:11434
type: code
size_gb: 18.0
- name: starcoder2:3b
backend: ollama
base_url: http://localhost:11434
type: code
size_gb: 1.7
speed: very_fast
# ── Embedding Models ───────────────────────────────────────────────────────
embedding_models:
# Not explicitly found in Ollama list
# memory-service uses its own embedding (check dagi-memory-service-node2)
- name: unknown
note: "Audit needed: check memory-service embedding model config"
# ── STT Models ─────────────────────────────────────────────────────────────
stt_models:
- name: whisper (swapper)
backend: swapper
base_url: http://localhost:8890
endpoint: /stt
note: "Swapper exposes /stt/models (HTTP 200). Model details need swapper config check."
# ── TTS Models ─────────────────────────────────────────────────────────────
tts_models:
- name: tts (swapper)
backend: swapper
base_url: http://localhost:8890
endpoint: /tts
note: "Swapper exposes /tts/models (HTTP 200). Model details need swapper config check."
# ── Routing Policy ─────────────────────────────────────────────────────────
routing_policy:
vision_request:
capability: "vision"
selection_logic: |
if quality_tier == "best":
prefer: qwen3-vl:8b (when installed)
fallback: llava:13b
if quality_tier == "fast":
prefer: llava:13b
return_model_used: true
nats_subject: "node.noda2.vision.request"
text_request:
capability: "text"
selection_logic: |
if quality_tier == "best": deepseek-r1:70b
if quality_tier == "high": qwen3.5:35b-a3b (default)
if quality_tier == "fast": qwen3:14b or gemma3:latest
nats_subject: "node.noda2.llm.request"
code_request:
capability: "code"
selection_logic: |
prefer: deepseek-coder:33b
fast fallback: starcoder2:3b
nats_subject: "node.noda2.code.request"