diff --git a/ops/audit_node2_20260227.json b/ops/audit_node2_20260227.json new file mode 100644 index 00000000..0a615235 --- /dev/null +++ b/ops/audit_node2_20260227.json @@ -0,0 +1,129 @@ +{ + "node_id": "noda2", + "hostname": "MacBook-Pro.local", + "timestamp": "2026-02-27T08:00:00Z", + "hardware": { + "cpu": "Apple M4 Max", + "ram_gb": 64, + "storage_free_gb": 634, + "os": "macOS 26.3 (Darwin arm64)" + }, + "backends": [ + { + "name": "ollama-main", + "base_url": "http://localhost:11434", + "pid_owner": "ollama (system daemon)", + "version": "0.17.1", + "gpu_mode": "Apple Silicon (MPS/Metal, unified memory)", + "models": [ + {"name": "qwen3.5:35b-a3b", "type": "llm", "size_gb": 9.3, "family": "qwen3", "params": "14.8B (MoE)", "modified": "19h ago"}, + {"name": "qwen3:14b", "type": "llm", "size_gb": 9.3, "family": "qwen3", "params": "14B", "modified": "20h ago"}, + {"name": "gemma3:latest", "type": "llm", "size_gb": 3.3, "family": "gemma3","params": "~4B", "modified": "25h ago"}, + {"name": "glm-4.7-flash:32k", "type": "llm", "size_gb": 19.0, "family": "glm", "params": "32B", "modified": "2w ago"}, + {"name": "glm-4.7-flash:q4_K_M", "type": "llm", "size_gb": 19.0, "family": "glm", "params": "32B", "modified": "2w ago"}, + {"name": "llava:13b", "type": "vision", "size_gb": 8.0, "family": "llava+clip", "params": "13B", "modified": "3mo ago", "vision_capable": true}, + {"name": "mistral-nemo:12b","type": "llm", "size_gb": 7.1, "family": "mistral","params": "12B", "modified": "3mo ago"}, + {"name": "deepseek-coder:33b","type":"code","size_gb": 18.0,"family":"deepseek","params":"33B", "modified": "3mo ago"}, + {"name": "deepseek-r1:70b", "type": "llm", "size_gb": 42.0,"family":"deepseek","params":"70B", "modified": "3mo ago"}, + {"name": "starcoder2:3b", "type": "code","size_gb": 1.7, "family":"starcoder","params":"3B", "modified": "3mo ago"}, + {"name": "phi3:latest", "type": "llm", "size_gb": 2.2, "family":"phi3", "params": "~4B", "modified": "3mo ago"}, + {"name": "gpt-oss:latest", "type": "llm", "size_gb": 13.0,"family":"unknown","params":"~13B", "modified": "3mo ago"} + ], + "running": [], + "notes": "llava:13b is only vision-capable model in Ollama (CLIP multimodal). qwen3-vl NOT installed." + }, + { + "name": "llama-server-cpu", + "base_url": "http://localhost:11435", + "pid_owner": "llama-server process (user: apple)", + "binary": "llama-server", + "model_path": "/Users/apple/Library/Application Support/llama.cpp/models/Qwen3.5-35B-A3B-Q4_K_M.gguf", + "model_name": "Qwen3.5-35B-A3B (Q4_K_M, llama.cpp)", + "api_type": "OpenAI-compatible (/v1/models, /v1/chat/completions)", + "health_url": "http://localhost:11435/health", + "health_status": "ok", + "gpu_mode": "Apple Silicon (Metal via llama.cpp)", + "notes": "Separate llama.cpp server instance running same Qwen3.5-35B-A3B model. Duplicates Ollama coverage." + }, + { + "name": "swapper-service", + "base_url": "http://localhost:8890", + "container": "swapper-service-node2", + "health_url": "http://localhost:8890/health", + "health_status": "healthy", + "active_model": "qwen3-14b", + "mode": "single-active", + "ollama_base_url": "http://host.docker.internal:11434", + "endpoints": { + "/models": "200", + "/vision/models": "200 (empty list - no vision models configured!)", + "/stt/models": "200", + "/tts/models": "200", + "/ocr": "405 (method)" + }, + "swapper_models_configured": [ + "gpt-oss:latest", "phi3:latest", "qwen3:14b (loaded)", + "qwen3.5:35b-a3b", "glm-4.7-flash:32k", "gemma2:27b (not installed)", + "deepseek-coder:33b", "qwen2.5-coder:32b (not installed)", "deepseek-r1:70b" + ], + "gap": "vision/models returns empty - llava:13b not in swapper_config_node2.yaml" + } + ], + "containers": [ + {"name": "dagi-router-node2", "port": "9102->8000", "status": "healthy", "nats_connected": true, "node_id": "NODA2"}, + {"name": "dagi-gateway-node2", "port": "9300", "status": "healthy", "agents": 14}, + {"name": "dagi-nats-node2", "port": "4222,8222", "status": "running", "leafnode": "spoke->144.76.224.179:7422", "rtt_ms": 58}, + {"name": "dagi-memory-service-node2","port": "8000", "status": "healthy", "collections": 6}, + {"name": "dagi-qdrant-node2", "port": "6333-6334", "status": "healthy"}, + {"name": "swapper-service-node2", "port": "8890", "status": "healthy"}, + {"name": "dagi-postgres-node2", "port": "5433->5432", "status": "healthy"}, + {"name": "dagi-neo4j-node2", "port": "7474,7687", "status": "healthy"}, + {"name": "sofiia-console", "port": "8002", "status": "running (Python process, not Docker healthcheck)"}, + {"name": "open-webui", "port": "8080", "status": "healthy", "version": "0.7.2"} + ], + "non_docker_services": [ + {"name": "ollama", "port": 11434, "type": "system daemon", "binary": "ollama"}, + {"name": "llama-server", "port": 11435, "type": "user process", "model": "Qwen3.5-35B-A3B-Q4_K_M.gguf"}, + {"name": "gitea", "port": 3000, "type": "git server", "version": "1.25.3"}, + {"name": "spacebot", "port": 19898, "type": "Telegram bot", "config": "uses sofiia-console BFF"}, + {"name": "opencode", "port": 3456, "type": "AI coding tool", "note": "OpenCode.app"}, + {"name": "stable (Warp)", "port": 9277, "type": "terminal helper","note": "Warp.app stable process, not SD"} + ], + "sofiia_agent": { + "agent_id": "sofiia", + "display_name": "Sophia", + "class": "top_level", + "canonical_role": "Chief AI Architect & Technical Sovereign", + "telegram": "@SofiiaDAARION_bot", + "prompt_file": "gateway-bot/sofiia_prompt.txt", + "prompt_lines": 1579, + "llm_profile": "NODA2 router -> Ollama 11434", + "gateway": "dagi-gateway-node2:9300", + "control_plane": { + "console_ui": "http://localhost:8002 (sofiia-console)", + "spacebot": "spacebot process -> sofiia-console BFF (http://localhost:8002/api)", + "nats_subjects": "not yet configured for node-ops", + "ssh_access": "NODES_NODA1_SSH_PASSWORD in env (present, SECURITY RISK)", + "node_ops_worker": "NOT IMPLEMENTED" + } + }, + "nats_leafnode": { + "noda2_role": "spoke", + "noda1_hub": "144.76.224.179:7422", + "rtt_ms": 58, + "connection_status": "connected", + "cross_node_subjects_tested": ["node.test.hello"], + "cross_node_pubsub": "PASS" + }, + "qdrant_collections": { + "sofiia_messages": {"points": 2}, + "sofiia_docs": {"points": 0}, + "sofiia_memory_items":{"points": 0}, + "sofiia_user_context":{"points": 0}, + "memories": {"points": 0}, + "messages": {"points": 0} + }, + "recommended_default_vision_model": "llava:13b (Ollama, port 11434) — only available, but outdated. Install qwen3-vl:8b for better quality.", + "recommended_default_text_model": "qwen3.5:35b-a3b (Ollama, port 11434) — fastest large model via MoE architecture", + "recommended_default_code_model": "deepseek-coder:33b (Ollama) or qwen3.5:35b-a3b" +} diff --git a/ops/audit_node2_20260227.md b/ops/audit_node2_20260227.md new file mode 100644 index 00000000..954f9a1e --- /dev/null +++ b/ops/audit_node2_20260227.md @@ -0,0 +1,217 @@ +# NODA2 Audit Report +**Дата:** 2026-02-27 +**Нода:** MacBook Pro M4 Max (Apple Silicon) +**Аудитор:** Sofiia (Cursor session) + +--- + +## Executive Summary + +NODA2 — це MacBook Pro M4 Max з 64GB RAM, на якому розгорнутий повний dev-стек DAARION.city. NATS leafnode успішно підключений до NODA1 (rtt=58ms). Основний стек (router, gateway, memory, swapper, qdrant) здоровий. Критичний gap: vision pipeline зламаний (`/vision/models` порожній, qwen3-vl:8b не встановлена). Sofiia керує NODA1 через SSH root password — SECURITY risk. node-ops-worker не реалізований. + +--- + +## Part A — Runtime Inventory + +### Hardware +| Параметр | Значення | +|----------|---------| +| CPU | Apple M4 Max | +| RAM | 64 GB (unified) | +| Storage free | 634 GB / 1.8 TB | +| OS | macOS 26.3 (Darwin arm64) | + +### Docker Containers (12) +| Container | Port | Status | +|-----------|------|--------| +| `dagi-router-node2` | 9102→8000 | ✅ healthy | +| `dagi-gateway-node2` | 9300 | ✅ healthy (14 agents) | +| `dagi-nats-node2` | 4222, 8222 | ✅ running (leafnode→NODA1) | +| `dagi-memory-service-node2` | 8000 | ✅ healthy | +| `dagi-qdrant-node2` | 6333-6334 | ✅ healthy | +| `swapper-service-node2` | 8890 | ✅ healthy | +| `dagi-postgres-node2` | 5433→5432 | ✅ healthy | +| `dagi-neo4j-node2` | 7474, 7687 | ✅ healthy | +| `sofiia-console` | 8002 | ⚠️ running (no healthcheck) | +| `open-webui` | 8080 | ✅ healthy (v0.7.2) | +| `dagi-postgres` | 5432 | ✅ healthy | +| `dagi-redis` | 6379 | ✅ healthy | + +### Non-Docker Services +| Process | Port | Description | +|---------|------|-------------| +| `ollama` | 11434 | System daemon, 11 models | +| `llama-server` | 11435 | llama.cpp server, Qwen3.5-35B-A3B | +| `gitea` | 3000 | Self-hosted Git (v1.25.3) | +| `spacebot` | 19898 | Telegram bot → sofiia-console BFF | +| `opencode` | 3456 | OpenCode.app AI coding tool | + +### NATS Leafnode Status +``` +NODA2 (spoke) ──58ms──> NODA1 144.76.224.179:7422 (hub) +Cross-node pub/sub: PASS (node.test.hello confirmed) +``` + +### Qdrant Collections +| Collection | Points | +|-----------|--------| +| sofiia_messages | 2 | +| sofiia_docs | 0 | +| sofiia_memory_items | 0 | +| sofiia_user_context | 0 | +| memories | 0 | +| messages | 0 | + +--- + +## Part B — Sofiia Agent Inventory + +### Registry Entry +```yaml +agent_id: sofiia +display_name: Sophia +class: top_level +canonical_role: Chief AI Architect & Technical Sovereign +visibility: private +telegram: enabled (whitelist) +prompt_file: gateway-bot/sofiia_prompt.txt (1579 lines) +``` + +### Runtime +- **Gateway**: `dagi-gateway-node2:9300` — зареєстрована з 13 іншими агентами +- **Router**: `dagi-router-node2:9102` — `NODE_ID=NODA2`, `nats_connected=true` +- **Console UI**: `http://localhost:8002` — Python process, HTML UI + - `NODES_NODA1_ROUTER_URL=http://144.76.224.179:9102` + - `NODES_NODA2_ROUTER_URL=http://router:8000` + - `ROUTER_URL=http://router:8000` + - ⚠️ `NODES_NODA1_SSH_PASSWORD=[secret present]` — SECURITY RISK +- **Spacebot**: Telegram → `sofiia-console BFF (http://localhost:8002/api)` + +### Node Control (current state) +- **Механізм**: SSH root з паролем (в env sofiia-console) +- **Що є**: `NODES_NODA1_SSH_PASSWORD` в Docker env +- **Чого немає**: NATS node-ops-worker, allowlist команд, audit log + +--- + +## Part C — Models Audit + +### Ollama (port 11434) +| Model | Type | Size | Status | +|-------|------|------|--------| +| `qwen3.5:35b-a3b` | LLM (MoE) | 9.3 GB | available (14.8B active) | +| `qwen3:14b` | LLM | 9.3 GB | available | +| `gemma3:latest` | LLM | 3.3 GB | available | +| `glm-4.7-flash:32k` | LLM | 19 GB | available (32k context) | +| `glm-4.7-flash:q4_K_M` | LLM | 19 GB | available | +| **`llava:13b`** | **Vision** | **8.0 GB** | **available (LLaVA+CLIP)** | +| `mistral-nemo:12b` | LLM | 7.1 GB | available | +| `deepseek-coder:33b` | Code | 18 GB | available | +| `deepseek-r1:70b` | LLM (Reasoning) | 42 GB | available | +| `starcoder2:3b` | Code | 1.7 GB | available | +| `phi3:latest` | LLM | 2.2 GB | available | +| `gpt-oss:latest` | LLM | 13 GB | available | +| ~~qwen3-vl:8b~~ | Vision | ~8 GB | **NOT INSTALLED** | + +### llama-server (port 11435, llama.cpp) +| Model | Type | Note | +|-------|------|------| +| `Qwen3.5-35B-A3B-Q4_K_M.gguf` | LLM | Same as Ollama qwen3.5:35b-a3b — DUPLICATE | + +### Swapper (port 8890) +| Endpoint | Status | +|----------|--------| +| `/health` | `{"status":"healthy","active_model":"qwen3-14b"}` | +| `/models` | 200 (9 configured, 1 loaded) | +| `/vision/models` | ⚠️ 200 but **empty list** | +| `/stt/models` | 200 | +| `/tts/models` | 200 | + +**Swapper models configured** (з swapper_config_node2.yaml): +- Loaded: `qwen3:14b` +- Unloaded: gpt-oss, phi3, qwen3.5:35b-a3b, glm-4.7-flash, deepseek-coder:33b, deepseek-r1:70b +- ⚠️ NOT installed: `gemma2:27b`, `qwen2.5-coder:32b` + +### Capabilities Summary +```yaml +vision_models: [llava:13b] # legacy, available; qwen3-vl:8b recommended +text_models: [qwen3.5:35b-a3b, qwen3:14b, glm-4.7-flash, gemma3, mistral-nemo, deepseek-r1:70b] +code_models: [deepseek-coder:33b, starcoder2:3b] +embedding_models: [unknown - check memory-service] +stt_models: [whisper via swapper - details TBD] +tts_models: [xtts/coqui via swapper - details TBD] +``` + +--- + +## Part D — Findings + +### P0 — Негайно +| ID | Issue | +|----|-------| +| FAIL-01 | **Vision pipeline broken**: `/vision/models=[]`, qwen3-vl:8b not installed, llava:13b not in swapper config | + +### P1 — Цього тижня +| ID | Issue | +|----|-------| +| FAIL-02 | **node-ops-worker не реалізований** — Sofiia керує NODA1 через SSH root password | +| FAIL-03 | **router-config.yml**: `172.17.0.1:11434` (Linux bridge) — потрібно `host.docker.internal:11434` | +| SEC-01 | **SSH password в Docker env** sofiia-console | +| SEC-03 | **NODA2 порти на 0.0.0.0** без firewall | + +### P2 — Наступний спринт +| ID | Issue | +|----|-------| +| PART-03 | `llama-server:11435` дублює Ollama — waste of memory | +| PART-04 | Qdrant memory collections empty — memory/RAG не використовується | +| PART-02 | Swapper config: gemma2:27b, qwen2.5-coder:32b — не встановлені | +| - | Cross-node vision routing NODA1→NODA2 через NATS не реалізований | + +--- + +## Рекомендований Action Plan + +### Крок 1 (P0): Виправити vision +```bash +# На NODA2: +ollama pull qwen3-vl:8b + +# Додати до swapper_config_node2.yaml секцію vision: +# vision_models: +# - name: qwen3-vl:8b +# type: vision +# priority: high +``` + +### Крок 2 (P1): Виправити router-config.yml +```bash +sed -i '' 's|http://172.17.0.1:11434|http://host.docker.internal:11434|g' router-config.yml +docker restart dagi-router-node2 +``` + +### Крок 3 (P1): Реалізувати node-ops-worker +``` +services/node_ops_worker/ + main.py — NATS subscriber + allowlist.py — whitelist команд + metrics.py — ops_requests_total, ops_errors_total +Subjects: + node.noda1.ops.request / node.noda2.ops.request +``` + +### Крок 4 (P1): Видалити SSH password з env +```bash +# Видалити NODES_NODA1_SSH_PASSWORD з docker-compose.node2.yml +# Додати SSH key-based auth або перейти на NATS node-ops +``` + +--- + +## Канонічні Endpoints NODA2 (для NODA1 routing) + +| Service | Internal | Via NATS | +|---------|----------|----------| +| Ollama LLM | `http://host.docker.internal:11434` | `node.noda2.llm.request` (TBD) | +| Ollama Vision | `http://host.docker.internal:11434` | `node.noda2.vision.request` (TBD) | +| Swapper | `http://host.docker.internal:8890` | `node.noda2.swapper.request` (TBD) | +| Router | `http://host.docker.internal:9102` | via NATS messaging | diff --git a/ops/audit_node2_findings.yml b/ops/audit_node2_findings.yml new file mode 100644 index 00000000..f4ade1b0 --- /dev/null +++ b/ops/audit_node2_findings.yml @@ -0,0 +1,151 @@ +# NODA2 Audit Findings +# Date: 2026-02-27 +# Auditor: Sofiia (Cursor session) + +# ── PASS (що працює) ────────────────────────────────────────────────────── +pass: + - id: PASS-01 + title: "NATS leafnode NODA2→NODA1 connected" + detail: "spoke=true, rtt=58ms, cross-node pub/sub tested and working" + + - id: PASS-02 + title: "Docker stack healthy" + detail: "All 12 containers running. 10 have healthcheck PASS. NATS no healthcheck docker-level (compensated by leafnode check)." + + - id: PASS-03 + title: "Router NODA2 operational" + detail: "nats_connected=true, NODE_ID=NODA2, 14 agents registered, mounts local router-config.yml" + + - id: PASS-04 + title: "Ollama 11 models available" + detail: "qwen3.5:35b-a3b (primary), qwen3:14b, llava:13b (vision), deepseek-r1:70b, deepseek-coder:33b, glm-4.7-flash, gemma3, mistral-nemo, starcoder2, phi3, gpt-oss" + + - id: PASS-05 + title: "Swapper healthy, active_model=qwen3-14b" + detail: "/health=healthy, /models=200, /vision/models=200, /stt/models=200, /tts/models=200" + + - id: PASS-06 + title: "Sofiia agent registered in gateway (14 agents)" + detail: "sofiia present in agent_registry.yml, 1579-line prompt, class=top_level, telegram enabled" + + - id: PASS-07 + title: "Memory + Qdrant operational" + detail: "6 collections present (sofiia_messages:2 points, others empty). sofiia_docs ready for indexing." + + - id: PASS-08 + title: "Sofiia Console UI running" + detail: "http://localhost:8002, Python process, connects to NODA1 router (http://144.76.224.179:9102) and NODA2 router" + + - id: PASS-09 + title: "Gitea (self-hosted Git) running" + detail: "http://localhost:3000, version 1.25.3. Internal code repo." + + - id: PASS-10 + title: "Spacebot + OpenCode running" + detail: "Spacebot uses sofiia-console BFF. OpenCode.app active. Both integrated with NODA2 stack." + +# ── PARTIAL (частково) ──────────────────────────────────────────────────── +partial: + - id: PART-01 + title: "Vision capability: llava:13b available but outdated" + detail: "llava:13b in Ollama (vision_capable=true via CLIP). But swapper /vision/models returns empty - llava not added to swapper_config_node2.yaml. qwen3-vl:8b not installed." + action: "Install qwen3-vl:8b OR add llava:13b to swapper vision config" + + - id: PART-02 + title: "Router LLM profiles use 172.17.0.1 (Docker bridge IP, Linux-style)" + detail: "router-config.yml has base_url: http://172.17.0.1:11434 for all profiles. On macOS Docker, this is host.docker.internal. Works because Ollama binds 127.0.0.1 and Docker on Mac uses host.docker.internal correctly — BUT 172.17.0.1 may not work in all network configurations." + action: "Replace 172.17.0.1:11434 with host.docker.internal:11434 in router-config.yml for NODA2" + + - id: PART-03 + title: "llama-server:11435 duplicates Ollama (same Qwen3.5-35B model)" + detail: "Wastes memory. Two instances of same model. No routing config points to 11435." + action: "Either remove llama-server or add it as dedicated profile in router-config.yml with specific purpose" + + - id: PART-04 + title: "Qdrant memory collections mostly empty" + detail: "sofiia_messages:2, all others 0. Memory not being used/indexed actively." + action: "Enable memory indexing in memory-service config or start ingesting docs" + + - id: PART-05 + title: "Sofiia Console not fully integrated" + detail: "UI serves HTML on :8002. Has NODES_NODA1_ROUTER_URL configured. But no node-ops-worker for NATS-based control. SSH password in env." + action: "Implement node-ops-worker (see FAIL-02)" + +# ── FAIL (зламано) ──────────────────────────────────────────────────────── +fail: + - id: FAIL-01 + title: "Vision pipeline broken end-to-end" + detail: "Swapper /vision/models=[] (empty). No vision model configured in swapper_config_node2.yaml. Router has no vision profile pointing to NODA2. Photo requests would fail silently or go to NODA1." + priority: P0 + action: | + 1. Add llava:13b to swapper_config_node2.yaml vision section + 2. OR install: ollama pull qwen3-vl:8b and add to swapper config + 3. Add NODA2 vision profile to router-config.yml + + - id: FAIL-02 + title: "Node-ops-worker not implemented" + detail: "Sofiia has no NATS-based node control. Only mechanism is SSH root (password in env = SECURITY RISK). No subjects: node.noda1.ops.request, node.noda2.ops.request" + priority: P1 + action: | + 1. Implement services/node_ops_worker/ (Python NATS subscriber) + 2. Allowlist: docker ps/logs/restart, health curl, tail logs + 3. Deploy to both NODA1 and NODA2 + 4. Remove NODES_NODA1_SSH_PASSWORD from env + + - id: FAIL-03 + title: "NODA2 router-config.yml profiles use 172.17.0.1 (Linux Docker bridge)" + detail: "On macOS, Docker containers use host.docker.internal for host access. 172.17.0.1 may or may not resolve depending on Docker version." + priority: P1 + action: "Replace 172.17.0.1:11434 → host.docker.internal:11434 in router-config.yml" + +# ── SECURITY risks ──────────────────────────────────────────────────────── +security: + - id: SEC-01 + title: "SSH root password in sofiia-console container env" + detail: "NODES_NODA1_SSH_PASSWORD=[secret present] in sofiia-console Docker env. Any process inside the container can read it." + severity: HIGH + priority: P1 + action: | + 1. Remove NODES_NODA1_SSH_PASSWORD from docker env + 2. Use SSH key-based auth instead (mount private key as secret) + 3. Better: implement NATS node-ops-worker (FAIL-02) to eliminate SSH dependency + + - id: SEC-02 + title: "DATABASE_URL with plaintext password in router env" + detail: "DATABASE_URL=postgresql://daarion:XXXXX@... in router container. Acceptable for local dev, risk if container is compromised." + severity: MEDIUM + priority: P2 + action: "Use Docker secrets or env file with restricted permissions" + + - id: SEC-03 + title: "NODA2 external IP (145.224.111.147) — ports exposed" + detail: "All Docker ports bound to 0.0.0.0. If NODA2 has external IP, ports 9300 (gateway), 9102 (router), 8890 (swapper), 8000 (memory) are publicly accessible." + severity: HIGH + priority: P1 + action: "Add firewall rules (macOS pfctl or router-level) to restrict inbound to trusted IPs only" + +# ── Summary ─────────────────────────────────────────────────────────────── +summary: + current_state: + - "NODA2 = MacBook Pro M4 Max, 64GB RAM, macOS 26.3" + - "12 Docker containers running, 10 healthy" + - "NATS leafnode connected to NODA1 (rtt=58ms, cross-node pub/sub PASS)" + - "Ollama: 11 models, primary=qwen3.5:35b-a3b + qwen3:14b. Vision=llava:13b (legacy)" + - "llama-server:11435 running Qwen3.5-35B-A3B via llama.cpp (duplicates Ollama)" + - "Swapper active_model=qwen3-14b, vision/models=EMPTY (gap)" + - "Sofiia: agent_registry entry, 1579-line prompt, gateway registered" + - "Sofiia Console UI on :8002, connects to NODA1+NODA2 routers" + - "Spacebot (Telegram) → sofiia-console BFF → NODA2 stack" + - "Memory: 6 Qdrant collections (mostly empty), Neo4j running" + + gaps: + - "P0: Vision pipeline broken — swapper /vision/models=empty, qwen3-vl:8b not installed" + - "P1: node-ops-worker not implemented — Sofiia controls NODA1 via SSH root password (SECURITY)" + - "P1: router-config.yml on NODA2 uses 172.17.0.1 (should be host.docker.internal)" + - "P1: NODA2 ports exposed on 0.0.0.0 without firewall — security risk" + - "P1: SSH root password in sofiia-console env" + - "P2: llama-server:11435 duplicates Ollama — memory waste" + - "P2: Qdrant memory collections empty — memory/RAG not being used" + - "P2: No cross-node vision routing (NODA1 can't send vision tasks to NODA2 via NATS yet)" + - "P2: Swapper config missing llava:13b in vision section" + - "P2: swapper_config_node2.yaml references gemma2:27b and qwen2.5-coder:32b — NOT installed" diff --git a/ops/node2_capabilities.yml b/ops/node2_capabilities.yml new file mode 100644 index 00000000..b1d80133 --- /dev/null +++ b/ops/node2_capabilities.yml @@ -0,0 +1,145 @@ +# NODA2 Capabilities — Machine-readable for NODA1 Router +# Generated: 2026-02-27 +# Node: MacBook Pro M4 Max, 64GB RAM, macOS 26.3 + +node_id: noda2 +hostname: MacBook-Pro.local +nats_endpoint: "nats://145.224.111.147:4222" # leafnode spoke, via NODA1 hub +router_url: "http://145.224.111.147:9102" # NODA2 router (external, if firewall allows) +swapper_url: "http://145.224.111.147:8890" # swapper (external, if firewall allows) +ollama_url: "http://145.224.111.147:11434" # Ollama GPU (external, if firewall allows) + +# ── Vision Models ────────────────────────────────────────────────────────── +vision_models: + - name: llava:13b + backend: ollama + base_url: http://localhost:11434 + type: vision + size_gb: 8.0 + quality: medium + note: "LLaVA 1.6 + CLIP, 13B. Available now. Legacy quality." + capability_hint: "image_understanding" + + # RECOMMENDED - not yet installed, install with: ollama pull qwen3-vl:8b + - name: qwen3-vl:8b + backend: ollama + base_url: http://localhost:11434 + type: vision + size_gb: ~8.0 + quality: high + installed: false + note: "Install: ollama pull qwen3-vl:8b. Superior OCR, document, chart understanding." + +# ── Text/LLM Models ──────────────────────────────────────────────────────── +text_models: + - name: qwen3.5:35b-a3b + backend: ollama + base_url: http://localhost:11434 + type: llm + size_gb: 9.3 + params: "14.8B active (MoE 35B total)" + quality: high + speed: fast + note: "Primary recommendation: MoE architecture, fast inference on M4 Max" + + - name: qwen3:14b + backend: ollama + base_url: http://localhost:11434 + type: llm + size_gb: 9.3 + params: "14B" + quality: high + speed: medium + + - name: glm-4.7-flash:32k + backend: ollama + base_url: http://localhost:11434 + type: llm + size_gb: 19.0 + params: "32B" + quality: high + speed: slow + note: "Long context (32k). Use for document-heavy tasks." + + - name: deepseek-r1:70b + backend: ollama + base_url: http://localhost:11434 + type: llm + size_gb: 42.0 + params: "70B" + quality: very_high + speed: very_slow + note: "Reasoning model. Use for complex multi-step planning only." + + - name: Qwen3.5-35B-A3B-Q4_K_M + backend: llama-server + base_url: http://localhost:11435 + api_type: openai_compatible + type: llm + note: "Same model as qwen3.5:35b-a3b via llama.cpp. Redundant with Ollama." + +# ── Code Models ──────────────────────────────────────────────────────────── +code_models: + - name: deepseek-coder:33b + backend: ollama + base_url: http://localhost:11434 + type: code + size_gb: 18.0 + + - name: starcoder2:3b + backend: ollama + base_url: http://localhost:11434 + type: code + size_gb: 1.7 + speed: very_fast + +# ── Embedding Models ─────────────────────────────────────────────────────── +embedding_models: + # Not explicitly found in Ollama list + # memory-service uses its own embedding (check dagi-memory-service-node2) + - name: unknown + note: "Audit needed: check memory-service embedding model config" + +# ── STT Models ───────────────────────────────────────────────────────────── +stt_models: + - name: whisper (swapper) + backend: swapper + base_url: http://localhost:8890 + endpoint: /stt + note: "Swapper exposes /stt/models (HTTP 200). Model details need swapper config check." + +# ── TTS Models ───────────────────────────────────────────────────────────── +tts_models: + - name: tts (swapper) + backend: swapper + base_url: http://localhost:8890 + endpoint: /tts + note: "Swapper exposes /tts/models (HTTP 200). Model details need swapper config check." + +# ── Routing Policy ───────────────────────────────────────────────────────── +routing_policy: + vision_request: + capability: "vision" + selection_logic: | + if quality_tier == "best": + prefer: qwen3-vl:8b (when installed) + fallback: llava:13b + if quality_tier == "fast": + prefer: llava:13b + return_model_used: true + nats_subject: "node.noda2.vision.request" + + text_request: + capability: "text" + selection_logic: | + if quality_tier == "best": deepseek-r1:70b + if quality_tier == "high": qwen3.5:35b-a3b (default) + if quality_tier == "fast": qwen3:14b or gemma3:latest + nats_subject: "node.noda2.llm.request" + + code_request: + capability: "code" + selection_logic: | + prefer: deepseek-coder:33b + fast fallback: starcoder2:3b + nats_subject: "node.noda2.code.request"