NCS (services/node-capabilities/metrics.py): - NodeLoad: inflight_jobs, queue_depth, concurrency_limit, estimated_wait_ms, cpu_load_1m, mem_pressure (macOS + Linux), rtt_ms_to_hub - RuntimeLoad: per-runtime healthy, p50_ms, p95_ms from rolling 50-sample window - POST /capabilities/report_latency for node-worker → NCS reporting - NCS fetches worker metrics via NODE_WORKER_URL Node Worker: - GET /metrics endpoint (inflight, concurrency, latency buffers) - Latency tracking per job type (llm/vision) with rolling buffer - Fire-and-forget latency reporting to NCS after each successful job Router (model_select v3): - score_candidate(): wait + model_latency + cross_node_penalty + prefer_bonus - LOCAL_THRESHOLD_MS=250: prefer local if within threshold of remote - ModelSelection.score field for observability - Structured [score] logs with chosen node, model, and score breakdown Tests: 19 new (12 scoring + 7 NCS metrics), 36 total pass Docs: ops/runbook_p3_1.md, ops/CHANGELOG_FABRIC.md No breaking changes to JobRequest/JobResponse or capabilities schema. Made-with: Cursor
204 lines
6.2 KiB
YAML
204 lines
6.2 KiB
YAML
version: "3.8"
|
|
|
|
services:
|
|
router:
|
|
build:
|
|
context: ./services/router
|
|
dockerfile: Dockerfile
|
|
container_name: dagi-router-node2
|
|
ports:
|
|
- "127.0.0.1:9102:8000"
|
|
environment:
|
|
- NODE_ID=NODA2
|
|
- DAGI_ROUTER_CONFIG=/app/router-config.yml
|
|
- MEMORY_SERVICE_URL=http://memory-service:8000
|
|
- NATS_URL=nats://dagi-nats:4222
|
|
- QDRANT_HOST=qdrant-node2
|
|
- QDRANT_PORT=6333
|
|
- DATABASE_URL=postgresql://daarion:daarion_secret_node2@postgres-node2:5432/daarion_memory
|
|
- NEO4J_BOLT_URL=bolt://neo4j-node2:7687
|
|
- NEO4J_USER=neo4j
|
|
- NEO4J_PASSWORD=daarion_node2_secret
|
|
- CITY_SERVICE_URL=http://city-service:7001
|
|
- PIECES_OS_URL=http://host.docker.internal:39300
|
|
- NOTION_API_KEY=${NOTION_API_KEY:-}
|
|
- XAI_API_KEY=${XAI_API_KEY}
|
|
- GROK_API_KEY=${XAI_API_KEY}
|
|
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
|
|
# ── Node Capabilities (multi-node model selection) ────────────────────
|
|
- NODE_CAPABILITIES_URL=http://node-capabilities:8099/capabilities
|
|
- ENABLE_GLOBAL_CAPS_NATS=true
|
|
# ── Persistence backends ──────────────────────────────────────────────
|
|
- ALERT_BACKEND=postgres
|
|
- ALERT_DATABASE_URL=${ALERT_DATABASE_URL:-${DATABASE_URL}}
|
|
- RISK_HISTORY_BACKEND=auto
|
|
- BACKLOG_BACKEND=auto
|
|
- INCIDENT_BACKEND=auto
|
|
- AUDIT_BACKEND=auto
|
|
volumes:
|
|
- ./services/router/router-config.node2.yml:/app/router-config.yml:ro
|
|
- ./logs:/app/logs
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
- "city-service:host-gateway"
|
|
- "daarion-city-service:host-gateway"
|
|
depends_on:
|
|
- dagi-nats
|
|
- node-capabilities
|
|
networks:
|
|
- dagi-network
|
|
- dagi-memory-network
|
|
restart: unless-stopped
|
|
|
|
gateway:
|
|
build:
|
|
context: ./gateway-bot
|
|
dockerfile: Dockerfile
|
|
container_name: dagi-gateway-node2
|
|
ports:
|
|
- "0.0.0.0:9300:9300"
|
|
environment:
|
|
- ROUTER_URL=http://router:8000
|
|
- DAARWIZZ_NAME=DAARWIZZ
|
|
- DAARWIZZ_PROMPT_PATH=/app/gateway-bot/daarwizz_prompt.txt
|
|
- MEMORY_SERVICE_URL=http://memory-service:8000
|
|
- SOFIIA_NAME=SOFIIA
|
|
- SOFIIA_PROMPT_PATH=/app/gateway-bot/sofiia_prompt.txt
|
|
- SOFIIA_TELEGRAM_BOT_TOKEN=${SOFIIA_TELEGRAM_BOT_TOKEN}
|
|
volumes:
|
|
- ./gateway-bot:/app/gateway-bot:ro
|
|
- ./logs:/app/logs
|
|
depends_on:
|
|
- router
|
|
networks:
|
|
- dagi-network
|
|
- dagi-memory-network
|
|
restart: unless-stopped
|
|
|
|
dagi-nats:
|
|
image: nats:2.10-alpine
|
|
container_name: dagi-nats-node2
|
|
ports:
|
|
- "4222:4222"
|
|
- "8222:8222"
|
|
command: -c /etc/nats/nats-server.conf
|
|
volumes:
|
|
- ./nats-server.conf:/etc/nats/nats-server.conf:ro
|
|
networks:
|
|
- dagi-network
|
|
restart: unless-stopped
|
|
|
|
swapper-service:
|
|
build:
|
|
context: ./services/swapper-service
|
|
dockerfile: Dockerfile
|
|
container_name: swapper-service-node2
|
|
ports:
|
|
- "127.0.0.1:8890:8890"
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
environment:
|
|
- OLLAMA_BASE_URL=http://host.docker.internal:11434
|
|
- SWAPPER_CONFIG_PATH=/app/config/swapper_config_node2.yaml
|
|
- SWAPPER_MODE=single-active
|
|
- MODEL_SWAP_TIMEOUT=300
|
|
volumes:
|
|
- ./services/swapper-service/config:/app/config:ro
|
|
- ./logs:/app/logs
|
|
networks:
|
|
- dagi-network
|
|
restart: unless-stopped
|
|
|
|
node-capabilities:
|
|
build:
|
|
context: ./services/node-capabilities
|
|
dockerfile: Dockerfile
|
|
container_name: node-capabilities-node2
|
|
ports:
|
|
- "127.0.0.1:8099:8099"
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
environment:
|
|
- NODE_ID=NODA2
|
|
- OLLAMA_BASE_URL=http://host.docker.internal:11434
|
|
- SWAPPER_URL=http://swapper-service:8890
|
|
- LLAMA_SERVER_URL=http://host.docker.internal:11435
|
|
- CACHE_TTL_SEC=15
|
|
- ENABLE_NATS_CAPS=true
|
|
- NATS_URL=nats://dagi-nats:4222
|
|
- NODE_WORKER_URL=http://node-worker:8109
|
|
depends_on:
|
|
- swapper-service
|
|
- dagi-nats
|
|
networks:
|
|
- dagi-network
|
|
restart: unless-stopped
|
|
|
|
node-worker:
|
|
build:
|
|
context: ./services/node-worker
|
|
dockerfile: Dockerfile
|
|
container_name: node-worker-node2
|
|
ports:
|
|
- "127.0.0.1:8109:8109"
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
environment:
|
|
- NODE_ID=noda2
|
|
- NATS_URL=nats://dagi-nats:4222
|
|
- OLLAMA_BASE_URL=http://host.docker.internal:11434
|
|
- SWAPPER_URL=http://swapper-service:8890
|
|
- NODE_DEFAULT_LLM=qwen3:14b
|
|
- NODE_DEFAULT_VISION=llava:13b
|
|
- NODE_WORKER_MAX_CONCURRENCY=2
|
|
- NCS_REPORT_URL=http://node-capabilities:8099
|
|
depends_on:
|
|
- dagi-nats
|
|
- swapper-service
|
|
networks:
|
|
- dagi-network
|
|
restart: unless-stopped
|
|
|
|
sofiia-console:
|
|
build:
|
|
context: ./services/sofiia-console
|
|
dockerfile: Dockerfile
|
|
container_name: sofiia-console
|
|
ports:
|
|
- "127.0.0.1:8002:8002"
|
|
environment:
|
|
- PORT=8002
|
|
- ENV=${ENV:-prod}
|
|
- NODE_ID=NODA2
|
|
- ROUTER_URL=http://router:8000
|
|
- CONFIG_DIR=/app/config
|
|
- NODES_NODA2_ROUTER_URL=http://router:8000
|
|
- NODES_NODA1_ROUTER_URL=http://144.76.224.179:9102
|
|
- MEMORY_SERVICE_URL=http://memory-service:8000
|
|
- OLLAMA_URL=${OLLAMA_URL:-http://host.docker.internal:11434}
|
|
- NOTION_API_KEY=${NOTION_API_KEY:-}
|
|
- OPENCODE_URL=${OPENCODE_URL:-}
|
|
# P1 SECURITY: SSH_PASSWORD removed — use key file instead
|
|
# NODES_NODA1_SSH_PASSWORD is NO LONGER passed; sofiia-console reads from key file
|
|
- NODES_NODA1_SSH_PRIVATE_KEY=/run/secrets/noda1_ssh_key
|
|
- SUPERVISOR_API_KEY=${SUPERVISOR_API_KEY}
|
|
- SOFIIA_CONSOLE_API_KEY=${SOFIIA_CONSOLE_API_KEY}
|
|
- CORS_ORIGINS=${CORS_ORIGINS:-}
|
|
volumes:
|
|
- ./config:/app/config
|
|
- ./secrets/noda1_id_ed25519:/run/secrets/noda1_ssh_key:ro
|
|
depends_on:
|
|
- router
|
|
networks:
|
|
- dagi-network
|
|
- dagi-memory-network
|
|
restart: unless-stopped
|
|
|
|
networks:
|
|
dagi-network:
|
|
driver: bridge
|
|
name: dagi-network-node2
|
|
dagi-memory-network:
|
|
external: true
|
|
name: dagi-memory-network-node2
|