# Matrix Bridge DAGI — Phase M2.1 (multi-room + mixed routing) # Include into the main NODA1 stack or run standalone: # docker compose -f docker-compose.node1.yml -f docker-compose.matrix-bridge-node1.yml up -d matrix-bridge-dagi version: "3.9" services: matrix-bridge-dagi: build: context: ./services/matrix-bridge-dagi args: BUILD_SHA: "${BUILD_SHA:-dev}" BUILD_TIME: "${BUILD_TIME:-local}" container_name: matrix-bridge-dagi-node1 ports: - "127.0.0.1:7030:7030" # internal only — not exposed publicly environment: - PORT=7030 - ENV=prod - NODE_ID=NODA1 - BUILD_SHA=${BUILD_SHA:-dev} - BUILD_TIME=${BUILD_TIME:-local} # ── Matrix homeserver ──────────────────────────────────────────────── # Required: set in .env on NODA1 before first launch - MATRIX_HOMESERVER_URL=${MATRIX_HOMESERVER_URL:-} - MATRIX_ACCESS_TOKEN=${MATRIX_ACCESS_TOKEN:-} - MATRIX_USER_ID=${MATRIX_USER_ID:-} # ── Room → Agent mapping (M1: single room for Sofiia) ──────────────── # Create the room manually, then paste the room_id here - SOFIIA_ROOM_ID=${SOFIIA_ROOM_ID:-} # ── DAGI backend — Router for /v1/agents/{id}/infer ───────────────── # Router internal port 8000 on dagi-network (ext port 9102 on host) - DAGI_GATEWAY_URL=http://dagi-router-node1:8000 - DEFAULT_NODE_ID=NODA1 # ── Sofiia Console (audit write) ───────────────────────────────────── - SOFIIA_CONSOLE_URL=http://dagi-sofiia-console-node1:8002 - SOFIIA_INTERNAL_TOKEN=${SOFIIA_INTERNAL_TOKEN:-} # ── H2: Backpressure queue ─────────────────────────────────────────── - QUEUE_MAX_EVENTS=100 - WORKER_CONCURRENCY=2 - QUEUE_DRAIN_TIMEOUT_S=5 # ── Policy ─────────────────────────────────────────────────────────── # M2.0+: multiple agents separated by comma - BRIDGE_ALLOWED_AGENTS=${BRIDGE_ALLOWED_AGENTS:-sofiia} # M2.0: "sofiia:!room1:server,helion:!room2:server" (1 room → 1 agent) - BRIDGE_ROOM_MAP=${BRIDGE_ROOM_MAP:-} - RATE_LIMIT_ROOM_RPM=20 - RATE_LIMIT_SENDER_RPM=10 # ── M2.1: Mixed rooms (1 room → N agents) ─────────────────────────── # Format: "!roomX:server=sofiia,helion;!roomY:server=druid" - BRIDGE_MIXED_ROOM_MAP=${BRIDGE_MIXED_ROOM_MAP:-} # Override default agent per mixed room (optional): # "!roomX:server=helion;!roomY:server=druid" - BRIDGE_MIXED_DEFAULTS=${BRIDGE_MIXED_DEFAULTS:-} # ── M3.0: Operator control channel ────────────────────────────────── # Comma-separated Matrix user IDs allowed to issue !commands - BRIDGE_OPERATOR_ALLOWLIST=${BRIDGE_OPERATOR_ALLOWLIST:-} # Comma-separated room IDs designated as ops control channels - BRIDGE_CONTROL_ROOMS=${BRIDGE_CONTROL_ROOMS:-} # "ignore" (silent) | "reply_error" (⛔ reply to unauthorised attempts) - CONTROL_UNAUTHORIZED_BEHAVIOR=${CONTROL_UNAUTHORIZED_BEHAVIOR:-ignore} # ── M3.1: Runbook runner token ─────────────────────────────────────── # X-Control-Token for POST /api/runbooks/internal/runs (sofiia-console) - SOFIIA_CONTROL_TOKEN=${SOFIIA_CONTROL_TOKEN:-} # M3.4: Control channel safety — rate limiting + cooldown - CONTROL_ROOM_RPM=${CONTROL_ROOM_RPM:-60} - CONTROL_OPERATOR_RPM=${CONTROL_OPERATOR_RPM:-30} - CONTROL_RUN_NEXT_RPM=${CONTROL_RUN_NEXT_RPM:-20} - CONTROL_COOLDOWN_S=${CONTROL_COOLDOWN_S:-2.0} # M2.3: Persistent event deduplication - PERSISTENT_DEDUPE=${PERSISTENT_DEDUPE:-1} - BRIDGE_DATA_DIR=${BRIDGE_DATA_DIR:-/app/data} - PROCESSED_EVENTS_TTL_H=${PROCESSED_EVENTS_TTL_H:-48} - PROCESSED_EVENTS_PRUNE_BATCH=${PROCESSED_EVENTS_PRUNE_BATCH:-5000} - PROCESSED_EVENTS_PRUNE_INTERVAL_S=${PROCESSED_EVENTS_PRUNE_INTERVAL_S:-3600} # M4.0: agent discovery - DISCOVERY_RPM=${DISCOVERY_RPM:-20} # M5.0: node-aware routing - BRIDGE_ALLOWED_NODES=${BRIDGE_ALLOWED_NODES:-NODA1} - BRIDGE_DEFAULT_NODE=${BRIDGE_DEFAULT_NODE:-NODA1} - BRIDGE_ROOM_NODE_MAP=${BRIDGE_ROOM_NODE_MAP:-} # M8.0: Node health + soft-failover thresholds - NODE_FAIL_CONSEC=${NODE_FAIL_CONSEC:-3} - NODE_LAT_EWMA_S=${NODE_LAT_EWMA_S:-12.0} - NODE_EWMA_ALPHA=${NODE_EWMA_ALPHA:-0.3} # M8.1: Sticky failover TTL (0 = disabled) - FAILOVER_STICKY_TTL_S=${FAILOVER_STICKY_TTL_S:-300} # M8.2: HA state persistence - HA_HEALTH_SNAPSHOT_INTERVAL_S=${HA_HEALTH_SNAPSHOT_INTERVAL_S:-60} - HA_HEALTH_MAX_AGE_S=${HA_HEALTH_MAX_AGE_S:-600} # M9.0: Two-step confirmation TTL for dangerous commands (0 = disabled) - CONFIRM_TTL_S=${CONFIRM_TTL_S:-120} - POLICY_EXPORT_RETENTION_DAYS=${POLICY_EXPORT_RETENTION_DAYS:-30} - POLICY_HISTORY_LIMIT=${POLICY_HISTORY_LIMIT:-100} # M11 soak: NEVER set to true in production - DEBUG_INJECT_ENABLED=${DEBUG_INJECT_ENABLED:-false} # ── M2.2: Mixed room guard rails ──────────────────────────────────── # Fail-fast if any room defines more agents than this - MAX_AGENTS_PER_MIXED_ROOM=${MAX_AGENTS_PER_MIXED_ROOM:-5} # Reject slash commands longer than this (anti-garbage / injection guard) - MAX_SLASH_LEN=${MAX_SLASH_LEN:-32} # What to do when unknown /slash is used: "ignore" (silent) | "reply_error" (inform user) - UNKNOWN_AGENT_BEHAVIOR=${UNKNOWN_AGENT_BEHAVIOR:-ignore} # Max concurrent Router invocations per (room, agent) pair; 0 = unlimited - MIXED_CONCURRENCY_CAP=${MIXED_CONCURRENCY_CAP:-1} healthcheck: test: - "CMD" - "python3" - "-c" - "import urllib.request; urllib.request.urlopen('http://localhost:7030/health', timeout=5)" interval: 30s timeout: 10s retries: 3 start_period: 15s networks: - dagi-network restart: unless-stopped networks: dagi-network: external: true