Guard rails (mixed_routing.py):
- MAX_AGENTS_PER_MIXED_ROOM (default 5): fail-fast at parse time
- MAX_SLASH_LEN (default 32): reject garbage/injection slash tokens
- Unified rejection reasons: unknown_agent, slash_too_long, no_mapping
- REASON_REJECTED_* constants (separate from success REASON_*)
Ingress (ingress.py):
- per-room-agent concurrency semaphore (MIXED_CONCURRENCY_CAP, default 1)
- active_lock_count property for /health + prometheus
- UNKNOWN_AGENT_BEHAVIOR: "ignore" (silent) | "reply_error" (inform user)
- on_routed(agent_id, reason) callback for routing metrics
- on_route_rejected(room_id, reason) callback for rejection metrics
- matrix.route.rejected audit event on every rejection
Config + main:
- max_agents_per_mixed_room, max_slash_len, unknown_agent_behavior, mixed_concurrency_cap
- matrix_bridge_routed_total{agent_id, reason} counter
- matrix_bridge_route_rejected_total{room_id, reason} counter
- matrix_bridge_active_room_agent_locks gauge
- /health: mixed_guard_rails section + total_agents_in_mixed_rooms
- docker-compose: all 4 new guard rail env vars
Runbook: section 9 — mixed room debug guide (6 acceptance tests, routing metrics, session isolation, lock hang, config guard)
Tests: 108 pass (94 → 108, +14 new tests for guard rails + callbacks + concurrency)
Made-with: Cursor
92 lines
4.1 KiB
YAML
92 lines
4.1 KiB
YAML
# Matrix Bridge DAGI — Phase M2.1 (multi-room + mixed routing)
|
|
# Include into the main NODA1 stack or run standalone:
|
|
# docker compose -f docker-compose.node1.yml -f docker-compose.matrix-bridge-node1.yml up -d matrix-bridge-dagi
|
|
|
|
version: "3.9"
|
|
|
|
services:
|
|
matrix-bridge-dagi:
|
|
build:
|
|
context: ./services/matrix-bridge-dagi
|
|
args:
|
|
BUILD_SHA: "${BUILD_SHA:-dev}"
|
|
BUILD_TIME: "${BUILD_TIME:-local}"
|
|
container_name: matrix-bridge-dagi-node1
|
|
ports:
|
|
- "127.0.0.1:7030:7030" # internal only — not exposed publicly
|
|
environment:
|
|
- PORT=7030
|
|
- ENV=prod
|
|
- NODE_ID=NODA1
|
|
- BUILD_SHA=${BUILD_SHA:-dev}
|
|
- BUILD_TIME=${BUILD_TIME:-local}
|
|
|
|
# ── Matrix homeserver ────────────────────────────────────────────────
|
|
# Required: set in .env on NODA1 before first launch
|
|
- MATRIX_HOMESERVER_URL=${MATRIX_HOMESERVER_URL:-}
|
|
- MATRIX_ACCESS_TOKEN=${MATRIX_ACCESS_TOKEN:-}
|
|
- MATRIX_USER_ID=${MATRIX_USER_ID:-}
|
|
|
|
# ── Room → Agent mapping (M1: single room for Sofiia) ────────────────
|
|
# Create the room manually, then paste the room_id here
|
|
- SOFIIA_ROOM_ID=${SOFIIA_ROOM_ID:-}
|
|
|
|
# ── DAGI backend — Router for /v1/agents/{id}/infer ─────────────────
|
|
# Router internal port 8000 on dagi-network (ext port 9102 on host)
|
|
- DAGI_GATEWAY_URL=http://dagi-router-node1:8000
|
|
- DEFAULT_NODE_ID=NODA1
|
|
|
|
# ── Sofiia Console (audit write) ─────────────────────────────────────
|
|
- SOFIIA_CONSOLE_URL=http://dagi-sofiia-console-node1:8002
|
|
- SOFIIA_INTERNAL_TOKEN=${SOFIIA_INTERNAL_TOKEN:-}
|
|
|
|
# ── H2: Backpressure queue ───────────────────────────────────────────
|
|
- QUEUE_MAX_EVENTS=100
|
|
- WORKER_CONCURRENCY=2
|
|
- QUEUE_DRAIN_TIMEOUT_S=5
|
|
|
|
# ── Policy ───────────────────────────────────────────────────────────
|
|
# M2.0+: multiple agents separated by comma
|
|
- BRIDGE_ALLOWED_AGENTS=${BRIDGE_ALLOWED_AGENTS:-sofiia}
|
|
# M2.0: "sofiia:!room1:server,helion:!room2:server" (1 room → 1 agent)
|
|
- BRIDGE_ROOM_MAP=${BRIDGE_ROOM_MAP:-}
|
|
- RATE_LIMIT_ROOM_RPM=20
|
|
- RATE_LIMIT_SENDER_RPM=10
|
|
|
|
# ── M2.1: Mixed rooms (1 room → N agents) ───────────────────────────
|
|
# Format: "!roomX:server=sofiia,helion;!roomY:server=druid"
|
|
- BRIDGE_MIXED_ROOM_MAP=${BRIDGE_MIXED_ROOM_MAP:-}
|
|
# Override default agent per mixed room (optional):
|
|
# "!roomX:server=helion;!roomY:server=druid"
|
|
- BRIDGE_MIXED_DEFAULTS=${BRIDGE_MIXED_DEFAULTS:-}
|
|
|
|
# ── M2.2: Mixed room guard rails ────────────────────────────────────
|
|
# Fail-fast if any room defines more agents than this
|
|
- MAX_AGENTS_PER_MIXED_ROOM=${MAX_AGENTS_PER_MIXED_ROOM:-5}
|
|
# Reject slash commands longer than this (anti-garbage / injection guard)
|
|
- MAX_SLASH_LEN=${MAX_SLASH_LEN:-32}
|
|
# What to do when unknown /slash is used: "ignore" (silent) | "reply_error" (inform user)
|
|
- UNKNOWN_AGENT_BEHAVIOR=${UNKNOWN_AGENT_BEHAVIOR:-ignore}
|
|
# Max concurrent Router invocations per (room, agent) pair; 0 = unlimited
|
|
- MIXED_CONCURRENCY_CAP=${MIXED_CONCURRENCY_CAP:-1}
|
|
|
|
healthcheck:
|
|
test:
|
|
- "CMD"
|
|
- "python3"
|
|
- "-c"
|
|
- "import urllib.request; urllib.request.urlopen('http://localhost:7030/health', timeout=5)"
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 15s
|
|
|
|
networks:
|
|
- dagi-network
|
|
|
|
restart: unless-stopped
|
|
|
|
networks:
|
|
dagi-network:
|
|
external: true
|