feat(matrix-bridge-dagi): harden mixed rooms with safe defaults and ops visibility (M2.2)

Guard rails (mixed_routing.py):
  - MAX_AGENTS_PER_MIXED_ROOM (default 5): fail-fast at parse time
  - MAX_SLASH_LEN (default 32): reject garbage/injection slash tokens
  - Unified rejection reasons: unknown_agent, slash_too_long, no_mapping
  - REASON_REJECTED_* constants (separate from success REASON_*)

Ingress (ingress.py):
  - per-room-agent concurrency semaphore (MIXED_CONCURRENCY_CAP, default 1)
  - active_lock_count property for /health + prometheus
  - UNKNOWN_AGENT_BEHAVIOR: "ignore" (silent) | "reply_error" (inform user)
  - on_routed(agent_id, reason) callback for routing metrics
  - on_route_rejected(room_id, reason) callback for rejection metrics
  - matrix.route.rejected audit event on every rejection

Config + main:
  - max_agents_per_mixed_room, max_slash_len, unknown_agent_behavior, mixed_concurrency_cap
  - matrix_bridge_routed_total{agent_id, reason} counter
  - matrix_bridge_route_rejected_total{room_id, reason} counter
  - matrix_bridge_active_room_agent_locks gauge
  - /health: mixed_guard_rails section + total_agents_in_mixed_rooms
  - docker-compose: all 4 new guard rail env vars

Runbook: section 9 — mixed room debug guide (6 acceptance tests, routing metrics, session isolation, lock hang, config guard)

Tests: 108 pass (94 → 108, +14 new tests for guard rails + callbacks + concurrency)
Made-with: Cursor
This commit is contained in:
Apple
2026-03-05 01:41:20 -08:00
parent a85a11984b
commit d40b1e87c6
8 changed files with 576 additions and 21 deletions

View File

@@ -26,6 +26,9 @@ from app.mixed_routing import ( # noqa: E402
REASON_AT_MENTION,
REASON_COLON_MENTION,
REASON_DEFAULT,
REASON_REJECTED_UNKNOWN_AGENT,
REASON_REJECTED_SLASH_TOO_LONG,
REASON_REJECTED_NO_MAPPING,
)
ROOM_X = "!roomX:daarion.space"
@@ -146,7 +149,7 @@ def test_slash_unknown_agent_returns_none():
cfg = _make_cfg()
agent, reason, _ = route_message("/druid hello", ROOM_X, cfg, frozenset({"sofiia", "helion"}))
assert agent is None
assert "unknown_slash_druid" in reason
assert reason == REASON_REJECTED_UNKNOWN_AGENT
# ── Routing — @mention ────────────────────────────────────────────────────────
@@ -225,3 +228,70 @@ def test_reply_prefix_single_room_empty():
def test_reply_prefix_capitalises_first_letter():
assert reply_prefix("druid", is_mixed=True) == "Druid: "
assert reply_prefix("NUTRA", is_mixed=True) == "Nutra: " # capitalize() normalises case
# ── M2.2: Guard rails ─────────────────────────────────────────────────────────
def test_max_agents_per_room_raises():
"""More agents than max → ValueError at parse time."""
raw = f"{ROOM_X}=sofiia,helion,druid,nutra,alateya,yaromir" # 6 agents
allowed_6 = frozenset({"sofiia", "helion", "druid", "nutra", "alateya", "yaromir"})
with pytest.raises(ValueError, match="MAX_AGENTS_PER_MIXED_ROOM"):
parse_mixed_room_map(raw, "", allowed_6, max_agents_per_room=5)
def test_max_agents_per_room_exactly_at_limit_ok():
"""Exactly at limit should succeed."""
raw = f"{ROOM_X}=sofiia,helion,druid,nutra,alateya" # 5 = default limit
allowed_5 = frozenset({"sofiia", "helion", "druid", "nutra", "alateya"})
cfg = parse_mixed_room_map(raw, "", allowed_5, max_agents_per_room=5)
assert len(cfg.agents_for_room(ROOM_X)) == 5
def test_slash_too_long_returns_rejected_reason():
"""Slash command token longer than max_slash_len → rejection, no fallthrough."""
cfg = _make_cfg()
long_token = "a" * 33 # > default 32
agent, reason, _ = route_message(
f"/{long_token} hello", ROOM_X, cfg, frozenset({"sofiia", "helion"}),
max_slash_len=32,
)
assert agent is None
assert reason == REASON_REJECTED_SLASH_TOO_LONG
def test_slash_exactly_at_max_len_ok():
"""Slash token exactly at max_slash_len should NOT be rejected."""
allowed = frozenset({"sofiia", "helion"})
raw = f"{ROOM_X}=sofiia,helion"
# Create a 10-char agent name (within limit) — use a mock allowed set
cfg = parse_mixed_room_map(raw, "", allowed, max_agents_per_room=5)
agent, reason, _ = route_message("/sofiia hi", ROOM_X, cfg, allowed, max_slash_len=32)
assert agent == "sofiia"
assert reason == REASON_SLASH
def test_unknown_slash_returns_rejected_unknown_agent():
"""Slash with valid-length token but unknown agent → REASON_REJECTED_UNKNOWN_AGENT."""
cfg = _make_cfg()
agent, reason, _ = route_message(
"/druid hello", ROOM_X, cfg, frozenset({"sofiia", "helion"}),
max_slash_len=32,
)
assert agent is None
assert reason == REASON_REJECTED_UNKNOWN_AGENT
def test_no_mapping_returns_rejected_no_mapping():
"""Room not in config → REASON_REJECTED_NO_MAPPING."""
cfg = _make_cfg(room_id=ROOM_X)
agent, reason, _ = route_message("hello", ROOM_Y, cfg, ALLOWED, max_slash_len=32)
assert agent is None
assert reason == REASON_REJECTED_NO_MAPPING
def test_rejection_reasons_are_distinct_constants():
"""All rejection reason strings must differ from success reasons."""
success = {REASON_SLASH, REASON_AT_MENTION, REASON_COLON_MENTION, REASON_DEFAULT}
rejected = {REASON_REJECTED_UNKNOWN_AGENT, REASON_REJECTED_SLASH_TOO_LONG, REASON_REJECTED_NO_MAPPING}
assert not success.intersection(rejected), "Rejection reasons must not overlap with success reasons"