feat(matrix-bridge-dagi): harden mixed rooms with safe defaults and ops visibility (M2.2)
Guard rails (mixed_routing.py):
- MAX_AGENTS_PER_MIXED_ROOM (default 5): fail-fast at parse time
- MAX_SLASH_LEN (default 32): reject garbage/injection slash tokens
- Unified rejection reasons: unknown_agent, slash_too_long, no_mapping
- REASON_REJECTED_* constants (separate from success REASON_*)
Ingress (ingress.py):
- per-room-agent concurrency semaphore (MIXED_CONCURRENCY_CAP, default 1)
- active_lock_count property for /health + prometheus
- UNKNOWN_AGENT_BEHAVIOR: "ignore" (silent) | "reply_error" (inform user)
- on_routed(agent_id, reason) callback for routing metrics
- on_route_rejected(room_id, reason) callback for rejection metrics
- matrix.route.rejected audit event on every rejection
Config + main:
- max_agents_per_mixed_room, max_slash_len, unknown_agent_behavior, mixed_concurrency_cap
- matrix_bridge_routed_total{agent_id, reason} counter
- matrix_bridge_route_rejected_total{room_id, reason} counter
- matrix_bridge_active_room_agent_locks gauge
- /health: mixed_guard_rails section + total_agents_in_mixed_rooms
- docker-compose: all 4 new guard rail env vars
Runbook: section 9 — mixed room debug guide (6 acceptance tests, routing metrics, session isolation, lock hang, config guard)
Tests: 108 pass (94 → 108, +14 new tests for guard rails + callbacks + concurrency)
Made-with: Cursor
This commit is contained in:
@@ -308,6 +308,158 @@ def test_rate_limited_mixed_room_event_dropped():
|
||||
assert len(dropped) == 2 # two dropped by rate limiter
|
||||
|
||||
|
||||
def test_on_route_rejected_callback_fires():
|
||||
"""on_route_rejected fires when /unknown slash is used in mixed room."""
|
||||
ingress = _make_ingress(mixed_raw=f"{ROOM_MIXED}=sofiia,helion")
|
||||
rejected_calls: List[tuple] = []
|
||||
ingress._on_route_rejected = lambda room, reason: rejected_calls.append((room, reason))
|
||||
|
||||
client = _fake_client({ROOM_MIXED: [_make_event("/unknownbot hello", event_id="r1")]})
|
||||
queue: asyncio.Queue = asyncio.Queue(maxsize=50)
|
||||
ingress._queue = queue
|
||||
|
||||
async def _run():
|
||||
with patch("app.ingress._write_audit", new=AsyncMock()):
|
||||
await ingress._enqueue_from_sync(client, queue, AsyncMock(), {})
|
||||
|
||||
run(_run())
|
||||
|
||||
assert queue.qsize() == 0
|
||||
assert len(rejected_calls) == 1
|
||||
room, reason = rejected_calls[0]
|
||||
assert room == ROOM_MIXED
|
||||
assert "unknown" in reason
|
||||
|
||||
|
||||
def test_on_routed_callback_fires_with_reason():
|
||||
"""on_routed fires with correct agent_id and routing_reason on successful route."""
|
||||
ingress = _make_ingress(mixed_raw=f"{ROOM_MIXED}=sofiia,helion")
|
||||
routed_calls: List[tuple] = []
|
||||
ingress._on_routed = lambda agent, reason: routed_calls.append((agent, reason))
|
||||
|
||||
client = _fake_client({ROOM_MIXED: [_make_event("/helion hello", event_id="rt1")]})
|
||||
queue: asyncio.Queue = asyncio.Queue(maxsize=50)
|
||||
ingress._queue = queue
|
||||
|
||||
async def _run():
|
||||
with patch("app.ingress._write_audit", new=AsyncMock()):
|
||||
await ingress._enqueue_from_sync(client, queue, AsyncMock(), {})
|
||||
|
||||
run(_run())
|
||||
|
||||
assert len(routed_calls) == 1
|
||||
agent, reason = routed_calls[0]
|
||||
assert agent == "helion"
|
||||
assert reason == "slash_command"
|
||||
|
||||
|
||||
def test_unknown_agent_reply_error_sends_message():
|
||||
"""UNKNOWN_AGENT_BEHAVIOR=reply_error → error message sent to room."""
|
||||
ingress = _make_ingress(mixed_raw=f"{ROOM_MIXED}=sofiia,helion")
|
||||
ingress._unknown_agent_behavior = "reply_error"
|
||||
|
||||
sent_texts: List[str] = []
|
||||
mock_client = _fake_client({ROOM_MIXED: [_make_event("/druid hello", event_id="ue1")]})
|
||||
mock_client.send_text = AsyncMock(side_effect=lambda room, text, txn_id=None: sent_texts.append(text))
|
||||
|
||||
queue: asyncio.Queue = asyncio.Queue(maxsize=50)
|
||||
ingress._queue = queue
|
||||
|
||||
async def _run():
|
||||
with patch("app.ingress._write_audit", new=AsyncMock()):
|
||||
await ingress._enqueue_from_sync(mock_client, queue, AsyncMock(), {})
|
||||
|
||||
run(_run())
|
||||
|
||||
assert queue.qsize() == 0 # not enqueued
|
||||
assert len(sent_texts) == 1
|
||||
assert "Unknown agent" in sent_texts[0]
|
||||
assert "sofiia" in sent_texts[0] or "helion" in sent_texts[0]
|
||||
|
||||
|
||||
def test_unknown_agent_ignore_sends_nothing():
|
||||
"""UNKNOWN_AGENT_BEHAVIOR=ignore (default) → no reply sent."""
|
||||
ingress = _make_ingress(mixed_raw=f"{ROOM_MIXED}=sofiia,helion")
|
||||
ingress._unknown_agent_behavior = "ignore"
|
||||
|
||||
sent_texts: List[str] = []
|
||||
mock_client = _fake_client({ROOM_MIXED: [_make_event("/druid hello", event_id="ui1")]})
|
||||
mock_client.send_text = AsyncMock(side_effect=lambda room, text, txn_id=None: sent_texts.append(text))
|
||||
|
||||
queue: asyncio.Queue = asyncio.Queue(maxsize=50)
|
||||
ingress._queue = queue
|
||||
|
||||
async def _run():
|
||||
with patch("app.ingress._write_audit", new=AsyncMock()):
|
||||
await ingress._enqueue_from_sync(mock_client, queue, AsyncMock(), {})
|
||||
|
||||
run(_run())
|
||||
|
||||
assert queue.qsize() == 0
|
||||
assert len(sent_texts) == 0 # silent
|
||||
|
||||
|
||||
def test_concurrency_cap_active_lock_count():
|
||||
"""active_lock_count returns 1 while semaphore is held."""
|
||||
ingress = _make_ingress(mixed_raw=f"{ROOM_MIXED}=sofiia,helion")
|
||||
ingress._mixed_concurrency_cap = 1
|
||||
|
||||
async def _run():
|
||||
lock = ingress._get_concurrency_lock(ROOM_MIXED, "sofiia")
|
||||
assert ingress.active_lock_count == 0
|
||||
await lock.acquire()
|
||||
assert ingress.active_lock_count == 1
|
||||
lock.release()
|
||||
assert ingress.active_lock_count == 0
|
||||
|
||||
run(_run())
|
||||
|
||||
|
||||
def test_slash_too_long_rejected_and_not_enqueued():
|
||||
"""Slash token longer than max_slash_len → rejected, not enqueued."""
|
||||
ingress = _make_ingress(mixed_raw=f"{ROOM_MIXED}=sofiia,helion")
|
||||
ingress._max_slash_len = 4 # very short for test
|
||||
|
||||
client = _fake_client({ROOM_MIXED: [_make_event("/toolongtoken hello", event_id="tl1")]})
|
||||
queue: asyncio.Queue = asyncio.Queue(maxsize=50)
|
||||
ingress._queue = queue
|
||||
|
||||
rejected_calls: List[str] = []
|
||||
ingress._on_route_rejected = lambda room, reason: rejected_calls.append(reason)
|
||||
|
||||
async def _run():
|
||||
with patch("app.ingress._write_audit", new=AsyncMock()):
|
||||
await ingress._enqueue_from_sync(client, queue, AsyncMock(), {})
|
||||
|
||||
run(_run())
|
||||
|
||||
assert queue.qsize() == 0
|
||||
assert len(rejected_calls) == 1
|
||||
assert rejected_calls[0] == "slash_too_long"
|
||||
|
||||
|
||||
def test_route_rejected_audit_event_written():
|
||||
"""On routing rejection, matrix.route.rejected audit event must be written."""
|
||||
ingress = _make_ingress(mixed_raw=f"{ROOM_MIXED}=sofiia,helion")
|
||||
|
||||
audit_events: List[str] = []
|
||||
|
||||
async def fake_audit(*args, **kwargs):
|
||||
audit_events.append(kwargs.get("event", ""))
|
||||
|
||||
client = _fake_client({ROOM_MIXED: [_make_event("/unknownbot test", event_id="ra1")]})
|
||||
queue: asyncio.Queue = asyncio.Queue(maxsize=50)
|
||||
ingress._queue = queue
|
||||
|
||||
async def _run():
|
||||
with patch("app.ingress._write_audit", side_effect=fake_audit):
|
||||
await ingress._enqueue_from_sync(client, queue, AsyncMock(), {})
|
||||
|
||||
run(_run())
|
||||
|
||||
assert "matrix.route.rejected" in audit_events
|
||||
|
||||
|
||||
def test_direct_room_reply_has_no_prefix():
|
||||
"""Reply in single-agent (direct) room must NOT have a prefix."""
|
||||
ingress = _make_ingress(direct_raw=f"druid:{ROOM_DIRECT}", allowed=frozenset({"druid"}))
|
||||
|
||||
@@ -26,6 +26,9 @@ from app.mixed_routing import ( # noqa: E402
|
||||
REASON_AT_MENTION,
|
||||
REASON_COLON_MENTION,
|
||||
REASON_DEFAULT,
|
||||
REASON_REJECTED_UNKNOWN_AGENT,
|
||||
REASON_REJECTED_SLASH_TOO_LONG,
|
||||
REASON_REJECTED_NO_MAPPING,
|
||||
)
|
||||
|
||||
ROOM_X = "!roomX:daarion.space"
|
||||
@@ -146,7 +149,7 @@ def test_slash_unknown_agent_returns_none():
|
||||
cfg = _make_cfg()
|
||||
agent, reason, _ = route_message("/druid hello", ROOM_X, cfg, frozenset({"sofiia", "helion"}))
|
||||
assert agent is None
|
||||
assert "unknown_slash_druid" in reason
|
||||
assert reason == REASON_REJECTED_UNKNOWN_AGENT
|
||||
|
||||
|
||||
# ── Routing — @mention ────────────────────────────────────────────────────────
|
||||
@@ -225,3 +228,70 @@ def test_reply_prefix_single_room_empty():
|
||||
def test_reply_prefix_capitalises_first_letter():
|
||||
assert reply_prefix("druid", is_mixed=True) == "Druid: "
|
||||
assert reply_prefix("NUTRA", is_mixed=True) == "Nutra: " # capitalize() normalises case
|
||||
|
||||
|
||||
# ── M2.2: Guard rails ─────────────────────────────────────────────────────────
|
||||
|
||||
def test_max_agents_per_room_raises():
|
||||
"""More agents than max → ValueError at parse time."""
|
||||
raw = f"{ROOM_X}=sofiia,helion,druid,nutra,alateya,yaromir" # 6 agents
|
||||
allowed_6 = frozenset({"sofiia", "helion", "druid", "nutra", "alateya", "yaromir"})
|
||||
with pytest.raises(ValueError, match="MAX_AGENTS_PER_MIXED_ROOM"):
|
||||
parse_mixed_room_map(raw, "", allowed_6, max_agents_per_room=5)
|
||||
|
||||
|
||||
def test_max_agents_per_room_exactly_at_limit_ok():
|
||||
"""Exactly at limit should succeed."""
|
||||
raw = f"{ROOM_X}=sofiia,helion,druid,nutra,alateya" # 5 = default limit
|
||||
allowed_5 = frozenset({"sofiia", "helion", "druid", "nutra", "alateya"})
|
||||
cfg = parse_mixed_room_map(raw, "", allowed_5, max_agents_per_room=5)
|
||||
assert len(cfg.agents_for_room(ROOM_X)) == 5
|
||||
|
||||
|
||||
def test_slash_too_long_returns_rejected_reason():
|
||||
"""Slash command token longer than max_slash_len → rejection, no fallthrough."""
|
||||
cfg = _make_cfg()
|
||||
long_token = "a" * 33 # > default 32
|
||||
agent, reason, _ = route_message(
|
||||
f"/{long_token} hello", ROOM_X, cfg, frozenset({"sofiia", "helion"}),
|
||||
max_slash_len=32,
|
||||
)
|
||||
assert agent is None
|
||||
assert reason == REASON_REJECTED_SLASH_TOO_LONG
|
||||
|
||||
|
||||
def test_slash_exactly_at_max_len_ok():
|
||||
"""Slash token exactly at max_slash_len should NOT be rejected."""
|
||||
allowed = frozenset({"sofiia", "helion"})
|
||||
raw = f"{ROOM_X}=sofiia,helion"
|
||||
# Create a 10-char agent name (within limit) — use a mock allowed set
|
||||
cfg = parse_mixed_room_map(raw, "", allowed, max_agents_per_room=5)
|
||||
agent, reason, _ = route_message("/sofiia hi", ROOM_X, cfg, allowed, max_slash_len=32)
|
||||
assert agent == "sofiia"
|
||||
assert reason == REASON_SLASH
|
||||
|
||||
|
||||
def test_unknown_slash_returns_rejected_unknown_agent():
|
||||
"""Slash with valid-length token but unknown agent → REASON_REJECTED_UNKNOWN_AGENT."""
|
||||
cfg = _make_cfg()
|
||||
agent, reason, _ = route_message(
|
||||
"/druid hello", ROOM_X, cfg, frozenset({"sofiia", "helion"}),
|
||||
max_slash_len=32,
|
||||
)
|
||||
assert agent is None
|
||||
assert reason == REASON_REJECTED_UNKNOWN_AGENT
|
||||
|
||||
|
||||
def test_no_mapping_returns_rejected_no_mapping():
|
||||
"""Room not in config → REASON_REJECTED_NO_MAPPING."""
|
||||
cfg = _make_cfg(room_id=ROOM_X)
|
||||
agent, reason, _ = route_message("hello", ROOM_Y, cfg, ALLOWED, max_slash_len=32)
|
||||
assert agent is None
|
||||
assert reason == REASON_REJECTED_NO_MAPPING
|
||||
|
||||
|
||||
def test_rejection_reasons_are_distinct_constants():
|
||||
"""All rejection reason strings must differ from success reasons."""
|
||||
success = {REASON_SLASH, REASON_AT_MENTION, REASON_COLON_MENTION, REASON_DEFAULT}
|
||||
rejected = {REASON_REJECTED_UNKNOWN_AGENT, REASON_REJECTED_SLASH_TOO_LONG, REASON_REJECTED_NO_MAPPING}
|
||||
assert not success.intersection(rejected), "Rejection reasons must not overlap with success reasons"
|
||||
|
||||
Reference in New Issue
Block a user