feat(matrix-bridge-dagi): add rate limiting (H1) and metrics (H3)

H1 — InMemoryRateLimiter (sliding window, no Redis):
  - Per-room: RATE_LIMIT_ROOM_RPM (default 20/min)
  - Per-sender: RATE_LIMIT_SENDER_RPM (default 10/min)
  - Room checked before sender — sender quota not charged on room block
  - Blocked messages: audit matrix.rate_limited + on_rate_limited callback
  - reset() for ops/test, stats() exposed in /health

H3 — Extended Prometheus metrics:
  - matrix_bridge_rate_limited_total{room_id,agent_id,limit_type}
  - matrix_bridge_send_duration_seconds histogram (invoke was already there)
  - matrix_bridge_invoke_duration_seconds buckets tuned for LLM latency
  - matrix_bridge_rate_limiter_active_rooms/senders gauges
  - on_invoke_latency + on_send_latency callbacks wired in ingress loop

16 new tests: rate limiter unit (13) + ingress integration (3)
Total: 65 passed

Made-with: Cursor
This commit is contained in:
Apple
2026-03-05 00:54:14 -08:00
parent 313d777c84
commit a4e95482bc
5 changed files with 607 additions and 40 deletions

View File

@@ -50,6 +50,14 @@ MSG_EVENT = {
}
import sys
_BRIDGE = Path(__file__).parent.parent / "services" / "matrix-bridge-dagi"
if str(_BRIDGE) not in sys.path:
sys.path.insert(0, str(_BRIDGE))
from app.rate_limit import InMemoryRateLimiter # noqa: E402
def _make_loop(**kwargs) -> MatrixIngressLoop:
room_map = parse_room_map(ROOM_MAP_STR, ALLOWED)
defaults = dict(
@@ -446,3 +454,188 @@ def test_loop_metric_callbacks_fire():
assert replied[0][2] == "ok"
run(_inner())
# ── H1: Rate limit integration ────────────────────────────────────────────────
def test_rate_limiter_blocks_invoke():
"""When room rate limit exceeded, router must NOT be invoked."""
async def _inner():
router_calls = [0]
rate_limited = []
rl = InMemoryRateLimiter(room_rpm=1, sender_rpm=100)
stop = asyncio.Event()
loop = _make_loop(
rate_limiter=rl,
on_rate_limited=lambda r, a, lt: rate_limited.append(lt),
)
# Two events from same room
event2 = {**MSG_EVENT, "event_id": "$event2:server"}
call_count = [0]
async def fake_sync_poll(**kwargs):
call_count[0] += 1
if call_count[0] > 1:
stop.set()
return {"next_batch": "end", "rooms": {}}
return _fake_sync([MSG_EVENT, event2])
def fake_extract(sync_resp, room_id):
events = sync_resp.get("rooms", {}).get("join", {}).get(room_id, {}).get("timeline", {}).get("events", [])
return [e for e in events if e.get("type") == "m.room.message" and e.get("sender") != BOT_USER]
async def fake_http_post(url, *, json=None, headers=None, timeout=None):
if "/infer" in url:
router_calls[0] += 1
return _ok_response("reply")
return _audit_response()
with patch("app.ingress.MatrixClient") as MockClient:
mock_mc = AsyncMock()
mock_mc.__aenter__ = AsyncMock(return_value=mock_mc)
mock_mc.__aexit__ = AsyncMock(return_value=False)
mock_mc.sync_poll = fake_sync_poll
mock_mc.join_room = AsyncMock()
mock_mc.mark_seen = MagicMock()
mock_mc.extract_room_messages = fake_extract
mock_mc.send_text = AsyncMock(return_value={"event_id": "$r"})
MockClient.return_value = mock_mc
MockClient.make_txn_id = lambda r, e: f"txn_{e}"
with patch("app.ingress.httpx.AsyncClient") as MockHTTP:
mock_http = AsyncMock()
mock_http.__aenter__ = AsyncMock(return_value=mock_http)
mock_http.__aexit__ = AsyncMock(return_value=False)
mock_http.post = fake_http_post
MockHTTP.return_value = mock_http
await asyncio.wait_for(loop.run(stop), timeout=3.0)
# First message passes, second blocked
assert router_calls[0] == 1
assert len(rate_limited) == 1
assert rate_limited[0] == "room"
run(_inner())
def test_rate_limiter_audit_event_on_block():
"""Blocked message must produce matrix.rate_limited audit event."""
async def _inner():
audit_events = []
rl = InMemoryRateLimiter(room_rpm=1, sender_rpm=100)
stop = asyncio.Event()
loop = _make_loop(rate_limiter=rl)
event2 = {**MSG_EVENT, "event_id": "$event2:server"}
call_count = [0]
async def fake_sync_poll(**kwargs):
call_count[0] += 1
if call_count[0] > 1:
stop.set()
return {"next_batch": "end", "rooms": {}}
return _fake_sync([MSG_EVENT, event2])
def fake_extract(sync_resp, room_id):
events = sync_resp.get("rooms", {}).get("join", {}).get(room_id, {}).get("timeline", {}).get("events", [])
return [e for e in events if e.get("type") == "m.room.message" and e.get("sender") != BOT_USER]
async def fake_http_post(url, *, json=None, headers=None, timeout=None):
if "/audit/internal" in url:
audit_events.append(json.get("event") if json else "unknown")
if "/infer" in url:
return _ok_response("reply")
return _audit_response()
with patch("app.ingress.MatrixClient") as MockClient:
mock_mc = AsyncMock()
mock_mc.__aenter__ = AsyncMock(return_value=mock_mc)
mock_mc.__aexit__ = AsyncMock(return_value=False)
mock_mc.sync_poll = fake_sync_poll
mock_mc.join_room = AsyncMock()
mock_mc.mark_seen = MagicMock()
mock_mc.extract_room_messages = fake_extract
mock_mc.send_text = AsyncMock(return_value={"event_id": "$r"})
MockClient.return_value = mock_mc
MockClient.make_txn_id = lambda r, e: f"txn_{e}"
with patch("app.ingress.httpx.AsyncClient") as MockHTTP:
mock_http = AsyncMock()
mock_http.__aenter__ = AsyncMock(return_value=mock_http)
mock_http.__aexit__ = AsyncMock(return_value=False)
mock_http.post = fake_http_post
MockHTTP.return_value = mock_http
await asyncio.wait_for(loop.run(stop), timeout=3.0)
assert "matrix.rate_limited" in audit_events
run(_inner())
def test_latency_callbacks_fire():
"""on_invoke_latency and on_send_latency must be called with agent_id and float."""
async def _inner():
invoke_latencies = []
send_latencies = []
stop = asyncio.Event()
loop = _make_loop(
on_invoke_latency=lambda a, d: invoke_latencies.append((a, d)),
on_send_latency=lambda a, d: send_latencies.append((a, d)),
)
call_count = [0]
async def fake_sync_poll(**kwargs):
call_count[0] += 1
if call_count[0] > 1:
stop.set()
return {"next_batch": "end", "rooms": {}}
return _fake_sync([MSG_EVENT])
def fake_extract(sync_resp, room_id):
events = sync_resp.get("rooms", {}).get("join", {}).get(room_id, {}).get("timeline", {}).get("events", [])
return [e for e in events if e.get("type") == "m.room.message" and e.get("sender") != BOT_USER]
async def fake_http_post(url, *, json=None, headers=None, timeout=None):
if "/infer" in url:
return _ok_response("test reply")
return _audit_response()
with patch("app.ingress.MatrixClient") as MockClient:
mock_mc = AsyncMock()
mock_mc.__aenter__ = AsyncMock(return_value=mock_mc)
mock_mc.__aexit__ = AsyncMock(return_value=False)
mock_mc.sync_poll = fake_sync_poll
mock_mc.join_room = AsyncMock()
mock_mc.mark_seen = MagicMock()
mock_mc.extract_room_messages = fake_extract
mock_mc.send_text = AsyncMock(return_value={"event_id": "$r"})
MockClient.return_value = mock_mc
MockClient.make_txn_id = lambda r, e: f"txn_{e}"
with patch("app.ingress.httpx.AsyncClient") as MockHTTP:
mock_http = AsyncMock()
mock_http.__aenter__ = AsyncMock(return_value=mock_http)
mock_http.__aexit__ = AsyncMock(return_value=False)
mock_http.post = fake_http_post
MockHTTP.return_value = mock_http
await asyncio.wait_for(loop.run(stop), timeout=3.0)
assert len(invoke_latencies) == 1
assert invoke_latencies[0][0] == "sofiia"
assert isinstance(invoke_latencies[0][1], float)
assert invoke_latencies[0][1] >= 0
assert len(send_latencies) == 1
assert send_latencies[0][0] == "sofiia"
assert isinstance(send_latencies[0][1], float)
run(_inner())

View File

@@ -0,0 +1,169 @@
"""
Tests for services/matrix-bridge-dagi/app/rate_limit.py (H1)
Coverage:
- basic allow / room limit / sender limit
- independent room and sender counters
- sliding window prune (old events don't block)
- reset() clears buckets
- stats() reflects live state
- constructor validation
"""
import sys
import time
from pathlib import Path
_BRIDGE = Path(__file__).parent.parent / "services" / "matrix-bridge-dagi"
if str(_BRIDGE) not in sys.path:
sys.path.insert(0, str(_BRIDGE))
from app.rate_limit import InMemoryRateLimiter # noqa: E402
ROOM = "!room1:server"
ROOM2 = "!room2:server"
SENDER = "@alice:server"
SENDER2 = "@bob:server"
def test_allows_first_message():
rl = InMemoryRateLimiter(room_rpm=5, sender_rpm=5)
allowed, limit_type = rl.check(ROOM, SENDER)
assert allowed is True
assert limit_type is None
def test_room_limit_blocks_at_threshold():
rl = InMemoryRateLimiter(room_rpm=3, sender_rpm=100)
for _ in range(3):
allowed, _ = rl.check(ROOM, SENDER)
assert allowed
# 4th from same room (different sender) should be blocked
allowed, limit_type = rl.check(ROOM, SENDER2)
assert allowed is False
assert limit_type == "room"
def test_sender_limit_blocks_at_threshold():
rl = InMemoryRateLimiter(room_rpm=100, sender_rpm=2)
allowed, _ = rl.check(ROOM, SENDER)
assert allowed
allowed, _ = rl.check(ROOM2, SENDER)
assert allowed
# 3rd from same sender (different room)
allowed, limit_type = rl.check("!room3:server", SENDER)
assert allowed is False
assert limit_type == "sender"
def test_room_checked_before_sender():
"""When both would exceed, 'room' is reported first."""
rl = InMemoryRateLimiter(room_rpm=1, sender_rpm=1)
rl.check(ROOM, SENDER) # fills both
allowed, limit_type = rl.check(ROOM, SENDER)
assert not allowed
assert limit_type == "room"
def test_independent_rooms_dont_interfere():
rl = InMemoryRateLimiter(room_rpm=2, sender_rpm=100)
rl.check(ROOM, SENDER)
rl.check(ROOM, SENDER)
# room1 full — room2 still ok
allowed, limit_type = rl.check(ROOM2, SENDER)
assert allowed is True
def test_independent_senders_dont_interfere():
rl = InMemoryRateLimiter(room_rpm=100, sender_rpm=1)
rl.check(ROOM, SENDER)
# alice full — bob still ok
allowed, _ = rl.check(ROOM, SENDER2)
assert allowed is True
def test_window_prune_allows_after_expiry(monkeypatch):
"""Events older than 60s should not count against the limit."""
rl = InMemoryRateLimiter(room_rpm=2, sender_rpm=100)
# Fill the room bucket
rl.check(ROOM, SENDER)
rl.check(ROOM, SENDER)
# Verify blocked
ok, lt = rl.check(ROOM, SENDER2)
assert not ok and lt == "room"
# Fast-forward time by 61 seconds
original_time = time.monotonic
start = original_time()
monkeypatch.setattr(time, "monotonic", lambda: start + 61.0)
# Should be allowed again
allowed, _ = rl.check(ROOM, SENDER2)
assert allowed is True
def test_reset_room_clears_bucket():
rl = InMemoryRateLimiter(room_rpm=1, sender_rpm=100)
rl.check(ROOM, SENDER)
ok, lt = rl.check(ROOM, SENDER2)
assert not ok and lt == "room"
rl.reset(room_id=ROOM)
ok, _ = rl.check(ROOM, SENDER2)
assert ok is True
def test_reset_sender_clears_bucket():
rl = InMemoryRateLimiter(room_rpm=100, sender_rpm=1)
rl.check(ROOM, SENDER)
ok, lt = rl.check(ROOM2, SENDER)
assert not ok and lt == "sender"
rl.reset(sender=SENDER)
ok, _ = rl.check(ROOM2, SENDER)
assert ok is True
def test_stats_reflects_active_buckets():
rl = InMemoryRateLimiter(room_rpm=10, sender_rpm=10)
rl.check(ROOM, SENDER)
rl.check(ROOM2, SENDER2)
s = rl.stats()
assert s["active_rooms"] == 2
assert s["active_senders"] == 2
assert s["room_rpm_limit"] == 10
assert s["sender_rpm_limit"] == 10
def test_stats_stale_buckets_not_counted(monkeypatch):
rl = InMemoryRateLimiter(room_rpm=10, sender_rpm=10)
rl.check(ROOM, SENDER)
original_time = time.monotonic
start = original_time()
monkeypatch.setattr(time, "monotonic", lambda: start + 61.0)
s = rl.stats()
assert s["active_rooms"] == 0
assert s["active_senders"] == 0
def test_constructor_validates_limits():
import pytest
with pytest.raises(ValueError):
InMemoryRateLimiter(room_rpm=0, sender_rpm=5)
with pytest.raises(ValueError):
InMemoryRateLimiter(room_rpm=5, sender_rpm=-1)
def test_sender_bucket_not_charged_when_room_blocked():
"""When room blocks, sender quota must not decrease."""
rl = InMemoryRateLimiter(room_rpm=1, sender_rpm=2)
rl.check(ROOM, SENDER) # fills room (1/1), sender (1/2)
# room blocked — sender should NOT be decremented
rl.check(ROOM, SENDER) # blocked by room
rl.check(ROOM, SENDER) # blocked by room
# Sender still has 1 slot left in a fresh room
ok, lt = rl.check(ROOM2, SENDER)
assert ok is True # sender only used 1/2 of its quota