feat(matrix-bridge-dagi): add rate limiting (H1) and metrics (H3)
H1 — InMemoryRateLimiter (sliding window, no Redis):
- Per-room: RATE_LIMIT_ROOM_RPM (default 20/min)
- Per-sender: RATE_LIMIT_SENDER_RPM (default 10/min)
- Room checked before sender — sender quota not charged on room block
- Blocked messages: audit matrix.rate_limited + on_rate_limited callback
- reset() for ops/test, stats() exposed in /health
H3 — Extended Prometheus metrics:
- matrix_bridge_rate_limited_total{room_id,agent_id,limit_type}
- matrix_bridge_send_duration_seconds histogram (invoke was already there)
- matrix_bridge_invoke_duration_seconds buckets tuned for LLM latency
- matrix_bridge_rate_limiter_active_rooms/senders gauges
- on_invoke_latency + on_send_latency callbacks wired in ingress loop
16 new tests: rate limiter unit (13) + ingress integration (3)
Total: 65 passed
Made-with: Cursor
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Matrix Ingress + Egress Loop — Phase M1.4
|
||||
Matrix Ingress + Egress Loop — Phase M1.4 + H1/H3
|
||||
|
||||
Polls Matrix /sync for new messages, invokes DAGI Router for mapped rooms,
|
||||
sends agent replies back to Matrix, writes audit events to sofiia-console.
|
||||
@@ -7,11 +7,12 @@ sends agent replies back to Matrix, writes audit events to sofiia-console.
|
||||
Pipeline:
|
||||
sync_poll() → extract_room_messages()
|
||||
→ for each message:
|
||||
1. dedupe (mark_seen)
|
||||
2. audit: matrix.message.received
|
||||
3. invoke DAGI Router (/v1/agents/{agent_id}/infer)
|
||||
4. send_text() reply to Matrix room
|
||||
5. audit: matrix.agent.replied | matrix.error
|
||||
1. rate_limit check (room + sender) ← H1
|
||||
2. dedupe (mark_seen)
|
||||
3. audit: matrix.message.received
|
||||
4. invoke DAGI Router (timed → on_invoke_latency) ← H3
|
||||
5. send_text() reply (timed → on_send_latency) ← H3
|
||||
6. audit: matrix.agent.replied | matrix.error
|
||||
|
||||
Graceful shutdown via asyncio.Event.
|
||||
"""
|
||||
@@ -24,6 +25,7 @@ from typing import Any, Callable, Dict, List, Optional
|
||||
import httpx
|
||||
|
||||
from .matrix_client import MatrixClient
|
||||
from .rate_limit import InMemoryRateLimiter
|
||||
from .room_mapping import RoomMappingConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -32,10 +34,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_MAX_RETRY_BACKOFF = 60.0
|
||||
_INIT_RETRY_BACKOFF = 2.0
|
||||
_ROUTER_TIMEOUT_S = 45.0 # Router may call DeepSeek/Mistral
|
||||
_ROUTER_TIMEOUT_S = 45.0
|
||||
_AUDIT_TIMEOUT_S = 5.0
|
||||
_REPLY_TEXT_MAX = 4000 # Matrix message cap (chars)
|
||||
_ERROR_REPLY_TEXT = "⚠️ Тимчасова помилка. Спробуйте ще раз."
|
||||
_REPLY_TEXT_MAX = 4000
|
||||
|
||||
|
||||
# ── Router invoke ──────────────────────────────────────────────────────────────
|
||||
@@ -50,7 +51,7 @@ async def _invoke_router(
|
||||
) -> str:
|
||||
"""
|
||||
POST /v1/agents/{agent_id}/infer — returns response text string.
|
||||
Field: response['response'] (confirmed from NODA1 test).
|
||||
Field confirmed as 'response' on NODA1.
|
||||
Raises httpx.HTTPError on failure.
|
||||
"""
|
||||
url = f"{router_url.rstrip('/')}/v1/agents/{agent_id}/infer"
|
||||
@@ -66,7 +67,6 @@ async def _invoke_router(
|
||||
resp = await http_client.post(url, json=payload, timeout=_ROUTER_TIMEOUT_S)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
# Extract text — field confirmed as 'response'
|
||||
text = (
|
||||
data.get("response")
|
||||
or data.get("text")
|
||||
@@ -95,10 +95,7 @@ async def _write_audit(
|
||||
duration_ms: Optional[int] = None,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Fire-and-forget audit write to sofiia-console internal endpoint.
|
||||
Never raises — logs warning on failure.
|
||||
"""
|
||||
"""Fire-and-forget audit write. Never raises."""
|
||||
if not console_url or not internal_token:
|
||||
return
|
||||
try:
|
||||
@@ -131,12 +128,15 @@ async def _write_audit(
|
||||
|
||||
class MatrixIngressLoop:
|
||||
"""
|
||||
Drives Matrix sync-poll → router-invoke → Matrix send_text pipeline.
|
||||
Drives Matrix sync-poll → rate-check → router-invoke → Matrix send_text.
|
||||
|
||||
Usage:
|
||||
loop = MatrixIngressLoop(...)
|
||||
stop_event = asyncio.Event()
|
||||
await loop.run(stop_event)
|
||||
Metric callbacks (all optional, called synchronously):
|
||||
on_message_received(room_id, agent_id)
|
||||
on_message_replied(room_id, agent_id, status)
|
||||
on_gateway_error(error_type)
|
||||
on_rate_limited(room_id, agent_id, limit_type) ← H1
|
||||
on_invoke_latency(agent_id, duration_seconds) ← H3
|
||||
on_send_latency(agent_id, duration_seconds) ← H3
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -149,9 +149,13 @@ class MatrixIngressLoop:
|
||||
room_map: RoomMappingConfig,
|
||||
sofiia_console_url: str = "",
|
||||
sofiia_internal_token: str = "",
|
||||
rate_limiter: Optional[InMemoryRateLimiter] = None,
|
||||
on_message_received: Optional[Callable[[str, str], None]] = None,
|
||||
on_message_replied: Optional[Callable[[str, str, str], None]] = None,
|
||||
on_gateway_error: Optional[Callable[[str], None]] = None,
|
||||
on_rate_limited: Optional[Callable[[str, str, str], None]] = None,
|
||||
on_invoke_latency: Optional[Callable[[str, float], None]] = None,
|
||||
on_send_latency: Optional[Callable[[str, float], None]] = None,
|
||||
) -> None:
|
||||
self._hs_url = matrix_homeserver_url
|
||||
self._token = matrix_access_token
|
||||
@@ -161,9 +165,13 @@ class MatrixIngressLoop:
|
||||
self._room_map = room_map
|
||||
self._console_url = sofiia_console_url
|
||||
self._internal_token = sofiia_internal_token
|
||||
self._rate_limiter = rate_limiter
|
||||
self._on_message_received = on_message_received
|
||||
self._on_message_replied = on_message_replied
|
||||
self._on_gateway_error = on_gateway_error
|
||||
self._on_rate_limited = on_rate_limited
|
||||
self._on_invoke_latency = on_invoke_latency
|
||||
self._on_send_latency = on_send_latency
|
||||
self._next_batch: Optional[str] = None
|
||||
|
||||
@property
|
||||
@@ -171,7 +179,6 @@ class MatrixIngressLoop:
|
||||
return self._next_batch
|
||||
|
||||
async def run(self, stop_event: asyncio.Event) -> None:
|
||||
"""Main loop until stop_event is set."""
|
||||
backoff = _INIT_RETRY_BACKOFF
|
||||
logger.info(
|
||||
"Matrix ingress/egress loop started | hs=%s node=%s mappings=%d",
|
||||
@@ -239,7 +246,27 @@ class MatrixIngressLoop:
|
||||
if not text:
|
||||
return
|
||||
|
||||
# Dedupe — mark seen before any IO (prevents double-process on retry)
|
||||
# ── H1: Rate limit check ───────────────────────────────────────────────
|
||||
if self._rate_limiter is not None:
|
||||
allowed, limit_type = self._rate_limiter.check(room_id=room_id, sender=sender)
|
||||
if not allowed:
|
||||
logger.warning(
|
||||
"Rate limited: room=%s sender=%s limit_type=%s event=%s",
|
||||
room_id, sender, limit_type, event_id,
|
||||
)
|
||||
if self._on_rate_limited:
|
||||
self._on_rate_limited(room_id, agent_id, limit_type or "unknown")
|
||||
await _write_audit(
|
||||
http_client, self._console_url, self._internal_token,
|
||||
event="matrix.rate_limited",
|
||||
agent_id=agent_id, node_id=self._node_id,
|
||||
room_id=room_id, event_id=event_id,
|
||||
status="error", error_code=f"rate_limit_{limit_type}",
|
||||
data={"sender": sender, "limit_type": limit_type},
|
||||
)
|
||||
return
|
||||
|
||||
# Dedupe — mark seen before any IO
|
||||
client.mark_seen(event_id)
|
||||
|
||||
logger.info(
|
||||
@@ -250,7 +277,6 @@ class MatrixIngressLoop:
|
||||
if self._on_message_received:
|
||||
self._on_message_received(room_id, agent_id)
|
||||
|
||||
# Audit: received
|
||||
await _write_audit(
|
||||
http_client, self._console_url, self._internal_token,
|
||||
event="matrix.message.received",
|
||||
@@ -260,12 +286,13 @@ class MatrixIngressLoop:
|
||||
data={"sender": sender, "text_len": len(text)},
|
||||
)
|
||||
|
||||
# Session ID: stable per room (allows memory context across messages)
|
||||
session_id = f"matrix:{room_id.replace('!', '').replace(':', '_')}"
|
||||
|
||||
# ── H3: Invoke with latency measurement ───────────────────────────────
|
||||
t0 = time.monotonic()
|
||||
reply_text: Optional[str] = None
|
||||
invoke_ok = False
|
||||
invoke_duration_s: float = 0.0
|
||||
|
||||
try:
|
||||
reply_text = await _invoke_router(
|
||||
@@ -277,14 +304,20 @@ class MatrixIngressLoop:
|
||||
session_id=session_id,
|
||||
)
|
||||
invoke_ok = True
|
||||
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||
invoke_duration_s = time.monotonic() - t0
|
||||
duration_ms = int(invoke_duration_s * 1000)
|
||||
|
||||
if self._on_invoke_latency:
|
||||
self._on_invoke_latency(agent_id, invoke_duration_s)
|
||||
|
||||
logger.info(
|
||||
"Router invoke ok: agent=%s event=%s reply_len=%d duration=%dms",
|
||||
agent_id, event_id, len(reply_text or ""), duration_ms,
|
||||
)
|
||||
|
||||
except httpx.HTTPStatusError as exc:
|
||||
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||
invoke_duration_s = time.monotonic() - t0
|
||||
duration_ms = int(invoke_duration_s * 1000)
|
||||
logger.error(
|
||||
"Router HTTP %d for agent=%s event=%s duration=%dms",
|
||||
exc.response.status_code, agent_id, event_id, duration_ms,
|
||||
@@ -301,7 +334,8 @@ class MatrixIngressLoop:
|
||||
)
|
||||
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as exc:
|
||||
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||
invoke_duration_s = time.monotonic() - t0
|
||||
duration_ms = int(invoke_duration_s * 1000)
|
||||
logger.error(
|
||||
"Router network error agent=%s event=%s: %s duration=%dms",
|
||||
agent_id, event_id, exc, duration_ms,
|
||||
@@ -318,7 +352,8 @@ class MatrixIngressLoop:
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||
invoke_duration_s = time.monotonic() - t0
|
||||
duration_ms = int(invoke_duration_s * 1000)
|
||||
logger.error(
|
||||
"Unexpected router error agent=%s event=%s: %s",
|
||||
agent_id, event_id, exc,
|
||||
@@ -334,24 +369,25 @@ class MatrixIngressLoop:
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
|
||||
# ── Egress: send reply back to Matrix ──────────────────────────────────
|
||||
if not invoke_ok:
|
||||
# No reply on error in M1 — just audit (avoids spam in room)
|
||||
return
|
||||
|
||||
if not reply_text:
|
||||
logger.warning("Empty reply from router for agent=%s event=%s", agent_id, event_id)
|
||||
return
|
||||
|
||||
# Truncate if needed
|
||||
# ── H3: Send with latency measurement ─────────────────────────────────
|
||||
send_text = reply_text[:_REPLY_TEXT_MAX]
|
||||
txn_id = MatrixClient.make_txn_id(room_id, event_id)
|
||||
|
||||
send_t0 = time.monotonic()
|
||||
try:
|
||||
await client.send_text(room_id, send_text, txn_id)
|
||||
send_duration_ms = int((time.monotonic() - send_t0) * 1000)
|
||||
send_duration_s = time.monotonic() - send_t0
|
||||
send_duration_ms = int(send_duration_s * 1000)
|
||||
|
||||
if self._on_send_latency:
|
||||
self._on_send_latency(agent_id, send_duration_s)
|
||||
if self._on_message_replied:
|
||||
self._on_message_replied(room_id, agent_id, "ok")
|
||||
|
||||
@@ -365,7 +401,7 @@ class MatrixIngressLoop:
|
||||
data={
|
||||
"reply_len": len(send_text),
|
||||
"truncated": len(reply_text) > _REPLY_TEXT_MAX,
|
||||
"router_duration_ms": duration_ms,
|
||||
"router_duration_ms": int(invoke_duration_s * 1000),
|
||||
},
|
||||
)
|
||||
logger.info(
|
||||
@@ -374,7 +410,8 @@ class MatrixIngressLoop:
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
send_duration_ms = int((time.monotonic() - send_t0) * 1000)
|
||||
send_duration_s = time.monotonic() - send_t0
|
||||
send_duration_ms = int(send_duration_s * 1000)
|
||||
logger.error(
|
||||
"Failed to send Matrix reply agent=%s event=%s: %s",
|
||||
agent_id, event_id, exc,
|
||||
|
||||
Reference in New Issue
Block a user