feat(matrix-bridge-dagi): add rate limiting (H1) and metrics (H3)

H1 — InMemoryRateLimiter (sliding window, no Redis): - Per-room: RATE_LIMIT_ROOM_RPM (default 20/min) - Per-sender: RATE_LIMIT_SENDER_RPM (default 10/min) - Room checked before sender — sender quota not charged on room block - Blocked messages: audit matrix.rate_limited + on_rate_limited callback - reset() for ops/test, stats() exposed in /health H3 — Extended Prometheus metrics: - matrix_bridge_rate_limited_total{room_id,agent_id,limit_type} - matrix_bridge_send_duration_seconds histogram (invoke was already there) - matrix_bridge_invoke_duration_seconds buckets tuned for LLM latency - matrix_bridge_rate_limiter_active_rooms/senders gauges - on_invoke_latency + on_send_latency callbacks wired in ingress loop 16 new tests: rate limiter unit (13) + ingress integration (3) Total: 65 passed Made-with: Cursor
2026-03-05 00:54:14 -08:00
parent 313d777c84
commit a4e95482bc
5 changed files with 607 additions and 40 deletions
--- a/services/matrix-bridge-dagi/app/ingress.py
+++ b/services/matrix-bridge-dagi/app/ingress.py
@@ -1,5 +1,5 @@
 """
-Matrix Ingress + Egress Loop — Phase M1.4
+Matrix Ingress + Egress Loop — Phase M1.4 + H1/H3

 Polls Matrix /sync for new messages, invokes DAGI Router for mapped rooms,
 sends agent replies back to Matrix, writes audit events to sofiia-console.
@@ -7,11 +7,12 @@ sends agent replies back to Matrix, writes audit events to sofiia-console.
 Pipeline:
  sync_poll() → extract_room_messages()
  → for each message:
-    1. dedupe (mark_seen)
-    2. audit: matrix.message.received
-    3. invoke DAGI Router (/v1/agents/{agent_id}/infer)
-    4. send_text() reply to Matrix room
-    5. audit: matrix.agent.replied | matrix.error
+    1. rate_limit check (room + sender)  ← H1
+    2. dedupe (mark_seen)
+    3. audit: matrix.message.received
+    4. invoke DAGI Router  (timed → on_invoke_latency)  ← H3
+    5. send_text() reply   (timed → on_send_latency)    ← H3
+    6. audit: matrix.agent.replied | matrix.error

 Graceful shutdown via asyncio.Event.
 """
@@ -24,6 +25,7 @@ from typing import Any, Callable, Dict, List, Optional
 import httpx

 from .matrix_client import MatrixClient
+from .rate_limit import InMemoryRateLimiter
 from .room_mapping import RoomMappingConfig

 logger = logging.getLogger(__name__)
@@ -32,10 +34,9 @@ logger = logging.getLogger(__name__)

 _MAX_RETRY_BACKOFF = 60.0
 _INIT_RETRY_BACKOFF = 2.0
-_ROUTER_TIMEOUT_S = 45.0       # Router may call DeepSeek/Mistral
+_ROUTER_TIMEOUT_S = 45.0
 _AUDIT_TIMEOUT_S = 5.0
-_REPLY_TEXT_MAX = 4000          # Matrix message cap (chars)
-_ERROR_REPLY_TEXT = "⚠️ Тимчасова помилка. Спробуйте ще раз."
+_REPLY_TEXT_MAX = 4000


 # ── Router invoke ──────────────────────────────────────────────────────────────
@@ -50,7 +51,7 @@ async def _invoke_router(
 ) -> str:
    """
    POST /v1/agents/{agent_id}/infer — returns response text string.
-    Field: response['response'] (confirmed from NODA1 test).
+    Field confirmed as 'response' on NODA1.
    Raises httpx.HTTPError on failure.
    """
    url = f"{router_url.rstrip('/')}/v1/agents/{agent_id}/infer"
@@ -66,7 +67,6 @@ async def _invoke_router(
    resp = await http_client.post(url, json=payload, timeout=_ROUTER_TIMEOUT_S)
    resp.raise_for_status()
    data = resp.json()
-    # Extract text — field confirmed as 'response'
    text = (
        data.get("response")
        or data.get("text")
@@ -95,10 +95,7 @@ async def _write_audit(
    duration_ms: Optional[int] = None,
    data: Optional[Dict[str, Any]] = None,
 ) -> None:
-    """
-    Fire-and-forget audit write to sofiia-console internal endpoint.
-    Never raises — logs warning on failure.
-    """
+    """Fire-and-forget audit write. Never raises."""
    if not console_url or not internal_token:
        return
    try:
@@ -131,12 +128,15 @@ async def _write_audit(

 class MatrixIngressLoop:
    """
-    Drives Matrix sync-poll → router-invoke → Matrix send_text pipeline.
+    Drives Matrix sync-poll → rate-check → router-invoke → Matrix send_text.

-    Usage:
-        loop = MatrixIngressLoop(...)
-        stop_event = asyncio.Event()
-        await loop.run(stop_event)
+    Metric callbacks (all optional, called synchronously):
+      on_message_received(room_id, agent_id)
+      on_message_replied(room_id, agent_id, status)
+      on_gateway_error(error_type)
+      on_rate_limited(room_id, agent_id, limit_type)       ← H1
+      on_invoke_latency(agent_id, duration_seconds)         ← H3
+      on_send_latency(agent_id, duration_seconds)           ← H3
    """

    def __init__(
@@ -149,9 +149,13 @@ class MatrixIngressLoop:
        room_map: RoomMappingConfig,
        sofiia_console_url: str = "",
        sofiia_internal_token: str = "",
+        rate_limiter: Optional[InMemoryRateLimiter] = None,
        on_message_received: Optional[Callable[[str, str], None]] = None,
        on_message_replied: Optional[Callable[[str, str, str], None]] = None,
        on_gateway_error: Optional[Callable[[str], None]] = None,
+        on_rate_limited: Optional[Callable[[str, str, str], None]] = None,
+        on_invoke_latency: Optional[Callable[[str, float], None]] = None,
+        on_send_latency: Optional[Callable[[str, float], None]] = None,
    ) -> None:
        self._hs_url = matrix_homeserver_url
        self._token = matrix_access_token
@@ -161,9 +165,13 @@ class MatrixIngressLoop:
        self._room_map = room_map
        self._console_url = sofiia_console_url
        self._internal_token = sofiia_internal_token
+        self._rate_limiter = rate_limiter
        self._on_message_received = on_message_received
        self._on_message_replied = on_message_replied
        self._on_gateway_error = on_gateway_error
+        self._on_rate_limited = on_rate_limited
+        self._on_invoke_latency = on_invoke_latency
+        self._on_send_latency = on_send_latency
        self._next_batch: Optional[str] = None

    @property
@@ -171,7 +179,6 @@ class MatrixIngressLoop:
        return self._next_batch

    async def run(self, stop_event: asyncio.Event) -> None:
-        """Main loop until stop_event is set."""
        backoff = _INIT_RETRY_BACKOFF
        logger.info(
            "Matrix ingress/egress loop started | hs=%s node=%s mappings=%d",
@@ -239,7 +246,27 @@ class MatrixIngressLoop:
        if not text:
            return

-        # Dedupe — mark seen before any IO (prevents double-process on retry)
+        # ── H1: Rate limit check ───────────────────────────────────────────────
+        if self._rate_limiter is not None:
+            allowed, limit_type = self._rate_limiter.check(room_id=room_id, sender=sender)
+            if not allowed:
+                logger.warning(
+                    "Rate limited: room=%s sender=%s limit_type=%s event=%s",
+                    room_id, sender, limit_type, event_id,
+                )
+                if self._on_rate_limited:
+                    self._on_rate_limited(room_id, agent_id, limit_type or "unknown")
+                await _write_audit(
+                    http_client, self._console_url, self._internal_token,
+                    event="matrix.rate_limited",
+                    agent_id=agent_id, node_id=self._node_id,
+                    room_id=room_id, event_id=event_id,
+                    status="error", error_code=f"rate_limit_{limit_type}",
+                    data={"sender": sender, "limit_type": limit_type},
+                )
+                return
+
+        # Dedupe — mark seen before any IO
        client.mark_seen(event_id)

        logger.info(
@@ -250,7 +277,6 @@ class MatrixIngressLoop:
        if self._on_message_received:
            self._on_message_received(room_id, agent_id)

-        # Audit: received
        await _write_audit(
            http_client, self._console_url, self._internal_token,
            event="matrix.message.received",
@@ -260,12 +286,13 @@ class MatrixIngressLoop:
            data={"sender": sender, "text_len": len(text)},
        )

-        # Session ID: stable per room (allows memory context across messages)
        session_id = f"matrix:{room_id.replace('!', '').replace(':', '_')}"

+        # ── H3: Invoke with latency measurement ───────────────────────────────
        t0 = time.monotonic()
        reply_text: Optional[str] = None
        invoke_ok = False
+        invoke_duration_s: float = 0.0

        try:
            reply_text = await _invoke_router(
@@ -277,14 +304,20 @@ class MatrixIngressLoop:
                session_id=session_id,
            )
            invoke_ok = True
-            duration_ms = int((time.monotonic() - t0) * 1000)
+            invoke_duration_s = time.monotonic() - t0
+            duration_ms = int(invoke_duration_s * 1000)
+
+            if self._on_invoke_latency:
+                self._on_invoke_latency(agent_id, invoke_duration_s)
+
            logger.info(
                "Router invoke ok: agent=%s event=%s reply_len=%d duration=%dms",
                agent_id, event_id, len(reply_text or ""), duration_ms,
            )

        except httpx.HTTPStatusError as exc:
-            duration_ms = int((time.monotonic() - t0) * 1000)
+            invoke_duration_s = time.monotonic() - t0
+            duration_ms = int(invoke_duration_s * 1000)
            logger.error(
                "Router HTTP %d for agent=%s event=%s duration=%dms",
                exc.response.status_code, agent_id, event_id, duration_ms,
@@ -301,7 +334,8 @@ class MatrixIngressLoop:
            )

        except (httpx.ConnectError, httpx.TimeoutException) as exc:
-            duration_ms = int((time.monotonic() - t0) * 1000)
+            invoke_duration_s = time.monotonic() - t0
+            duration_ms = int(invoke_duration_s * 1000)
            logger.error(
                "Router network error agent=%s event=%s: %s duration=%dms",
                agent_id, event_id, exc, duration_ms,
@@ -318,7 +352,8 @@ class MatrixIngressLoop:
            )

        except Exception as exc:
-            duration_ms = int((time.monotonic() - t0) * 1000)
+            invoke_duration_s = time.monotonic() - t0
+            duration_ms = int(invoke_duration_s * 1000)
            logger.error(
                "Unexpected router error agent=%s event=%s: %s",
                agent_id, event_id, exc,
@@ -334,24 +369,25 @@ class MatrixIngressLoop:
                duration_ms=duration_ms,
            )

-        # ── Egress: send reply back to Matrix ──────────────────────────────────
        if not invoke_ok:
-            # No reply on error in M1 — just audit (avoids spam in room)
            return

        if not reply_text:
            logger.warning("Empty reply from router for agent=%s event=%s", agent_id, event_id)
            return

-        # Truncate if needed
+        # ── H3: Send with latency measurement ─────────────────────────────────
        send_text = reply_text[:_REPLY_TEXT_MAX]
        txn_id = MatrixClient.make_txn_id(room_id, event_id)

        send_t0 = time.monotonic()
        try:
            await client.send_text(room_id, send_text, txn_id)
-            send_duration_ms = int((time.monotonic() - send_t0) * 1000)
+            send_duration_s = time.monotonic() - send_t0
+            send_duration_ms = int(send_duration_s * 1000)

+            if self._on_send_latency:
+                self._on_send_latency(agent_id, send_duration_s)
            if self._on_message_replied:
                self._on_message_replied(room_id, agent_id, "ok")

@@ -365,7 +401,7 @@ class MatrixIngressLoop:
                data={
                    "reply_len": len(send_text),
                    "truncated": len(reply_text) > _REPLY_TEXT_MAX,
-                    "router_duration_ms": duration_ms,
+                    "router_duration_ms": int(invoke_duration_s * 1000),
                },
            )
            logger.info(
@@ -374,7 +410,8 @@ class MatrixIngressLoop:
            )

        except Exception as exc:
-            send_duration_ms = int((time.monotonic() - send_t0) * 1000)
+            send_duration_s = time.monotonic() - send_t0
+            send_duration_ms = int(send_duration_s * 1000)
            logger.error(
                "Failed to send Matrix reply agent=%s event=%s: %s",
                agent_id, event_id, exc,