Files
microdao-daarion/services/matrix-bridge-dagi/app/ingress.py
Apple a4e95482bc feat(matrix-bridge-dagi): add rate limiting (H1) and metrics (H3)
H1 — InMemoryRateLimiter (sliding window, no Redis):
  - Per-room: RATE_LIMIT_ROOM_RPM (default 20/min)
  - Per-sender: RATE_LIMIT_SENDER_RPM (default 10/min)
  - Room checked before sender — sender quota not charged on room block
  - Blocked messages: audit matrix.rate_limited + on_rate_limited callback
  - reset() for ops/test, stats() exposed in /health

H3 — Extended Prometheus metrics:
  - matrix_bridge_rate_limited_total{room_id,agent_id,limit_type}
  - matrix_bridge_send_duration_seconds histogram (invoke was already there)
  - matrix_bridge_invoke_duration_seconds buckets tuned for LLM latency
  - matrix_bridge_rate_limiter_active_rooms/senders gauges
  - on_invoke_latency + on_send_latency callbacks wired in ingress loop

16 new tests: rate limiter unit (13) + ingress integration (3)
Total: 65 passed

Made-with: Cursor
2026-03-05 00:54:14 -08:00

431 lines
17 KiB
Python

"""
Matrix Ingress + Egress Loop — Phase M1.4 + H1/H3
Polls Matrix /sync for new messages, invokes DAGI Router for mapped rooms,
sends agent replies back to Matrix, writes audit events to sofiia-console.
Pipeline:
sync_poll() → extract_room_messages()
→ for each message:
1. rate_limit check (room + sender) ← H1
2. dedupe (mark_seen)
3. audit: matrix.message.received
4. invoke DAGI Router (timed → on_invoke_latency) ← H3
5. send_text() reply (timed → on_send_latency) ← H3
6. audit: matrix.agent.replied | matrix.error
Graceful shutdown via asyncio.Event.
"""
import asyncio
import logging
import time
from typing import Any, Callable, Dict, List, Optional
import httpx
from .matrix_client import MatrixClient
from .rate_limit import InMemoryRateLimiter
from .room_mapping import RoomMappingConfig
logger = logging.getLogger(__name__)
# ── Constants ──────────────────────────────────────────────────────────────────
_MAX_RETRY_BACKOFF = 60.0
_INIT_RETRY_BACKOFF = 2.0
_ROUTER_TIMEOUT_S = 45.0
_AUDIT_TIMEOUT_S = 5.0
_REPLY_TEXT_MAX = 4000
# ── Router invoke ──────────────────────────────────────────────────────────────
async def _invoke_router(
http_client: httpx.AsyncClient,
router_url: str,
agent_id: str,
node_id: str,
prompt: str,
session_id: str,
) -> str:
"""
POST /v1/agents/{agent_id}/infer — returns response text string.
Field confirmed as 'response' on NODA1.
Raises httpx.HTTPError on failure.
"""
url = f"{router_url.rstrip('/')}/v1/agents/{agent_id}/infer"
payload = {
"prompt": prompt,
"session_id": session_id,
"user_id": "matrix_bridge",
"metadata": {
"transport": "matrix",
"node_id": node_id,
},
}
resp = await http_client.post(url, json=payload, timeout=_ROUTER_TIMEOUT_S)
resp.raise_for_status()
data = resp.json()
text = (
data.get("response")
or data.get("text")
or data.get("content")
or data.get("message")
or ""
)
if not isinstance(text, str):
text = str(text)
return text.strip()
# ── Audit write ────────────────────────────────────────────────────────────────
async def _write_audit(
http_client: httpx.AsyncClient,
console_url: str,
internal_token: str,
event: str,
agent_id: str,
node_id: str,
room_id: str,
event_id: str,
status: str = "ok",
error_code: Optional[str] = None,
duration_ms: Optional[int] = None,
data: Optional[Dict[str, Any]] = None,
) -> None:
"""Fire-and-forget audit write. Never raises."""
if not console_url or not internal_token:
return
try:
url = f"{console_url.rstrip('/')}/api/audit/internal"
await http_client.post(
url,
json={
"event": event,
"operator_id": "matrix_bridge",
"node_id": node_id,
"agent_id": agent_id,
"chat_id": room_id,
"status": status,
"error_code": error_code,
"duration_ms": duration_ms,
"data": {
"matrix_event_id": event_id,
"matrix_room_id": room_id,
**(data or {}),
},
},
headers={"X-Internal-Service-Token": internal_token},
timeout=_AUDIT_TIMEOUT_S,
)
except Exception as exc:
logger.warning("Audit write failed (non-blocking): %s", exc)
# ── Ingress loop ───────────────────────────────────────────────────────────────
class MatrixIngressLoop:
"""
Drives Matrix sync-poll → rate-check → router-invoke → Matrix send_text.
Metric callbacks (all optional, called synchronously):
on_message_received(room_id, agent_id)
on_message_replied(room_id, agent_id, status)
on_gateway_error(error_type)
on_rate_limited(room_id, agent_id, limit_type) ← H1
on_invoke_latency(agent_id, duration_seconds) ← H3
on_send_latency(agent_id, duration_seconds) ← H3
"""
def __init__(
self,
matrix_homeserver_url: str,
matrix_access_token: str,
matrix_user_id: str,
router_url: str,
node_id: str,
room_map: RoomMappingConfig,
sofiia_console_url: str = "",
sofiia_internal_token: str = "",
rate_limiter: Optional[InMemoryRateLimiter] = None,
on_message_received: Optional[Callable[[str, str], None]] = None,
on_message_replied: Optional[Callable[[str, str, str], None]] = None,
on_gateway_error: Optional[Callable[[str], None]] = None,
on_rate_limited: Optional[Callable[[str, str, str], None]] = None,
on_invoke_latency: Optional[Callable[[str, float], None]] = None,
on_send_latency: Optional[Callable[[str, float], None]] = None,
) -> None:
self._hs_url = matrix_homeserver_url
self._token = matrix_access_token
self._user_id = matrix_user_id
self._router_url = router_url
self._node_id = node_id
self._room_map = room_map
self._console_url = sofiia_console_url
self._internal_token = sofiia_internal_token
self._rate_limiter = rate_limiter
self._on_message_received = on_message_received
self._on_message_replied = on_message_replied
self._on_gateway_error = on_gateway_error
self._on_rate_limited = on_rate_limited
self._on_invoke_latency = on_invoke_latency
self._on_send_latency = on_send_latency
self._next_batch: Optional[str] = None
@property
def next_batch(self) -> Optional[str]:
return self._next_batch
async def run(self, stop_event: asyncio.Event) -> None:
backoff = _INIT_RETRY_BACKOFF
logger.info(
"Matrix ingress/egress loop started | hs=%s node=%s mappings=%d",
self._hs_url, self._node_id, self._room_map.total_mappings,
)
if self._room_map.total_mappings == 0:
logger.warning("No room mappings — ingress loop is idle")
async with MatrixClient(self._hs_url, self._token, self._user_id) as client:
for mapping in self._room_map.mappings:
if mapping.agent_id in self._room_map.allowed_agents:
try:
await client.join_room(mapping.room_id)
except Exception as exc:
logger.warning("Could not join room %s: %s", mapping.room_id, exc)
async with httpx.AsyncClient() as http_client:
while not stop_event.is_set():
try:
sync_resp = await client.sync_poll(since=self._next_batch)
self._next_batch = sync_resp.get("next_batch")
backoff = _INIT_RETRY_BACKOFF
await self._process_sync(client, http_client, sync_resp)
except asyncio.CancelledError:
break
except Exception as exc:
logger.error("Ingress loop error (retry in %.1fs): %s", backoff, exc)
if self._on_gateway_error:
self._on_gateway_error("sync_error")
try:
await asyncio.wait_for(stop_event.wait(), timeout=backoff)
except asyncio.TimeoutError:
pass
backoff = min(backoff * 2, _MAX_RETRY_BACKOFF)
logger.info("Matrix ingress/egress loop stopped")
async def _process_sync(
self,
client: MatrixClient,
http_client: httpx.AsyncClient,
sync_resp: Dict[str, Any],
) -> None:
for mapping in self._room_map.mappings:
if mapping.agent_id not in self._room_map.allowed_agents:
continue
messages = client.extract_room_messages(sync_resp, mapping.room_id)
for event in messages:
await self._handle_message(client, http_client, event, mapping)
async def _handle_message(
self,
client: MatrixClient,
http_client: httpx.AsyncClient,
event: Dict[str, Any],
mapping,
) -> None:
event_id = event.get("event_id", "")
sender = event.get("sender", "")
text = event.get("content", {}).get("body", "").strip()
room_id = mapping.room_id
agent_id = mapping.agent_id
if not text:
return
# ── H1: Rate limit check ───────────────────────────────────────────────
if self._rate_limiter is not None:
allowed, limit_type = self._rate_limiter.check(room_id=room_id, sender=sender)
if not allowed:
logger.warning(
"Rate limited: room=%s sender=%s limit_type=%s event=%s",
room_id, sender, limit_type, event_id,
)
if self._on_rate_limited:
self._on_rate_limited(room_id, agent_id, limit_type or "unknown")
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.rate_limited",
agent_id=agent_id, node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="error", error_code=f"rate_limit_{limit_type}",
data={"sender": sender, "limit_type": limit_type},
)
return
# Dedupe — mark seen before any IO
client.mark_seen(event_id)
logger.info(
"Matrix message: room=%s sender=%s agent=%s event=%s len=%d",
room_id, sender, agent_id, event_id, len(text),
)
if self._on_message_received:
self._on_message_received(room_id, agent_id)
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.message.received",
agent_id=agent_id, node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="ok",
data={"sender": sender, "text_len": len(text)},
)
session_id = f"matrix:{room_id.replace('!', '').replace(':', '_')}"
# ── H3: Invoke with latency measurement ───────────────────────────────
t0 = time.monotonic()
reply_text: Optional[str] = None
invoke_ok = False
invoke_duration_s: float = 0.0
try:
reply_text = await _invoke_router(
http_client,
self._router_url,
agent_id=agent_id,
node_id=self._node_id,
prompt=text,
session_id=session_id,
)
invoke_ok = True
invoke_duration_s = time.monotonic() - t0
duration_ms = int(invoke_duration_s * 1000)
if self._on_invoke_latency:
self._on_invoke_latency(agent_id, invoke_duration_s)
logger.info(
"Router invoke ok: agent=%s event=%s reply_len=%d duration=%dms",
agent_id, event_id, len(reply_text or ""), duration_ms,
)
except httpx.HTTPStatusError as exc:
invoke_duration_s = time.monotonic() - t0
duration_ms = int(invoke_duration_s * 1000)
logger.error(
"Router HTTP %d for agent=%s event=%s duration=%dms",
exc.response.status_code, agent_id, event_id, duration_ms,
)
if self._on_gateway_error:
self._on_gateway_error(f"http_{exc.response.status_code}")
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.error",
agent_id=agent_id, node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="error", error_code=f"router_http_{exc.response.status_code}",
duration_ms=duration_ms,
)
except (httpx.ConnectError, httpx.TimeoutException) as exc:
invoke_duration_s = time.monotonic() - t0
duration_ms = int(invoke_duration_s * 1000)
logger.error(
"Router network error agent=%s event=%s: %s duration=%dms",
agent_id, event_id, exc, duration_ms,
)
if self._on_gateway_error:
self._on_gateway_error("network_error")
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.error",
agent_id=agent_id, node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="error", error_code="router_network_error",
duration_ms=duration_ms,
)
except Exception as exc:
invoke_duration_s = time.monotonic() - t0
duration_ms = int(invoke_duration_s * 1000)
logger.error(
"Unexpected router error agent=%s event=%s: %s",
agent_id, event_id, exc,
)
if self._on_gateway_error:
self._on_gateway_error("unexpected")
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.error",
agent_id=agent_id, node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="error", error_code="router_unexpected",
duration_ms=duration_ms,
)
if not invoke_ok:
return
if not reply_text:
logger.warning("Empty reply from router for agent=%s event=%s", agent_id, event_id)
return
# ── H3: Send with latency measurement ─────────────────────────────────
send_text = reply_text[:_REPLY_TEXT_MAX]
txn_id = MatrixClient.make_txn_id(room_id, event_id)
send_t0 = time.monotonic()
try:
await client.send_text(room_id, send_text, txn_id)
send_duration_s = time.monotonic() - send_t0
send_duration_ms = int(send_duration_s * 1000)
if self._on_send_latency:
self._on_send_latency(agent_id, send_duration_s)
if self._on_message_replied:
self._on_message_replied(room_id, agent_id, "ok")
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.agent.replied",
agent_id=agent_id, node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="ok",
duration_ms=send_duration_ms,
data={
"reply_len": len(send_text),
"truncated": len(reply_text) > _REPLY_TEXT_MAX,
"router_duration_ms": int(invoke_duration_s * 1000),
},
)
logger.info(
"Reply sent: agent=%s event=%s reply_len=%d send_ms=%d",
agent_id, event_id, len(send_text), send_duration_ms,
)
except Exception as exc:
send_duration_s = time.monotonic() - send_t0
send_duration_ms = int(send_duration_s * 1000)
logger.error(
"Failed to send Matrix reply agent=%s event=%s: %s",
agent_id, event_id, exc,
)
if self._on_message_replied:
self._on_message_replied(room_id, agent_id, "error")
if self._on_gateway_error:
self._on_gateway_error("matrix_send_error")
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.error",
agent_id=agent_id, node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="error", error_code="matrix_send_failed",
duration_ms=send_duration_ms,
)