""" Matrix Client Wrapper — Phase M1 Provides minimal, idempotent Matrix CS-API calls: - send_text(room_id, text, txn_id) — idempotent PUT via txn_id - sync_poll(since) — GET /sync with timeout - join_room(room_id) — POST join if not already joined - whoami() — GET /account/whoami (for smoke) Design: - No state beyond in-memory dedupe LRU for incoming event_ids. - Idempotency: txn_id = sha256(room_id + source_event_id) for replies. - Retry: simple exponential backoff, max 3 attempts, surface errors up. - No asyncio background tasks here — caller drives the sync loop. """ import asyncio import hashlib import logging import time from collections import OrderedDict from typing import Any, Dict, List, Optional import httpx logger = logging.getLogger(__name__) # ── Dedupe LRU cache ─────────────────────────────────────────────────────────── class _LRUSet: """Fixed-size LRU set for event_id deduplication.""" def __init__(self, maxsize: int = 2048): self._data: OrderedDict[str, None] = OrderedDict() self._maxsize = maxsize def contains(self, key: str) -> bool: if key in self._data: self._data.move_to_end(key) return True return False def add(self, key: str) -> None: if key in self._data: self._data.move_to_end(key) return self._data[key] = None while len(self._data) > self._maxsize: self._data.popitem(last=False) # ── Matrix Client ────────────────────────────────────────────────────────────── class MatrixClient: """ Minimal async Matrix CS-API client for matrix-bridge-dagi M1. Usage: client = MatrixClient(homeserver_url, access_token, bot_user_id) async with client: await client.join_room(room_id) txn_id = MatrixClient.make_txn_id(room_id, event_id) await client.send_text(room_id, "Hello", txn_id) """ # Sync timeout: how long Matrix server holds the /sync connection open SYNC_TIMEOUT_MS = 30_000 # HTTP timeout for non-sync requests HTTP_TIMEOUT_S = 15.0 # Max retries for transient errors (429, 5xx, network) MAX_RETRIES = 3 # Initial backoff seconds (doubles each retry) BACKOFF_INITIAL = 1.0 def __init__( self, homeserver_url: str, access_token: str, bot_user_id: str, dedupe_maxsize: int = 2048, ) -> None: self._hs = homeserver_url.rstrip("/") self._token = access_token self._bot_user_id = bot_user_id self._dedupe = _LRUSet(dedupe_maxsize) self._client: Optional[httpx.AsyncClient] = None # ── Context manager ──────────────────────────────────────────────────────── async def __aenter__(self) -> "MatrixClient": self._client = httpx.AsyncClient( headers={ "Authorization": f"Bearer {self._token}", "Content-Type": "application/json", "User-Agent": "matrix-bridge-dagi/0.1", }, timeout=self.HTTP_TIMEOUT_S, follow_redirects=True, ) return self async def __aexit__(self, *_: Any) -> None: if self._client: await self._client.aclose() self._client = None # ── Helpers ──────────────────────────────────────────────────────────────── @staticmethod def make_txn_id(room_id: str, source_event_id: str) -> str: """ Deterministic txn_id for idempotent reply sends. SHA-256 of (room_id + source_event_id), hex[:32]. """ raw = f"{room_id}:{source_event_id}".encode() return hashlib.sha256(raw).hexdigest()[:32] def is_duplicate(self, event_id: str) -> bool: """True if event_id was seen before (dedupe for incoming events).""" return self._dedupe.contains(event_id) def mark_seen(self, event_id: str) -> None: """Mark incoming event_id as processed.""" self._dedupe.add(event_id) def _url(self, path: str) -> str: return f"{self._hs}/_matrix/client/v3{path}" def _ensure_client(self) -> httpx.AsyncClient: if self._client is None: raise RuntimeError( "MatrixClient not initialised — use 'async with MatrixClient(...) as client:'" ) return self._client async def _request_with_retry( self, method: str, path: str, *, json: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None, ) -> Dict[str, Any]: """ Execute HTTP request with exponential backoff on 429/5xx. Raises httpx.HTTPStatusError on final failure. """ client = self._ensure_client() url = self._url(path) backoff = self.BACKOFF_INITIAL for attempt in range(1, self.MAX_RETRIES + 1): try: resp = await client.request( method, url, json=json, params=params, timeout=timeout or self.HTTP_TIMEOUT_S, ) if resp.status_code == 429: retry_after = float( resp.json().get("retry_after_ms", backoff * 1000) ) / 1000.0 logger.warning( "Rate limited by homeserver, retry in %.1fs (attempt %d/%d)", retry_after, attempt, self.MAX_RETRIES, ) if attempt < self.MAX_RETRIES: await asyncio.sleep(retry_after) backoff *= 2 continue resp.raise_for_status() return resp.json() except (httpx.ConnectError, httpx.TimeoutException) as exc: logger.warning( "Network error on %s %s (attempt %d/%d): %s", method, path, attempt, self.MAX_RETRIES, exc, ) if attempt < self.MAX_RETRIES: await asyncio.sleep(backoff) backoff *= 2 else: raise except httpx.HTTPStatusError as exc: if exc.response.status_code >= 500 and attempt < self.MAX_RETRIES: logger.warning( "Server error %d on %s %s, retry (attempt %d/%d)", exc.response.status_code, method, path, attempt, self.MAX_RETRIES, ) await asyncio.sleep(backoff) backoff *= 2 else: raise raise RuntimeError(f"Exhausted {self.MAX_RETRIES} retries for {method} {path}") # ── Public API ───────────────────────────────────────────────────────────── async def whoami(self) -> Dict[str, Any]: """GET /account/whoami — returns {user_id, device_id, ...}""" return await self._request_with_retry("GET", "/account/whoami") async def join_room(self, room_id: str) -> Dict[str, Any]: """ POST /join/{room_id} — join the room (safe to call if already joined; homeserver returns 200 with room_id). """ encoded = room_id.replace("!", "%21").replace(":", "%3A") result = await self._request_with_retry("POST", f"/join/{encoded}", json={}) logger.info("Joined room %s", room_id) return result async def send_text( self, room_id: str, text: str, txn_id: str ) -> Dict[str, Any]: """ PUT /rooms/{room_id}/send/m.room.message/{txn_id} Idempotent: same txn_id → homeserver deduplicates. Returns {event_id: ...} """ encoded_room = room_id.replace("!", "%21").replace(":", "%3A") path = f"/rooms/{encoded_room}/send/m.room.message/{txn_id}" result = await self._request_with_retry( "PUT", path, json={"msgtype": "m.text", "body": text}, ) logger.debug("Sent message to %s: event_id=%s", room_id, result.get("event_id")) return result async def sync_poll( self, since: Optional[str] = None, filter_id: Optional[str] = None, timeout_ms: int = SYNC_TIMEOUT_MS, ) -> Dict[str, Any]: """ GET /sync — long-poll for new events. Returns raw sync response dict. Args: since: next_batch token from previous sync (None for initial). filter_id: optional filter (to limit event types/rooms). timeout_ms: how long server should hold the request (default 30s). """ params: Dict[str, Any] = {"timeout": timeout_ms} if since: params["since"] = since if filter_id: params["filter"] = filter_id # Add extra buffer beyond timeout_ms for our HTTP timeout http_timeout = (timeout_ms / 1000.0) + 15.0 return await self._request_with_retry( "GET", "/sync", params=params, timeout=http_timeout ) def extract_room_messages( self, sync_response: Dict[str, Any], room_id: str, ) -> List[Dict[str, Any]]: """ Extract new m.room.message events for a specific room from a sync response. Filters out: - messages from the bot itself (_bot_user_id) - already-seen event_ids (dedupe) Returns list of event dicts with at least: {event_id, sender, content, origin_server_ts}. """ rooms_data = sync_response.get("rooms", {}) join_data = rooms_data.get("join", {}) room_data = join_data.get(room_id, {}) timeline = room_data.get("timeline", {}) events = timeline.get("events", []) results = [] for event in events: if event.get("type") != "m.room.message": continue event_id = event.get("event_id", "") sender = event.get("sender", "") # Skip own messages if sender == self._bot_user_id: continue # Skip already-processed if self.is_duplicate(event_id): continue # Only text messages for M1 content = event.get("content", {}) if content.get("msgtype") != "m.text": continue results.append(event) return results