microdao-daarion/services/matrix-bridge-dagi/app/matrix_client.py

"""
Matrix Client Wrapper — Phase M1

Provides minimal, idempotent Matrix CS-API calls:
  - send_text(room_id, text, txn_id)  — idempotent PUT via txn_id
  - sync_poll(since)                  — GET /sync with timeout
  - join_room(room_id)                — POST join if not already joined
  - whoami()                          — GET /account/whoami (for smoke)

Design:
  - No state beyond in-memory dedupe LRU for incoming event_ids.
  - Idempotency: txn_id = sha256(room_id + source_event_id) for replies.
  - Retry: simple exponential backoff, max 3 attempts, surface errors up.
  - No asyncio background tasks here — caller drives the sync loop.
"""

import asyncio
import hashlib
import logging
import time
from collections import OrderedDict
from typing import Any, Dict, List, Optional

import httpx

logger = logging.getLogger(__name__)

# ── Dedupe LRU cache ───────────────────────────────────────────────────────────

class _LRUSet:
    """Fixed-size LRU set for event_id deduplication."""

    def __init__(self, maxsize: int = 2048):
        self._data: OrderedDict[str, None] = OrderedDict()
        self._maxsize = maxsize

    def contains(self, key: str) -> bool:
        if key in self._data:
            self._data.move_to_end(key)
            return True
        return False

    def add(self, key: str) -> None:
        if key in self._data:
            self._data.move_to_end(key)
            return
        self._data[key] = None
        while len(self._data) > self._maxsize:
            self._data.popitem(last=False)


# ── Matrix Client ──────────────────────────────────────────────────────────────

class MatrixClient:
    """
    Minimal async Matrix CS-API client for matrix-bridge-dagi M1.

    Usage:
        client = MatrixClient(homeserver_url, access_token, bot_user_id)
        async with client:
            await client.join_room(room_id)
            txn_id = MatrixClient.make_txn_id(room_id, event_id)
            await client.send_text(room_id, "Hello", txn_id)
    """

    # Sync timeout: how long Matrix server holds the /sync connection open
    SYNC_TIMEOUT_MS = 30_000

    # HTTP timeout for non-sync requests
    HTTP_TIMEOUT_S = 15.0

    # Max retries for transient errors (429, 5xx, network)
    MAX_RETRIES = 3

    # Initial backoff seconds (doubles each retry)
    BACKOFF_INITIAL = 1.0

    def __init__(
        self,
        homeserver_url: str,
        access_token: str,
        bot_user_id: str,
        dedupe_maxsize: int = 2048,
    ) -> None:
        self._hs = homeserver_url.rstrip("/")
        self._token = access_token
        self._bot_user_id = bot_user_id
        self._dedupe = _LRUSet(dedupe_maxsize)
        self._client: Optional[httpx.AsyncClient] = None

    # ── Context manager ────────────────────────────────────────────────────────

    async def __aenter__(self) -> "MatrixClient":
        self._client = httpx.AsyncClient(
            headers={
                "Authorization": f"Bearer {self._token}",
                "Content-Type": "application/json",
                "User-Agent": "matrix-bridge-dagi/0.1",
            },
            timeout=self.HTTP_TIMEOUT_S,
            follow_redirects=True,
        )
        return self

    async def __aexit__(self, *_: Any) -> None:
        if self._client:
            await self._client.aclose()
            self._client = None

    # ── Helpers ────────────────────────────────────────────────────────────────

    @staticmethod
    def make_txn_id(room_id: str, source_event_id: str) -> str:
        """
        Deterministic txn_id for idempotent reply sends.
        SHA-256 of (room_id + source_event_id), hex[:32].
        """
        raw = f"{room_id}:{source_event_id}".encode()
        return hashlib.sha256(raw).hexdigest()[:32]

    def is_duplicate(self, event_id: str) -> bool:
        """True if event_id was seen before (dedupe for incoming events)."""
        return self._dedupe.contains(event_id)

    def mark_seen(self, event_id: str) -> None:
        """Mark incoming event_id as processed."""
        self._dedupe.add(event_id)

    def _url(self, path: str) -> str:
        return f"{self._hs}/_matrix/client/v3{path}"

    def _ensure_client(self) -> httpx.AsyncClient:
        if self._client is None:
            raise RuntimeError(
                "MatrixClient not initialised — use 'async with MatrixClient(...) as client:'"
            )
        return self._client

    async def _request_with_retry(
        self,
        method: str,
        path: str,
        *,
        json: Optional[Dict[str, Any]] = None,
        params: Optional[Dict[str, Any]] = None,
        timeout: Optional[float] = None,
    ) -> Dict[str, Any]:
        """
        Execute HTTP request with exponential backoff on 429/5xx.
        Raises httpx.HTTPStatusError on final failure.
        """
        client = self._ensure_client()
        url = self._url(path)
        backoff = self.BACKOFF_INITIAL

        for attempt in range(1, self.MAX_RETRIES + 1):
            try:
                resp = await client.request(
                    method,
                    url,
                    json=json,
                    params=params,
                    timeout=timeout or self.HTTP_TIMEOUT_S,
                )
                if resp.status_code == 429:
                    retry_after = float(
                        resp.json().get("retry_after_ms", backoff * 1000)
                    ) / 1000.0
                    logger.warning(
                        "Rate limited by homeserver, retry in %.1fs (attempt %d/%d)",
                        retry_after, attempt, self.MAX_RETRIES,
                    )
                    if attempt < self.MAX_RETRIES:
                        await asyncio.sleep(retry_after)
                        backoff *= 2
                        continue
                resp.raise_for_status()
                return resp.json()

            except (httpx.ConnectError, httpx.TimeoutException) as exc:
                logger.warning(
                    "Network error on %s %s (attempt %d/%d): %s",
                    method, path, attempt, self.MAX_RETRIES, exc,
                )
                if attempt < self.MAX_RETRIES:
                    await asyncio.sleep(backoff)
                    backoff *= 2
                else:
                    raise

            except httpx.HTTPStatusError as exc:
                if exc.response.status_code >= 500 and attempt < self.MAX_RETRIES:
                    logger.warning(
                        "Server error %d on %s %s, retry (attempt %d/%d)",
                        exc.response.status_code, method, path, attempt, self.MAX_RETRIES,
                    )
                    await asyncio.sleep(backoff)
                    backoff *= 2
                else:
                    raise

        raise RuntimeError(f"Exhausted {self.MAX_RETRIES} retries for {method} {path}")

    # ── Public API ─────────────────────────────────────────────────────────────

    async def whoami(self) -> Dict[str, Any]:
        """GET /account/whoami — returns {user_id, device_id, ...}"""
        return await self._request_with_retry("GET", "/account/whoami")

    async def join_room(self, room_id: str) -> Dict[str, Any]:
        """
        POST /join/{room_id} — join the room (safe to call if already joined;
        homeserver returns 200 with room_id).
        """
        encoded = room_id.replace("!", "%21").replace(":", "%3A")
        result = await self._request_with_retry("POST", f"/join/{encoded}", json={})
        logger.info("Joined room %s", room_id)
        return result

    async def send_text(
        self, room_id: str, text: str, txn_id: str
    ) -> Dict[str, Any]:
        """
        PUT /rooms/{room_id}/send/m.room.message/{txn_id}
        Idempotent: same txn_id → homeserver deduplicates.
        Returns {event_id: ...}
        """
        encoded_room = room_id.replace("!", "%21").replace(":", "%3A")
        path = f"/rooms/{encoded_room}/send/m.room.message/{txn_id}"
        result = await self._request_with_retry(
            "PUT",
            path,
            json={"msgtype": "m.text", "body": text},
        )
        logger.debug("Sent message to %s: event_id=%s", room_id, result.get("event_id"))
        return result

    async def sync_poll(
        self,
        since: Optional[str] = None,
        filter_id: Optional[str] = None,
        timeout_ms: int = SYNC_TIMEOUT_MS,
    ) -> Dict[str, Any]:
        """
        GET /sync — long-poll for new events.
        Returns raw sync response dict.

        Args:
            since: next_batch token from previous sync (None for initial).
            filter_id: optional filter (to limit event types/rooms).
            timeout_ms: how long server should hold the request (default 30s).
        """
        params: Dict[str, Any] = {"timeout": timeout_ms}
        if since:
            params["since"] = since
        if filter_id:
            params["filter"] = filter_id

        # Add extra buffer beyond timeout_ms for our HTTP timeout
        http_timeout = (timeout_ms / 1000.0) + 15.0

        return await self._request_with_retry(
            "GET", "/sync", params=params, timeout=http_timeout
        )

    def extract_room_messages(
        self,
        sync_response: Dict[str, Any],
        room_id: str,
    ) -> List[Dict[str, Any]]:
        """
        Extract new m.room.message events for a specific room from a sync response.

        Filters out:
          - messages from the bot itself (_bot_user_id)
          - already-seen event_ids (dedupe)

        Returns list of event dicts with at least: {event_id, sender, content, origin_server_ts}.
        """
        rooms_data = sync_response.get("rooms", {})
        join_data = rooms_data.get("join", {})
        room_data = join_data.get(room_id, {})
        timeline = room_data.get("timeline", {})
        events = timeline.get("events", [])

        results = []
        for event in events:
            if event.get("type") != "m.room.message":
                continue
            event_id = event.get("event_id", "")
            sender = event.get("sender", "")

            # Skip own messages
            if sender == self._bot_user_id:
                continue

            # Skip already-processed
            if self.is_duplicate(event_id):
                continue

            # Only text messages for M1
            content = event.get("content", {})
            if content.get("msgtype") != "m.text":
                continue

            results.append(event)

        return results