- adds MatrixClient with send_text/sync_poll/join_room/whoami (idempotent via txn_id) - LRU dedupe for incoming event_ids (2048 capacity) - exponential backoff retry (max 3 attempts) for 429/5xx/network errors - extract_room_messages: filters own messages, non-text, duplicates - health endpoint now probes matrix_reachable + gateway_reachable at startup - adds docker-compose.synapse-node1.yml (Synapse + Postgres for NODA1) - adds ops/runbook-matrix-setup.md (10-step setup: DNS, config, bot, room, .env) - 19 tests passing, no real Synapse required Made-with: Cursor
309 lines
11 KiB
Python
309 lines
11 KiB
Python
"""
|
|
Matrix Client Wrapper — Phase M1
|
|
|
|
Provides minimal, idempotent Matrix CS-API calls:
|
|
- send_text(room_id, text, txn_id) — idempotent PUT via txn_id
|
|
- sync_poll(since) — GET /sync with timeout
|
|
- join_room(room_id) — POST join if not already joined
|
|
- whoami() — GET /account/whoami (for smoke)
|
|
|
|
Design:
|
|
- No state beyond in-memory dedupe LRU for incoming event_ids.
|
|
- Idempotency: txn_id = sha256(room_id + source_event_id) for replies.
|
|
- Retry: simple exponential backoff, max 3 attempts, surface errors up.
|
|
- No asyncio background tasks here — caller drives the sync loop.
|
|
"""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import logging
|
|
import time
|
|
from collections import OrderedDict
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ── Dedupe LRU cache ───────────────────────────────────────────────────────────
|
|
|
|
class _LRUSet:
|
|
"""Fixed-size LRU set for event_id deduplication."""
|
|
|
|
def __init__(self, maxsize: int = 2048):
|
|
self._data: OrderedDict[str, None] = OrderedDict()
|
|
self._maxsize = maxsize
|
|
|
|
def contains(self, key: str) -> bool:
|
|
if key in self._data:
|
|
self._data.move_to_end(key)
|
|
return True
|
|
return False
|
|
|
|
def add(self, key: str) -> None:
|
|
if key in self._data:
|
|
self._data.move_to_end(key)
|
|
return
|
|
self._data[key] = None
|
|
while len(self._data) > self._maxsize:
|
|
self._data.popitem(last=False)
|
|
|
|
|
|
# ── Matrix Client ──────────────────────────────────────────────────────────────
|
|
|
|
class MatrixClient:
|
|
"""
|
|
Minimal async Matrix CS-API client for matrix-bridge-dagi M1.
|
|
|
|
Usage:
|
|
client = MatrixClient(homeserver_url, access_token, bot_user_id)
|
|
async with client:
|
|
await client.join_room(room_id)
|
|
txn_id = MatrixClient.make_txn_id(room_id, event_id)
|
|
await client.send_text(room_id, "Hello", txn_id)
|
|
"""
|
|
|
|
# Sync timeout: how long Matrix server holds the /sync connection open
|
|
SYNC_TIMEOUT_MS = 30_000
|
|
|
|
# HTTP timeout for non-sync requests
|
|
HTTP_TIMEOUT_S = 15.0
|
|
|
|
# Max retries for transient errors (429, 5xx, network)
|
|
MAX_RETRIES = 3
|
|
|
|
# Initial backoff seconds (doubles each retry)
|
|
BACKOFF_INITIAL = 1.0
|
|
|
|
def __init__(
|
|
self,
|
|
homeserver_url: str,
|
|
access_token: str,
|
|
bot_user_id: str,
|
|
dedupe_maxsize: int = 2048,
|
|
) -> None:
|
|
self._hs = homeserver_url.rstrip("/")
|
|
self._token = access_token
|
|
self._bot_user_id = bot_user_id
|
|
self._dedupe = _LRUSet(dedupe_maxsize)
|
|
self._client: Optional[httpx.AsyncClient] = None
|
|
|
|
# ── Context manager ────────────────────────────────────────────────────────
|
|
|
|
async def __aenter__(self) -> "MatrixClient":
|
|
self._client = httpx.AsyncClient(
|
|
headers={
|
|
"Authorization": f"Bearer {self._token}",
|
|
"Content-Type": "application/json",
|
|
"User-Agent": "matrix-bridge-dagi/0.1",
|
|
},
|
|
timeout=self.HTTP_TIMEOUT_S,
|
|
follow_redirects=True,
|
|
)
|
|
return self
|
|
|
|
async def __aexit__(self, *_: Any) -> None:
|
|
if self._client:
|
|
await self._client.aclose()
|
|
self._client = None
|
|
|
|
# ── Helpers ────────────────────────────────────────────────────────────────
|
|
|
|
@staticmethod
|
|
def make_txn_id(room_id: str, source_event_id: str) -> str:
|
|
"""
|
|
Deterministic txn_id for idempotent reply sends.
|
|
SHA-256 of (room_id + source_event_id), hex[:32].
|
|
"""
|
|
raw = f"{room_id}:{source_event_id}".encode()
|
|
return hashlib.sha256(raw).hexdigest()[:32]
|
|
|
|
def is_duplicate(self, event_id: str) -> bool:
|
|
"""True if event_id was seen before (dedupe for incoming events)."""
|
|
return self._dedupe.contains(event_id)
|
|
|
|
def mark_seen(self, event_id: str) -> None:
|
|
"""Mark incoming event_id as processed."""
|
|
self._dedupe.add(event_id)
|
|
|
|
def _url(self, path: str) -> str:
|
|
return f"{self._hs}/_matrix/client/v3{path}"
|
|
|
|
def _ensure_client(self) -> httpx.AsyncClient:
|
|
if self._client is None:
|
|
raise RuntimeError(
|
|
"MatrixClient not initialised — use 'async with MatrixClient(...) as client:'"
|
|
)
|
|
return self._client
|
|
|
|
async def _request_with_retry(
|
|
self,
|
|
method: str,
|
|
path: str,
|
|
*,
|
|
json: Optional[Dict[str, Any]] = None,
|
|
params: Optional[Dict[str, Any]] = None,
|
|
timeout: Optional[float] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Execute HTTP request with exponential backoff on 429/5xx.
|
|
Raises httpx.HTTPStatusError on final failure.
|
|
"""
|
|
client = self._ensure_client()
|
|
url = self._url(path)
|
|
backoff = self.BACKOFF_INITIAL
|
|
|
|
for attempt in range(1, self.MAX_RETRIES + 1):
|
|
try:
|
|
resp = await client.request(
|
|
method,
|
|
url,
|
|
json=json,
|
|
params=params,
|
|
timeout=timeout or self.HTTP_TIMEOUT_S,
|
|
)
|
|
if resp.status_code == 429:
|
|
retry_after = float(
|
|
resp.json().get("retry_after_ms", backoff * 1000)
|
|
) / 1000.0
|
|
logger.warning(
|
|
"Rate limited by homeserver, retry in %.1fs (attempt %d/%d)",
|
|
retry_after, attempt, self.MAX_RETRIES,
|
|
)
|
|
if attempt < self.MAX_RETRIES:
|
|
await asyncio.sleep(retry_after)
|
|
backoff *= 2
|
|
continue
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
except (httpx.ConnectError, httpx.TimeoutException) as exc:
|
|
logger.warning(
|
|
"Network error on %s %s (attempt %d/%d): %s",
|
|
method, path, attempt, self.MAX_RETRIES, exc,
|
|
)
|
|
if attempt < self.MAX_RETRIES:
|
|
await asyncio.sleep(backoff)
|
|
backoff *= 2
|
|
else:
|
|
raise
|
|
|
|
except httpx.HTTPStatusError as exc:
|
|
if exc.response.status_code >= 500 and attempt < self.MAX_RETRIES:
|
|
logger.warning(
|
|
"Server error %d on %s %s, retry (attempt %d/%d)",
|
|
exc.response.status_code, method, path, attempt, self.MAX_RETRIES,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
backoff *= 2
|
|
else:
|
|
raise
|
|
|
|
raise RuntimeError(f"Exhausted {self.MAX_RETRIES} retries for {method} {path}")
|
|
|
|
# ── Public API ─────────────────────────────────────────────────────────────
|
|
|
|
async def whoami(self) -> Dict[str, Any]:
|
|
"""GET /account/whoami — returns {user_id, device_id, ...}"""
|
|
return await self._request_with_retry("GET", "/account/whoami")
|
|
|
|
async def join_room(self, room_id: str) -> Dict[str, Any]:
|
|
"""
|
|
POST /join/{room_id} — join the room (safe to call if already joined;
|
|
homeserver returns 200 with room_id).
|
|
"""
|
|
encoded = room_id.replace("!", "%21").replace(":", "%3A")
|
|
result = await self._request_with_retry("POST", f"/join/{encoded}", json={})
|
|
logger.info("Joined room %s", room_id)
|
|
return result
|
|
|
|
async def send_text(
|
|
self, room_id: str, text: str, txn_id: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
PUT /rooms/{room_id}/send/m.room.message/{txn_id}
|
|
Idempotent: same txn_id → homeserver deduplicates.
|
|
Returns {event_id: ...}
|
|
"""
|
|
encoded_room = room_id.replace("!", "%21").replace(":", "%3A")
|
|
path = f"/rooms/{encoded_room}/send/m.room.message/{txn_id}"
|
|
result = await self._request_with_retry(
|
|
"PUT",
|
|
path,
|
|
json={"msgtype": "m.text", "body": text},
|
|
)
|
|
logger.debug("Sent message to %s: event_id=%s", room_id, result.get("event_id"))
|
|
return result
|
|
|
|
async def sync_poll(
|
|
self,
|
|
since: Optional[str] = None,
|
|
filter_id: Optional[str] = None,
|
|
timeout_ms: int = SYNC_TIMEOUT_MS,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
GET /sync — long-poll for new events.
|
|
Returns raw sync response dict.
|
|
|
|
Args:
|
|
since: next_batch token from previous sync (None for initial).
|
|
filter_id: optional filter (to limit event types/rooms).
|
|
timeout_ms: how long server should hold the request (default 30s).
|
|
"""
|
|
params: Dict[str, Any] = {"timeout": timeout_ms}
|
|
if since:
|
|
params["since"] = since
|
|
if filter_id:
|
|
params["filter"] = filter_id
|
|
|
|
# Add extra buffer beyond timeout_ms for our HTTP timeout
|
|
http_timeout = (timeout_ms / 1000.0) + 15.0
|
|
|
|
return await self._request_with_retry(
|
|
"GET", "/sync", params=params, timeout=http_timeout
|
|
)
|
|
|
|
def extract_room_messages(
|
|
self,
|
|
sync_response: Dict[str, Any],
|
|
room_id: str,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract new m.room.message events for a specific room from a sync response.
|
|
|
|
Filters out:
|
|
- messages from the bot itself (_bot_user_id)
|
|
- already-seen event_ids (dedupe)
|
|
|
|
Returns list of event dicts with at least: {event_id, sender, content, origin_server_ts}.
|
|
"""
|
|
rooms_data = sync_response.get("rooms", {})
|
|
join_data = rooms_data.get("join", {})
|
|
room_data = join_data.get(room_id, {})
|
|
timeline = room_data.get("timeline", {})
|
|
events = timeline.get("events", [])
|
|
|
|
results = []
|
|
for event in events:
|
|
if event.get("type") != "m.room.message":
|
|
continue
|
|
event_id = event.get("event_id", "")
|
|
sender = event.get("sender", "")
|
|
|
|
# Skip own messages
|
|
if sender == self._bot_user_id:
|
|
continue
|
|
|
|
# Skip already-processed
|
|
if self.is_duplicate(event_id):
|
|
continue
|
|
|
|
# Only text messages for M1
|
|
content = event.get("content", {})
|
|
if content.get("msgtype") != "m.text":
|
|
continue
|
|
|
|
results.append(event)
|
|
|
|
return results
|