""" Matrix Ingress Loop — Phase M1.3 Polls Matrix /sync for new messages, invokes DAGI Gateway for mapped rooms. Does NOT send replies back (that is PR-M1.4 egress). Design: - asyncio task, driven by run_ingress_loop() - sync_poll() → extract_room_messages() per mapped room - for each message: dedupe → invoke gateway → audit (fire-and-forget) - next_batch token persisted in memory (restart resets to None — acceptable for M1) - graceful shutdown via asyncio.Event """ import asyncio import logging import time from typing import Any, Dict, Optional import httpx from .matrix_client import MatrixClient from .room_mapping import RoomMappingConfig logger = logging.getLogger(__name__) # ── Constants ────────────────────────────────────────────────────────────────── # Max wait between sync retries on error (seconds) _MAX_RETRY_BACKOFF = 60.0 _INIT_RETRY_BACKOFF = 2.0 # Gateway invoke timeout _GATEWAY_TIMEOUT_S = 30.0 # ── Gateway invoke ───────────────────────────────────────────────────────────── async def _invoke_gateway( http_client: httpx.AsyncClient, gateway_url: str, agent_id: str, node_id: str, message_text: str, matrix_room_id: str, matrix_event_id: str, matrix_sender: str, ) -> Dict[str, Any]: """ POST to DAGI Gateway /v1/invoke (or /debug/agent_ping equivalent). Returns parsed JSON response or raises httpx.HTTPError. Payload format matches existing Gateway invoke schema. """ url = f"{gateway_url.rstrip('/')}/v1/invoke" payload = { "agent_id": agent_id, "node_id": node_id, "message": message_text, "metadata": { "transport": "matrix", "matrix_room_id": matrix_room_id, "matrix_event_id": matrix_event_id, "matrix_sender": matrix_sender, "node_id": node_id, }, } resp = await http_client.post(url, json=payload, timeout=_GATEWAY_TIMEOUT_S) resp.raise_for_status() return resp.json() # ── Ingress loop ─────────────────────────────────────────────────────────────── class MatrixIngressLoop: """ Drives the Matrix sync-poll → gateway-invoke pipeline. Usage: loop = MatrixIngressLoop(cfg, room_map) stop_event = asyncio.Event() await loop.run(stop_event) Metrics callbacks (optional, injected to avoid hard dependency): on_message_received(room_id, agent_id) — called after successful dedupe on_gateway_error(error_type) — called on gateway invoke error """ def __init__( self, matrix_homeserver_url: str, matrix_access_token: str, matrix_user_id: str, gateway_url: str, node_id: str, room_map: RoomMappingConfig, on_message_received=None, on_gateway_error=None, ) -> None: self._hs_url = matrix_homeserver_url self._token = matrix_access_token self._user_id = matrix_user_id self._gateway_url = gateway_url self._node_id = node_id self._room_map = room_map self._on_message_received = on_message_received self._on_gateway_error = on_gateway_error self._next_batch: Optional[str] = None self._running = False @property def next_batch(self) -> Optional[str]: return self._next_batch async def run(self, stop_event: asyncio.Event) -> None: """ Main loop. Runs until stop_event is set. Handles errors with exponential backoff. """ self._running = True backoff = _INIT_RETRY_BACKOFF logger.info( "Matrix ingress loop started | hs=%s node=%s mappings=%d", self._hs_url, self._node_id, self._room_map.total_mappings, ) if self._room_map.total_mappings == 0: logger.warning("No room mappings configured — ingress loop is idle") async with MatrixClient( self._hs_url, self._token, self._user_id ) as client: # Join all mapped rooms at startup for mapping in self._room_map.mappings: if mapping.agent_id in self._room_map.allowed_agents: try: await client.join_room(mapping.room_id) logger.info("Joined room %s → agent %s", mapping.room_id, mapping.agent_id) except Exception as exc: logger.warning("Could not join room %s: %s", mapping.room_id, exc) async with httpx.AsyncClient(timeout=_GATEWAY_TIMEOUT_S) as gw_client: while not stop_event.is_set(): try: sync_resp = await client.sync_poll(since=self._next_batch) self._next_batch = sync_resp.get("next_batch") backoff = _INIT_RETRY_BACKOFF # reset on success await self._process_sync(client, gw_client, sync_resp) except asyncio.CancelledError: logger.info("Ingress loop cancelled") break except Exception as exc: logger.error( "Ingress loop error (retry in %.1fs): %s", backoff, exc, ) if self._on_gateway_error: self._on_gateway_error("sync_error") try: await asyncio.wait_for( stop_event.wait(), timeout=backoff ) except asyncio.TimeoutError: pass backoff = min(backoff * 2, _MAX_RETRY_BACKOFF) self._running = False logger.info("Matrix ingress loop stopped") async def _process_sync( self, client: MatrixClient, gw_client: httpx.AsyncClient, sync_resp: Dict[str, Any], ) -> None: """Process all mapped rooms in a sync response.""" for mapping in self._room_map.mappings: if mapping.agent_id not in self._room_map.allowed_agents: continue messages = client.extract_room_messages(sync_resp, mapping.room_id) for event in messages: await self._handle_message(client, gw_client, event, mapping) async def _handle_message( self, client: MatrixClient, gw_client: httpx.AsyncClient, event: Dict[str, Any], mapping, ) -> None: """ Process a single Matrix message event: 1. Mark as seen (dedupe) 2. Invoke DAGI gateway 3. Fire metrics callback Note: Reply sending (egress) is PR-M1.4 — not done here. """ event_id = event.get("event_id", "") sender = event.get("sender", "") text = event.get("content", {}).get("body", "").strip() room_id = mapping.room_id agent_id = mapping.agent_id if not text: logger.debug("Skipping empty message from %s in %s", sender, room_id) return # Mark event as seen before invoke (prevents duplicate on retry) client.mark_seen(event_id) logger.info( "Matrix message: room=%s sender=%s agent=%s event=%s text_len=%d", room_id, sender, agent_id, event_id, len(text), ) if self._on_message_received: self._on_message_received(room_id, agent_id) t0 = time.monotonic() try: await _invoke_gateway( gw_client, self._gateway_url, agent_id=agent_id, node_id=self._node_id, message_text=text, matrix_room_id=room_id, matrix_event_id=event_id, matrix_sender=sender, ) duration = time.monotonic() - t0 logger.info( "Gateway invoke ok: agent=%s event=%s duration=%.2fs", agent_id, event_id, duration, ) except httpx.HTTPStatusError as exc: duration = time.monotonic() - t0 logger.error( "Gateway HTTP error %d for agent=%s event=%s duration=%.2fs", exc.response.status_code, agent_id, event_id, duration, ) if self._on_gateway_error: self._on_gateway_error(f"http_{exc.response.status_code}") except (httpx.ConnectError, httpx.TimeoutException) as exc: duration = time.monotonic() - t0 logger.error( "Gateway network error for agent=%s event=%s: %s duration=%.2fs", agent_id, event_id, exc, duration, ) if self._on_gateway_error: self._on_gateway_error("network_error") except Exception as exc: duration = time.monotonic() - t0 logger.error( "Unexpected error invoking gateway for agent=%s event=%s: %s", agent_id, event_id, exc, ) if self._on_gateway_error: self._on_gateway_error("unexpected")