""" Matrix Ingress + Egress Loop — Phase M1.4 Polls Matrix /sync for new messages, invokes DAGI Router for mapped rooms, sends agent replies back to Matrix, writes audit events to sofiia-console. Pipeline: sync_poll() → extract_room_messages() → for each message: 1. dedupe (mark_seen) 2. audit: matrix.message.received 3. invoke DAGI Router (/v1/agents/{agent_id}/infer) 4. send_text() reply to Matrix room 5. audit: matrix.agent.replied | matrix.error Graceful shutdown via asyncio.Event. """ import asyncio import logging import time from typing import Any, Callable, Dict, List, Optional import httpx from .matrix_client import MatrixClient from .room_mapping import RoomMappingConfig logger = logging.getLogger(__name__) # ── Constants ────────────────────────────────────────────────────────────────── _MAX_RETRY_BACKOFF = 60.0 _INIT_RETRY_BACKOFF = 2.0 _ROUTER_TIMEOUT_S = 45.0 # Router may call DeepSeek/Mistral _AUDIT_TIMEOUT_S = 5.0 _REPLY_TEXT_MAX = 4000 # Matrix message cap (chars) _ERROR_REPLY_TEXT = "⚠️ Тимчасова помилка. Спробуйте ще раз." # ── Router invoke ────────────────────────────────────────────────────────────── async def _invoke_router( http_client: httpx.AsyncClient, router_url: str, agent_id: str, node_id: str, prompt: str, session_id: str, ) -> str: """ POST /v1/agents/{agent_id}/infer — returns response text string. Field: response['response'] (confirmed from NODA1 test). Raises httpx.HTTPError on failure. """ url = f"{router_url.rstrip('/')}/v1/agents/{agent_id}/infer" payload = { "prompt": prompt, "session_id": session_id, "user_id": "matrix_bridge", "metadata": { "transport": "matrix", "node_id": node_id, }, } resp = await http_client.post(url, json=payload, timeout=_ROUTER_TIMEOUT_S) resp.raise_for_status() data = resp.json() # Extract text — field confirmed as 'response' text = ( data.get("response") or data.get("text") or data.get("content") or data.get("message") or "" ) if not isinstance(text, str): text = str(text) return text.strip() # ── Audit write ──────────────────────────────────────────────────────────────── async def _write_audit( http_client: httpx.AsyncClient, console_url: str, internal_token: str, event: str, agent_id: str, node_id: str, room_id: str, event_id: str, status: str = "ok", error_code: Optional[str] = None, duration_ms: Optional[int] = None, data: Optional[Dict[str, Any]] = None, ) -> None: """ Fire-and-forget audit write to sofiia-console internal endpoint. Never raises — logs warning on failure. """ if not console_url or not internal_token: return try: url = f"{console_url.rstrip('/')}/api/audit/internal" await http_client.post( url, json={ "event": event, "operator_id": "matrix_bridge", "node_id": node_id, "agent_id": agent_id, "chat_id": room_id, "status": status, "error_code": error_code, "duration_ms": duration_ms, "data": { "matrix_event_id": event_id, "matrix_room_id": room_id, **(data or {}), }, }, headers={"X-Internal-Service-Token": internal_token}, timeout=_AUDIT_TIMEOUT_S, ) except Exception as exc: logger.warning("Audit write failed (non-blocking): %s", exc) # ── Ingress loop ─────────────────────────────────────────────────────────────── class MatrixIngressLoop: """ Drives Matrix sync-poll → router-invoke → Matrix send_text pipeline. Usage: loop = MatrixIngressLoop(...) stop_event = asyncio.Event() await loop.run(stop_event) """ def __init__( self, matrix_homeserver_url: str, matrix_access_token: str, matrix_user_id: str, router_url: str, node_id: str, room_map: RoomMappingConfig, sofiia_console_url: str = "", sofiia_internal_token: str = "", on_message_received: Optional[Callable[[str, str], None]] = None, on_message_replied: Optional[Callable[[str, str, str], None]] = None, on_gateway_error: Optional[Callable[[str], None]] = None, ) -> None: self._hs_url = matrix_homeserver_url self._token = matrix_access_token self._user_id = matrix_user_id self._router_url = router_url self._node_id = node_id self._room_map = room_map self._console_url = sofiia_console_url self._internal_token = sofiia_internal_token self._on_message_received = on_message_received self._on_message_replied = on_message_replied self._on_gateway_error = on_gateway_error self._next_batch: Optional[str] = None @property def next_batch(self) -> Optional[str]: return self._next_batch async def run(self, stop_event: asyncio.Event) -> None: """Main loop until stop_event is set.""" backoff = _INIT_RETRY_BACKOFF logger.info( "Matrix ingress/egress loop started | hs=%s node=%s mappings=%d", self._hs_url, self._node_id, self._room_map.total_mappings, ) if self._room_map.total_mappings == 0: logger.warning("No room mappings — ingress loop is idle") async with MatrixClient(self._hs_url, self._token, self._user_id) as client: for mapping in self._room_map.mappings: if mapping.agent_id in self._room_map.allowed_agents: try: await client.join_room(mapping.room_id) except Exception as exc: logger.warning("Could not join room %s: %s", mapping.room_id, exc) async with httpx.AsyncClient() as http_client: while not stop_event.is_set(): try: sync_resp = await client.sync_poll(since=self._next_batch) self._next_batch = sync_resp.get("next_batch") backoff = _INIT_RETRY_BACKOFF await self._process_sync(client, http_client, sync_resp) except asyncio.CancelledError: break except Exception as exc: logger.error("Ingress loop error (retry in %.1fs): %s", backoff, exc) if self._on_gateway_error: self._on_gateway_error("sync_error") try: await asyncio.wait_for(stop_event.wait(), timeout=backoff) except asyncio.TimeoutError: pass backoff = min(backoff * 2, _MAX_RETRY_BACKOFF) logger.info("Matrix ingress/egress loop stopped") async def _process_sync( self, client: MatrixClient, http_client: httpx.AsyncClient, sync_resp: Dict[str, Any], ) -> None: for mapping in self._room_map.mappings: if mapping.agent_id not in self._room_map.allowed_agents: continue messages = client.extract_room_messages(sync_resp, mapping.room_id) for event in messages: await self._handle_message(client, http_client, event, mapping) async def _handle_message( self, client: MatrixClient, http_client: httpx.AsyncClient, event: Dict[str, Any], mapping, ) -> None: event_id = event.get("event_id", "") sender = event.get("sender", "") text = event.get("content", {}).get("body", "").strip() room_id = mapping.room_id agent_id = mapping.agent_id if not text: return # Dedupe — mark seen before any IO (prevents double-process on retry) client.mark_seen(event_id) logger.info( "Matrix message: room=%s sender=%s agent=%s event=%s len=%d", room_id, sender, agent_id, event_id, len(text), ) if self._on_message_received: self._on_message_received(room_id, agent_id) # Audit: received await _write_audit( http_client, self._console_url, self._internal_token, event="matrix.message.received", agent_id=agent_id, node_id=self._node_id, room_id=room_id, event_id=event_id, status="ok", data={"sender": sender, "text_len": len(text)}, ) # Session ID: stable per room (allows memory context across messages) session_id = f"matrix:{room_id.replace('!', '').replace(':', '_')}" t0 = time.monotonic() reply_text: Optional[str] = None invoke_ok = False try: reply_text = await _invoke_router( http_client, self._router_url, agent_id=agent_id, node_id=self._node_id, prompt=text, session_id=session_id, ) invoke_ok = True duration_ms = int((time.monotonic() - t0) * 1000) logger.info( "Router invoke ok: agent=%s event=%s reply_len=%d duration=%dms", agent_id, event_id, len(reply_text or ""), duration_ms, ) except httpx.HTTPStatusError as exc: duration_ms = int((time.monotonic() - t0) * 1000) logger.error( "Router HTTP %d for agent=%s event=%s duration=%dms", exc.response.status_code, agent_id, event_id, duration_ms, ) if self._on_gateway_error: self._on_gateway_error(f"http_{exc.response.status_code}") await _write_audit( http_client, self._console_url, self._internal_token, event="matrix.error", agent_id=agent_id, node_id=self._node_id, room_id=room_id, event_id=event_id, status="error", error_code=f"router_http_{exc.response.status_code}", duration_ms=duration_ms, ) except (httpx.ConnectError, httpx.TimeoutException) as exc: duration_ms = int((time.monotonic() - t0) * 1000) logger.error( "Router network error agent=%s event=%s: %s duration=%dms", agent_id, event_id, exc, duration_ms, ) if self._on_gateway_error: self._on_gateway_error("network_error") await _write_audit( http_client, self._console_url, self._internal_token, event="matrix.error", agent_id=agent_id, node_id=self._node_id, room_id=room_id, event_id=event_id, status="error", error_code="router_network_error", duration_ms=duration_ms, ) except Exception as exc: duration_ms = int((time.monotonic() - t0) * 1000) logger.error( "Unexpected router error agent=%s event=%s: %s", agent_id, event_id, exc, ) if self._on_gateway_error: self._on_gateway_error("unexpected") await _write_audit( http_client, self._console_url, self._internal_token, event="matrix.error", agent_id=agent_id, node_id=self._node_id, room_id=room_id, event_id=event_id, status="error", error_code="router_unexpected", duration_ms=duration_ms, ) # ── Egress: send reply back to Matrix ────────────────────────────────── if not invoke_ok: # No reply on error in M1 — just audit (avoids spam in room) return if not reply_text: logger.warning("Empty reply from router for agent=%s event=%s", agent_id, event_id) return # Truncate if needed send_text = reply_text[:_REPLY_TEXT_MAX] txn_id = MatrixClient.make_txn_id(room_id, event_id) send_t0 = time.monotonic() try: await client.send_text(room_id, send_text, txn_id) send_duration_ms = int((time.monotonic() - send_t0) * 1000) if self._on_message_replied: self._on_message_replied(room_id, agent_id, "ok") await _write_audit( http_client, self._console_url, self._internal_token, event="matrix.agent.replied", agent_id=agent_id, node_id=self._node_id, room_id=room_id, event_id=event_id, status="ok", duration_ms=send_duration_ms, data={ "reply_len": len(send_text), "truncated": len(reply_text) > _REPLY_TEXT_MAX, "router_duration_ms": duration_ms, }, ) logger.info( "Reply sent: agent=%s event=%s reply_len=%d send_ms=%d", agent_id, event_id, len(send_text), send_duration_ms, ) except Exception as exc: send_duration_ms = int((time.monotonic() - send_t0) * 1000) logger.error( "Failed to send Matrix reply agent=%s event=%s: %s", agent_id, event_id, exc, ) if self._on_message_replied: self._on_message_replied(room_id, agent_id, "error") if self._on_gateway_error: self._on_gateway_error("matrix_send_error") await _write_audit( http_client, self._console_url, self._internal_token, event="matrix.error", agent_id=agent_id, node_id=self._node_id, room_id=room_id, event_id=event_id, status="error", error_code="matrix_send_failed", duration_ms=send_duration_ms, )