""" event_store — M2.3: Persistent event deduplication via SQLite. Stores processed Matrix event_ids so that bridge restarts do not reprocess events still returned by /sync (within TTL window). Schema: processed_events (room_id, event_id, ts, sender_hash) PK: (room_id, event_id) IDX: idx_processed_events_ts (ts) Design notes: - Uses aiosqlite for non-blocking async access from the ingress event loop. - Prune is best-effort: failures are logged but do NOT abort processing. - If the DB is unavailable (init error, corruption), EventStore degrades to a no-op: is_processed() returns False, mark_processed() is a no-op. The in-memory LRU dedupe (H1) continues to protect within a single run. - WAL mode is enabled for better concurrent read performance. """ from __future__ import annotations import logging import time from pathlib import Path from typing import Optional, Tuple try: import aiosqlite _AIOSQLITE_OK = True except ImportError: # pragma: no cover aiosqlite = None # type: ignore _AIOSQLITE_OK = False logger = logging.getLogger(__name__) _SCHEMA = """ CREATE TABLE IF NOT EXISTS processed_events ( room_id TEXT NOT NULL, event_id TEXT NOT NULL, ts INTEGER NOT NULL, sender_hash TEXT, PRIMARY KEY (room_id, event_id) ); CREATE INDEX IF NOT EXISTS idx_processed_events_ts ON processed_events (ts); """ class EventStore: """ Async SQLite-backed deduplication store for Matrix event_ids. Usage: store = EventStore("/app/data/matrix_bridge.db", ttl_h=48) await store.open() ... hit = await store.is_processed(room_id, event_id) if not hit: await store.mark_processed(room_id, event_id, sender_hash) ... pruned = await store.prune(batch=5000) await store.close() """ def __init__( self, db_path: str, ttl_h: int = 48, prune_batch: int = 5000, ) -> None: self.db_path = db_path self.ttl_h = ttl_h self.prune_batch = prune_batch self._db: Optional["aiosqlite.Connection"] = None self._ok: bool = False self._last_prune_at: Optional[float] = None self._pruned_rows_last: int = 0 # ── Lifecycle ───────────────────────────────────────────────────────────── async def open(self) -> bool: """ Open the SQLite connection and apply schema. Returns True on success; False on failure (degraded mode). """ if not _AIOSQLITE_OK: logger.warning("aiosqlite not available — persistent dedupe disabled") return False try: Path(self.db_path).parent.mkdir(parents=True, exist_ok=True) self._db = await aiosqlite.connect(self.db_path) # WAL mode: better concurrent read, non-blocking writes await self._db.execute("PRAGMA journal_mode=WAL") await self._db.execute("PRAGMA synchronous=NORMAL") await self._db.executescript(_SCHEMA) await self._db.commit() self._ok = True logger.info("EventStore opened: %s (ttl_h=%d)", self.db_path, self.ttl_h) return True except Exception as exc: logger.error("EventStore.open failed — degraded: %s", exc) self._ok = False return False async def close(self) -> None: """Close the SQLite connection gracefully.""" if self._db is not None: try: await self._db.close() except Exception as exc: # pragma: no cover logger.warning("EventStore.close error: %s", exc) self._db = None self._ok = False # ── Core operations ─────────────────────────────────────────────────────── async def is_processed(self, room_id: str, event_id: str) -> bool: """ Return True if (room_id, event_id) has already been processed. Safe to call even when degraded (returns False → no false deduplication). """ if not self._ok or self._db is None: return False try: async with self._db.execute( "SELECT 1 FROM processed_events WHERE room_id=? AND event_id=? LIMIT 1", (room_id, event_id), ) as cursor: row = await cursor.fetchone() return row is not None except Exception as exc: logger.warning("EventStore.is_processed error (degraded): %s", exc) return False async def mark_processed( self, room_id: str, event_id: str, sender_hash: str = "", ) -> bool: """ Insert (room_id, event_id) as processed. Returns True on success, False if already exists or on error. Uses INSERT OR IGNORE to avoid duplicates without raising. """ if not self._ok or self._db is None: return False ts = int(time.time()) try: await self._db.execute( "INSERT OR IGNORE INTO processed_events (room_id, event_id, ts, sender_hash) " "VALUES (?, ?, ?, ?)", (room_id, event_id, ts, sender_hash or None), ) await self._db.commit() return True except Exception as exc: logger.warning("EventStore.mark_processed error (degraded): %s", exc) return False # ── Prune ───────────────────────────────────────────────────────────────── async def prune(self, batch: Optional[int] = None) -> int: """ Delete events older than ttl_h. Returns the number of rows deleted (0 on error or degraded). Uses LIMIT batch to avoid long locks on large tables. """ if not self._ok or self._db is None: return 0 cutoff = int(time.time()) - self.ttl_h * 3600 effective_batch = batch or self.prune_batch deleted = 0 try: # SQLite DELETE with LIMIT requires compiling with SQLITE_ENABLE_UPDATE_DELETE_LIMIT, # which may not be available. Use a subquery approach instead. await self._db.execute( "DELETE FROM processed_events " "WHERE rowid IN (" " SELECT rowid FROM processed_events WHERE ts < ? LIMIT ?" ")", (cutoff, effective_batch), ) await self._db.commit() # Estimate rows deleted from changes() async with self._db.execute("SELECT changes()") as cursor: row = await cursor.fetchone() deleted = row[0] if row else 0 self._last_prune_at = time.time() self._pruned_rows_last = deleted if deleted: logger.info("EventStore pruned %d rows (cutoff=%d)", deleted, cutoff) except Exception as exc: logger.warning("EventStore.prune error: %s", exc) return deleted # ── Health / introspection ───────────────────────────────────────────────── def as_health_dict(self) -> dict: return { "enabled": self._ok, "db_path": self.db_path, "ttl_h": self.ttl_h, "ok": self._ok, "last_prune_at": self._last_prune_at, "pruned_rows_last": self._pruned_rows_last, }