refactor(sofiia-console): extract idempotency store abstraction

Move idempotency TTL/LRU logic into a dedicated store module with a swap-ready interface and wire chat send flow to use store get/set semantics without changing API behavior. Made-with: Cursor
2026-03-02 08:11:13 -08:00
parent b9c548f1a6
commit 0c626943d6
3 changed files with 109 additions and 44 deletions
--- a/services/sofiia-console/app/idempotency.py
+++ b/services/sofiia-console/app/idempotency.py
@@ -0,0 +1,89 @@
+from __future__ import annotations
+
+import os
+import time
+from collections import OrderedDict
+from dataclasses import dataclass
+from typing import Any, Dict, Optional, Protocol
+
+
+@dataclass
+class ReplayEntry:
+    message_id: str
+    response_body: Dict[str, Any]
+    created_at: float
+    node_id: str
+
+
+class IdempotencyStore(Protocol):
+    def get(self, key: str) -> Optional[ReplayEntry]:
+        ...
+
+    def set(self, key: str, entry: ReplayEntry) -> None:
+        ...
+
+
+class InMemoryIdempotencyStore:
+    def __init__(self, ttl_seconds: int = 900, max_size: int = 5000) -> None:
+        self._ttl_seconds = max(60, int(ttl_seconds))
+        self._max_size = max(100, int(max_size))
+        self._values: "OrderedDict[str, Dict[str, Any]]" = OrderedDict()
+
+    def _cleanup(self, now: Optional[float] = None) -> None:
+        ts = now if now is not None else time.monotonic()
+        while self._values:
+            first_key = next(iter(self._values))
+            exp = float((self._values[first_key] or {}).get("expires_at", 0.0))
+            if exp > ts:
+                break
+            self._values.popitem(last=False)
+
+    def get(self, key: str) -> Optional[ReplayEntry]:
+        self._cleanup()
+        hit = self._values.get(key)
+        if not hit:
+            return None
+        # Touch key to preserve LRU behavior.
+        self._values.move_to_end(key, last=True)
+        entry = hit.get("entry")
+        return entry if isinstance(entry, ReplayEntry) else None
+
+    def set(self, key: str, entry: ReplayEntry) -> None:
+        now = time.monotonic()
+        self._cleanup(now)
+        self._values[key] = {
+            "expires_at": now + self._ttl_seconds,
+            "entry": entry,
+        }
+        self._values.move_to_end(key, last=True)
+        while len(self._values) > self._max_size:
+            self._values.popitem(last=False)
+
+    # Debug/test helpers
+    def size(self) -> int:
+        self._cleanup()
+        return len(self._values)
+
+    def delete(self, key: str) -> None:
+        self._values.pop(key, None)
+
+    def reset(self) -> None:
+        self._values.clear()
+
+
+_STORE: Optional[IdempotencyStore] = None
+
+
+def get_idempotency_store() -> IdempotencyStore:
+    global _STORE
+    if _STORE is None:
+        ttl = int(
+            os.getenv(
+                "SOFIIA_IDEMPOTENCY_TTL_S",
+                os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900"),
+            )
+        )
+        max_size = int(os.getenv("SOFIIA_IDEMPOTENCY_MAX", "5000"))
+        _STORE = InMemoryIdempotencyStore(ttl_seconds=ttl, max_size=max_size)
+    return _STORE
+
--- a/services/sofiia-console/app/main.py
+++ b/services/sofiia-console/app/main.py
@@ -63,6 +63,7 @@ from .metrics import (
    SOFIIA_CURSOR_REQUESTS_TOTAL,
    render_metrics,
 )
+from .idempotency import get_idempotency_store, ReplayEntry

 logger = logging.getLogger(__name__)

@@ -77,9 +78,7 @@ _NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))
 # ── Rate limiter ──────────────────────────────────────────────────────────────
 _rate_buckets: Dict[str, collections.deque] = {}

-# ── Chat idempotency cache (TTL in-memory) ───────────────────────────────────
-_IDEMPOTENCY_TTL_SEC = int(os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900"))
-_idempotency_cache: "collections.OrderedDict[str, Dict[str, Any]]" = collections.OrderedDict()
+_idempotency_store = get_idempotency_store()

 def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
    now = time.monotonic()
@@ -92,43 +91,6 @@ def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
    return True


-def _idem_cleanup(now: Optional[float] = None) -> None:
-    ts = now if now is not None else time.monotonic()
-    while _idempotency_cache:
-        first_key = next(iter(_idempotency_cache))
-        exp = float((_idempotency_cache[first_key] or {}).get("expires_at", 0))
-        if exp > ts:
-            break
-        _idempotency_cache.popitem(last=False)
-
-
-def _idem_get(chat_id: str, idem_key: str) -> Optional[Dict[str, Any]]:
-    _idem_cleanup()
-    cache_key = f"{chat_id}::{idem_key}"
-    hit = _idempotency_cache.get(cache_key)
-    if not hit:
-        return None
-    # Touch LRU
-    _idempotency_cache.move_to_end(cache_key, last=True)
-    payload = hit.get("payload")
-    return payload if isinstance(payload, dict) else None
-
-
-def _idem_put(chat_id: str, idem_key: str, payload: Dict[str, Any]) -> None:
-    if not idem_key:
-        return
-    now = time.monotonic()
-    _idem_cleanup(now)
-    cache_key = f"{chat_id}::{idem_key}"
-    _idempotency_cache[cache_key] = {
-        "expires_at": now + max(60, _IDEMPOTENCY_TTL_SEC),
-        "payload": payload,
-    }
-    _idempotency_cache.move_to_end(cache_key, last=True)
-    # Bound memory growth
-    while len(_idempotency_cache) > 5000:
-        _idempotency_cache.popitem(last=False)
-
 # ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
 # Circular buffers: last 5 TTS errors and last 5 LLM errors.
 # Populated by all voice endpoints. Read by /api/voice/degradation_status.
@@ -3301,10 +3263,11 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
        ).strip()
    )[:128]
    if idem_key:
-        cached = _idem_get(chat_id, idem_key)
+        cache_key = f"{chat_id}::{idem_key}"
+        cached = _idempotency_store.get(cache_key)
        if cached:
            SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL.inc()
-            replay = dict(cached)
+            replay = dict(cached.response_body)
            replay["idempotency"] = {"replayed": True, "key": idem_key}
            return replay

@@ -3382,7 +3345,16 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
        },
    }
    if idem_key:
-        _idem_put(chat_id, idem_key, result)
+        cache_key = f"{chat_id}::{idem_key}"
+        _idempotency_store.set(
+            cache_key,
+            ReplayEntry(
+                message_id=str((result.get("message") or {}).get("message_id") or ""),
+                response_body=dict(result),
+                created_at=time.monotonic(),
+                node_id=target_node,
+            ),
+        )
        result["idempotency"] = {"replayed": False, "key": idem_key}
    return result