refactor(sofiia-console): extract idempotency store abstraction

Move idempotency TTL/LRU logic into a dedicated store module with a swap-ready interface and wire chat send flow to use store get/set semantics without changing API behavior. Made-with: Cursor
2026-03-02 08:11:13 -08:00
parent b9c548f1a6
commit 0c626943d6
3 changed files with 109 additions and 44 deletions
--- a/services/sofiia-console/app/main.py
+++ b/services/sofiia-console/app/main.py
@@ -63,6 +63,7 @@ from .metrics import (
    SOFIIA_CURSOR_REQUESTS_TOTAL,
    render_metrics,
 )
+from .idempotency import get_idempotency_store, ReplayEntry

 logger = logging.getLogger(__name__)

@@ -77,9 +78,7 @@ _NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))
 # ── Rate limiter ──────────────────────────────────────────────────────────────
 _rate_buckets: Dict[str, collections.deque] = {}

-# ── Chat idempotency cache (TTL in-memory) ───────────────────────────────────
-_IDEMPOTENCY_TTL_SEC = int(os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900"))
-_idempotency_cache: "collections.OrderedDict[str, Dict[str, Any]]" = collections.OrderedDict()
+_idempotency_store = get_idempotency_store()

 def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
    now = time.monotonic()
@@ -92,43 +91,6 @@ def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
    return True


-def _idem_cleanup(now: Optional[float] = None) -> None:
-    ts = now if now is not None else time.monotonic()
-    while _idempotency_cache:
-        first_key = next(iter(_idempotency_cache))
-        exp = float((_idempotency_cache[first_key] or {}).get("expires_at", 0))
-        if exp > ts:
-            break
-        _idempotency_cache.popitem(last=False)
-
-
-def _idem_get(chat_id: str, idem_key: str) -> Optional[Dict[str, Any]]:
-    _idem_cleanup()
-    cache_key = f"{chat_id}::{idem_key}"
-    hit = _idempotency_cache.get(cache_key)
-    if not hit:
-        return None
-    # Touch LRU
-    _idempotency_cache.move_to_end(cache_key, last=True)
-    payload = hit.get("payload")
-    return payload if isinstance(payload, dict) else None
-
-
-def _idem_put(chat_id: str, idem_key: str, payload: Dict[str, Any]) -> None:
-    if not idem_key:
-        return
-    now = time.monotonic()
-    _idem_cleanup(now)
-    cache_key = f"{chat_id}::{idem_key}"
-    _idempotency_cache[cache_key] = {
-        "expires_at": now + max(60, _IDEMPOTENCY_TTL_SEC),
-        "payload": payload,
-    }
-    _idempotency_cache.move_to_end(cache_key, last=True)
-    # Bound memory growth
-    while len(_idempotency_cache) > 5000:
-        _idempotency_cache.popitem(last=False)
-
 # ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
 # Circular buffers: last 5 TTS errors and last 5 LLM errors.
 # Populated by all voice endpoints. Read by /api/voice/degradation_status.
@@ -3301,10 +3263,11 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
        ).strip()
    )[:128]
    if idem_key:
-        cached = _idem_get(chat_id, idem_key)
+        cache_key = f"{chat_id}::{idem_key}"
+        cached = _idempotency_store.get(cache_key)
        if cached:
            SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL.inc()
-            replay = dict(cached)
+            replay = dict(cached.response_body)
            replay["idempotency"] = {"replayed": True, "key": idem_key}
            return replay

@@ -3382,7 +3345,16 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
        },
    }
    if idem_key:
-        _idem_put(chat_id, idem_key, result)
+        cache_key = f"{chat_id}::{idem_key}"
+        _idempotency_store.set(
+            cache_key,
+            ReplayEntry(
+                message_id=str((result.get("message") or {}).get("message_id") or ""),
+                response_body=dict(result),
+                created_at=time.monotonic(),
+                node_id=target_node,
+            ),
+        )
        result["idempotency"] = {"replayed": False, "key": idem_key}
    return result