refactor(sofiia-console): extract idempotency store abstraction

Move idempotency TTL/LRU logic into a dedicated store module with a swap-ready interface and wire chat send flow to use store get/set semantics without changing API behavior.

Made-with: Cursor
This commit is contained in:
Apple
2026-03-02 08:11:13 -08:00
parent b9c548f1a6
commit 0c626943d6
3 changed files with 109 additions and 44 deletions

View File

@@ -0,0 +1,89 @@
from __future__ import annotations
import os
import time
from collections import OrderedDict
from dataclasses import dataclass
from typing import Any, Dict, Optional, Protocol
@dataclass
class ReplayEntry:
message_id: str
response_body: Dict[str, Any]
created_at: float
node_id: str
class IdempotencyStore(Protocol):
def get(self, key: str) -> Optional[ReplayEntry]:
...
def set(self, key: str, entry: ReplayEntry) -> None:
...
class InMemoryIdempotencyStore:
def __init__(self, ttl_seconds: int = 900, max_size: int = 5000) -> None:
self._ttl_seconds = max(60, int(ttl_seconds))
self._max_size = max(100, int(max_size))
self._values: "OrderedDict[str, Dict[str, Any]]" = OrderedDict()
def _cleanup(self, now: Optional[float] = None) -> None:
ts = now if now is not None else time.monotonic()
while self._values:
first_key = next(iter(self._values))
exp = float((self._values[first_key] or {}).get("expires_at", 0.0))
if exp > ts:
break
self._values.popitem(last=False)
def get(self, key: str) -> Optional[ReplayEntry]:
self._cleanup()
hit = self._values.get(key)
if not hit:
return None
# Touch key to preserve LRU behavior.
self._values.move_to_end(key, last=True)
entry = hit.get("entry")
return entry if isinstance(entry, ReplayEntry) else None
def set(self, key: str, entry: ReplayEntry) -> None:
now = time.monotonic()
self._cleanup(now)
self._values[key] = {
"expires_at": now + self._ttl_seconds,
"entry": entry,
}
self._values.move_to_end(key, last=True)
while len(self._values) > self._max_size:
self._values.popitem(last=False)
# Debug/test helpers
def size(self) -> int:
self._cleanup()
return len(self._values)
def delete(self, key: str) -> None:
self._values.pop(key, None)
def reset(self) -> None:
self._values.clear()
_STORE: Optional[IdempotencyStore] = None
def get_idempotency_store() -> IdempotencyStore:
global _STORE
if _STORE is None:
ttl = int(
os.getenv(
"SOFIIA_IDEMPOTENCY_TTL_S",
os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900"),
)
)
max_size = int(os.getenv("SOFIIA_IDEMPOTENCY_MAX", "5000"))
_STORE = InMemoryIdempotencyStore(ttl_seconds=ttl, max_size=max_size)
return _STORE

View File

@@ -63,6 +63,7 @@ from .metrics import (
SOFIIA_CURSOR_REQUESTS_TOTAL,
render_metrics,
)
from .idempotency import get_idempotency_store, ReplayEntry
logger = logging.getLogger(__name__)
@@ -77,9 +78,7 @@ _NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))
# ── Rate limiter ──────────────────────────────────────────────────────────────
_rate_buckets: Dict[str, collections.deque] = {}
# ── Chat idempotency cache (TTL in-memory) ───────────────────────────────────
_IDEMPOTENCY_TTL_SEC = int(os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900"))
_idempotency_cache: "collections.OrderedDict[str, Dict[str, Any]]" = collections.OrderedDict()
_idempotency_store = get_idempotency_store()
def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
now = time.monotonic()
@@ -92,43 +91,6 @@ def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
return True
def _idem_cleanup(now: Optional[float] = None) -> None:
ts = now if now is not None else time.monotonic()
while _idempotency_cache:
first_key = next(iter(_idempotency_cache))
exp = float((_idempotency_cache[first_key] or {}).get("expires_at", 0))
if exp > ts:
break
_idempotency_cache.popitem(last=False)
def _idem_get(chat_id: str, idem_key: str) -> Optional[Dict[str, Any]]:
_idem_cleanup()
cache_key = f"{chat_id}::{idem_key}"
hit = _idempotency_cache.get(cache_key)
if not hit:
return None
# Touch LRU
_idempotency_cache.move_to_end(cache_key, last=True)
payload = hit.get("payload")
return payload if isinstance(payload, dict) else None
def _idem_put(chat_id: str, idem_key: str, payload: Dict[str, Any]) -> None:
if not idem_key:
return
now = time.monotonic()
_idem_cleanup(now)
cache_key = f"{chat_id}::{idem_key}"
_idempotency_cache[cache_key] = {
"expires_at": now + max(60, _IDEMPOTENCY_TTL_SEC),
"payload": payload,
}
_idempotency_cache.move_to_end(cache_key, last=True)
# Bound memory growth
while len(_idempotency_cache) > 5000:
_idempotency_cache.popitem(last=False)
# ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
# Circular buffers: last 5 TTS errors and last 5 LLM errors.
# Populated by all voice endpoints. Read by /api/voice/degradation_status.
@@ -3301,10 +3263,11 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
).strip()
)[:128]
if idem_key:
cached = _idem_get(chat_id, idem_key)
cache_key = f"{chat_id}::{idem_key}"
cached = _idempotency_store.get(cache_key)
if cached:
SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL.inc()
replay = dict(cached)
replay = dict(cached.response_body)
replay["idempotency"] = {"replayed": True, "key": idem_key}
return replay
@@ -3382,7 +3345,16 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
},
}
if idem_key:
_idem_put(chat_id, idem_key, result)
cache_key = f"{chat_id}::{idem_key}"
_idempotency_store.set(
cache_key,
ReplayEntry(
message_id=str((result.get("message") or {}).get("message_id") or ""),
response_body=dict(result),
created_at=time.monotonic(),
node_id=target_node,
),
)
result["idempotency"] = {"replayed": False, "key": idem_key}
return result