refactor(sofiia-console): extract idempotency store abstraction
Move idempotency TTL/LRU logic into a dedicated store module with a swap-ready interface and wire chat send flow to use store get/set semantics without changing API behavior. Made-with: Cursor
This commit is contained in:
89
services/sofiia-console/app/idempotency.py
Normal file
89
services/sofiia-console/app/idempotency.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional, Protocol
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplayEntry:
|
||||
message_id: str
|
||||
response_body: Dict[str, Any]
|
||||
created_at: float
|
||||
node_id: str
|
||||
|
||||
|
||||
class IdempotencyStore(Protocol):
|
||||
def get(self, key: str) -> Optional[ReplayEntry]:
|
||||
...
|
||||
|
||||
def set(self, key: str, entry: ReplayEntry) -> None:
|
||||
...
|
||||
|
||||
|
||||
class InMemoryIdempotencyStore:
|
||||
def __init__(self, ttl_seconds: int = 900, max_size: int = 5000) -> None:
|
||||
self._ttl_seconds = max(60, int(ttl_seconds))
|
||||
self._max_size = max(100, int(max_size))
|
||||
self._values: "OrderedDict[str, Dict[str, Any]]" = OrderedDict()
|
||||
|
||||
def _cleanup(self, now: Optional[float] = None) -> None:
|
||||
ts = now if now is not None else time.monotonic()
|
||||
while self._values:
|
||||
first_key = next(iter(self._values))
|
||||
exp = float((self._values[first_key] or {}).get("expires_at", 0.0))
|
||||
if exp > ts:
|
||||
break
|
||||
self._values.popitem(last=False)
|
||||
|
||||
def get(self, key: str) -> Optional[ReplayEntry]:
|
||||
self._cleanup()
|
||||
hit = self._values.get(key)
|
||||
if not hit:
|
||||
return None
|
||||
# Touch key to preserve LRU behavior.
|
||||
self._values.move_to_end(key, last=True)
|
||||
entry = hit.get("entry")
|
||||
return entry if isinstance(entry, ReplayEntry) else None
|
||||
|
||||
def set(self, key: str, entry: ReplayEntry) -> None:
|
||||
now = time.monotonic()
|
||||
self._cleanup(now)
|
||||
self._values[key] = {
|
||||
"expires_at": now + self._ttl_seconds,
|
||||
"entry": entry,
|
||||
}
|
||||
self._values.move_to_end(key, last=True)
|
||||
while len(self._values) > self._max_size:
|
||||
self._values.popitem(last=False)
|
||||
|
||||
# Debug/test helpers
|
||||
def size(self) -> int:
|
||||
self._cleanup()
|
||||
return len(self._values)
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
self._values.pop(key, None)
|
||||
|
||||
def reset(self) -> None:
|
||||
self._values.clear()
|
||||
|
||||
|
||||
_STORE: Optional[IdempotencyStore] = None
|
||||
|
||||
|
||||
def get_idempotency_store() -> IdempotencyStore:
|
||||
global _STORE
|
||||
if _STORE is None:
|
||||
ttl = int(
|
||||
os.getenv(
|
||||
"SOFIIA_IDEMPOTENCY_TTL_S",
|
||||
os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900"),
|
||||
)
|
||||
)
|
||||
max_size = int(os.getenv("SOFIIA_IDEMPOTENCY_MAX", "5000"))
|
||||
_STORE = InMemoryIdempotencyStore(ttl_seconds=ttl, max_size=max_size)
|
||||
return _STORE
|
||||
|
||||
@@ -63,6 +63,7 @@ from .metrics import (
|
||||
SOFIIA_CURSOR_REQUESTS_TOTAL,
|
||||
render_metrics,
|
||||
)
|
||||
from .idempotency import get_idempotency_store, ReplayEntry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -77,9 +78,7 @@ _NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))
|
||||
# ── Rate limiter ──────────────────────────────────────────────────────────────
|
||||
_rate_buckets: Dict[str, collections.deque] = {}
|
||||
|
||||
# ── Chat idempotency cache (TTL in-memory) ───────────────────────────────────
|
||||
_IDEMPOTENCY_TTL_SEC = int(os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900"))
|
||||
_idempotency_cache: "collections.OrderedDict[str, Dict[str, Any]]" = collections.OrderedDict()
|
||||
_idempotency_store = get_idempotency_store()
|
||||
|
||||
def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
|
||||
now = time.monotonic()
|
||||
@@ -92,43 +91,6 @@ def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _idem_cleanup(now: Optional[float] = None) -> None:
|
||||
ts = now if now is not None else time.monotonic()
|
||||
while _idempotency_cache:
|
||||
first_key = next(iter(_idempotency_cache))
|
||||
exp = float((_idempotency_cache[first_key] or {}).get("expires_at", 0))
|
||||
if exp > ts:
|
||||
break
|
||||
_idempotency_cache.popitem(last=False)
|
||||
|
||||
|
||||
def _idem_get(chat_id: str, idem_key: str) -> Optional[Dict[str, Any]]:
|
||||
_idem_cleanup()
|
||||
cache_key = f"{chat_id}::{idem_key}"
|
||||
hit = _idempotency_cache.get(cache_key)
|
||||
if not hit:
|
||||
return None
|
||||
# Touch LRU
|
||||
_idempotency_cache.move_to_end(cache_key, last=True)
|
||||
payload = hit.get("payload")
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
|
||||
def _idem_put(chat_id: str, idem_key: str, payload: Dict[str, Any]) -> None:
|
||||
if not idem_key:
|
||||
return
|
||||
now = time.monotonic()
|
||||
_idem_cleanup(now)
|
||||
cache_key = f"{chat_id}::{idem_key}"
|
||||
_idempotency_cache[cache_key] = {
|
||||
"expires_at": now + max(60, _IDEMPOTENCY_TTL_SEC),
|
||||
"payload": payload,
|
||||
}
|
||||
_idempotency_cache.move_to_end(cache_key, last=True)
|
||||
# Bound memory growth
|
||||
while len(_idempotency_cache) > 5000:
|
||||
_idempotency_cache.popitem(last=False)
|
||||
|
||||
# ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
|
||||
# Circular buffers: last 5 TTS errors and last 5 LLM errors.
|
||||
# Populated by all voice endpoints. Read by /api/voice/degradation_status.
|
||||
@@ -3301,10 +3263,11 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
|
||||
).strip()
|
||||
)[:128]
|
||||
if idem_key:
|
||||
cached = _idem_get(chat_id, idem_key)
|
||||
cache_key = f"{chat_id}::{idem_key}"
|
||||
cached = _idempotency_store.get(cache_key)
|
||||
if cached:
|
||||
SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL.inc()
|
||||
replay = dict(cached)
|
||||
replay = dict(cached.response_body)
|
||||
replay["idempotency"] = {"replayed": True, "key": idem_key}
|
||||
return replay
|
||||
|
||||
@@ -3382,7 +3345,16 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
|
||||
},
|
||||
}
|
||||
if idem_key:
|
||||
_idem_put(chat_id, idem_key, result)
|
||||
cache_key = f"{chat_id}::{idem_key}"
|
||||
_idempotency_store.set(
|
||||
cache_key,
|
||||
ReplayEntry(
|
||||
message_id=str((result.get("message") or {}).get("message_id") or ""),
|
||||
response_body=dict(result),
|
||||
created_at=time.monotonic(),
|
||||
node_id=target_node,
|
||||
),
|
||||
)
|
||||
result["idempotency"] = {"replayed": False, "key": idem_key}
|
||||
return result
|
||||
|
||||
|
||||
@@ -28,12 +28,16 @@ def sofiia_module(tmp_path, monkeypatch):
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
import app.db as db_mod # type: ignore
|
||||
import app.idempotency as idem_mod # type: ignore
|
||||
import app.main as main_mod # type: ignore
|
||||
|
||||
importlib.reload(db_mod)
|
||||
importlib.reload(idem_mod)
|
||||
importlib.reload(main_mod)
|
||||
main_mod._rate_buckets.clear()
|
||||
main_mod._idempotency_cache.clear()
|
||||
store = idem_mod.get_idempotency_store()
|
||||
if hasattr(store, "reset"):
|
||||
store.reset()
|
||||
return main_mod
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user