refactor(sofiia-console): extract idempotency store abstraction
Move idempotency TTL/LRU logic into a dedicated store module with a swap-ready interface and wire chat send flow to use store get/set semantics without changing API behavior. Made-with: Cursor
This commit is contained in:
@@ -63,6 +63,7 @@ from .metrics import (
|
||||
SOFIIA_CURSOR_REQUESTS_TOTAL,
|
||||
render_metrics,
|
||||
)
|
||||
from .idempotency import get_idempotency_store, ReplayEntry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -77,9 +78,7 @@ _NODE_ID = os.getenv("NODE_ID", os.getenv("HOSTNAME", "noda2"))
|
||||
# ── Rate limiter ──────────────────────────────────────────────────────────────
|
||||
_rate_buckets: Dict[str, collections.deque] = {}
|
||||
|
||||
# ── Chat idempotency cache (TTL in-memory) ───────────────────────────────────
|
||||
_IDEMPOTENCY_TTL_SEC = int(os.getenv("CHAT_IDEMPOTENCY_TTL_SEC", "900"))
|
||||
_idempotency_cache: "collections.OrderedDict[str, Dict[str, Any]]" = collections.OrderedDict()
|
||||
_idempotency_store = get_idempotency_store()
|
||||
|
||||
def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
|
||||
now = time.monotonic()
|
||||
@@ -92,43 +91,6 @@ def _check_rate(key: str, max_calls: int, window_sec: int = 60) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _idem_cleanup(now: Optional[float] = None) -> None:
|
||||
ts = now if now is not None else time.monotonic()
|
||||
while _idempotency_cache:
|
||||
first_key = next(iter(_idempotency_cache))
|
||||
exp = float((_idempotency_cache[first_key] or {}).get("expires_at", 0))
|
||||
if exp > ts:
|
||||
break
|
||||
_idempotency_cache.popitem(last=False)
|
||||
|
||||
|
||||
def _idem_get(chat_id: str, idem_key: str) -> Optional[Dict[str, Any]]:
|
||||
_idem_cleanup()
|
||||
cache_key = f"{chat_id}::{idem_key}"
|
||||
hit = _idempotency_cache.get(cache_key)
|
||||
if not hit:
|
||||
return None
|
||||
# Touch LRU
|
||||
_idempotency_cache.move_to_end(cache_key, last=True)
|
||||
payload = hit.get("payload")
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
|
||||
def _idem_put(chat_id: str, idem_key: str, payload: Dict[str, Any]) -> None:
|
||||
if not idem_key:
|
||||
return
|
||||
now = time.monotonic()
|
||||
_idem_cleanup(now)
|
||||
cache_key = f"{chat_id}::{idem_key}"
|
||||
_idempotency_cache[cache_key] = {
|
||||
"expires_at": now + max(60, _IDEMPOTENCY_TTL_SEC),
|
||||
"payload": payload,
|
||||
}
|
||||
_idempotency_cache.move_to_end(cache_key, last=True)
|
||||
# Bound memory growth
|
||||
while len(_idempotency_cache) > 5000:
|
||||
_idempotency_cache.popitem(last=False)
|
||||
|
||||
# ── Voice error rings (repro pack for incident diagnosis) ─────────────────────
|
||||
# Circular buffers: last 5 TTS errors and last 5 LLM errors.
|
||||
# Populated by all voice endpoints. Read by /api/voice/degradation_status.
|
||||
@@ -3301,10 +3263,11 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
|
||||
).strip()
|
||||
)[:128]
|
||||
if idem_key:
|
||||
cached = _idem_get(chat_id, idem_key)
|
||||
cache_key = f"{chat_id}::{idem_key}"
|
||||
cached = _idempotency_store.get(cache_key)
|
||||
if cached:
|
||||
SOFIIA_IDEMPOTENCY_REPLAYS_TOTAL.inc()
|
||||
replay = dict(cached)
|
||||
replay = dict(cached.response_body)
|
||||
replay["idempotency"] = {"replayed": True, "key": idem_key}
|
||||
return replay
|
||||
|
||||
@@ -3382,7 +3345,16 @@ async def api_chat_send_v2(chat_id: str, body: ChatMessageSendBody, request: Req
|
||||
},
|
||||
}
|
||||
if idem_key:
|
||||
_idem_put(chat_id, idem_key, result)
|
||||
cache_key = f"{chat_id}::{idem_key}"
|
||||
_idempotency_store.set(
|
||||
cache_key,
|
||||
ReplayEntry(
|
||||
message_id=str((result.get("message") or {}).get("message_id") or ""),
|
||||
response_body=dict(result),
|
||||
created_at=time.monotonic(),
|
||||
node_id=target_node,
|
||||
),
|
||||
)
|
||||
result["idempotency"] = {"replayed": False, "key": idem_key}
|
||||
return result
|
||||
|
||||
|
||||
Reference in New Issue
Block a user