feat(sofiia-console): add rate limiting for chat send (per-chat and per-operator)

Made-with: Cursor
2026-03-02 09:24:21 -08:00
parent de8002eacd
commit 9b89ace2fc
4 changed files with 331 additions and 2 deletions
--- a/tests/test_sofiia_rate_limit.py
+++ b/tests/test_sofiia_rate_limit.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+from app.rate_limit import InMemoryRateLimiter
+
+
+def _create_chat(client, agent_id: str, node_id: str, ref: str) -> str:
+    r = client.post(
+        "/api/chats",
+        json={
+            "agent_id": agent_id,
+            "node_id": node_id,
+            "source": "web",
+            "external_chat_ref": ref,
+        },
+    )
+    assert r.status_code == 200, r.text
+    return r.json()["chat"]["chat_id"]
+
+
+def test_inmemory_rate_limiter_blocks_burst_exceed():
+    rl = InMemoryRateLimiter()
+    first = rl.consume("rl:test:key", rps=0.001, burst=1)
+    second = rl.consume("rl:test:key", rps=0.001, burst=1)
+
+    assert first.allowed is True
+    assert second.allowed is False
+    assert second.retry_after_s > 0
+
+
+def test_send_rate_limit_per_chat_returns_429(sofiia_client, sofiia_module, monkeypatch):
+    async def _fake_infer(base_url, agent_id, text, **kwargs):
+        return {"response": f"ok:{agent_id}:{text}", "backend": "fake", "model": "fake-model"}
+
+    monkeypatch.setattr(sofiia_module, "infer", _fake_infer)
+    monkeypatch.setattr(sofiia_module, "_rate_limiter", InMemoryRateLimiter())
+    monkeypatch.setattr(sofiia_module, "_RL_CHAT_RPS", 0.001)
+    monkeypatch.setattr(sofiia_module, "_RL_CHAT_BURST", 1)
+    monkeypatch.setattr(sofiia_module, "_RL_OP_RPS", 100.0)
+    monkeypatch.setattr(sofiia_module, "_RL_OP_BURST", 100)
+
+    chat_id = _create_chat(sofiia_client, "sofiia", "NODA2", "rl-chat")
+    r1 = sofiia_client.post(f"/api/chats/{chat_id}/send", json={"text": "ping-1", "user_id": "op-1"})
+    r2 = sofiia_client.post(f"/api/chats/{chat_id}/send", json={"text": "ping-2", "user_id": "op-1"})
+
+    assert r1.status_code == 200, r1.text
+    assert r2.status_code == 429, r2.text
+    body = r2.json()
+    assert body["detail"]["error"]["code"] == "rate_limited"
+    assert body["detail"]["error"]["scope"] == "chat"
+    assert int(r2.headers.get("Retry-After", "0")) >= 1
+
+
+def test_send_rate_limit_per_operator_returns_429(sofiia_client, sofiia_module, monkeypatch):
+    async def _fake_infer(base_url, agent_id, text, **kwargs):
+        return {"response": f"ok:{agent_id}:{text}", "backend": "fake", "model": "fake-model"}
+
+    monkeypatch.setattr(sofiia_module, "infer", _fake_infer)
+    monkeypatch.setattr(sofiia_module, "_rate_limiter", InMemoryRateLimiter())
+    monkeypatch.setattr(sofiia_module, "_RL_CHAT_RPS", 100.0)
+    monkeypatch.setattr(sofiia_module, "_RL_CHAT_BURST", 100)
+    monkeypatch.setattr(sofiia_module, "_RL_OP_RPS", 0.001)
+    monkeypatch.setattr(sofiia_module, "_RL_OP_BURST", 1)
+
+    chat_1 = _create_chat(sofiia_client, "sofiia", "NODA2", "rl-op-1")
+    chat_2 = _create_chat(sofiia_client, "sofiia", "NODA2", "rl-op-2")
+    r1 = sofiia_client.post(f"/api/chats/{chat_1}/send", json={"text": "ping-1", "user_id": "operator-1"})
+    r2 = sofiia_client.post(f"/api/chats/{chat_2}/send", json={"text": "ping-2", "user_id": "operator-1"})
+
+    assert r1.status_code == 200, r1.text
+    assert r2.status_code == 429, r2.text
+    body = r2.json()
+    assert body["detail"]["error"]["code"] == "rate_limited"
+    assert body["detail"]["error"]["scope"] == "operator"
+    assert int(r2.headers.get("Retry-After", "0")) >= 1