from __future__ import annotations from app.rate_limit import InMemoryRateLimiter def _create_chat(client, agent_id: str, node_id: str, ref: str) -> str: r = client.post( "/api/chats", json={ "agent_id": agent_id, "node_id": node_id, "source": "web", "external_chat_ref": ref, }, ) assert r.status_code == 200, r.text return r.json()["chat"]["chat_id"] def test_inmemory_rate_limiter_blocks_burst_exceed(): rl = InMemoryRateLimiter() first = rl.consume("rl:test:key", rps=0.001, burst=1) second = rl.consume("rl:test:key", rps=0.001, burst=1) assert first.allowed is True assert second.allowed is False assert second.retry_after_s > 0 def test_send_rate_limit_per_chat_returns_429(sofiia_client, sofiia_module, monkeypatch): async def _fake_infer(base_url, agent_id, text, **kwargs): return {"response": f"ok:{agent_id}:{text}", "backend": "fake", "model": "fake-model"} monkeypatch.setattr(sofiia_module, "infer", _fake_infer) monkeypatch.setattr(sofiia_module, "_rate_limiter", InMemoryRateLimiter()) monkeypatch.setattr(sofiia_module, "_RL_CHAT_RPS", 0.001) monkeypatch.setattr(sofiia_module, "_RL_CHAT_BURST", 1) monkeypatch.setattr(sofiia_module, "_RL_OP_RPS", 100.0) monkeypatch.setattr(sofiia_module, "_RL_OP_BURST", 100) chat_id = _create_chat(sofiia_client, "sofiia", "NODA2", "rl-chat") r1 = sofiia_client.post(f"/api/chats/{chat_id}/send", json={"text": "ping-1", "user_id": "op-1"}) r2 = sofiia_client.post(f"/api/chats/{chat_id}/send", json={"text": "ping-2", "user_id": "op-1"}) assert r1.status_code == 200, r1.text assert r2.status_code == 429, r2.text body = r2.json() assert body["detail"]["error"]["code"] == "rate_limited" assert body["detail"]["error"]["scope"] == "chat" assert int(r2.headers.get("Retry-After", "0")) >= 1 def test_send_rate_limit_per_operator_returns_429(sofiia_client, sofiia_module, monkeypatch): async def _fake_infer(base_url, agent_id, text, **kwargs): return {"response": f"ok:{agent_id}:{text}", "backend": "fake", "model": "fake-model"} monkeypatch.setattr(sofiia_module, "infer", _fake_infer) monkeypatch.setattr(sofiia_module, "_rate_limiter", InMemoryRateLimiter()) monkeypatch.setattr(sofiia_module, "_RL_CHAT_RPS", 100.0) monkeypatch.setattr(sofiia_module, "_RL_CHAT_BURST", 100) monkeypatch.setattr(sofiia_module, "_RL_OP_RPS", 0.001) monkeypatch.setattr(sofiia_module, "_RL_OP_BURST", 1) chat_1 = _create_chat(sofiia_client, "sofiia", "NODA2", "rl-op-1") chat_2 = _create_chat(sofiia_client, "sofiia", "NODA2", "rl-op-2") r1 = sofiia_client.post(f"/api/chats/{chat_1}/send", json={"text": "ping-1", "user_id": "operator-1"}) r2 = sofiia_client.post(f"/api/chats/{chat_2}/send", json={"text": "ping-2", "user_id": "operator-1"}) assert r1.status_code == 200, r1.text assert r2.status_code == 429, r2.text body = r2.json() assert body["detail"]["error"]["code"] == "rate_limited" assert body["detail"]["error"]["scope"] == "operator" assert int(r2.headers.get("Retry-After", "0")) >= 1