feat(gateway): phase7 public access layer (entitlements, rate limits, public list)
This commit is contained in:
106
docs/ops/phase7_public_access.md
Normal file
106
docs/ops/phase7_public_access.md
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
# Phase-7 Public Access Layer
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
- Public discovery endpoint: `GET /v1/agents/public`
|
||||||
|
- Entitlements check in gateway before router call
|
||||||
|
- Rate limits in gateway for:
|
||||||
|
- `user_global`
|
||||||
|
- `user_agent`
|
||||||
|
- `group_agent`
|
||||||
|
|
||||||
|
## Data Model
|
||||||
|
Migration: `migrations/056_agent_access_policies.sql`
|
||||||
|
|
||||||
|
Tables:
|
||||||
|
- `agent_access_policies`
|
||||||
|
- `agent_allowlist`
|
||||||
|
|
||||||
|
## Gateway Env
|
||||||
|
- `GATEWAY_PUBLIC_ACCESS_ENABLED=true`
|
||||||
|
- `GATEWAY_ACCESS_POLICY_CACHE_TTL_SECONDS=60`
|
||||||
|
- `GATEWAY_ALLOWLIST_CACHE_TTL_SECONDS=30`
|
||||||
|
- `GATEWAY_ACCESS_DB_TIMEOUT_MS=40`
|
||||||
|
- `GATEWAY_ACCESS_DENY_COOLDOWN_SECONDS=30`
|
||||||
|
- `GATEWAY_RL_USER_GLOBAL_LIMIT=60`
|
||||||
|
- `GATEWAY_RL_USER_GLOBAL_WINDOW_SECONDS=300`
|
||||||
|
- `GATEWAY_RL_USER_AGENT_LIMIT=20`
|
||||||
|
- `GATEWAY_RL_USER_AGENT_WINDOW_SECONDS=300`
|
||||||
|
- `GATEWAY_RL_GROUP_AGENT_LIMIT=10`
|
||||||
|
- `GATEWAY_RL_GROUP_AGENT_WINDOW_SECONDS=300`
|
||||||
|
|
||||||
|
## Public Discovery
|
||||||
|
```bash
|
||||||
|
curl -sS http://127.0.0.1:9300/v1/agents/public | jq
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected:
|
||||||
|
- `count` includes only `enabled && public_active` agents.
|
||||||
|
- planned/internal agents are excluded.
|
||||||
|
|
||||||
|
## Entitlements Operations
|
||||||
|
Add whitelist user:
|
||||||
|
```sql
|
||||||
|
INSERT INTO agent_allowlist(platform, platform_user_id, agent_id)
|
||||||
|
VALUES ('telegram', '123456789', 'helion')
|
||||||
|
ON CONFLICT (platform, platform_user_id, agent_id) DO NOTHING;
|
||||||
|
```
|
||||||
|
|
||||||
|
Require whitelist for an agent:
|
||||||
|
```sql
|
||||||
|
UPDATE agent_access_policies
|
||||||
|
SET requires_whitelist = TRUE, updated_at = now()
|
||||||
|
WHERE agent_id = 'helion';
|
||||||
|
```
|
||||||
|
|
||||||
|
Disable agent public access:
|
||||||
|
```sql
|
||||||
|
UPDATE agent_access_policies
|
||||||
|
SET enabled = FALSE, public_active = FALSE, updated_at = now()
|
||||||
|
WHERE agent_id = 'aistalk';
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rate-Limit Policy Update
|
||||||
|
```sql
|
||||||
|
UPDATE agent_access_policies
|
||||||
|
SET
|
||||||
|
user_global_limit = 30,
|
||||||
|
user_global_window_seconds = 300,
|
||||||
|
user_agent_limit = 10,
|
||||||
|
user_agent_window_seconds = 300,
|
||||||
|
group_agent_limit = 5,
|
||||||
|
group_agent_window_seconds = 300,
|
||||||
|
updated_at = now()
|
||||||
|
WHERE agent_id = 'agromatrix';
|
||||||
|
```
|
||||||
|
|
||||||
|
## Fixed Smoke
|
||||||
|
1. Discovery:
|
||||||
|
```bash
|
||||||
|
curl -sS http://127.0.0.1:9300/v1/agents/public | jq '.count'
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Whitelist deny:
|
||||||
|
- Set `requires_whitelist=true` for test agent.
|
||||||
|
- Replay webhook from user not in allowlist.
|
||||||
|
- Expected: deny ACK and event reason `access_whitelist_required`.
|
||||||
|
|
||||||
|
3. Whitelist allow:
|
||||||
|
- Insert user to `agent_allowlist`.
|
||||||
|
- Replay webhook.
|
||||||
|
- Expected: request continues to normal processing path.
|
||||||
|
|
||||||
|
4. Rate limit:
|
||||||
|
- Set low policy (`user_agent_limit=2`, window 60s).
|
||||||
|
- Send 3 quick webhooks from same user/agent.
|
||||||
|
- Expected: third request is `429`-style deny path and `reason=rate_limit_user_agent`.
|
||||||
|
|
||||||
|
5. Event invariant:
|
||||||
|
- `1 webhook -> 1 gateway event` remains true.
|
||||||
|
|
||||||
|
## PASS
|
||||||
|
- `/v1/agents/public` returns only public enabled agents.
|
||||||
|
- Entitlement decisions are deterministic (`allow|deny|rate_limited`).
|
||||||
|
- Metrics increment:
|
||||||
|
- `gateway_access_decisions_total`
|
||||||
|
- `gateway_rate_limited_total`
|
||||||
|
- No regression in webhook event finalize behavior.
|
||||||
413
gateway-bot/gateway_experience_bus.py
Normal file
413
gateway-bot/gateway_experience_bus.py
Normal file
@@ -0,0 +1,413 @@
|
|||||||
|
"""Gateway experience event publisher/store (Phase-4).
|
||||||
|
|
||||||
|
Best-effort, fail-open telemetry for gateway webhook flow:
|
||||||
|
- publish to JetStream subject agent.experience.v1.<agent_id>
|
||||||
|
- optional DB append-only insert into agent_experience_events
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
try:
|
||||||
|
import asyncpg
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
asyncpg = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
try:
|
||||||
|
import nats
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
nats = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
try:
|
||||||
|
from metrics import GATEWAY_EXPERIENCE_PUBLISHED_TOTAL
|
||||||
|
METRICS_AVAILABLE = True
|
||||||
|
except Exception: # pragma: no cover
|
||||||
|
METRICS_AVAILABLE = False
|
||||||
|
GATEWAY_EXPERIENCE_PUBLISHED_TOTAL = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("gateway.experience_bus")
|
||||||
|
|
||||||
|
|
||||||
|
def _metric_publish(status: str) -> None:
|
||||||
|
if METRICS_AVAILABLE and GATEWAY_EXPERIENCE_PUBLISHED_TOTAL is not None:
|
||||||
|
GATEWAY_EXPERIENCE_PUBLISHED_TOTAL.labels(status=status).inc()
|
||||||
|
|
||||||
|
|
||||||
|
class GatewayExperienceBus:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.enabled = os.getenv("EXPERIENCE_BUS_ENABLED", "true").lower() in {"1", "true", "yes"}
|
||||||
|
self.enable_nats = os.getenv("EXPERIENCE_ENABLE_NATS", "true").lower() in {"1", "true", "yes"}
|
||||||
|
self.enable_db = os.getenv("EXPERIENCE_ENABLE_DB", "true").lower() in {"1", "true", "yes"}
|
||||||
|
|
||||||
|
self.node_id = os.getenv("NODE_ID", "NODA1")
|
||||||
|
self.nats_url = os.getenv("NATS_URL", "nats://nats:4222")
|
||||||
|
self.stream_name = os.getenv("EXPERIENCE_STREAM_NAME", "EXPERIENCE")
|
||||||
|
self.subject_prefix = os.getenv("EXPERIENCE_SUBJECT_PREFIX", "agent.experience.v1")
|
||||||
|
self.publish_timeout_s = float(os.getenv("EXPERIENCE_PUBLISH_TIMEOUT_MS", "800") or 800) / 1000.0
|
||||||
|
self.db_timeout_s = float(os.getenv("EXPERIENCE_DB_TIMEOUT_MS", "1200") or 1200) / 1000.0
|
||||||
|
|
||||||
|
self.db_dsn = os.getenv("EXPERIENCE_DATABASE_URL") or os.getenv("DATABASE_URL")
|
||||||
|
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
self._nc: Any = None
|
||||||
|
self._js: Any = None
|
||||||
|
self._pool: Any = None
|
||||||
|
self._stream_ensured = False
|
||||||
|
|
||||||
|
async def capture(self, event: Dict[str, Any]) -> None:
|
||||||
|
if not self.enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self._ensure_clients()
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
|
logger.debug("gateway experience ensure clients failed: %s", e)
|
||||||
|
|
||||||
|
nats_ok = await self._publish_nats(event)
|
||||||
|
db_ok = await self._insert_db(event)
|
||||||
|
|
||||||
|
if nats_ok or db_ok:
|
||||||
|
_metric_publish("ok")
|
||||||
|
else:
|
||||||
|
_metric_publish("err")
|
||||||
|
|
||||||
|
async def get_anti_silent_tuning_lesson(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
reason: str,
|
||||||
|
chat_type: str,
|
||||||
|
timeout_s: float = 0.04,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Lookup active anti-silent tuning lesson for (reason, chat_type).
|
||||||
|
|
||||||
|
Returns lesson raw payload or None. Fail-open by design.
|
||||||
|
"""
|
||||||
|
if not self.enabled or not self.enable_db:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
await self._ensure_clients()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
if self._pool is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
trigger = f"reason={reason};chat_type={chat_type}"
|
||||||
|
query = """
|
||||||
|
SELECT raw
|
||||||
|
FROM agent_lessons
|
||||||
|
WHERE COALESCE(raw->>'lesson_type', '') = 'anti_silent_tuning'
|
||||||
|
AND trigger = $1
|
||||||
|
AND (
|
||||||
|
NULLIF(COALESCE(raw->>'expires_at', ''), '') IS NULL
|
||||||
|
OR (raw->>'expires_at')::timestamptz > now()
|
||||||
|
)
|
||||||
|
ORDER BY ts DESC
|
||||||
|
LIMIT 1
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
async with self._pool.acquire() as conn:
|
||||||
|
row = await asyncio.wait_for(conn.fetchrow(query, trigger), timeout=timeout_s)
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
raw = row.get("raw")
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
return raw
|
||||||
|
if isinstance(raw, str):
|
||||||
|
return json.loads(raw)
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def get_agent_access_policy(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
agent_id: str,
|
||||||
|
timeout_s: float = 0.04,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Lookup access policy row for an agent. Returns None on miss/errors."""
|
||||||
|
if not self.enabled or not self.enable_db:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
await self._ensure_clients()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
if self._pool is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
query = """
|
||||||
|
SELECT
|
||||||
|
agent_id,
|
||||||
|
enabled,
|
||||||
|
public_active,
|
||||||
|
requires_whitelist,
|
||||||
|
user_global_limit,
|
||||||
|
user_global_window_seconds,
|
||||||
|
user_agent_limit,
|
||||||
|
user_agent_window_seconds,
|
||||||
|
group_agent_limit,
|
||||||
|
group_agent_window_seconds
|
||||||
|
FROM agent_access_policies
|
||||||
|
WHERE agent_id = $1
|
||||||
|
LIMIT 1
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
async with self._pool.acquire() as conn:
|
||||||
|
row = await asyncio.wait_for(conn.fetchrow(query, agent_id), timeout=timeout_s)
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"agent_id": row.get("agent_id"),
|
||||||
|
"enabled": bool(row.get("enabled")),
|
||||||
|
"public_active": bool(row.get("public_active")),
|
||||||
|
"requires_whitelist": bool(row.get("requires_whitelist")),
|
||||||
|
"user_global_limit": int(row.get("user_global_limit") or 0),
|
||||||
|
"user_global_window_seconds": int(row.get("user_global_window_seconds") or 0),
|
||||||
|
"user_agent_limit": int(row.get("user_agent_limit") or 0),
|
||||||
|
"user_agent_window_seconds": int(row.get("user_agent_window_seconds") or 0),
|
||||||
|
"group_agent_limit": int(row.get("group_agent_limit") or 0),
|
||||||
|
"group_agent_window_seconds": int(row.get("group_agent_window_seconds") or 0),
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def is_allowlisted(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
platform: str,
|
||||||
|
platform_user_id: str,
|
||||||
|
agent_id: str,
|
||||||
|
timeout_s: float = 0.04,
|
||||||
|
) -> bool:
|
||||||
|
"""Return True when (platform, user, agent) exists in allowlist."""
|
||||||
|
if not self.enabled or not self.enable_db:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
await self._ensure_clients()
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
if self._pool is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
query = """
|
||||||
|
SELECT 1
|
||||||
|
FROM agent_allowlist
|
||||||
|
WHERE platform = $1
|
||||||
|
AND platform_user_id = $2
|
||||||
|
AND agent_id = $3
|
||||||
|
LIMIT 1
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
async with self._pool.acquire() as conn:
|
||||||
|
row = await asyncio.wait_for(
|
||||||
|
conn.fetchrow(query, platform, platform_user_id, agent_id),
|
||||||
|
timeout=timeout_s,
|
||||||
|
)
|
||||||
|
return row is not None
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def close(self) -> None:
|
||||||
|
if self._pool is not None:
|
||||||
|
try:
|
||||||
|
await self._pool.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._pool = None
|
||||||
|
|
||||||
|
if self._nc is not None:
|
||||||
|
try:
|
||||||
|
await self._nc.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._nc = None
|
||||||
|
self._js = None
|
||||||
|
|
||||||
|
async def _ensure_clients(self) -> None:
|
||||||
|
async with self._lock:
|
||||||
|
if self.enable_nats and self._nc is None and nats is not None:
|
||||||
|
try:
|
||||||
|
self._nc = await nats.connect(self.nats_url)
|
||||||
|
self._js = self._nc.jetstream()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("gateway experience nats connect failed: %s", e)
|
||||||
|
self._nc = None
|
||||||
|
self._js = None
|
||||||
|
|
||||||
|
if self.enable_db and self._pool is None and asyncpg is not None and self.db_dsn:
|
||||||
|
try:
|
||||||
|
self._pool = await asyncpg.create_pool(self.db_dsn, min_size=1, max_size=2)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("gateway experience db pool failed: %s", e)
|
||||||
|
self._pool = None
|
||||||
|
|
||||||
|
if self._js is not None and not self._stream_ensured:
|
||||||
|
await self._ensure_stream()
|
||||||
|
|
||||||
|
async def _ensure_stream(self) -> None:
|
||||||
|
if self._js is None:
|
||||||
|
return
|
||||||
|
subjects = [f"{self.subject_prefix}.>"]
|
||||||
|
try:
|
||||||
|
await self._js.stream_info(self.stream_name)
|
||||||
|
self._stream_ensured = True
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self._js.add_stream(name=self.stream_name, subjects=subjects)
|
||||||
|
self._stream_ensured = True
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("gateway experience ensure stream failed: %s", e)
|
||||||
|
|
||||||
|
async def _publish_nats(self, event: Dict[str, Any]) -> bool:
|
||||||
|
if not self.enable_nats:
|
||||||
|
return False
|
||||||
|
if self._js is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
subject = f"{self.subject_prefix}.{event.get('agent_id', 'unknown')}"
|
||||||
|
payload = json.dumps(event, ensure_ascii=False).encode("utf-8")
|
||||||
|
msg_id = str(event.get("event_id") or "").strip()
|
||||||
|
headers = {"Nats-Msg-Id": msg_id} if msg_id else None
|
||||||
|
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(self._js.publish(subject, payload, headers=headers), timeout=self.publish_timeout_s)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("gateway experience nats publish failed: %s", e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _insert_db(self, event: Dict[str, Any]) -> bool:
|
||||||
|
if not self.enable_db:
|
||||||
|
return False
|
||||||
|
if self._pool is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
llm = event.get("llm") or {}
|
||||||
|
result = event.get("result") or {}
|
||||||
|
|
||||||
|
query = """
|
||||||
|
INSERT INTO agent_experience_events (
|
||||||
|
event_id,
|
||||||
|
ts,
|
||||||
|
node_id,
|
||||||
|
source,
|
||||||
|
agent_id,
|
||||||
|
task_type,
|
||||||
|
request_id,
|
||||||
|
channel,
|
||||||
|
inputs_hash,
|
||||||
|
provider,
|
||||||
|
model,
|
||||||
|
profile,
|
||||||
|
latency_ms,
|
||||||
|
tokens_in,
|
||||||
|
tokens_out,
|
||||||
|
ok,
|
||||||
|
error_class,
|
||||||
|
error_msg_redacted,
|
||||||
|
http_status,
|
||||||
|
raw
|
||||||
|
) VALUES (
|
||||||
|
$1::uuid,
|
||||||
|
$2::timestamptz,
|
||||||
|
$3,
|
||||||
|
$4,
|
||||||
|
$5,
|
||||||
|
$6,
|
||||||
|
$7,
|
||||||
|
$8,
|
||||||
|
$9,
|
||||||
|
$10,
|
||||||
|
$11,
|
||||||
|
$12,
|
||||||
|
$13,
|
||||||
|
$14,
|
||||||
|
$15,
|
||||||
|
$16,
|
||||||
|
$17,
|
||||||
|
$18,
|
||||||
|
$19,
|
||||||
|
$20::jsonb
|
||||||
|
)
|
||||||
|
ON CONFLICT (event_id) DO NOTHING
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload_json = json.dumps(event, ensure_ascii=False)
|
||||||
|
async with self._pool.acquire() as conn:
|
||||||
|
await asyncio.wait_for(
|
||||||
|
conn.execute(
|
||||||
|
query,
|
||||||
|
_as_uuid(event.get("event_id")),
|
||||||
|
_as_timestamptz(event.get("ts")),
|
||||||
|
event.get("node_id", self.node_id),
|
||||||
|
event.get("source", "gateway"),
|
||||||
|
event.get("agent_id"),
|
||||||
|
event.get("task_type", "webhook"),
|
||||||
|
event.get("request_id"),
|
||||||
|
event.get("channel", "telegram"),
|
||||||
|
event.get("inputs_hash"),
|
||||||
|
llm.get("provider", "gateway"),
|
||||||
|
llm.get("model", "gateway"),
|
||||||
|
llm.get("profile"),
|
||||||
|
int(llm.get("latency_ms") or 0),
|
||||||
|
_as_int_or_none(llm.get("tokens_in")),
|
||||||
|
_as_int_or_none(llm.get("tokens_out")),
|
||||||
|
bool(result.get("ok")),
|
||||||
|
result.get("error_class"),
|
||||||
|
result.get("error_msg_redacted"),
|
||||||
|
int(result.get("http_status") or 0),
|
||||||
|
payload_json,
|
||||||
|
),
|
||||||
|
timeout=self.db_timeout_s,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("gateway experience db insert failed: %s", e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _as_int_or_none(value: Any) -> Optional[int]:
|
||||||
|
try:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
return int(value)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _as_uuid(value: Any) -> uuid.UUID:
|
||||||
|
try:
|
||||||
|
return uuid.UUID(str(value))
|
||||||
|
except Exception:
|
||||||
|
return uuid.uuid4()
|
||||||
|
|
||||||
|
|
||||||
|
def _as_timestamptz(value: Any) -> datetime:
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value if value.tzinfo is not None else value.replace(tzinfo=timezone.utc)
|
||||||
|
try:
|
||||||
|
parsed = datetime.fromisoformat(str(value).replace("Z", "+00:00"))
|
||||||
|
return parsed if parsed.tzinfo is not None else parsed.replace(tzinfo=timezone.utc)
|
||||||
|
except Exception:
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
_gateway_bus_singleton: Optional[GatewayExperienceBus] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_gateway_experience_bus() -> GatewayExperienceBus:
|
||||||
|
global _gateway_bus_singleton
|
||||||
|
if _gateway_bus_singleton is None:
|
||||||
|
_gateway_bus_singleton = GatewayExperienceBus()
|
||||||
|
return _gateway_bus_singleton
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -45,6 +45,79 @@ ROUTER_LATENCY = Histogram(
|
|||||||
buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0]
|
buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# === Experience Bus Phase-4 Metrics ===
|
||||||
|
GATEWAY_EXPERIENCE_PUBLISHED_TOTAL = Counter(
|
||||||
|
"gateway_experience_published_total",
|
||||||
|
"Gateway experience event publish/store status",
|
||||||
|
["status"] # ok, err
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_POLICY_DECISIONS_TOTAL = Counter(
|
||||||
|
"gateway_policy_decisions_total",
|
||||||
|
"Gateway policy (SOWA) decisions",
|
||||||
|
["sowa_decision", "reason"]
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_USER_SIGNAL_TOTAL = Counter(
|
||||||
|
"gateway_user_signal_total",
|
||||||
|
"Detected user signals from webhook stream",
|
||||||
|
["user_signal"] # none, positive, negative, retry, timeout
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_WEBHOOK_LATENCY_MS = Histogram(
|
||||||
|
"gateway_webhook_latency_ms",
|
||||||
|
"Gateway webhook end-to-end latency in milliseconds",
|
||||||
|
buckets=[5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_EXPERIENCE_EMITTED_TOTAL = Counter(
|
||||||
|
"gateway_experience_emitted_total",
|
||||||
|
"Gateway experience events emitted from webhook handler",
|
||||||
|
["status", "path"] # status: ok|err, path: normal|early_return|exception
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_EARLY_RETURN_TOTAL = Counter(
|
||||||
|
"gateway_early_return_total",
|
||||||
|
"Gateway early return branches observed by reason",
|
||||||
|
["reason"]
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_EVENT_FINALIZE_LATENCY_MS = Histogram(
|
||||||
|
"gateway_event_finalize_latency_ms",
|
||||||
|
"Gateway event finalize latency in milliseconds",
|
||||||
|
buckets=[1, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000]
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_ANTI_SILENT_TOTAL = Counter(
|
||||||
|
"gateway_anti_silent_total",
|
||||||
|
"Gateway anti-silent actions by reason/chat type",
|
||||||
|
["action", "reason", "chat_type"] # ACK_EMITTED, ACK_SUPPRESSED_COOLDOWN
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_ACK_SENT_TOTAL = Counter(
|
||||||
|
"gateway_ack_sent_total",
|
||||||
|
"Gateway ACK messages sent by template/chat type",
|
||||||
|
["template_id", "chat_type"]
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_ANTI_SILENT_TUNING_APPLIED_TOTAL = Counter(
|
||||||
|
"gateway_anti_silent_tuning_applied_total",
|
||||||
|
"Gateway anti-silent tuning applications by reason/chat type/template",
|
||||||
|
["reason", "chat_type", "template_id"]
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_ACCESS_DECISIONS_TOTAL = Counter(
|
||||||
|
"gateway_access_decisions_total",
|
||||||
|
"Gateway access decisions for public layer",
|
||||||
|
["decision", "agent_id", "chat_type"] # allow, deny, rate_limited
|
||||||
|
)
|
||||||
|
|
||||||
|
GATEWAY_RATE_LIMITED_TOTAL = Counter(
|
||||||
|
"gateway_rate_limited_total",
|
||||||
|
"Gateway rate limit hits by scope",
|
||||||
|
["scope", "agent_id", "chat_type"] # user_global, user_agent, group_agent
|
||||||
|
)
|
||||||
|
|
||||||
# === Memory Service Metrics ===
|
# === Memory Service Metrics ===
|
||||||
MEMORY_CALLS_TOTAL = Counter(
|
MEMORY_CALLS_TOTAL = Counter(
|
||||||
"gateway_memory_calls_total",
|
"gateway_memory_calls_total",
|
||||||
|
|||||||
61
migrations/056_agent_access_policies.sql
Normal file
61
migrations/056_agent_access_policies.sql
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
-- Phase-7 public access layer
|
||||||
|
-- Access policy + allowlist tables for gateway entitlements/rate-limits.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS agent_access_policies (
|
||||||
|
agent_id TEXT PRIMARY KEY,
|
||||||
|
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
public_active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
requires_whitelist BOOLEAN NOT NULL DEFAULT FALSE,
|
||||||
|
user_global_limit INTEGER NOT NULL DEFAULT 60,
|
||||||
|
user_global_window_seconds INTEGER NOT NULL DEFAULT 300,
|
||||||
|
user_agent_limit INTEGER NOT NULL DEFAULT 20,
|
||||||
|
user_agent_window_seconds INTEGER NOT NULL DEFAULT 300,
|
||||||
|
group_agent_limit INTEGER NOT NULL DEFAULT 10,
|
||||||
|
group_agent_window_seconds INTEGER NOT NULL DEFAULT 300,
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS agent_allowlist (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
platform TEXT NOT NULL,
|
||||||
|
platform_user_id TEXT NOT NULL,
|
||||||
|
agent_id TEXT NOT NULL REFERENCES agent_access_policies(agent_id) ON DELETE CASCADE,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||||
|
UNIQUE (platform, platform_user_id, agent_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_agent_access_policies_enabled
|
||||||
|
ON agent_access_policies (enabled, public_active);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_agent_allowlist_lookup
|
||||||
|
ON agent_allowlist (platform, platform_user_id, agent_id);
|
||||||
|
|
||||||
|
INSERT INTO agent_access_policies (
|
||||||
|
agent_id,
|
||||||
|
enabled,
|
||||||
|
public_active,
|
||||||
|
requires_whitelist
|
||||||
|
)
|
||||||
|
VALUES
|
||||||
|
('daarwizz', TRUE, TRUE, FALSE),
|
||||||
|
('helion', TRUE, TRUE, FALSE),
|
||||||
|
('greenfood', TRUE, TRUE, FALSE),
|
||||||
|
('agromatrix', TRUE, TRUE, FALSE),
|
||||||
|
('alateya', TRUE, TRUE, FALSE),
|
||||||
|
('nutra', TRUE, TRUE, FALSE),
|
||||||
|
('druid', TRUE, TRUE, FALSE),
|
||||||
|
('clan', TRUE, TRUE, FALSE),
|
||||||
|
('eonarch', TRUE, TRUE, FALSE),
|
||||||
|
('senpai', TRUE, TRUE, FALSE),
|
||||||
|
('oneok', TRUE, TRUE, FALSE),
|
||||||
|
('soul', TRUE, TRUE, FALSE),
|
||||||
|
('yaromir', TRUE, TRUE, FALSE),
|
||||||
|
('sofiia', TRUE, TRUE, FALSE),
|
||||||
|
('monitor', FALSE, FALSE, TRUE),
|
||||||
|
('aistalk', FALSE, FALSE, TRUE)
|
||||||
|
ON CONFLICT (agent_id) DO UPDATE
|
||||||
|
SET
|
||||||
|
enabled = EXCLUDED.enabled,
|
||||||
|
public_active = EXCLUDED.public_active,
|
||||||
|
requires_whitelist = EXCLUDED.requires_whitelist,
|
||||||
|
updated_at = now();
|
||||||
Reference in New Issue
Block a user