Files
microdao-daarion/services/matrix-bridge-dagi/app/config.py
Apple a24dae8e18 feat(matrix-bridge-dagi): add backpressure queue with N workers (H2)
Reader + N workers architecture:
  Reader: sync_poll → rate_check → dedupe → queue.put_nowait()
  Workers (WORKER_CONCURRENCY, default 2): queue.get() → invoke → send → audit

Drop policy (queue full):
  - put_nowait() raises QueueFull → dropped immediately (reader never blocks)
  - audit matrix.queue_full + on_queue_dropped callback
  - metric: matrix_bridge_queue_dropped_total{room_id,agent_id}

Graceful shutdown:
  1. stop_event → reader exits loop
  2. queue.join() with QUEUE_DRAIN_TIMEOUT_S (default 5s) → workers finish in-flight
  3. worker tasks cancelled

New config env vars:
  QUEUE_MAX_EVENTS (default 100)
  WORKER_CONCURRENCY (default 2)
  QUEUE_DRAIN_TIMEOUT_S (default 5)

New metrics (H3 additions):
  matrix_bridge_queue_size (gauge)
  matrix_bridge_queue_dropped_total (counter)
  matrix_bridge_queue_wait_seconds histogram (buckets: 0.01…30s)

/health: queue.size, queue.max, queue.workers
MatrixIngressLoop: queue_size + worker_count properties

6 queue tests: enqueue/process, full-drop-audit, concurrency barrier,
graceful drain, wait metric, rate-limit-before-enqueue
Total: 71 passed

Made-with: Cursor
2026-03-05 01:07:04 -08:00

77 lines
2.9 KiB
Python

"""
matrix-bridge-dagi — configuration and validation
"""
import os
from dataclasses import dataclass, field
from typing import FrozenSet
@dataclass(frozen=True)
class BridgeConfig:
# Matrix homeserver
matrix_homeserver_url: str
matrix_access_token: str
matrix_user_id: str # e.g. @dagi_bridge:daarion.space
# Room → agent mapping (M1: single room)
sofiia_room_id: str # e.g. !abcdef:daarion.space
# DAGI backend
dagi_gateway_url: str # e.g. http://dagi-gateway-node1:9300
default_node_id: str # e.g. NODA1
# Sofiia Console (audit write)
sofiia_console_url: str # e.g. http://dagi-sofiia-console-node1:8002
sofiia_internal_token: str # X-Internal-Service-Token for audit ingest
# Policy
bridge_allowed_agents: FrozenSet[str]
rate_limit_room_rpm: int # max messages per room per minute
rate_limit_sender_rpm: int # max messages per sender per minute
# H2: Backpressure queue
queue_max_events: int # max pending items (drops oldest on full)
worker_concurrency: int # parallel invoke workers
queue_drain_timeout_s: float # graceful shutdown drain timeout
# Service identity
node_id: str
build_sha: str
build_time: str
def load_config() -> BridgeConfig:
"""Load and validate config from environment variables."""
def _require(key: str) -> str:
v = os.getenv(key, "").strip()
if not v:
raise RuntimeError(f"Required env var {key!r} is not set")
return v
def _optional(key: str, default: str = "") -> str:
return os.getenv(key, default).strip()
allowed_raw = _optional("BRIDGE_ALLOWED_AGENTS", "sofiia")
allowed = frozenset(a.strip() for a in allowed_raw.split(",") if a.strip())
return BridgeConfig(
matrix_homeserver_url=_require("MATRIX_HOMESERVER_URL").rstrip("/"),
matrix_access_token=_require("MATRIX_ACCESS_TOKEN"),
matrix_user_id=_require("MATRIX_USER_ID"),
sofiia_room_id=_require("SOFIIA_ROOM_ID"),
dagi_gateway_url=_require("DAGI_GATEWAY_URL").rstrip("/"),
default_node_id=_optional("DEFAULT_NODE_ID", "NODA1"),
sofiia_console_url=_optional("SOFIIA_CONSOLE_URL", "").rstrip("/"),
sofiia_internal_token=_optional("SOFIIA_INTERNAL_TOKEN", ""),
bridge_allowed_agents=allowed,
rate_limit_room_rpm=int(_optional("RATE_LIMIT_ROOM_RPM", "20")),
rate_limit_sender_rpm=int(_optional("RATE_LIMIT_SENDER_RPM", "10")),
queue_max_events=max(1, int(_optional("QUEUE_MAX_EVENTS", "100"))),
worker_concurrency=max(1, int(_optional("WORKER_CONCURRENCY", "2"))),
queue_drain_timeout_s=max(1.0, float(_optional("QUEUE_DRAIN_TIMEOUT_S", "5"))),
node_id=_optional("NODE_ID", "NODA1"),
build_sha=_optional("BUILD_SHA", "dev"),
build_time=_optional("BUILD_TIME", "local"),
)