feat(matrix-bridge-dagi): add operator allowlist for control commands (M3.0)

New: app/control.py
  - ControlConfig: operator_allowlist + control_rooms (frozensets)
  - parse_control_config(): validates @user:server + !room:server formats, fail-fast
  - parse_command(): parses !verb subcommand [args] [key=value] up to 512 chars
  - check_authorization(): AND(is_control_room, is_operator) → (bool, reason)
  - Reply helpers: not_implemented, unknown_command, unauthorized, help
  - KNOWN_VERBS: runbook, status, help (M3.1+ stubs)
  - MAX_CMD_LEN=512, MAX_CMD_TOKENS=20

ingress.py:
  - _try_control(): dispatch for control rooms (authorized → audit + reply, unauthorized → audit + optional )
  - join control rooms on startup
  - _enqueue_from_sync: control rooms processed first, never forwarded to agents
  - on_control_command(sender, verb, subcommand) metric callback
  - CONTROL_UNAUTHORIZED_BEHAVIOR: "ignore" | "reply_error"

Audit events:
  matrix.control.command       — authorised command (verb, subcommand, args, kwargs)
  matrix.control.unauthorized  — rejected by allowlist (reason: not_operator | not_control_room)
  matrix.control.unknown_cmd   — authorised but unrecognised verb

Config + main:
  - bridge_operator_allowlist, bridge_control_rooms, control_unauthorized_behavior
  - matrix_bridge_control_commands_total{sender,verb,subcommand} counter
  - /health: control_channel section (enabled, rooms_count, operators_count, behavior)
  - /bridge/mappings: control_rooms + control_operators_count
  - docker-compose: BRIDGE_OPERATOR_ALLOWLIST, BRIDGE_CONTROL_ROOMS, CONTROL_UNAUTHORIZED_BEHAVIOR

Tests: 40 new → 148 total pass
Made-with: Cursor
This commit is contained in:
Apple
2026-03-05 01:50:04 -08:00
parent d40b1e87c6
commit fe6e3d30ae
6 changed files with 945 additions and 5 deletions

View File

@@ -32,6 +32,7 @@ except ImportError: # pragma: no cover
_PROM_OK = False
from .config import BridgeConfig, load_config
from .control import ControlConfig, parse_control_config
from .ingress import MatrixIngressLoop
from .mixed_routing import MixedRoomConfig, parse_mixed_room_map
from .rate_limit import InMemoryRateLimiter
@@ -120,6 +121,12 @@ if _PROM_OK:
"matrix_bridge_active_room_agent_locks",
"Number of room-agent pairs currently holding a concurrency lock",
)
# M3.0: Control channel
_control_commands_total = Counter(
"matrix_bridge_control_commands_total",
"Total control commands received from authorized operators",
["sender", "verb", "subcommand"],
)
# ── Startup state ─────────────────────────────────────────────────────────────
_START_TIME = time.monotonic()
@@ -129,6 +136,7 @@ _matrix_reachable: Optional[bool] = None
_gateway_reachable: Optional[bool] = None
_room_map: Optional[RoomMappingConfig] = None
_mixed_room_config: Optional[MixedRoomConfig] = None
_control_config: Optional[ControlConfig] = None
_rate_limiter: Optional[InMemoryRateLimiter] = None
_ingress_loop: Optional["MatrixIngressLoop"] = None # for /health queue_size
_ingress_task: Optional[asyncio.Task] = None
@@ -150,7 +158,7 @@ async def _probe_url(url: str, timeout: float = 5.0) -> bool:
@asynccontextmanager
async def lifespan(app_: Any):
global _cfg, _config_error, _matrix_reachable, _gateway_reachable
global _room_map, _mixed_room_config, _rate_limiter, _ingress_loop
global _room_map, _mixed_room_config, _control_config, _rate_limiter, _ingress_loop
try:
_cfg = load_config()
@@ -186,13 +194,24 @@ async def lifespan(app_: Any):
_cfg.rate_limit_room_rpm, _cfg.rate_limit_sender_rpm,
)
# M3.0: Operator control channel
if _cfg.bridge_operator_allowlist or _cfg.bridge_control_rooms:
_control_config = parse_control_config(
_cfg.bridge_operator_allowlist,
_cfg.bridge_control_rooms,
)
else:
_control_config = None
mixed_count = _mixed_room_config.total_rooms if _mixed_room_config else 0
ctrl_rooms = len(_control_config.control_rooms) if _control_config else 0
ctrl_ops = len(_control_config.operator_allowlist) if _control_config else 0
logger.info(
"✅ matrix-bridge-dagi started | node=%s build=%s homeserver=%s "
"agents=%s mappings=%d mixed_rooms=%d",
"agents=%s mappings=%d mixed_rooms=%d ctrl_rooms=%d ctrl_operators=%d",
_cfg.node_id, _cfg.build_sha, _cfg.matrix_homeserver_url,
list(_cfg.bridge_allowed_agents),
_room_map.total_mappings, mixed_count,
_room_map.total_mappings, mixed_count, ctrl_rooms, ctrl_ops,
)
# Connectivity smoke probes (non-blocking failures)
@@ -274,6 +293,13 @@ async def lifespan(app_: Any):
if _PROM_OK:
_route_rejected_total.labels(room_id=room_id, reason=reason).inc()
# M3.0 callbacks
def _on_control_command(sender: str, verb: str, subcommand: str) -> None:
if _PROM_OK:
_control_commands_total.labels(
sender=sender, verb=verb, subcommand=subcommand
).inc()
ingress = MatrixIngressLoop(
matrix_homeserver_url=_cfg.matrix_homeserver_url,
matrix_access_token=_cfg.matrix_access_token,
@@ -302,6 +328,9 @@ async def lifespan(app_: Any):
on_queue_wait=_on_queue_wait,
on_routed=_on_routed,
on_route_rejected=_on_route_rejected,
control_config=_control_config,
control_unauthorized_behavior=_cfg.control_unauthorized_behavior,
on_control_command=_on_control_command,
)
logger.info(
"✅ Backpressure queue: max=%d workers=%d drain_timeout=%.1fs",
@@ -400,6 +429,12 @@ async def health() -> Dict[str, Any]:
"concurrency_cap": _cfg.mixed_concurrency_cap,
"active_room_agent_locks": _ingress_loop.active_lock_count if _ingress_loop else 0,
},
"control_channel": {
"enabled": _control_config.is_enabled if _control_config else False,
"control_rooms_count": len(_control_config.control_rooms) if _control_config else 0,
"operators_count": len(_control_config.operator_allowlist) if _control_config else 0,
"unauthorized_behavior": _cfg.control_unauthorized_behavior,
},
}
@@ -424,6 +459,8 @@ async def bridge_mappings() -> Dict[str, Any]:
"mappings": _room_map.as_summary(),
"mixed_rooms_total": _mixed_room_config.total_rooms if _mixed_room_config else 0,
"mixed_rooms": _mixed_room_config.as_summary() if _mixed_room_config else [],
"control_rooms": sorted(_control_config.control_rooms) if _control_config else [],
"control_operators_count": len(_control_config.operator_allowlist) if _control_config else 0,
}