feat(matrix-bridge-dagi): add operator allowlist for control commands (M3.0)

New: app/control.py
  - ControlConfig: operator_allowlist + control_rooms (frozensets)
  - parse_control_config(): validates @user:server + !room:server formats, fail-fast
  - parse_command(): parses !verb subcommand [args] [key=value] up to 512 chars
  - check_authorization(): AND(is_control_room, is_operator) → (bool, reason)
  - Reply helpers: not_implemented, unknown_command, unauthorized, help
  - KNOWN_VERBS: runbook, status, help (M3.1+ stubs)
  - MAX_CMD_LEN=512, MAX_CMD_TOKENS=20

ingress.py:
  - _try_control(): dispatch for control rooms (authorized → audit + reply, unauthorized → audit + optional )
  - join control rooms on startup
  - _enqueue_from_sync: control rooms processed first, never forwarded to agents
  - on_control_command(sender, verb, subcommand) metric callback
  - CONTROL_UNAUTHORIZED_BEHAVIOR: "ignore" | "reply_error"

Audit events:
  matrix.control.command       — authorised command (verb, subcommand, args, kwargs)
  matrix.control.unauthorized  — rejected by allowlist (reason: not_operator | not_control_room)
  matrix.control.unknown_cmd   — authorised but unrecognised verb

Config + main:
  - bridge_operator_allowlist, bridge_control_rooms, control_unauthorized_behavior
  - matrix_bridge_control_commands_total{sender,verb,subcommand} counter
  - /health: control_channel section (enabled, rooms_count, operators_count, behavior)
  - /bridge/mappings: control_rooms + control_operators_count
  - docker-compose: BRIDGE_OPERATOR_ALLOWLIST, BRIDGE_CONTROL_ROOMS, CONTROL_UNAUTHORIZED_BEHAVIOR

Tests: 40 new → 148 total pass
Made-with: Cursor
This commit is contained in:
Apple
2026-03-05 01:50:04 -08:00
parent d40b1e87c6
commit fe6e3d30ae
6 changed files with 945 additions and 5 deletions

View File

@@ -1,5 +1,5 @@
"""
Matrix Ingress + Egress Loop — Phase M1.4 + H1 + H2 + H3 + M2.1 + M2.2 (mixed rooms hardening)
Matrix Ingress + Egress Loop — Phase M1.4 + H1 + H2 + H3 + M2.1 + M2.2 + M3.0 (control channel)
Architecture (H2):
Reader task → asyncio.Queue(maxsize) → N Worker tasks
@@ -33,6 +33,12 @@ from typing import Any, Callable, Dict, List, Optional
import httpx
from .control import (
ControlConfig, ControlCommand,
check_authorization, parse_command, is_control_message,
not_implemented_reply, unknown_command_reply, unauthorized_reply, help_reply,
VERB_HELP,
)
from .matrix_client import MatrixClient
from .mixed_routing import (
MixedRoomConfig, route_message, reply_prefix,
@@ -178,6 +184,9 @@ class MatrixIngressLoop:
unknown_agent_behavior: str = "ignore", # "ignore" | "reply_error"
max_slash_len: int = 32,
mixed_concurrency_cap: int = 1, # 0 = unlimited
# M3.0: control channel
control_config: Optional[ControlConfig] = None,
control_unauthorized_behavior: str = "ignore", # "ignore" | "reply_error"
# Callbacks
on_message_received: Optional[Callable[[str, str], None]] = None,
on_message_replied: Optional[Callable[[str, str, str], None]] = None,
@@ -190,6 +199,7 @@ class MatrixIngressLoop:
on_queue_wait: Optional[Callable[[str, float], None]] = None,
on_routed: Optional[Callable[[str, str], None]] = None,
on_route_rejected: Optional[Callable[[str, str], None]] = None,
on_control_command: Optional[Callable[[str, str, str], None]] = None,
) -> None:
self._hs_url = matrix_homeserver_url
self._token = matrix_access_token
@@ -214,11 +224,14 @@ class MatrixIngressLoop:
self._on_send_latency = on_send_latency
self._on_queue_wait = on_queue_wait
self._mixed_room_config = mixed_room_config
self._control_config = control_config
self._control_unauthorized_behavior = control_unauthorized_behavior
self._unknown_agent_behavior = unknown_agent_behavior
self._max_slash_len = max_slash_len
self._mixed_concurrency_cap = mixed_concurrency_cap
self._on_routed = on_routed
self._on_route_rejected = on_route_rejected
self._on_control_command = on_control_command
# Lazily populated semaphores keyed by "{room_id}:{agent_id}"
self._concurrency_locks: Dict[str, asyncio.Semaphore] = {}
self._next_batch: Optional[str] = None
@@ -281,6 +294,17 @@ class MatrixIngressLoop:
await client.join_room(room_id)
except Exception as exc:
logger.warning("Could not join mixed room %s: %s", room_id, exc)
if self._control_config and self._control_config.is_enabled:
for room_id in self._control_config.control_rooms:
try:
await client.join_room(room_id)
except Exception as exc:
logger.warning("Could not join control room %s: %s", room_id, exc)
logger.info(
"Control channel: %d rooms, %d operators",
len(self._control_config.control_rooms),
len(self._control_config.operator_allowlist),
)
async with httpx.AsyncClient() as http_client:
# Start workers
@@ -355,6 +379,13 @@ class MatrixIngressLoop:
http_client: httpx.AsyncClient,
sync_resp: Dict[str, Any],
) -> None:
# M3.0: Control rooms — handled first, not forwarded to agents
if self._control_config and self._control_config.is_enabled:
for room_id in self._control_config.control_rooms:
messages = client.extract_room_messages(sync_resp, room_id)
for event in messages:
await self._try_control(client, http_client, event, room_id)
# Regular rooms: 1 room → 1 agent (M1 / M2.0)
for mapping in self._room_map.mappings:
if mapping.agent_id not in self._room_map.allowed_agents:
@@ -559,6 +590,110 @@ class MatrixIngressLoop:
data={"queue_max": self._queue_max, "sender": sender},
)
# ── Control command handler ────────────────────────────────────────────────
async def _try_control(
self,
client: MatrixClient,
http_client: httpx.AsyncClient,
event: Dict[str, Any],
room_id: str,
) -> None:
"""
Process a message from a control room.
Non-command messages (not starting with '!') are silently ignored.
All command attempts are audited regardless of authorization.
"""
assert self._control_config is not None
event_id = event.get("event_id", "")
sender = event.get("sender", "")
text = event.get("content", {}).get("body", "").strip()
if not text or not is_control_message(text):
return # not a command, ignore
client.mark_seen(event_id)
# Authorization check
authorized, rejection_reason = check_authorization(sender, room_id, self._control_config)
if not authorized:
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.control.unauthorized",
agent_id="control", node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="error", error_code=rejection_reason,
data={"sender": sender, "command_preview": text[:80]},
)
logger.warning(
"Unauthorized control command: sender=%s room=%s reason=%s cmd=%r",
sender, room_id, rejection_reason, text[:60],
)
if self._control_unauthorized_behavior == "reply_error":
try:
txn_id = MatrixClient.make_txn_id(room_id, event_id + "_unauth")
await client.send_text(room_id, unauthorized_reply(rejection_reason), txn_id)
except Exception as exc:
logger.warning("Could not send unauthorized reply: %s", exc)
return
# Parse command
cmd = parse_command(text)
if cmd is None:
logger.warning("Control message from %s could not be parsed: %r", sender, text[:60])
return
# Metric callback
if self._on_control_command:
self._on_control_command(sender, cmd.verb, cmd.subcommand)
# Audit every authorized command
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.control.command",
agent_id="control", node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="ok",
data={
"sender": sender,
"verb": cmd.verb,
"subcommand": cmd.subcommand,
"args": list(cmd.args),
"kwargs": dict(cmd.kwargs),
"is_known": cmd.is_known,
},
)
logger.info(
"Control command: sender=%s verb=%s sub=%s args=%s",
sender, cmd.verb, cmd.subcommand, cmd.args,
)
# Build reply
txn_id = MatrixClient.make_txn_id(room_id, event_id + "_ctrl")
if cmd.verb == VERB_HELP:
reply_text = help_reply()
elif not cmd.is_known:
reply_text = unknown_command_reply(cmd)
await _write_audit(
http_client, self._console_url, self._internal_token,
event="matrix.control.unknown_cmd",
agent_id="control", node_id=self._node_id,
room_id=room_id, event_id=event_id,
status="error", error_code="unknown_verb",
data={"verb": cmd.verb, "sender": sender},
)
else:
# M3.1+ will implement actual runbook/status commands
reply_text = not_implemented_reply(cmd)
try:
await client.send_text(room_id, reply_text, txn_id)
except Exception as exc:
logger.error("Could not send control reply: %s", exc)
# ── Worker ─────────────────────────────────────────────────────────────────
async def _worker(