feat(matrix-bridge-dagi): add operator allowlist for control commands (M3.0)
New: app/control.py
- ControlConfig: operator_allowlist + control_rooms (frozensets)
- parse_control_config(): validates @user:server + !room:server formats, fail-fast
- parse_command(): parses !verb subcommand [args] [key=value] up to 512 chars
- check_authorization(): AND(is_control_room, is_operator) → (bool, reason)
- Reply helpers: not_implemented, unknown_command, unauthorized, help
- KNOWN_VERBS: runbook, status, help (M3.1+ stubs)
- MAX_CMD_LEN=512, MAX_CMD_TOKENS=20
ingress.py:
- _try_control(): dispatch for control rooms (authorized → audit + reply, unauthorized → audit + optional ⛔)
- join control rooms on startup
- _enqueue_from_sync: control rooms processed first, never forwarded to agents
- on_control_command(sender, verb, subcommand) metric callback
- CONTROL_UNAUTHORIZED_BEHAVIOR: "ignore" | "reply_error"
Audit events:
matrix.control.command — authorised command (verb, subcommand, args, kwargs)
matrix.control.unauthorized — rejected by allowlist (reason: not_operator | not_control_room)
matrix.control.unknown_cmd — authorised but unrecognised verb
Config + main:
- bridge_operator_allowlist, bridge_control_rooms, control_unauthorized_behavior
- matrix_bridge_control_commands_total{sender,verb,subcommand} counter
- /health: control_channel section (enabled, rooms_count, operators_count, behavior)
- /bridge/mappings: control_rooms + control_operators_count
- docker-compose: BRIDGE_OPERATOR_ALLOWLIST, BRIDGE_CONTROL_ROOMS, CONTROL_UNAUTHORIZED_BEHAVIOR
Tests: 40 new → 148 total pass
Made-with: Cursor
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
matrix-bridge-dagi — configuration and validation (M2.1 + M2.2: mixed rooms + guard rails)
|
||||
matrix-bridge-dagi — configuration and validation (M2.1 + M2.2 + M3.0)
|
||||
"""
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
@@ -46,6 +46,14 @@ class BridgeConfig:
|
||||
unknown_agent_behavior: str # "ignore" | "reply_error"
|
||||
mixed_concurrency_cap: int # max parallel invokes per (room, agent); 0 = unlimited
|
||||
|
||||
# M3.0: Operator control channel
|
||||
# "@ivan:daarion.space,@sergiy:daarion.space"
|
||||
bridge_operator_allowlist: str
|
||||
# "!opsroom:server,!opsroom2:server2"
|
||||
bridge_control_rooms: str
|
||||
# "ignore" | "reply_error" (send ⛔ to room on unauthorized attempt)
|
||||
control_unauthorized_behavior: str
|
||||
|
||||
# Service identity
|
||||
node_id: str
|
||||
build_sha: str
|
||||
@@ -88,6 +96,9 @@ def load_config() -> BridgeConfig:
|
||||
max_slash_len=max(4, int(_optional("MAX_SLASH_LEN", "32"))),
|
||||
unknown_agent_behavior=_optional("UNKNOWN_AGENT_BEHAVIOR", "ignore"),
|
||||
mixed_concurrency_cap=max(0, int(_optional("MIXED_CONCURRENCY_CAP", "1"))),
|
||||
bridge_operator_allowlist=_optional("BRIDGE_OPERATOR_ALLOWLIST", ""),
|
||||
bridge_control_rooms=_optional("BRIDGE_CONTROL_ROOMS", ""),
|
||||
control_unauthorized_behavior=_optional("CONTROL_UNAUTHORIZED_BEHAVIOR", "ignore"),
|
||||
node_id=_optional("NODE_ID", "NODA1"),
|
||||
build_sha=_optional("BUILD_SHA", "dev"),
|
||||
build_time=_optional("BUILD_TIME", "local"),
|
||||
|
||||
263
services/matrix-bridge-dagi/app/control.py
Normal file
263
services/matrix-bridge-dagi/app/control.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""
|
||||
Matrix Bridge — Control Command Layer (M3.0)
|
||||
|
||||
Handles operator commands from designated control rooms.
|
||||
|
||||
Access policy (AND):
|
||||
1. Message arrives in a BRIDGE_CONTROL_ROOM
|
||||
2. Sender is in BRIDGE_OPERATOR_ALLOWLIST
|
||||
3. Message starts with "!" prefix (e.g. "!runbook start ...")
|
||||
|
||||
Design principles:
|
||||
- Bridge is a TRANSPORT only — it never executes scripts directly.
|
||||
- All actions go via sofiia-console internal API (M3.1+).
|
||||
- Every command attempt is audited regardless of authorization.
|
||||
- Unknown commands acknowledged but not executed (forward-compatible).
|
||||
|
||||
Audit events emitted:
|
||||
matrix.control.command — authorised command recognised
|
||||
matrix.control.unauthorized — command from non-operator or wrong room
|
||||
matrix.control.unknown_cmd — authorised but unrecognised verb
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, FrozenSet, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Constants ─────────────────────────────────────────────────────────────────
|
||||
|
||||
# Supported control verbs (M3.1+ will implement them fully)
|
||||
VERB_RUNBOOK = "runbook"
|
||||
VERB_STATUS = "status"
|
||||
VERB_HELP = "help"
|
||||
|
||||
KNOWN_VERBS: FrozenSet[str] = frozenset({VERB_RUNBOOK, VERB_STATUS, VERB_HELP})
|
||||
|
||||
# Max command line length to guard against garbage injection
|
||||
_MAX_CMD_LEN = 512
|
||||
# Max number of tokens in a single command
|
||||
_MAX_CMD_TOKENS = 20
|
||||
|
||||
# Matrix user ID format: @localpart:server
|
||||
_MATRIX_USER_RE = re.compile(r"^@[A-Za-z0-9._\-/=+]+:[A-Za-z0-9.\-]+$")
|
||||
# Room ID format: !localpart:server
|
||||
_ROOM_ID_RE = re.compile(r"^![A-Za-z0-9\-_.]+:[A-Za-z0-9\-_.]+$")
|
||||
|
||||
|
||||
# ── Data structures ────────────────────────────────────────────────────────────
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ControlCommand:
|
||||
"""Parsed control command from a Matrix message."""
|
||||
verb: str # e.g. "runbook"
|
||||
subcommand: str # e.g. "start", "next", "complete", "evidence", "status"
|
||||
args: Tuple[str, ...] # remaining positional args
|
||||
kwargs: Dict[str, str] # key=value pairs parsed from args (e.g. node=NODA1)
|
||||
raw: str # original message text
|
||||
is_known: bool # True if verb in KNOWN_VERBS
|
||||
|
||||
@classmethod
|
||||
def from_tokens(cls, tokens: List[str], raw: str) -> "ControlCommand":
|
||||
"""Build ControlCommand from pre-split tokens (first token must not include '!')."""
|
||||
verb = tokens[0].lower() if tokens else ""
|
||||
subcommand = tokens[1].lower() if len(tokens) > 1 else ""
|
||||
remaining = tokens[2:] if len(tokens) > 2 else []
|
||||
|
||||
positional: List[str] = []
|
||||
kw: Dict[str, str] = {}
|
||||
for token in remaining:
|
||||
if "=" in token:
|
||||
k, _, v = token.partition("=")
|
||||
kw[k.lower().strip()] = v.strip()
|
||||
else:
|
||||
positional.append(token)
|
||||
|
||||
return cls(
|
||||
verb=verb,
|
||||
subcommand=subcommand,
|
||||
args=tuple(positional),
|
||||
kwargs=kw,
|
||||
raw=raw,
|
||||
is_known=verb in KNOWN_VERBS,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ControlConfig:
|
||||
"""
|
||||
Parsed operator access policy for the control channel.
|
||||
|
||||
operator_allowlist: Frozenset of Matrix user IDs allowed to issue commands.
|
||||
control_rooms: Frozenset of room IDs designated as control channels.
|
||||
"""
|
||||
operator_allowlist: FrozenSet[str] = field(default_factory=frozenset)
|
||||
control_rooms: FrozenSet[str] = field(default_factory=frozenset)
|
||||
|
||||
@property
|
||||
def is_enabled(self) -> bool:
|
||||
"""Control channel is effective only when both sets are non-empty."""
|
||||
return bool(self.operator_allowlist and self.control_rooms)
|
||||
|
||||
|
||||
# ── Parsers ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def parse_control_config(
|
||||
raw_allowlist: str,
|
||||
raw_control_rooms: str,
|
||||
) -> ControlConfig:
|
||||
"""
|
||||
Parse BRIDGE_OPERATOR_ALLOWLIST and BRIDGE_CONTROL_ROOMS.
|
||||
|
||||
Allowlist format: "@ivan:daarion.space,@sergiy:daarion.space"
|
||||
Control rooms fmt: "!opsroom:server,!opsroom2:server2"
|
||||
|
||||
Raises ValueError on:
|
||||
- Malformed Matrix user ID
|
||||
- Malformed room ID
|
||||
"""
|
||||
operators: List[str] = []
|
||||
errors: List[str] = []
|
||||
|
||||
for entry in raw_allowlist.split(","):
|
||||
uid = entry.strip()
|
||||
if not uid:
|
||||
continue
|
||||
if not _MATRIX_USER_RE.match(uid):
|
||||
errors.append(f"Invalid operator user_id: {uid!r}")
|
||||
else:
|
||||
operators.append(uid)
|
||||
|
||||
rooms: List[str] = []
|
||||
for entry in raw_control_rooms.split(","):
|
||||
rid = entry.strip()
|
||||
if not rid:
|
||||
continue
|
||||
if not _ROOM_ID_RE.match(rid):
|
||||
errors.append(f"Invalid control room_id: {rid!r}")
|
||||
else:
|
||||
rooms.append(rid)
|
||||
|
||||
if errors:
|
||||
raise ValueError(f"Control config parse errors: {'; '.join(errors)}")
|
||||
|
||||
cfg = ControlConfig(
|
||||
operator_allowlist=frozenset(operators),
|
||||
control_rooms=frozenset(rooms),
|
||||
)
|
||||
if cfg.is_enabled:
|
||||
logger.info(
|
||||
"Control channel enabled: %d operators, %d rooms",
|
||||
len(operators), len(rooms),
|
||||
)
|
||||
else:
|
||||
logger.info("Control channel disabled (empty allowlist or no control rooms)")
|
||||
return cfg
|
||||
|
||||
|
||||
# ── Message inspection ────────────────────────────────────────────────────────
|
||||
|
||||
def is_control_message(text: str) -> bool:
|
||||
"""Returns True if message looks like a control command (starts with '!')."""
|
||||
return bool(text and text.strip().startswith("!"))
|
||||
|
||||
|
||||
def is_control_room(room_id: str, config: ControlConfig) -> bool:
|
||||
return room_id in config.control_rooms
|
||||
|
||||
|
||||
def is_operator(sender: str, config: ControlConfig) -> bool:
|
||||
return sender in config.operator_allowlist
|
||||
|
||||
|
||||
def parse_command(text: str) -> Optional[ControlCommand]:
|
||||
"""
|
||||
Parse a control message into a ControlCommand.
|
||||
Returns None if text is not a control command or is malformed/too long.
|
||||
"""
|
||||
stripped = text.strip()
|
||||
if not stripped.startswith("!"):
|
||||
return None
|
||||
if len(stripped) > _MAX_CMD_LEN:
|
||||
logger.warning("Control command too long (%d chars) — rejected", len(stripped))
|
||||
return None
|
||||
|
||||
# Strip leading '!'
|
||||
body = stripped[1:]
|
||||
tokens = body.split()
|
||||
if not tokens:
|
||||
return None
|
||||
if len(tokens) > _MAX_CMD_TOKENS:
|
||||
logger.warning("Control command has too many tokens (%d) — rejected", len(tokens))
|
||||
return None
|
||||
|
||||
return ControlCommand.from_tokens(tokens, raw=stripped)
|
||||
|
||||
|
||||
# ── Authorization check ───────────────────────────────────────────────────────
|
||||
|
||||
def check_authorization(
|
||||
sender: str,
|
||||
room_id: str,
|
||||
config: ControlConfig,
|
||||
) -> Tuple[bool, str]:
|
||||
"""
|
||||
Returns (authorized: bool, rejection_reason: str).
|
||||
|
||||
Reasons:
|
||||
- "not_operator": sender not in allowlist
|
||||
- "not_control_room": room not in control_rooms
|
||||
- "ok": authorized
|
||||
"""
|
||||
if not is_control_room(room_id, config):
|
||||
return False, "not_control_room"
|
||||
if not is_operator(sender, config):
|
||||
logger.warning(
|
||||
"Unauthorized control attempt: sender=%s room=%s not in allowlist",
|
||||
sender, room_id,
|
||||
)
|
||||
return False, "not_operator"
|
||||
return True, "ok"
|
||||
|
||||
|
||||
# ── Reply helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
def not_implemented_reply(cmd: ControlCommand) -> str:
|
||||
"""Reply for known commands not yet implemented (M3.0 stub)."""
|
||||
return (
|
||||
f"✅ Command acknowledged: `{cmd.raw}`\n"
|
||||
f"⏳ `!{cmd.verb} {cmd.subcommand}` — implementation pending (M3.1+)."
|
||||
)
|
||||
|
||||
|
||||
def unknown_command_reply(cmd: ControlCommand) -> str:
|
||||
"""Reply for unrecognised verbs."""
|
||||
return (
|
||||
f"⚠️ Unknown command: `{cmd.raw}`\n"
|
||||
f"Known verbs: {', '.join(sorted(KNOWN_VERBS))}.\n"
|
||||
f"Type `!help` for usage."
|
||||
)
|
||||
|
||||
|
||||
def unauthorized_reply(reason: str) -> str:
|
||||
"""Reply for unauthorized command attempts (sent only when behavior=reply_error)."""
|
||||
if reason == "not_operator":
|
||||
return "⛔ Not authorised: your Matrix ID is not in the operator allowlist."
|
||||
return "⛔ Not authorised: this room is not a control channel."
|
||||
|
||||
|
||||
def help_reply() -> str:
|
||||
"""Brief help text."""
|
||||
return (
|
||||
"**DAGI Bridge — Control Commands**\n\n"
|
||||
"`!runbook start <path> [node=NODA1]` — Start a runbook run\n"
|
||||
"`!runbook next <run_id>` — Advance to next step\n"
|
||||
"`!runbook complete <run_id> step=<n> status=ok` — Mark step complete\n"
|
||||
"`!runbook evidence <run_id>` — Get evidence artifact path\n"
|
||||
"`!runbook status <run_id>` — Show current run state\n"
|
||||
"`!status` — Bridge health summary\n"
|
||||
"`!help` — This message\n\n"
|
||||
"_Only authorised operators can issue control commands._"
|
||||
)
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Matrix Ingress + Egress Loop — Phase M1.4 + H1 + H2 + H3 + M2.1 + M2.2 (mixed rooms hardening)
|
||||
Matrix Ingress + Egress Loop — Phase M1.4 + H1 + H2 + H3 + M2.1 + M2.2 + M3.0 (control channel)
|
||||
|
||||
Architecture (H2):
|
||||
Reader task → asyncio.Queue(maxsize) → N Worker tasks
|
||||
@@ -33,6 +33,12 @@ from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from .control import (
|
||||
ControlConfig, ControlCommand,
|
||||
check_authorization, parse_command, is_control_message,
|
||||
not_implemented_reply, unknown_command_reply, unauthorized_reply, help_reply,
|
||||
VERB_HELP,
|
||||
)
|
||||
from .matrix_client import MatrixClient
|
||||
from .mixed_routing import (
|
||||
MixedRoomConfig, route_message, reply_prefix,
|
||||
@@ -178,6 +184,9 @@ class MatrixIngressLoop:
|
||||
unknown_agent_behavior: str = "ignore", # "ignore" | "reply_error"
|
||||
max_slash_len: int = 32,
|
||||
mixed_concurrency_cap: int = 1, # 0 = unlimited
|
||||
# M3.0: control channel
|
||||
control_config: Optional[ControlConfig] = None,
|
||||
control_unauthorized_behavior: str = "ignore", # "ignore" | "reply_error"
|
||||
# Callbacks
|
||||
on_message_received: Optional[Callable[[str, str], None]] = None,
|
||||
on_message_replied: Optional[Callable[[str, str, str], None]] = None,
|
||||
@@ -190,6 +199,7 @@ class MatrixIngressLoop:
|
||||
on_queue_wait: Optional[Callable[[str, float], None]] = None,
|
||||
on_routed: Optional[Callable[[str, str], None]] = None,
|
||||
on_route_rejected: Optional[Callable[[str, str], None]] = None,
|
||||
on_control_command: Optional[Callable[[str, str, str], None]] = None,
|
||||
) -> None:
|
||||
self._hs_url = matrix_homeserver_url
|
||||
self._token = matrix_access_token
|
||||
@@ -214,11 +224,14 @@ class MatrixIngressLoop:
|
||||
self._on_send_latency = on_send_latency
|
||||
self._on_queue_wait = on_queue_wait
|
||||
self._mixed_room_config = mixed_room_config
|
||||
self._control_config = control_config
|
||||
self._control_unauthorized_behavior = control_unauthorized_behavior
|
||||
self._unknown_agent_behavior = unknown_agent_behavior
|
||||
self._max_slash_len = max_slash_len
|
||||
self._mixed_concurrency_cap = mixed_concurrency_cap
|
||||
self._on_routed = on_routed
|
||||
self._on_route_rejected = on_route_rejected
|
||||
self._on_control_command = on_control_command
|
||||
# Lazily populated semaphores keyed by "{room_id}:{agent_id}"
|
||||
self._concurrency_locks: Dict[str, asyncio.Semaphore] = {}
|
||||
self._next_batch: Optional[str] = None
|
||||
@@ -281,6 +294,17 @@ class MatrixIngressLoop:
|
||||
await client.join_room(room_id)
|
||||
except Exception as exc:
|
||||
logger.warning("Could not join mixed room %s: %s", room_id, exc)
|
||||
if self._control_config and self._control_config.is_enabled:
|
||||
for room_id in self._control_config.control_rooms:
|
||||
try:
|
||||
await client.join_room(room_id)
|
||||
except Exception as exc:
|
||||
logger.warning("Could not join control room %s: %s", room_id, exc)
|
||||
logger.info(
|
||||
"Control channel: %d rooms, %d operators",
|
||||
len(self._control_config.control_rooms),
|
||||
len(self._control_config.operator_allowlist),
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient() as http_client:
|
||||
# Start workers
|
||||
@@ -355,6 +379,13 @@ class MatrixIngressLoop:
|
||||
http_client: httpx.AsyncClient,
|
||||
sync_resp: Dict[str, Any],
|
||||
) -> None:
|
||||
# M3.0: Control rooms — handled first, not forwarded to agents
|
||||
if self._control_config and self._control_config.is_enabled:
|
||||
for room_id in self._control_config.control_rooms:
|
||||
messages = client.extract_room_messages(sync_resp, room_id)
|
||||
for event in messages:
|
||||
await self._try_control(client, http_client, event, room_id)
|
||||
|
||||
# Regular rooms: 1 room → 1 agent (M1 / M2.0)
|
||||
for mapping in self._room_map.mappings:
|
||||
if mapping.agent_id not in self._room_map.allowed_agents:
|
||||
@@ -559,6 +590,110 @@ class MatrixIngressLoop:
|
||||
data={"queue_max": self._queue_max, "sender": sender},
|
||||
)
|
||||
|
||||
# ── Control command handler ────────────────────────────────────────────────
|
||||
|
||||
async def _try_control(
|
||||
self,
|
||||
client: MatrixClient,
|
||||
http_client: httpx.AsyncClient,
|
||||
event: Dict[str, Any],
|
||||
room_id: str,
|
||||
) -> None:
|
||||
"""
|
||||
Process a message from a control room.
|
||||
|
||||
Non-command messages (not starting with '!') are silently ignored.
|
||||
All command attempts are audited regardless of authorization.
|
||||
"""
|
||||
assert self._control_config is not None
|
||||
event_id = event.get("event_id", "")
|
||||
sender = event.get("sender", "")
|
||||
text = event.get("content", {}).get("body", "").strip()
|
||||
|
||||
if not text or not is_control_message(text):
|
||||
return # not a command, ignore
|
||||
|
||||
client.mark_seen(event_id)
|
||||
|
||||
# Authorization check
|
||||
authorized, rejection_reason = check_authorization(sender, room_id, self._control_config)
|
||||
|
||||
if not authorized:
|
||||
await _write_audit(
|
||||
http_client, self._console_url, self._internal_token,
|
||||
event="matrix.control.unauthorized",
|
||||
agent_id="control", node_id=self._node_id,
|
||||
room_id=room_id, event_id=event_id,
|
||||
status="error", error_code=rejection_reason,
|
||||
data={"sender": sender, "command_preview": text[:80]},
|
||||
)
|
||||
logger.warning(
|
||||
"Unauthorized control command: sender=%s room=%s reason=%s cmd=%r",
|
||||
sender, room_id, rejection_reason, text[:60],
|
||||
)
|
||||
if self._control_unauthorized_behavior == "reply_error":
|
||||
try:
|
||||
txn_id = MatrixClient.make_txn_id(room_id, event_id + "_unauth")
|
||||
await client.send_text(room_id, unauthorized_reply(rejection_reason), txn_id)
|
||||
except Exception as exc:
|
||||
logger.warning("Could not send unauthorized reply: %s", exc)
|
||||
return
|
||||
|
||||
# Parse command
|
||||
cmd = parse_command(text)
|
||||
if cmd is None:
|
||||
logger.warning("Control message from %s could not be parsed: %r", sender, text[:60])
|
||||
return
|
||||
|
||||
# Metric callback
|
||||
if self._on_control_command:
|
||||
self._on_control_command(sender, cmd.verb, cmd.subcommand)
|
||||
|
||||
# Audit every authorized command
|
||||
await _write_audit(
|
||||
http_client, self._console_url, self._internal_token,
|
||||
event="matrix.control.command",
|
||||
agent_id="control", node_id=self._node_id,
|
||||
room_id=room_id, event_id=event_id,
|
||||
status="ok",
|
||||
data={
|
||||
"sender": sender,
|
||||
"verb": cmd.verb,
|
||||
"subcommand": cmd.subcommand,
|
||||
"args": list(cmd.args),
|
||||
"kwargs": dict(cmd.kwargs),
|
||||
"is_known": cmd.is_known,
|
||||
},
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Control command: sender=%s verb=%s sub=%s args=%s",
|
||||
sender, cmd.verb, cmd.subcommand, cmd.args,
|
||||
)
|
||||
|
||||
# Build reply
|
||||
txn_id = MatrixClient.make_txn_id(room_id, event_id + "_ctrl")
|
||||
if cmd.verb == VERB_HELP:
|
||||
reply_text = help_reply()
|
||||
elif not cmd.is_known:
|
||||
reply_text = unknown_command_reply(cmd)
|
||||
await _write_audit(
|
||||
http_client, self._console_url, self._internal_token,
|
||||
event="matrix.control.unknown_cmd",
|
||||
agent_id="control", node_id=self._node_id,
|
||||
room_id=room_id, event_id=event_id,
|
||||
status="error", error_code="unknown_verb",
|
||||
data={"verb": cmd.verb, "sender": sender},
|
||||
)
|
||||
else:
|
||||
# M3.1+ will implement actual runbook/status commands
|
||||
reply_text = not_implemented_reply(cmd)
|
||||
|
||||
try:
|
||||
await client.send_text(room_id, reply_text, txn_id)
|
||||
except Exception as exc:
|
||||
logger.error("Could not send control reply: %s", exc)
|
||||
|
||||
# ── Worker ─────────────────────────────────────────────────────────────────
|
||||
|
||||
async def _worker(
|
||||
|
||||
@@ -32,6 +32,7 @@ except ImportError: # pragma: no cover
|
||||
_PROM_OK = False
|
||||
|
||||
from .config import BridgeConfig, load_config
|
||||
from .control import ControlConfig, parse_control_config
|
||||
from .ingress import MatrixIngressLoop
|
||||
from .mixed_routing import MixedRoomConfig, parse_mixed_room_map
|
||||
from .rate_limit import InMemoryRateLimiter
|
||||
@@ -120,6 +121,12 @@ if _PROM_OK:
|
||||
"matrix_bridge_active_room_agent_locks",
|
||||
"Number of room-agent pairs currently holding a concurrency lock",
|
||||
)
|
||||
# M3.0: Control channel
|
||||
_control_commands_total = Counter(
|
||||
"matrix_bridge_control_commands_total",
|
||||
"Total control commands received from authorized operators",
|
||||
["sender", "verb", "subcommand"],
|
||||
)
|
||||
|
||||
# ── Startup state ─────────────────────────────────────────────────────────────
|
||||
_START_TIME = time.monotonic()
|
||||
@@ -129,6 +136,7 @@ _matrix_reachable: Optional[bool] = None
|
||||
_gateway_reachable: Optional[bool] = None
|
||||
_room_map: Optional[RoomMappingConfig] = None
|
||||
_mixed_room_config: Optional[MixedRoomConfig] = None
|
||||
_control_config: Optional[ControlConfig] = None
|
||||
_rate_limiter: Optional[InMemoryRateLimiter] = None
|
||||
_ingress_loop: Optional["MatrixIngressLoop"] = None # for /health queue_size
|
||||
_ingress_task: Optional[asyncio.Task] = None
|
||||
@@ -150,7 +158,7 @@ async def _probe_url(url: str, timeout: float = 5.0) -> bool:
|
||||
@asynccontextmanager
|
||||
async def lifespan(app_: Any):
|
||||
global _cfg, _config_error, _matrix_reachable, _gateway_reachable
|
||||
global _room_map, _mixed_room_config, _rate_limiter, _ingress_loop
|
||||
global _room_map, _mixed_room_config, _control_config, _rate_limiter, _ingress_loop
|
||||
try:
|
||||
_cfg = load_config()
|
||||
|
||||
@@ -186,13 +194,24 @@ async def lifespan(app_: Any):
|
||||
_cfg.rate_limit_room_rpm, _cfg.rate_limit_sender_rpm,
|
||||
)
|
||||
|
||||
# M3.0: Operator control channel
|
||||
if _cfg.bridge_operator_allowlist or _cfg.bridge_control_rooms:
|
||||
_control_config = parse_control_config(
|
||||
_cfg.bridge_operator_allowlist,
|
||||
_cfg.bridge_control_rooms,
|
||||
)
|
||||
else:
|
||||
_control_config = None
|
||||
|
||||
mixed_count = _mixed_room_config.total_rooms if _mixed_room_config else 0
|
||||
ctrl_rooms = len(_control_config.control_rooms) if _control_config else 0
|
||||
ctrl_ops = len(_control_config.operator_allowlist) if _control_config else 0
|
||||
logger.info(
|
||||
"✅ matrix-bridge-dagi started | node=%s build=%s homeserver=%s "
|
||||
"agents=%s mappings=%d mixed_rooms=%d",
|
||||
"agents=%s mappings=%d mixed_rooms=%d ctrl_rooms=%d ctrl_operators=%d",
|
||||
_cfg.node_id, _cfg.build_sha, _cfg.matrix_homeserver_url,
|
||||
list(_cfg.bridge_allowed_agents),
|
||||
_room_map.total_mappings, mixed_count,
|
||||
_room_map.total_mappings, mixed_count, ctrl_rooms, ctrl_ops,
|
||||
)
|
||||
|
||||
# Connectivity smoke probes (non-blocking failures)
|
||||
@@ -274,6 +293,13 @@ async def lifespan(app_: Any):
|
||||
if _PROM_OK:
|
||||
_route_rejected_total.labels(room_id=room_id, reason=reason).inc()
|
||||
|
||||
# M3.0 callbacks
|
||||
def _on_control_command(sender: str, verb: str, subcommand: str) -> None:
|
||||
if _PROM_OK:
|
||||
_control_commands_total.labels(
|
||||
sender=sender, verb=verb, subcommand=subcommand
|
||||
).inc()
|
||||
|
||||
ingress = MatrixIngressLoop(
|
||||
matrix_homeserver_url=_cfg.matrix_homeserver_url,
|
||||
matrix_access_token=_cfg.matrix_access_token,
|
||||
@@ -302,6 +328,9 @@ async def lifespan(app_: Any):
|
||||
on_queue_wait=_on_queue_wait,
|
||||
on_routed=_on_routed,
|
||||
on_route_rejected=_on_route_rejected,
|
||||
control_config=_control_config,
|
||||
control_unauthorized_behavior=_cfg.control_unauthorized_behavior,
|
||||
on_control_command=_on_control_command,
|
||||
)
|
||||
logger.info(
|
||||
"✅ Backpressure queue: max=%d workers=%d drain_timeout=%.1fs",
|
||||
@@ -400,6 +429,12 @@ async def health() -> Dict[str, Any]:
|
||||
"concurrency_cap": _cfg.mixed_concurrency_cap,
|
||||
"active_room_agent_locks": _ingress_loop.active_lock_count if _ingress_loop else 0,
|
||||
},
|
||||
"control_channel": {
|
||||
"enabled": _control_config.is_enabled if _control_config else False,
|
||||
"control_rooms_count": len(_control_config.control_rooms) if _control_config else 0,
|
||||
"operators_count": len(_control_config.operator_allowlist) if _control_config else 0,
|
||||
"unauthorized_behavior": _cfg.control_unauthorized_behavior,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -424,6 +459,8 @@ async def bridge_mappings() -> Dict[str, Any]:
|
||||
"mappings": _room_map.as_summary(),
|
||||
"mixed_rooms_total": _mixed_room_config.total_rooms if _mixed_room_config else 0,
|
||||
"mixed_rooms": _mixed_room_config.as_summary() if _mixed_room_config else [],
|
||||
"control_rooms": sorted(_control_config.control_rooms) if _control_config else [],
|
||||
"control_operators_count": len(_control_config.operator_allowlist) if _control_config else 0,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user