Files
microdao-daarion/services/matrix-bridge-dagi/app/control.py
Apple 82d5ff2a4f feat(matrix-bridge-dagi): M4–M11 + soak infrastructure (debug inject endpoint)
Includes all milestones M4 through M11:
- M4: agent discovery (!agents / !status)
- M5: node-aware routing + per-node observability
- M6: dynamic policy store (node/agent overrides, import/export)
- M7: Prometheus alerts + Grafana dashboard + metrics contract
- M8: node health tracker + soft failover + sticky cache + HA persistence
- M9: two-step confirm + diff preview for dangerous commands
- M10: auto-backup, restore, retention, policy history + change detail
- M11: soak scenarios (CI tests) + live soak script

Soak infrastructure (this commit):
- POST /v1/debug/inject_event (guarded by DEBUG_INJECT_ENABLED=false)
- _preflight_inject() and _check_wal() in soak script
- --db-path arg for WAL delta reporting
- Runbook sections 2a/2b/2c: Step 0 and Step 1 exact commands

Made-with: Cursor
2026-03-05 07:51:37 -08:00

1188 lines
43 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Matrix Bridge — Control Command Layer (M3.0)
Handles operator commands from designated control rooms.
Access policy (AND):
1. Message arrives in a BRIDGE_CONTROL_ROOM
2. Sender is in BRIDGE_OPERATOR_ALLOWLIST
3. Message starts with "!" prefix (e.g. "!runbook start ...")
Design principles:
- Bridge is a TRANSPORT only — it never executes scripts directly.
- All actions go via sofiia-console internal API (M3.1+).
- Every command attempt is audited regardless of authorization.
- Unknown commands acknowledged but not executed (forward-compatible).
Audit events emitted:
matrix.control.command — authorised command recognised
matrix.control.unauthorized — command from non-operator or wrong room
matrix.control.unknown_cmd — authorised but unrecognised verb
"""
import logging
import re
from dataclasses import dataclass, field
from typing import Any, Dict, FrozenSet, List, Optional, Tuple
logger = logging.getLogger(__name__)
# ── Constants ─────────────────────────────────────────────────────────────────
# Supported control verbs
VERB_RUNBOOK = "runbook"
VERB_STATUS = "status"
VERB_NODES = "nodes" # M5.1: node policy overview
VERB_NODE = "node" # M6.0: dynamic room-node override commands
VERB_ROOM = "room" # M6.1: dynamic mixed room agent overrides
VERB_POLICY = "policy" # M6.2: policy snapshot export/import
VERB_CONFIRM = "confirm" # M9.0: two-step confirmation for dangerous commands
VERB_HELP = "help"
KNOWN_VERBS: FrozenSet[str] = frozenset({
VERB_RUNBOOK, VERB_STATUS, VERB_NODES, VERB_NODE,
VERB_ROOM, VERB_POLICY, VERB_CONFIRM, VERB_HELP,
})
# ── M9.0: Dangerous command detection ─────────────────────────────────────────
def is_dangerous_cmd(cmd: "ControlCommand") -> bool:
"""
Return True if the command requires two-step confirmation before applying.
Dangerous verbs:
!node set room=... node=... — changes room routing
!room agents set room=... agents=... — replaces all agents for a room
!policy import ... — overwrites policy DB (both modes)
"""
v = cmd.verb
sub = (cmd.subcommand or "").strip().lower()
if v == VERB_NODE and sub == "set":
return True
if v == VERB_ROOM and sub == "agents" and cmd.args and cmd.args[0].lower() == "set":
return True
if v == VERB_POLICY and sub == "import":
return True
# M10.0: prune_exports is dangerous only when dry_run=0 (actual deletion)
if v == VERB_POLICY and sub == "prune_exports":
dry_raw = cmd.kwargs.get("dry_run", "1").strip()
is_dry = dry_raw not in ("0", "false", "no")
return not is_dry
# M10.1: restore is always dangerous (no dry_run option)
if v == VERB_POLICY and sub == "restore":
return True
return False
def build_normalized_args(cmd: "ControlCommand") -> str:
"""
Build a human-readable normalized representation of the command args.
Used in audit events and confirmation prompts.
"""
parts: list[str] = []
# For !room agents set, skip the "set" positional from args display
skip_first_arg = cmd.verb == VERB_ROOM and cmd.subcommand == "agents"
for i, a in enumerate(cmd.args):
if skip_first_arg and i == 0:
continue
parts.append(a)
for k, v in sorted(cmd.kwargs.items()):
parts.append(f"{k}={v}")
return " ".join(parts)
def confirm_intent_reply(action_summary: str, nonce: str, ttl_s: int) -> str:
"""Reply when a dangerous command is held pending confirmation (M9.0)."""
return (
f"⚠️ **Confirm required**\n"
f"Action: `{action_summary}`\n"
f"Type `!confirm {nonce}` within {ttl_s}s to apply.\n"
f"_(Only you can confirm this action.)_"
)
def confirm_success_reply(action_result: str) -> str:
"""Reply when a confirmation is accepted and the action applied (M9.0)."""
return f"✅ Confirmed and applied.\n{action_result}"
def confirm_expired_reply() -> str:
"""Reply when the nonce is invalid, expired, or from a different sender (M9.0)."""
return (
"❌ Invalid or expired confirmation code. "
"The action was **not** applied.\n"
"Re-issue the original command to get a new code."
)
# M6.1: !room subcommand + actions
ROOM_SUBCMD_AGENTS = "agents"
ROOM_ACTION_SET = "set"
ROOM_ACTION_ADD = "add"
ROOM_ACTION_REMOVE = "remove"
ROOM_ACTION_GET = "get"
ROOM_ACTION_LIST = "list"
ROOM_ACTION_UNSET = "unset" # remove full override
_VALID_ROOM_ACTIONS = frozenset({
ROOM_ACTION_SET, ROOM_ACTION_ADD, ROOM_ACTION_REMOVE,
ROOM_ACTION_GET, ROOM_ACTION_LIST, ROOM_ACTION_UNSET,
})
# M6.0: !node subcommands
NODE_SUBCMD_SET = "set"
NODE_SUBCMD_UNSET = "unset"
NODE_SUBCMD_GET = "get"
NODE_SUBCMD_LIST = "list"
_VALID_NODE_SUBCMDS = frozenset({NODE_SUBCMD_SET, NODE_SUBCMD_UNSET, NODE_SUBCMD_GET, NODE_SUBCMD_LIST})
# Runbook subcommands (M3.x)
SUBCOMMAND_START = "start" # M3.1 — implemented
SUBCOMMAND_NEXT = "next" # M3.2 — implemented
SUBCOMMAND_COMPLETE = "complete" # M3.2 — implemented
SUBCOMMAND_EVIDENCE = "evidence" # M3.3 — implemented
SUBCOMMAND_STATUS = "status" # M3.3 — implemented
SUBCOMMAND_POST_REVIEW = "post_review" # M3.3 — implemented
# Max command line length to guard against garbage injection
_MAX_CMD_LEN = 512
# Max number of tokens in a single command
_MAX_CMD_TOKENS = 20
# Matrix user ID format: @localpart:server
_MATRIX_USER_RE = re.compile(r"^@[A-Za-z0-9._\-/=+]+:[A-Za-z0-9.\-]+$")
# Room ID format: !localpart:server
_ROOM_ID_RE = re.compile(r"^![A-Za-z0-9\-_.]+:[A-Za-z0-9\-_.]+$")
# ── Data structures ────────────────────────────────────────────────────────────
@dataclass(frozen=True)
class ControlCommand:
"""Parsed control command from a Matrix message."""
verb: str # e.g. "runbook"
subcommand: str # e.g. "start", "next", "complete", "evidence", "status"
args: Tuple[str, ...] # remaining positional args
kwargs: Dict[str, str] # key=value pairs parsed from args (e.g. node=NODA1)
raw: str # original message text
is_known: bool # True if verb in KNOWN_VERBS
@classmethod
def from_tokens(cls, tokens: List[str], raw: str) -> "ControlCommand":
"""Build ControlCommand from pre-split tokens (first token must not include '!')."""
verb = tokens[0].lower() if tokens else ""
subcommand = tokens[1].lower() if len(tokens) > 1 else ""
remaining = tokens[2:] if len(tokens) > 2 else []
positional: List[str] = []
kw: Dict[str, str] = {}
for token in remaining:
if "=" in token:
k, _, v = token.partition("=")
kw[k.lower().strip()] = v.strip()
else:
positional.append(token)
return cls(
verb=verb,
subcommand=subcommand,
args=tuple(positional),
kwargs=kw,
raw=raw,
is_known=verb in KNOWN_VERBS,
)
@dataclass
class ControlConfig:
"""
Parsed operator access policy for the control channel.
operator_allowlist: Frozenset of Matrix user IDs allowed to issue commands.
control_rooms: Frozenset of room IDs designated as control channels.
"""
operator_allowlist: FrozenSet[str] = field(default_factory=frozenset)
control_rooms: FrozenSet[str] = field(default_factory=frozenset)
@property
def is_enabled(self) -> bool:
"""Control channel is effective only when both sets are non-empty."""
return bool(self.operator_allowlist and self.control_rooms)
# ── Parsers ────────────────────────────────────────────────────────────────────
def parse_control_config(
raw_allowlist: str,
raw_control_rooms: str,
) -> ControlConfig:
"""
Parse BRIDGE_OPERATOR_ALLOWLIST and BRIDGE_CONTROL_ROOMS.
Allowlist format: "@ivan:daarion.space,@sergiy:daarion.space"
Control rooms fmt: "!opsroom:server,!opsroom2:server2"
Raises ValueError on:
- Malformed Matrix user ID
- Malformed room ID
"""
operators: List[str] = []
errors: List[str] = []
for entry in raw_allowlist.split(","):
uid = entry.strip()
if not uid:
continue
if not _MATRIX_USER_RE.match(uid):
errors.append(f"Invalid operator user_id: {uid!r}")
else:
operators.append(uid)
rooms: List[str] = []
for entry in raw_control_rooms.split(","):
rid = entry.strip()
if not rid:
continue
if not _ROOM_ID_RE.match(rid):
errors.append(f"Invalid control room_id: {rid!r}")
else:
rooms.append(rid)
if errors:
raise ValueError(f"Control config parse errors: {'; '.join(errors)}")
cfg = ControlConfig(
operator_allowlist=frozenset(operators),
control_rooms=frozenset(rooms),
)
if cfg.is_enabled:
logger.info(
"Control channel enabled: %d operators, %d rooms",
len(operators), len(rooms),
)
else:
logger.info("Control channel disabled (empty allowlist or no control rooms)")
return cfg
# ── Message inspection ────────────────────────────────────────────────────────
def is_control_message(text: str) -> bool:
"""Returns True if message looks like a control command (starts with '!')."""
return bool(text and text.strip().startswith("!"))
def is_control_room(room_id: str, config: ControlConfig) -> bool:
return room_id in config.control_rooms
def is_operator(sender: str, config: ControlConfig) -> bool:
return sender in config.operator_allowlist
def parse_command(text: str) -> Optional[ControlCommand]:
"""
Parse a control message into a ControlCommand.
Returns None if text is not a control command or is malformed/too long.
"""
stripped = text.strip()
if not stripped.startswith("!"):
return None
if len(stripped) > _MAX_CMD_LEN:
logger.warning("Control command too long (%d chars) — rejected", len(stripped))
return None
# Strip leading '!'
body = stripped[1:]
tokens = body.split()
if not tokens:
return None
if len(tokens) > _MAX_CMD_TOKENS:
logger.warning("Control command has too many tokens (%d) — rejected", len(tokens))
return None
return ControlCommand.from_tokens(tokens, raw=stripped)
# ── Authorization check ───────────────────────────────────────────────────────
def check_authorization(
sender: str,
room_id: str,
config: ControlConfig,
) -> Tuple[bool, str]:
"""
Returns (authorized: bool, rejection_reason: str).
Reasons:
- "not_operator": sender not in allowlist
- "not_control_room": room not in control_rooms
- "ok": authorized
"""
if not is_control_room(room_id, config):
return False, "not_control_room"
if not is_operator(sender, config):
logger.warning(
"Unauthorized control attempt: sender=%s room=%s not in allowlist",
sender, room_id,
)
return False, "not_operator"
return True, "ok"
# ── Reply helpers ─────────────────────────────────────────────────────────────
def not_implemented_reply(cmd: ControlCommand) -> str:
"""Reply for known commands not yet implemented."""
return (
f"✅ Command acknowledged: `{cmd.raw}`\n"
f"⏳ `!{cmd.verb} {cmd.subcommand}` — implementation pending."
)
def next_usage_reply() -> str:
"""Reply when !runbook next is called without a run_id."""
return (
"⚠️ Usage: `!runbook next <run_id>`\n"
"Example: `!runbook next abc-123`"
)
def complete_usage_reply() -> str:
"""Reply when !runbook complete is missing required args."""
return (
"⚠️ Usage: `!runbook complete <run_id> step=<n> status=ok|warn|fail [notes=...]`\n"
"Example: `!runbook complete abc-123 step=3 status=ok notes=done`\n"
"Notes with spaces: join without quotes — `notes=done_and_verified`."
)
def start_usage_reply() -> str:
"""Reply when !runbook start is called with missing/invalid runbook_path."""
return (
"⚠️ Usage: `!runbook start <runbook_path> [node=NODA1]`\n"
"Example: `!runbook start runbooks/rehearsal-v1-checklist.md node=NODA1`\n"
"runbook_path must be a relative path without `..`."
)
def runbook_started_reply(run_id: str, steps_total: int, status: str) -> str:
"""Success reply after sofiia-console creates a runbook run."""
return (
f"✅ runbook started: `run_id={run_id}` steps={steps_total} status={status}\n"
f"Next: `!runbook next {run_id}`"
)
def runbook_start_error_reply(reason: str) -> str:
"""Error reply when sofiia-console returns a non-2xx or connection error."""
return f"❌ failed to start runbook: {reason}"
# ── M3.2 reply helpers ────────────────────────────────────────────────────────
# Max chars of instructions_md to include in Matrix message before truncating
_INSTRUCTIONS_EXCERPT_MAX = 1500
def next_manual_reply(
run_id: str,
step_index: int,
steps_total: Optional[int],
title: str,
instructions_md: str,
) -> str:
"""Reply for a manual step returned by !runbook next."""
step_label = f"Step {step_index + 1}"
if steps_total:
step_label += f"/{steps_total}"
excerpt = instructions_md.strip()
truncated = False
if len(excerpt) > _INSTRUCTIONS_EXCERPT_MAX:
excerpt = excerpt[:_INSTRUCTIONS_EXCERPT_MAX].rsplit("\n", 1)[0]
truncated = True
parts = [
f"🧭 {step_label}: **{title}**",
"",
excerpt,
]
if truncated:
parts.append("_...(truncated — open in console for full instructions)_")
parts += [
"",
f"Complete: `!runbook complete {run_id} step={step_index} status=ok`",
]
return "\n".join(parts)
def next_auto_reply(
run_id: str,
step_index: int,
action_type: str,
step_status: str,
duration_ms: Optional[int],
completed: bool,
) -> str:
"""Reply for an auto step (http_check/script) completed by !runbook next."""
emoji = {"ok": "", "warn": "⚠️", "fail": ""}.get(step_status, "")
dur = f" duration={duration_ms}ms" if duration_ms is not None else ""
header = f"{emoji} step {step_index + 1} ({action_type}) {step_status}{dur}"
if completed:
return (
f"{header}\n"
"🎉 All steps completed!\n"
f"Get evidence: `!runbook evidence {run_id}`"
)
return f"{header}\nNext: `!runbook next {run_id}`"
def next_error_reply(run_id: str, reason: str) -> str:
"""Error reply when !runbook next fails."""
return f"❌ failed to advance runbook: {reason}"
def complete_ok_reply(run_id: str, step_index: int, status: str, run_completed: bool) -> str:
"""Success reply after !runbook complete."""
emoji = {"ok": "", "warn": "⚠️", "fail": "", "skipped": "⏭️"}.get(status, "")
line1 = f"{emoji} recorded step {step_index + 1}: {status}"
if run_completed:
return f"{line1}\n🎉 All steps completed!\nGet evidence: `!runbook evidence {run_id}`"
return f"{line1}\nNext: `!runbook next {run_id}`"
def complete_error_reply(run_id: str, reason: str) -> str:
"""Error reply when !runbook complete fails."""
return f"❌ failed to complete step: {reason}"
# ── M3.3 reply helpers ────────────────────────────────────────────────────────
def status_usage_reply() -> str:
return (
"⚠️ Usage: `!runbook status <run_id>`\n"
"Example: `!runbook status abc-123`"
)
def evidence_usage_reply() -> str:
return (
"⚠️ Usage: `!runbook evidence <run_id>`\n"
"Example: `!runbook evidence abc-123`"
)
def post_review_usage_reply() -> str:
return (
"⚠️ Usage: `!runbook post_review <run_id>`\n"
"Example: `!runbook post_review abc-123`"
)
def status_reply(run: dict) -> str:
"""Format !runbook status reply from a get_run response."""
run_id = run.get("run_id", "?")
status = run.get("status", "?")
current = run.get("current_step", 0)
steps_total = run.get("steps_total") or len(run.get("steps", []))
runbook_path = run.get("runbook_path", "?")
node_id = run.get("node_id", "?")
evidence_path = run.get("evidence_path")
# Count warn/fail steps
steps = run.get("steps", [])
warn_count = sum(1 for s in steps if s.get("status") == "warn")
fail_count = sum(1 for s in steps if s.get("status") == "fail")
status_emoji = {
"running": "🔄", "completed": "", "aborted": "🛑", "paused": "⏸️",
}.get(status, "")
step_label = f"{current}/{steps_total}" if steps_total else str(current)
lines = [
f"{status_emoji} `run_id={run_id}` status={status} step={step_label}",
f"runbook: `{runbook_path}` node: {node_id}",
]
if warn_count or fail_count:
lines.append(f"warn={warn_count} fail={fail_count}")
if evidence_path:
lines.append(f"evidence: `{evidence_path}`")
if status == "completed" and not evidence_path:
lines.append(f"Get evidence: `!runbook evidence {run_id}`")
elif status == "completed" and evidence_path:
lines.append(f"Post-review: `!runbook post_review {run_id}`")
return "\n".join(lines)
def status_error_reply(run_id: str, reason: str) -> str:
return f"❌ failed to get status: {reason}"
def evidence_reply(result: dict) -> str:
"""Success reply after !runbook evidence."""
path = result.get("evidence_path", "?")
size = result.get("bytes", 0)
run_id = result.get("run_id", "")
ts = result.get("created_at", "")
lines = [f"📄 evidence created: `{path}` (bytes={size})"]
if ts:
lines.append(f"created_at: {ts}")
if run_id:
lines.append(f"Next: `!runbook post_review {run_id}`")
return "\n".join(lines)
def evidence_error_reply(run_id: str, reason: str) -> str:
return f"❌ failed to generate evidence: {reason}"
def post_review_reply(result: dict) -> str:
"""Success reply after !runbook post_review."""
path = result.get("path", "?")
size = result.get("bytes", 0)
ts = result.get("created_at", "")
lines = [f"🧾 post-review created: `{path}` (bytes={size})"]
if ts:
lines.append(f"created_at: {ts}")
return "\n".join(lines)
def post_review_error_reply(run_id: str, reason: str) -> str:
return f"❌ failed to generate post-review: {reason}"
# ── M3.4 safety helpers ───────────────────────────────────────────────────────
#: Maximum length of notes/free-text operator input accepted before truncation.
MAX_NOTES_LEN: int = 500
#: Control characters (U+0000U+001F minus tab/newline) that must be stripped.
_CTRL_CHARS = "".join(chr(i) for i in range(32) if i not in (9, 10, 13))
def sanitize_notes(notes: str) -> str:
"""
Strip control characters and truncate notes to MAX_NOTES_LEN.
Safe to call with any string; returns empty string for falsy input.
"""
if not notes:
return ""
cleaned = notes.translate(str.maketrans("", "", _CTRL_CHARS))
if len(cleaned) > MAX_NOTES_LEN:
cleaned = cleaned[:MAX_NOTES_LEN] + ""
return cleaned
def rate_limited_reply(scope: str, retry_after_s: float) -> str:
"""Reply when a control command is rejected by rate limiter or cooldown."""
secs = f"{retry_after_s:.0f}s" if retry_after_s >= 1 else "a moment"
return f"⏳ rate limited ({scope}), retry after {secs}"
def status_not_available_reply() -> str:
return "⚠️ Bridge status not available (service initialising or config missing)."
# M5.1: !nodes reply
_MAX_ROOM_OVERRIDES_SHOWN = 10
def nodes_reply(
policy_info: dict,
node_stats: Optional[dict] = None,
sticky_info: Optional[dict] = None,
) -> str:
"""
Compact reply for `!nodes` in control room.
policy_info: from NodePolicy.as_info_dict()
node_stats: optional dict {node_id: {"routed": N, "rejected": M, "health": ..., ...}}
sticky_info: optional dict from StickyNodeCache (M8.1)
"""
default = policy_info.get("default_node", "?")
allowed = sorted(policy_info.get("allowed_nodes") or [])
overrides = policy_info.get("room_overrides", {}) or {}
allowed_str = ", ".join(f"`{n}`" for n in allowed)
lines = [
"🌐 **Node policy**",
f"Default: `{default}` Allowed: {allowed_str}",
]
if isinstance(overrides, dict) and overrides:
lines.append(f"\n**Room overrides** ({len(overrides)}):")
items = list(overrides.items())[:_MAX_ROOM_OVERRIDES_SHOWN]
for room_id, node in items:
lines.append(f" `{room_id}` → `{node}`")
if len(overrides) > _MAX_ROOM_OVERRIDES_SHOWN:
lines.append(f" _(+{len(overrides) - _MAX_ROOM_OVERRIDES_SHOWN} more)_")
elif isinstance(overrides, int):
# as_info_dict returns room_overrides as int count, not dict
if overrides:
lines.append(f"\nRoom overrides: {overrides}")
else:
lines.append("\nNo room overrides configured.")
else:
lines.append("\nNo room overrides configured.")
if node_stats:
lines.append("\n**Per-node stats** (since last restart):")
for node_id in sorted(node_stats):
ns = node_stats[node_id]
routed = ns.get("routed", 0)
rejected = ns.get("rejected", 0)
health = ns.get("health", "")
ewma = ns.get("ewma_latency_s")
consec = ns.get("consecutive_failures", 0)
stat_parts = [f"routed={routed}", f"rejected={rejected}"]
if health:
stat_parts.append(f"health={health}")
if ewma is not None:
stat_parts.append(f"ewma={ewma:.2f}s")
if consec:
stat_parts.append(f"consec_fail={consec}")
lines.append(f" `{node_id}`: " + " ".join(stat_parts))
# M8.1: sticky cache section
if sticky_info is not None:
active = sticky_info.get("active_keys", 0)
ttl = sticky_info.get("ttl_s", 0)
if active:
lines.append(f"\n**Sticky routing** (anti-flap): {active} active ttl={ttl:.0f}s")
for entry in sticky_info.get("entries", []):
rem = entry.get("remaining_s", 0)
lines.append(
f" `{entry['key']}` → `{entry['node']}` ({rem:.0f}s left)"
)
if sticky_info.get("truncated"):
lines.append(f" _(+{sticky_info['truncated']} more)_")
else:
lines.append(f"\nSticky routing: none active ttl={ttl:.0f}s")
return "\n".join(lines)
# ── M6.0: !node subcommand parser + reply helpers ──────────────────────────────
import re as _re
_ROOM_KWARG_RE = _re.compile(r"\broom=(\S+)", _re.IGNORECASE)
_NODE_VAL_RE = _re.compile(r"\bnode=(\w+)", _re.IGNORECASE)
_ROOM_ID_RE = _re.compile(r"^![a-zA-Z0-9._\-]+:[a-zA-Z0-9._\-]+$")
def parse_node_cmd(args_text: str) -> Tuple[str, Optional[str], Optional[str]]:
"""
Parse `!node <subcommand> [room=...] [node=...]` arguments.
Returns (subcmd, room_id_or_None, node_id_or_None).
subcmd is lower-cased; node_id is upper-cased.
"""
parts = args_text.strip().split(None, 1)
if not parts:
return ("", None, None)
subcmd = parts[0].lower()
rest = parts[1] if len(parts) > 1 else ""
room_m = _ROOM_KWARG_RE.search(rest)
node_m = _NODE_VAL_RE.search(rest)
room_id = room_m.group(1) if room_m else None
node_id = node_m.group(1).upper() if node_m else None
return (subcmd, room_id, node_id)
def node_cmd_validate_room(room_id: str) -> bool:
"""Return True if room_id matches basic Matrix room ID format."""
return bool(_ROOM_ID_RE.match(room_id)) if room_id else False
def node_cmd_reply_set(room_id: str, node_id: str) -> str:
return f"✅ Override set: `{room_id}` → `{node_id}`"
def node_cmd_reply_unset_ok(room_id: str) -> str:
return f"✅ Override removed for `{room_id}`"
def node_cmd_reply_unset_not_found(room_id: str) -> str:
return f" No override was set for `{room_id}`"
def node_cmd_reply_get(
room_id: str,
node_id: Optional[str],
env_node: Optional[str],
default_node: str,
) -> str:
lines = [f"📌 **Node info for** `{room_id}`"]
if node_id:
lines.append(f"Dynamic override: `{node_id}` _(set by operator)_")
else:
lines.append("Dynamic override: _none_")
if env_node:
lines.append(f"Env map: `{env_node}`")
lines.append(f"Default: `{default_node}`")
effective = node_id or env_node or default_node
lines.append(f"\nEffective node: **`{effective}`**")
return "\n".join(lines)
def node_cmd_reply_list(
overrides: List[Tuple[str, str, int]],
total: int,
) -> str:
import datetime
lines = [f"📋 **Dynamic node overrides** ({total} total)"]
if not overrides:
lines.append("_None set._")
else:
for room_id, node_id, updated_at in overrides:
ts = datetime.datetime.utcfromtimestamp(updated_at).strftime("%Y-%m-%d %H:%M")
lines.append(f" `{room_id}` → `{node_id}` _(at {ts} UTC)_")
if total > len(overrides):
lines.append(f" _(+{total - len(overrides)} more)_")
return "\n".join(lines)
def node_cmd_reply_error(msg: str) -> str:
return (
f"{msg}\n\n"
"Usage:\n"
" `!node set room=!room:server node=NODA2`\n"
" `!node unset room=!room:server`\n"
" `!node get room=!room:server`\n"
" `!node list`"
)
# ── M6.1: !room agents reply helpers ──────────────────────────────────────────
_AGENTS_KWARG_RE = _re.compile(r"\bagents=(\S+)", _re.IGNORECASE)
_AGENT_KWARG_RE = _re.compile(r"\bagent=(\w+)", _re.IGNORECASE)
_DEFAULT_KWARG_RE = _re.compile(r"\bdefault=(\w+)", _re.IGNORECASE)
def parse_room_agents_cmd(
subcommand: str,
args: tuple,
kwargs: Dict[str, str],
) -> Tuple[str, Optional[str], Optional[List[str]], Optional[str], Optional[str]]:
"""
Parse !room agents <action> [room=...] [agents=...] [agent=...] [default=...] args.
Returns (action, room_id, agents_or_None, single_agent_or_None, default_agent_or_None).
action: the ROOM_ACTION_* constant (from args[0] or subcommand)
room_id: from kwargs["room"]
agents: from kwargs["agents"] as a list (for set command)
single_agent: from kwargs["agent"] (for add/remove)
default_agent: from kwargs["default"]
"""
# action is args[0] when subcommand == "agents"
action = (args[0].lower() if args else "").strip() or subcommand.lower()
room_id = kwargs.get("room")
# agents= may be comma-separated
raw_agents = kwargs.get("agents", "")
agents: Optional[List[str]] = (
[a.strip().lower() for a in raw_agents.split(",") if a.strip()]
if raw_agents else None
)
single_agent = kwargs.get("agent", "").strip().lower() or None
default_agent = kwargs.get("default", "").strip().lower() or None
return action, room_id, agents, single_agent, default_agent
def room_agents_reply_set(room_id: str, agents: List[str], default_agent: str) -> str:
agents_str = ", ".join(f"`{a}`" for a in sorted(agents))
return (
f"✅ Agent override set for `{room_id}`\n"
f"Agents: {agents_str}\n"
f"Default: `{default_agent}`"
)
def room_agents_reply_add(room_id: str, agent: str, agents: List[str], default_agent: Optional[str]) -> str:
agents_str = ", ".join(f"`{a}`" for a in sorted(agents))
return (
f"✅ Agent `{agent}` added to `{room_id}`\n"
f"Current agents: {agents_str}"
+ (f"\nDefault: `{default_agent}`" if default_agent else "")
)
def room_agents_reply_remove(room_id: str, agent: str, agents: List[str], default_agent: Optional[str]) -> str:
if agents:
agents_str = ", ".join(f"`{a}`" for a in sorted(agents))
return (
f"✅ Agent `{agent}` removed from `{room_id}`\n"
f"Remaining: {agents_str}"
+ (f"\nDefault: `{default_agent}`" if default_agent else "")
)
return f"✅ Agent `{agent}` removed — no agents left, override cleared for `{room_id}`"
def room_agents_reply_unset_ok(room_id: str) -> str:
return f"✅ Agent override cleared for `{room_id}` (using env/default config)"
def room_agents_reply_unset_not_found(room_id: str) -> str:
return f" No agent override was set for `{room_id}`"
def room_agents_reply_get(
room_id: str,
override_agents: Optional[List[str]],
override_default: Optional[str],
env_agents: Optional[List[str]],
env_default: Optional[str],
) -> str:
lines = [f"📌 **Agent policy for** `{room_id}`"]
if override_agents:
agents_str = ", ".join(f"`{a}`" for a in sorted(override_agents))
lines.append(f"Dynamic override: {agents_str} default=`{override_default or '?'}`")
else:
lines.append("Dynamic override: _none_")
if env_agents:
env_str = ", ".join(f"`{a}`" for a in sorted(env_agents))
lines.append(f"Env config: {env_str} default=`{env_default or '?'}`")
else:
lines.append("Env config: _not configured_")
effective_agents = override_agents or env_agents or []
effective_default = override_default or env_default or "?"
lines.append(f"\nEffective agents: **{', '.join(f'`{a}`' for a in sorted(effective_agents))}** default=**`{effective_default}`**")
return "\n".join(lines)
def room_agents_reply_list(
overrides: List[Tuple[str, List[str], Optional[str], int]],
total: int,
) -> str:
import datetime
lines = [f"📋 **Dynamic agent overrides** ({total} total)"]
if not overrides:
lines.append("_None set._")
else:
for room_id, agents, default_agent, updated_at in overrides:
ts = datetime.datetime.utcfromtimestamp(updated_at).strftime("%Y-%m-%d %H:%M")
agents_str = ", ".join(agents)
lines.append(f" `{room_id}`: [{agents_str}] default=`{default_agent or '?'}` _(at {ts} UTC)_")
if total > len(overrides):
lines.append(f" _(+{total - len(overrides)} more)_")
return "\n".join(lines)
def room_agents_reply_error(msg: str) -> str:
return (
f"{msg}\n\n"
"Usage:\n"
" `!room agents set room=!X agents=sofiia,helion [default=sofiia]`\n"
" `!room agents add room=!X agent=druid`\n"
" `!room agents remove room=!X agent=helion`\n"
" `!room agents get room=!X`\n"
" `!room agents unset room=!X`\n"
" `!room agents list`"
)
# ── M6.2: !policy export/import reply helpers + path validator ────────────────
import os as _os
import json as _json
POLICY_EXPORTS_SUBDIR = "policy_exports"
def validate_export_path(exports_dir: str, filename: str) -> Optional[str]:
"""
Validate and resolve an export filename to an absolute path.
Security: only allow simple filenames (no slashes, no `..`).
Returns the absolute safe path, or None if invalid.
"""
if not filename:
return None
# Reject anything with directory separators or traversal sequences
if "/" in filename or "\\" in filename or ".." in filename:
return None
# Only allow safe characters: alphanumeric, dash, underscore, dot
if not _re.match(r"^[a-zA-Z0-9._\-]+$", filename):
return None
full_path = _os.path.join(exports_dir, filename)
try:
resolved = _os.path.realpath(full_path)
exports_resolved = _os.path.realpath(exports_dir)
if not resolved.startswith(exports_resolved + _os.sep):
return None
except Exception: # noqa: BLE001
return None
return full_path
def policy_export_reply(path: str, node_count: int, agent_count: int) -> str:
filename = _os.path.basename(path)
return (
f"✅ **Policy exported**\n"
f"File: `{filename}`\n"
f"Node overrides: {node_count} Agent overrides: {agent_count}"
)
def policy_import_dry_run_reply(stats: dict, mode: str) -> str:
return (
f"🔍 **Import dry-run** (mode=`{mode}`, no changes applied)\n"
f"Node overrides: +{stats.get('node_added',0)} ~{stats.get('node_updated',0)} -{stats.get('node_deleted',0)}\n"
f"Agent overrides: +{stats.get('agent_added',0)} ~{stats.get('agent_updated',0)} -{stats.get('agent_deleted',0)}\n"
f"_Use `dry_run=0` to apply._"
)
def format_import_diff(diff: Any) -> str:
"""
Format an ImportDiff as a human-readable Markdown string (M9.1).
`diff` is an ImportDiff instance from policy_store.
"""
lines: List[str] = []
# Node overrides row
node_parts: List[str] = []
if diff.node_added: node_parts.append(f"+{diff.node_added} added")
if diff.node_updated: node_parts.append(f"~{diff.node_updated} updated")
if diff.node_deleted: node_parts.append(f"-{diff.node_deleted} deleted ⚠️")
lines.append("**Node overrides:** " + (", ".join(node_parts) if node_parts else "no changes"))
# Agent overrides row
agent_parts: List[str] = []
if diff.agent_added: agent_parts.append(f"+{diff.agent_added} added")
if diff.agent_updated: agent_parts.append(f"~{diff.agent_updated} updated")
if diff.agent_deleted: agent_parts.append(f"-{diff.agent_deleted} deleted ⚠️")
lines.append("**Agent overrides:** " + (", ".join(agent_parts) if agent_parts else "no changes"))
# Sample affected rooms
if getattr(diff, "sample_keys", None):
keys_str = ", ".join(f"`{k}`" for k in diff.sample_keys)
more = diff.total_changes() - len(diff.sample_keys)
suffix = f" _(+{more} more)_" if more > 0 else ""
lines.append(f"**Affected rooms:** {keys_str}{suffix}")
# Replace danger banner
if getattr(diff, "is_replace", False):
lines.append("⚠️ **REPLACE mode** — existing overrides NOT in the file will be **deleted**.")
return "\n".join(lines)
def policy_import_intent_reply(
diff: Any,
action_summary: str,
nonce: str,
ttl_s: int,
) -> str:
"""Reply for !policy import intent with diff preview (M9.1)."""
lines = [
"⚠️ **Confirm required**",
f"Action: `{action_summary}`",
"",
"**Preview:**",
format_import_diff(diff),
"",
]
if diff.total_changes() == 0:
lines.append("_(No policy changes would be made.)_")
lines.append("")
lines += [
f"Type `!confirm {nonce}` within {ttl_s}s to apply.",
"_(Only you can confirm. If the file changes, this confirm will be rejected.)_",
]
return "\n".join(lines)
def policy_import_reply(stats: dict, mode: str) -> str:
return (
f"✅ **Policy imported** (mode=`{mode}`)\n"
f"Node overrides: +{stats.get('node_added',0)} ~{stats.get('node_updated',0)} -{stats.get('node_deleted',0)}\n"
f"Agent overrides: +{stats.get('agent_added',0)} ~{stats.get('agent_updated',0)} -{stats.get('agent_deleted',0)}"
)
def policy_restore_intent_reply(
diff: Any,
action_summary: str,
nonce: str,
ttl_s: int,
) -> str:
"""Reply for !policy restore intent — rollback preview + confirm prompt (M10.1)."""
diff_text = format_import_diff(diff)
return (
f"🔄 **Policy restore (rollback) preview**\n"
f"{diff_text}\n\n"
f"⚠️ **Rollback action:** `{action_summary}`\n\n"
f"Type `!confirm {nonce}` to apply restore (expires in {ttl_s}s)"
)
def policy_restore_applied_reply(
stats: Any,
mode: str,
autobackup_basename: str = "",
) -> str:
"""Reply after !policy restore is confirmed and applied (M10.1)."""
n_a = stats.get("node_added", 0) if isinstance(stats, dict) else 0
n_u = stats.get("node_updated", 0) if isinstance(stats, dict) else 0
n_d = stats.get("node_deleted", 0) if isinstance(stats, dict) else 0
a_a = stats.get("agent_added", 0) if isinstance(stats, dict) else 0
a_u = stats.get("agent_updated", 0) if isinstance(stats, dict) else 0
a_d = stats.get("agent_deleted", 0) if isinstance(stats, dict) else 0
backup_line = (
f"\n\n💾 Pre-restore backup saved: `{autobackup_basename}`"
if autobackup_basename else ""
)
return (
f"✅ **Policy restored** (mode={mode})\n"
f"Node overrides: +{n_a} ~{n_u} -{n_d}\n"
f"Agent overrides: +{a_a} ~{a_u} -{a_d}"
f"{backup_line}"
)
def policy_history_reply(changes: List[Any]) -> str:
"""
Format policy_changes records for !policy history reply (M10.2).
Each line: #{n}. [id:NN] [YYYY-MM-DD HH:MM] verb/mode +Xn ~Yn -Zn `file` op:`hash8` [⚠️]
Use !policy change id=NN to see full details.
"""
if not changes:
return "📋 **Policy change history**\nNo policy changes recorded yet."
lines = ["📋 **Policy change history** (most recent first)\n"]
for i, c in enumerate(changes, 1):
destr_flag = " ⚠️" if c.is_destructive else ""
fname = c.source_file[:40] + "" if len(c.source_file) > 40 else c.source_file
line = (
f"{i}. [id:{c.id}] [{c.when_str()}] `{c.verb}/{c.mode}`"
f" {c.changes_short()}{destr_flag}"
f" `{fname}`"
f" op:`{c.sender_hash[:8]}`"
)
lines.append(line)
lines.append("\nUse `!policy change id=<n>` for full details of a specific change.")
return "\n".join(lines)
def policy_change_detail_reply(change: Any) -> str:
"""
Format full details of a single PolicyChange for !policy change id=<n> (M10.3).
"""
destr_str = "⚠️ Yes" if change.is_destructive else "No"
fname = change.source_file[:60] + "" if len(change.source_file) > 60 else change.source_file
lines = [
f"🔍 **Policy change #{change.id}**\n",
f"**Verb:** `{change.verb}`",
f"**Mode:** `{change.mode}`",
f"**Applied:** {change.when_str()} UTC",
f"**Operator:** op:`{change.sender_hash[:8]}`",
f"**File:** `{fname}`",
f"**Destructive:** {destr_str}",
"",
"**Changes:**",
f" Nodes: +{change.node_added} added ~{change.node_updated} updated -{change.node_deleted} deleted",
f" Agents: +{change.agent_added} added ~{change.agent_updated} updated -{change.agent_deleted} deleted",
"",
"**Summary:**",
f" {change.diff_summary}",
]
return "\n".join(lines)
def policy_prune_preview_reply(result: Any, retention_days: int) -> str:
"""Reply for !policy prune_exports dry_run=1 — preview of what would be pruned (M10.0)."""
if result.count == 0:
return (
f"🗑️ **Policy exports prune preview** (retention={retention_days}d)\n"
"No files older than the retention window found. Nothing to prune."
)
samples = result.sample_filenames(5)
sample_str = "\n".join(f" - `{f}`" for f in samples)
more = result.count - len(samples)
more_str = f"\n _(+{more} more)_" if more > 0 else ""
size_kb = result.total_bytes // 1024
return (
f"🗑️ **Policy exports prune preview** (retention={retention_days}d)\n"
f"Would delete **{result.count}** file(s) (~{size_kb} KB):\n"
f"{sample_str}{more_str}\n\n"
f"To actually prune: `!policy prune_exports dry_run=0`"
)
def policy_prune_applied_reply(result: Any, retention_days: int) -> str:
"""Reply after !policy prune_exports dry_run=0 is confirmed and applied (M10.0)."""
if result.count == 0:
return (
f"🗑️ **Policy exports pruned** (retention={retention_days}d)\n"
"No files matched the retention window."
)
size_kb = result.total_bytes // 1024
return (
f"✅ **Policy exports pruned** (retention={retention_days}d)\n"
f"Deleted **{result.count}** file(s) (~{size_kb} KB freed)."
)
def policy_cmd_error(msg: str) -> str:
return (
f"{msg}\n\n"
"Usage:\n"
" `!policy export`\n"
" `!policy import path=policy-YYYYMMDD-HHMMSS.json [mode=merge|replace] [dry_run=0]`"
)
def unknown_command_reply(cmd: ControlCommand) -> str:
"""Reply for unrecognised verbs."""
return (
f"⚠️ Unknown command: `{cmd.raw}`\n"
f"Known verbs: {', '.join(sorted(KNOWN_VERBS))}.\n"
f"Type `!help` for usage."
)
def unauthorized_reply(reason: str) -> str:
"""Reply for unauthorized command attempts (sent only when behavior=reply_error)."""
if reason == "not_operator":
return "⛔ Not authorised: your Matrix ID is not in the operator allowlist."
return "⛔ Not authorised: this room is not a control channel."
def help_reply() -> str:
"""Brief help text."""
return (
"**DAGI Bridge — Control Commands**\n\n"
"`!runbook start <path> [node=NODA1]` — Start a runbook run ✅\n"
"`!runbook next <run_id>` — Advance to next step ✅\n"
"`!runbook complete <run_id> step=<n> status=ok [notes=...]` — Mark step complete ✅\n"
"`!runbook status <run_id>` — Show run status ✅\n"
"`!runbook evidence <run_id>` — Generate release evidence ✅\n"
"`!runbook post_review <run_id>` — Generate post-release review ✅\n"
"`!status` — Bridge health summary ✅\n"
"`!nodes` — Node policy overview ✅\n"
"`!node set room=!room:server node=NODA2` — Set room-node override ✅\n"
"`!node unset room=!room:server` — Remove room-node override ✅\n"
"`!node get room=!room:server` — Show current override ✅\n"
"`!node list` — List dynamic overrides (top 10) ✅\n"
"`!room agents set room=!X agents=sofiia,helion [default=sofiia]` — Set agent list ✅\n"
"`!room agents add room=!X agent=druid` — Add agent to room ✅\n"
"`!room agents remove room=!X agent=helion` — Remove agent from room ✅\n"
"`!room agents get room=!X` — Show current agent policy ✅\n"
"`!room agents list` — List all rooms with agent overrides ✅\n"
"`!room agents unset room=!X` — Remove all agent overrides for room ✅\n"
"`!policy export` — Export policy snapshot to file ✅\n"
"`!policy import path=<file> [mode=merge|replace] [dry_run=0]` — Import policy snapshot ✅\n"
"`!help` — This message\n\n"
"_Only authorised operators can issue control commands._"
)