feat(matrix-bridge-dagi): M4–M11 + soak infrastructure (debug inject endpoint)
Includes all milestones M4 through M11: - M4: agent discovery (!agents / !status) - M5: node-aware routing + per-node observability - M6: dynamic policy store (node/agent overrides, import/export) - M7: Prometheus alerts + Grafana dashboard + metrics contract - M8: node health tracker + soft failover + sticky cache + HA persistence - M9: two-step confirm + diff preview for dangerous commands - M10: auto-backup, restore, retention, policy history + change detail - M11: soak scenarios (CI tests) + live soak script Soak infrastructure (this commit): - POST /v1/debug/inject_event (guarded by DEBUG_INJECT_ENABLED=false) - _preflight_inject() and _check_wal() in soak script - --db-path arg for WAL delta reporting - Runbook sections 2a/2b/2c: Step 0 and Step 1 exact commands Made-with: Cursor
This commit is contained in:
@@ -23,18 +23,124 @@ Audit events emitted:
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, FrozenSet, List, Optional, Tuple
|
||||
from typing import Any, Dict, FrozenSet, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Constants ─────────────────────────────────────────────────────────────────
|
||||
|
||||
# Supported control verbs (M3.1+ will implement them fully)
|
||||
# Supported control verbs
|
||||
VERB_RUNBOOK = "runbook"
|
||||
VERB_STATUS = "status"
|
||||
VERB_NODES = "nodes" # M5.1: node policy overview
|
||||
VERB_NODE = "node" # M6.0: dynamic room-node override commands
|
||||
VERB_ROOM = "room" # M6.1: dynamic mixed room agent overrides
|
||||
VERB_POLICY = "policy" # M6.2: policy snapshot export/import
|
||||
VERB_CONFIRM = "confirm" # M9.0: two-step confirmation for dangerous commands
|
||||
VERB_HELP = "help"
|
||||
|
||||
KNOWN_VERBS: FrozenSet[str] = frozenset({VERB_RUNBOOK, VERB_STATUS, VERB_HELP})
|
||||
KNOWN_VERBS: FrozenSet[str] = frozenset({
|
||||
VERB_RUNBOOK, VERB_STATUS, VERB_NODES, VERB_NODE,
|
||||
VERB_ROOM, VERB_POLICY, VERB_CONFIRM, VERB_HELP,
|
||||
})
|
||||
|
||||
# ── M9.0: Dangerous command detection ─────────────────────────────────────────
|
||||
|
||||
def is_dangerous_cmd(cmd: "ControlCommand") -> bool:
|
||||
"""
|
||||
Return True if the command requires two-step confirmation before applying.
|
||||
|
||||
Dangerous verbs:
|
||||
!node set room=... node=... — changes room routing
|
||||
!room agents set room=... agents=... — replaces all agents for a room
|
||||
!policy import ... — overwrites policy DB (both modes)
|
||||
"""
|
||||
v = cmd.verb
|
||||
sub = (cmd.subcommand or "").strip().lower()
|
||||
if v == VERB_NODE and sub == "set":
|
||||
return True
|
||||
if v == VERB_ROOM and sub == "agents" and cmd.args and cmd.args[0].lower() == "set":
|
||||
return True
|
||||
if v == VERB_POLICY and sub == "import":
|
||||
return True
|
||||
# M10.0: prune_exports is dangerous only when dry_run=0 (actual deletion)
|
||||
if v == VERB_POLICY and sub == "prune_exports":
|
||||
dry_raw = cmd.kwargs.get("dry_run", "1").strip()
|
||||
is_dry = dry_raw not in ("0", "false", "no")
|
||||
return not is_dry
|
||||
# M10.1: restore is always dangerous (no dry_run option)
|
||||
if v == VERB_POLICY and sub == "restore":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def build_normalized_args(cmd: "ControlCommand") -> str:
|
||||
"""
|
||||
Build a human-readable normalized representation of the command args.
|
||||
Used in audit events and confirmation prompts.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
# For !room agents set, skip the "set" positional from args display
|
||||
skip_first_arg = cmd.verb == VERB_ROOM and cmd.subcommand == "agents"
|
||||
for i, a in enumerate(cmd.args):
|
||||
if skip_first_arg and i == 0:
|
||||
continue
|
||||
parts.append(a)
|
||||
for k, v in sorted(cmd.kwargs.items()):
|
||||
parts.append(f"{k}={v}")
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
def confirm_intent_reply(action_summary: str, nonce: str, ttl_s: int) -> str:
|
||||
"""Reply when a dangerous command is held pending confirmation (M9.0)."""
|
||||
return (
|
||||
f"⚠️ **Confirm required**\n"
|
||||
f"Action: `{action_summary}`\n"
|
||||
f"Type `!confirm {nonce}` within {ttl_s}s to apply.\n"
|
||||
f"_(Only you can confirm this action.)_"
|
||||
)
|
||||
|
||||
|
||||
def confirm_success_reply(action_result: str) -> str:
|
||||
"""Reply when a confirmation is accepted and the action applied (M9.0)."""
|
||||
return f"✅ Confirmed and applied.\n{action_result}"
|
||||
|
||||
|
||||
def confirm_expired_reply() -> str:
|
||||
"""Reply when the nonce is invalid, expired, or from a different sender (M9.0)."""
|
||||
return (
|
||||
"❌ Invalid or expired confirmation code. "
|
||||
"The action was **not** applied.\n"
|
||||
"Re-issue the original command to get a new code."
|
||||
)
|
||||
|
||||
# M6.1: !room subcommand + actions
|
||||
ROOM_SUBCMD_AGENTS = "agents"
|
||||
ROOM_ACTION_SET = "set"
|
||||
ROOM_ACTION_ADD = "add"
|
||||
ROOM_ACTION_REMOVE = "remove"
|
||||
ROOM_ACTION_GET = "get"
|
||||
ROOM_ACTION_LIST = "list"
|
||||
ROOM_ACTION_UNSET = "unset" # remove full override
|
||||
_VALID_ROOM_ACTIONS = frozenset({
|
||||
ROOM_ACTION_SET, ROOM_ACTION_ADD, ROOM_ACTION_REMOVE,
|
||||
ROOM_ACTION_GET, ROOM_ACTION_LIST, ROOM_ACTION_UNSET,
|
||||
})
|
||||
|
||||
# M6.0: !node subcommands
|
||||
NODE_SUBCMD_SET = "set"
|
||||
NODE_SUBCMD_UNSET = "unset"
|
||||
NODE_SUBCMD_GET = "get"
|
||||
NODE_SUBCMD_LIST = "list"
|
||||
_VALID_NODE_SUBCMDS = frozenset({NODE_SUBCMD_SET, NODE_SUBCMD_UNSET, NODE_SUBCMD_GET, NODE_SUBCMD_LIST})
|
||||
|
||||
# Runbook subcommands (M3.x)
|
||||
SUBCOMMAND_START = "start" # M3.1 — implemented
|
||||
SUBCOMMAND_NEXT = "next" # M3.2 — implemented
|
||||
SUBCOMMAND_COMPLETE = "complete" # M3.2 — implemented
|
||||
SUBCOMMAND_EVIDENCE = "evidence" # M3.3 — implemented
|
||||
SUBCOMMAND_STATUS = "status" # M3.3 — implemented
|
||||
SUBCOMMAND_POST_REVIEW = "post_review" # M3.3 — implemented
|
||||
|
||||
# Max command line length to guard against garbage injection
|
||||
_MAX_CMD_LEN = 512
|
||||
@@ -225,10 +331,814 @@ def check_authorization(
|
||||
# ── Reply helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
def not_implemented_reply(cmd: ControlCommand) -> str:
|
||||
"""Reply for known commands not yet implemented (M3.0 stub)."""
|
||||
"""Reply for known commands not yet implemented."""
|
||||
return (
|
||||
f"✅ Command acknowledged: `{cmd.raw}`\n"
|
||||
f"⏳ `!{cmd.verb} {cmd.subcommand}` — implementation pending (M3.1+)."
|
||||
f"⏳ `!{cmd.verb} {cmd.subcommand}` — implementation pending."
|
||||
)
|
||||
|
||||
|
||||
def next_usage_reply() -> str:
|
||||
"""Reply when !runbook next is called without a run_id."""
|
||||
return (
|
||||
"⚠️ Usage: `!runbook next <run_id>`\n"
|
||||
"Example: `!runbook next abc-123`"
|
||||
)
|
||||
|
||||
|
||||
def complete_usage_reply() -> str:
|
||||
"""Reply when !runbook complete is missing required args."""
|
||||
return (
|
||||
"⚠️ Usage: `!runbook complete <run_id> step=<n> status=ok|warn|fail [notes=...]`\n"
|
||||
"Example: `!runbook complete abc-123 step=3 status=ok notes=done`\n"
|
||||
"Notes with spaces: join without quotes — `notes=done_and_verified`."
|
||||
)
|
||||
|
||||
|
||||
def start_usage_reply() -> str:
|
||||
"""Reply when !runbook start is called with missing/invalid runbook_path."""
|
||||
return (
|
||||
"⚠️ Usage: `!runbook start <runbook_path> [node=NODA1]`\n"
|
||||
"Example: `!runbook start runbooks/rehearsal-v1-checklist.md node=NODA1`\n"
|
||||
"runbook_path must be a relative path without `..`."
|
||||
)
|
||||
|
||||
|
||||
def runbook_started_reply(run_id: str, steps_total: int, status: str) -> str:
|
||||
"""Success reply after sofiia-console creates a runbook run."""
|
||||
return (
|
||||
f"✅ runbook started: `run_id={run_id}` steps={steps_total} status={status}\n"
|
||||
f"Next: `!runbook next {run_id}`"
|
||||
)
|
||||
|
||||
|
||||
def runbook_start_error_reply(reason: str) -> str:
|
||||
"""Error reply when sofiia-console returns a non-2xx or connection error."""
|
||||
return f"❌ failed to start runbook: {reason}"
|
||||
|
||||
|
||||
# ── M3.2 reply helpers ────────────────────────────────────────────────────────
|
||||
|
||||
# Max chars of instructions_md to include in Matrix message before truncating
|
||||
_INSTRUCTIONS_EXCERPT_MAX = 1500
|
||||
|
||||
|
||||
def next_manual_reply(
|
||||
run_id: str,
|
||||
step_index: int,
|
||||
steps_total: Optional[int],
|
||||
title: str,
|
||||
instructions_md: str,
|
||||
) -> str:
|
||||
"""Reply for a manual step returned by !runbook next."""
|
||||
step_label = f"Step {step_index + 1}"
|
||||
if steps_total:
|
||||
step_label += f"/{steps_total}"
|
||||
|
||||
excerpt = instructions_md.strip()
|
||||
truncated = False
|
||||
if len(excerpt) > _INSTRUCTIONS_EXCERPT_MAX:
|
||||
excerpt = excerpt[:_INSTRUCTIONS_EXCERPT_MAX].rsplit("\n", 1)[0]
|
||||
truncated = True
|
||||
|
||||
parts = [
|
||||
f"🧭 {step_label}: **{title}**",
|
||||
"",
|
||||
excerpt,
|
||||
]
|
||||
if truncated:
|
||||
parts.append("_...(truncated — open in console for full instructions)_")
|
||||
parts += [
|
||||
"",
|
||||
f"Complete: `!runbook complete {run_id} step={step_index} status=ok`",
|
||||
]
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def next_auto_reply(
|
||||
run_id: str,
|
||||
step_index: int,
|
||||
action_type: str,
|
||||
step_status: str,
|
||||
duration_ms: Optional[int],
|
||||
completed: bool,
|
||||
) -> str:
|
||||
"""Reply for an auto step (http_check/script) completed by !runbook next."""
|
||||
emoji = {"ok": "✅", "warn": "⚠️", "fail": "❌"}.get(step_status, "ℹ️")
|
||||
dur = f" duration={duration_ms}ms" if duration_ms is not None else ""
|
||||
header = f"{emoji} step {step_index + 1} ({action_type}) {step_status}{dur}"
|
||||
|
||||
if completed:
|
||||
return (
|
||||
f"{header}\n"
|
||||
"🎉 All steps completed!\n"
|
||||
f"Get evidence: `!runbook evidence {run_id}`"
|
||||
)
|
||||
return f"{header}\nNext: `!runbook next {run_id}`"
|
||||
|
||||
|
||||
def next_error_reply(run_id: str, reason: str) -> str:
|
||||
"""Error reply when !runbook next fails."""
|
||||
return f"❌ failed to advance runbook: {reason}"
|
||||
|
||||
|
||||
def complete_ok_reply(run_id: str, step_index: int, status: str, run_completed: bool) -> str:
|
||||
"""Success reply after !runbook complete."""
|
||||
emoji = {"ok": "✅", "warn": "⚠️", "fail": "❌", "skipped": "⏭️"}.get(status, "✅")
|
||||
line1 = f"{emoji} recorded step {step_index + 1}: {status}"
|
||||
if run_completed:
|
||||
return f"{line1}\n🎉 All steps completed!\nGet evidence: `!runbook evidence {run_id}`"
|
||||
return f"{line1}\nNext: `!runbook next {run_id}`"
|
||||
|
||||
|
||||
def complete_error_reply(run_id: str, reason: str) -> str:
|
||||
"""Error reply when !runbook complete fails."""
|
||||
return f"❌ failed to complete step: {reason}"
|
||||
|
||||
|
||||
# ── M3.3 reply helpers ────────────────────────────────────────────────────────
|
||||
|
||||
def status_usage_reply() -> str:
|
||||
return (
|
||||
"⚠️ Usage: `!runbook status <run_id>`\n"
|
||||
"Example: `!runbook status abc-123`"
|
||||
)
|
||||
|
||||
|
||||
def evidence_usage_reply() -> str:
|
||||
return (
|
||||
"⚠️ Usage: `!runbook evidence <run_id>`\n"
|
||||
"Example: `!runbook evidence abc-123`"
|
||||
)
|
||||
|
||||
|
||||
def post_review_usage_reply() -> str:
|
||||
return (
|
||||
"⚠️ Usage: `!runbook post_review <run_id>`\n"
|
||||
"Example: `!runbook post_review abc-123`"
|
||||
)
|
||||
|
||||
|
||||
def status_reply(run: dict) -> str:
|
||||
"""Format !runbook status reply from a get_run response."""
|
||||
run_id = run.get("run_id", "?")
|
||||
status = run.get("status", "?")
|
||||
current = run.get("current_step", 0)
|
||||
steps_total = run.get("steps_total") or len(run.get("steps", []))
|
||||
runbook_path = run.get("runbook_path", "?")
|
||||
node_id = run.get("node_id", "?")
|
||||
evidence_path = run.get("evidence_path")
|
||||
|
||||
# Count warn/fail steps
|
||||
steps = run.get("steps", [])
|
||||
warn_count = sum(1 for s in steps if s.get("status") == "warn")
|
||||
fail_count = sum(1 for s in steps if s.get("status") == "fail")
|
||||
|
||||
status_emoji = {
|
||||
"running": "🔄", "completed": "✅", "aborted": "🛑", "paused": "⏸️",
|
||||
}.get(status, "ℹ️")
|
||||
|
||||
step_label = f"{current}/{steps_total}" if steps_total else str(current)
|
||||
lines = [
|
||||
f"{status_emoji} `run_id={run_id}` status={status} step={step_label}",
|
||||
f"runbook: `{runbook_path}` node: {node_id}",
|
||||
]
|
||||
if warn_count or fail_count:
|
||||
lines.append(f"warn={warn_count} fail={fail_count}")
|
||||
if evidence_path:
|
||||
lines.append(f"evidence: `{evidence_path}`")
|
||||
|
||||
if status == "completed" and not evidence_path:
|
||||
lines.append(f"Get evidence: `!runbook evidence {run_id}`")
|
||||
elif status == "completed" and evidence_path:
|
||||
lines.append(f"Post-review: `!runbook post_review {run_id}`")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def status_error_reply(run_id: str, reason: str) -> str:
|
||||
return f"❌ failed to get status: {reason}"
|
||||
|
||||
|
||||
def evidence_reply(result: dict) -> str:
|
||||
"""Success reply after !runbook evidence."""
|
||||
path = result.get("evidence_path", "?")
|
||||
size = result.get("bytes", 0)
|
||||
run_id = result.get("run_id", "")
|
||||
ts = result.get("created_at", "")
|
||||
lines = [f"📄 evidence created: `{path}` (bytes={size})"]
|
||||
if ts:
|
||||
lines.append(f"created_at: {ts}")
|
||||
if run_id:
|
||||
lines.append(f"Next: `!runbook post_review {run_id}`")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def evidence_error_reply(run_id: str, reason: str) -> str:
|
||||
return f"❌ failed to generate evidence: {reason}"
|
||||
|
||||
|
||||
def post_review_reply(result: dict) -> str:
|
||||
"""Success reply after !runbook post_review."""
|
||||
path = result.get("path", "?")
|
||||
size = result.get("bytes", 0)
|
||||
ts = result.get("created_at", "")
|
||||
lines = [f"🧾 post-review created: `{path}` (bytes={size})"]
|
||||
if ts:
|
||||
lines.append(f"created_at: {ts}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def post_review_error_reply(run_id: str, reason: str) -> str:
|
||||
return f"❌ failed to generate post-review: {reason}"
|
||||
|
||||
|
||||
# ── M3.4 safety helpers ───────────────────────────────────────────────────────
|
||||
|
||||
#: Maximum length of notes/free-text operator input accepted before truncation.
|
||||
MAX_NOTES_LEN: int = 500
|
||||
|
||||
#: Control characters (U+0000–U+001F minus tab/newline) that must be stripped.
|
||||
_CTRL_CHARS = "".join(chr(i) for i in range(32) if i not in (9, 10, 13))
|
||||
|
||||
|
||||
def sanitize_notes(notes: str) -> str:
|
||||
"""
|
||||
Strip control characters and truncate notes to MAX_NOTES_LEN.
|
||||
|
||||
Safe to call with any string; returns empty string for falsy input.
|
||||
"""
|
||||
if not notes:
|
||||
return ""
|
||||
cleaned = notes.translate(str.maketrans("", "", _CTRL_CHARS))
|
||||
if len(cleaned) > MAX_NOTES_LEN:
|
||||
cleaned = cleaned[:MAX_NOTES_LEN] + "…"
|
||||
return cleaned
|
||||
|
||||
|
||||
def rate_limited_reply(scope: str, retry_after_s: float) -> str:
|
||||
"""Reply when a control command is rejected by rate limiter or cooldown."""
|
||||
secs = f"{retry_after_s:.0f}s" if retry_after_s >= 1 else "a moment"
|
||||
return f"⏳ rate limited ({scope}), retry after {secs}"
|
||||
|
||||
|
||||
def status_not_available_reply() -> str:
|
||||
return "⚠️ Bridge status not available (service initialising or config missing)."
|
||||
|
||||
|
||||
# M5.1: !nodes reply
|
||||
_MAX_ROOM_OVERRIDES_SHOWN = 10
|
||||
|
||||
|
||||
def nodes_reply(
|
||||
policy_info: dict,
|
||||
node_stats: Optional[dict] = None,
|
||||
sticky_info: Optional[dict] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Compact reply for `!nodes` in control room.
|
||||
|
||||
policy_info: from NodePolicy.as_info_dict()
|
||||
node_stats: optional dict {node_id: {"routed": N, "rejected": M, "health": ..., ...}}
|
||||
sticky_info: optional dict from StickyNodeCache (M8.1)
|
||||
"""
|
||||
default = policy_info.get("default_node", "?")
|
||||
allowed = sorted(policy_info.get("allowed_nodes") or [])
|
||||
overrides = policy_info.get("room_overrides", {}) or {}
|
||||
|
||||
allowed_str = ", ".join(f"`{n}`" for n in allowed)
|
||||
lines = [
|
||||
"🌐 **Node policy**",
|
||||
f"Default: `{default}` Allowed: {allowed_str}",
|
||||
]
|
||||
|
||||
if isinstance(overrides, dict) and overrides:
|
||||
lines.append(f"\n**Room overrides** ({len(overrides)}):")
|
||||
items = list(overrides.items())[:_MAX_ROOM_OVERRIDES_SHOWN]
|
||||
for room_id, node in items:
|
||||
lines.append(f" `{room_id}` → `{node}`")
|
||||
if len(overrides) > _MAX_ROOM_OVERRIDES_SHOWN:
|
||||
lines.append(f" _(+{len(overrides) - _MAX_ROOM_OVERRIDES_SHOWN} more)_")
|
||||
elif isinstance(overrides, int):
|
||||
# as_info_dict returns room_overrides as int count, not dict
|
||||
if overrides:
|
||||
lines.append(f"\nRoom overrides: {overrides}")
|
||||
else:
|
||||
lines.append("\nNo room overrides configured.")
|
||||
else:
|
||||
lines.append("\nNo room overrides configured.")
|
||||
|
||||
if node_stats:
|
||||
lines.append("\n**Per-node stats** (since last restart):")
|
||||
for node_id in sorted(node_stats):
|
||||
ns = node_stats[node_id]
|
||||
routed = ns.get("routed", 0)
|
||||
rejected = ns.get("rejected", 0)
|
||||
health = ns.get("health", "")
|
||||
ewma = ns.get("ewma_latency_s")
|
||||
consec = ns.get("consecutive_failures", 0)
|
||||
stat_parts = [f"routed={routed}", f"rejected={rejected}"]
|
||||
if health:
|
||||
stat_parts.append(f"health={health}")
|
||||
if ewma is not None:
|
||||
stat_parts.append(f"ewma={ewma:.2f}s")
|
||||
if consec:
|
||||
stat_parts.append(f"consec_fail={consec}")
|
||||
lines.append(f" `{node_id}`: " + " ".join(stat_parts))
|
||||
|
||||
# M8.1: sticky cache section
|
||||
if sticky_info is not None:
|
||||
active = sticky_info.get("active_keys", 0)
|
||||
ttl = sticky_info.get("ttl_s", 0)
|
||||
if active:
|
||||
lines.append(f"\n**Sticky routing** (anti-flap): {active} active ttl={ttl:.0f}s")
|
||||
for entry in sticky_info.get("entries", []):
|
||||
rem = entry.get("remaining_s", 0)
|
||||
lines.append(
|
||||
f" `{entry['key']}` → `{entry['node']}` ({rem:.0f}s left)"
|
||||
)
|
||||
if sticky_info.get("truncated"):
|
||||
lines.append(f" _(+{sticky_info['truncated']} more)_")
|
||||
else:
|
||||
lines.append(f"\nSticky routing: none active ttl={ttl:.0f}s")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── M6.0: !node subcommand parser + reply helpers ──────────────────────────────
|
||||
|
||||
import re as _re
|
||||
|
||||
_ROOM_KWARG_RE = _re.compile(r"\broom=(\S+)", _re.IGNORECASE)
|
||||
_NODE_VAL_RE = _re.compile(r"\bnode=(\w+)", _re.IGNORECASE)
|
||||
_ROOM_ID_RE = _re.compile(r"^![a-zA-Z0-9._\-]+:[a-zA-Z0-9._\-]+$")
|
||||
|
||||
|
||||
def parse_node_cmd(args_text: str) -> Tuple[str, Optional[str], Optional[str]]:
|
||||
"""
|
||||
Parse `!node <subcommand> [room=...] [node=...]` arguments.
|
||||
|
||||
Returns (subcmd, room_id_or_None, node_id_or_None).
|
||||
subcmd is lower-cased; node_id is upper-cased.
|
||||
"""
|
||||
parts = args_text.strip().split(None, 1)
|
||||
if not parts:
|
||||
return ("", None, None)
|
||||
subcmd = parts[0].lower()
|
||||
rest = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
room_m = _ROOM_KWARG_RE.search(rest)
|
||||
node_m = _NODE_VAL_RE.search(rest)
|
||||
|
||||
room_id = room_m.group(1) if room_m else None
|
||||
node_id = node_m.group(1).upper() if node_m else None
|
||||
return (subcmd, room_id, node_id)
|
||||
|
||||
|
||||
def node_cmd_validate_room(room_id: str) -> bool:
|
||||
"""Return True if room_id matches basic Matrix room ID format."""
|
||||
return bool(_ROOM_ID_RE.match(room_id)) if room_id else False
|
||||
|
||||
|
||||
def node_cmd_reply_set(room_id: str, node_id: str) -> str:
|
||||
return f"✅ Override set: `{room_id}` → `{node_id}`"
|
||||
|
||||
|
||||
def node_cmd_reply_unset_ok(room_id: str) -> str:
|
||||
return f"✅ Override removed for `{room_id}`"
|
||||
|
||||
|
||||
def node_cmd_reply_unset_not_found(room_id: str) -> str:
|
||||
return f"ℹ️ No override was set for `{room_id}`"
|
||||
|
||||
|
||||
def node_cmd_reply_get(
|
||||
room_id: str,
|
||||
node_id: Optional[str],
|
||||
env_node: Optional[str],
|
||||
default_node: str,
|
||||
) -> str:
|
||||
lines = [f"📌 **Node info for** `{room_id}`"]
|
||||
if node_id:
|
||||
lines.append(f"Dynamic override: `{node_id}` _(set by operator)_")
|
||||
else:
|
||||
lines.append("Dynamic override: _none_")
|
||||
if env_node:
|
||||
lines.append(f"Env map: `{env_node}`")
|
||||
lines.append(f"Default: `{default_node}`")
|
||||
effective = node_id or env_node or default_node
|
||||
lines.append(f"\nEffective node: **`{effective}`**")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def node_cmd_reply_list(
|
||||
overrides: List[Tuple[str, str, int]],
|
||||
total: int,
|
||||
) -> str:
|
||||
import datetime
|
||||
lines = [f"📋 **Dynamic node overrides** ({total} total)"]
|
||||
if not overrides:
|
||||
lines.append("_None set._")
|
||||
else:
|
||||
for room_id, node_id, updated_at in overrides:
|
||||
ts = datetime.datetime.utcfromtimestamp(updated_at).strftime("%Y-%m-%d %H:%M")
|
||||
lines.append(f" `{room_id}` → `{node_id}` _(at {ts} UTC)_")
|
||||
if total > len(overrides):
|
||||
lines.append(f" _(+{total - len(overrides)} more)_")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def node_cmd_reply_error(msg: str) -> str:
|
||||
return (
|
||||
f"❌ {msg}\n\n"
|
||||
"Usage:\n"
|
||||
" `!node set room=!room:server node=NODA2`\n"
|
||||
" `!node unset room=!room:server`\n"
|
||||
" `!node get room=!room:server`\n"
|
||||
" `!node list`"
|
||||
)
|
||||
|
||||
|
||||
# ── M6.1: !room agents reply helpers ──────────────────────────────────────────
|
||||
|
||||
_AGENTS_KWARG_RE = _re.compile(r"\bagents=(\S+)", _re.IGNORECASE)
|
||||
_AGENT_KWARG_RE = _re.compile(r"\bagent=(\w+)", _re.IGNORECASE)
|
||||
_DEFAULT_KWARG_RE = _re.compile(r"\bdefault=(\w+)", _re.IGNORECASE)
|
||||
|
||||
|
||||
def parse_room_agents_cmd(
|
||||
subcommand: str,
|
||||
args: tuple,
|
||||
kwargs: Dict[str, str],
|
||||
) -> Tuple[str, Optional[str], Optional[List[str]], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Parse !room agents <action> [room=...] [agents=...] [agent=...] [default=...] args.
|
||||
|
||||
Returns (action, room_id, agents_or_None, single_agent_or_None, default_agent_or_None).
|
||||
action: the ROOM_ACTION_* constant (from args[0] or subcommand)
|
||||
room_id: from kwargs["room"]
|
||||
agents: from kwargs["agents"] as a list (for set command)
|
||||
single_agent: from kwargs["agent"] (for add/remove)
|
||||
default_agent: from kwargs["default"]
|
||||
"""
|
||||
# action is args[0] when subcommand == "agents"
|
||||
action = (args[0].lower() if args else "").strip() or subcommand.lower()
|
||||
room_id = kwargs.get("room")
|
||||
|
||||
# agents= may be comma-separated
|
||||
raw_agents = kwargs.get("agents", "")
|
||||
agents: Optional[List[str]] = (
|
||||
[a.strip().lower() for a in raw_agents.split(",") if a.strip()]
|
||||
if raw_agents else None
|
||||
)
|
||||
|
||||
single_agent = kwargs.get("agent", "").strip().lower() or None
|
||||
default_agent = kwargs.get("default", "").strip().lower() or None
|
||||
return action, room_id, agents, single_agent, default_agent
|
||||
|
||||
|
||||
def room_agents_reply_set(room_id: str, agents: List[str], default_agent: str) -> str:
|
||||
agents_str = ", ".join(f"`{a}`" for a in sorted(agents))
|
||||
return (
|
||||
f"✅ Agent override set for `{room_id}`\n"
|
||||
f"Agents: {agents_str}\n"
|
||||
f"Default: `{default_agent}`"
|
||||
)
|
||||
|
||||
|
||||
def room_agents_reply_add(room_id: str, agent: str, agents: List[str], default_agent: Optional[str]) -> str:
|
||||
agents_str = ", ".join(f"`{a}`" for a in sorted(agents))
|
||||
return (
|
||||
f"✅ Agent `{agent}` added to `{room_id}`\n"
|
||||
f"Current agents: {agents_str}"
|
||||
+ (f"\nDefault: `{default_agent}`" if default_agent else "")
|
||||
)
|
||||
|
||||
|
||||
def room_agents_reply_remove(room_id: str, agent: str, agents: List[str], default_agent: Optional[str]) -> str:
|
||||
if agents:
|
||||
agents_str = ", ".join(f"`{a}`" for a in sorted(agents))
|
||||
return (
|
||||
f"✅ Agent `{agent}` removed from `{room_id}`\n"
|
||||
f"Remaining: {agents_str}"
|
||||
+ (f"\nDefault: `{default_agent}`" if default_agent else "")
|
||||
)
|
||||
return f"✅ Agent `{agent}` removed — no agents left, override cleared for `{room_id}`"
|
||||
|
||||
|
||||
def room_agents_reply_unset_ok(room_id: str) -> str:
|
||||
return f"✅ Agent override cleared for `{room_id}` (using env/default config)"
|
||||
|
||||
|
||||
def room_agents_reply_unset_not_found(room_id: str) -> str:
|
||||
return f"ℹ️ No agent override was set for `{room_id}`"
|
||||
|
||||
|
||||
def room_agents_reply_get(
|
||||
room_id: str,
|
||||
override_agents: Optional[List[str]],
|
||||
override_default: Optional[str],
|
||||
env_agents: Optional[List[str]],
|
||||
env_default: Optional[str],
|
||||
) -> str:
|
||||
lines = [f"📌 **Agent policy for** `{room_id}`"]
|
||||
if override_agents:
|
||||
agents_str = ", ".join(f"`{a}`" for a in sorted(override_agents))
|
||||
lines.append(f"Dynamic override: {agents_str} default=`{override_default or '?'}`")
|
||||
else:
|
||||
lines.append("Dynamic override: _none_")
|
||||
if env_agents:
|
||||
env_str = ", ".join(f"`{a}`" for a in sorted(env_agents))
|
||||
lines.append(f"Env config: {env_str} default=`{env_default or '?'}`")
|
||||
else:
|
||||
lines.append("Env config: _not configured_")
|
||||
effective_agents = override_agents or env_agents or []
|
||||
effective_default = override_default or env_default or "?"
|
||||
lines.append(f"\nEffective agents: **{', '.join(f'`{a}`' for a in sorted(effective_agents))}** default=**`{effective_default}`**")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def room_agents_reply_list(
|
||||
overrides: List[Tuple[str, List[str], Optional[str], int]],
|
||||
total: int,
|
||||
) -> str:
|
||||
import datetime
|
||||
lines = [f"📋 **Dynamic agent overrides** ({total} total)"]
|
||||
if not overrides:
|
||||
lines.append("_None set._")
|
||||
else:
|
||||
for room_id, agents, default_agent, updated_at in overrides:
|
||||
ts = datetime.datetime.utcfromtimestamp(updated_at).strftime("%Y-%m-%d %H:%M")
|
||||
agents_str = ", ".join(agents)
|
||||
lines.append(f" `{room_id}`: [{agents_str}] default=`{default_agent or '?'}` _(at {ts} UTC)_")
|
||||
if total > len(overrides):
|
||||
lines.append(f" _(+{total - len(overrides)} more)_")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def room_agents_reply_error(msg: str) -> str:
|
||||
return (
|
||||
f"❌ {msg}\n\n"
|
||||
"Usage:\n"
|
||||
" `!room agents set room=!X agents=sofiia,helion [default=sofiia]`\n"
|
||||
" `!room agents add room=!X agent=druid`\n"
|
||||
" `!room agents remove room=!X agent=helion`\n"
|
||||
" `!room agents get room=!X`\n"
|
||||
" `!room agents unset room=!X`\n"
|
||||
" `!room agents list`"
|
||||
)
|
||||
|
||||
|
||||
# ── M6.2: !policy export/import reply helpers + path validator ────────────────
|
||||
|
||||
import os as _os
|
||||
import json as _json
|
||||
|
||||
|
||||
POLICY_EXPORTS_SUBDIR = "policy_exports"
|
||||
|
||||
|
||||
def validate_export_path(exports_dir: str, filename: str) -> Optional[str]:
|
||||
"""
|
||||
Validate and resolve an export filename to an absolute path.
|
||||
|
||||
Security: only allow simple filenames (no slashes, no `..`).
|
||||
Returns the absolute safe path, or None if invalid.
|
||||
"""
|
||||
if not filename:
|
||||
return None
|
||||
# Reject anything with directory separators or traversal sequences
|
||||
if "/" in filename or "\\" in filename or ".." in filename:
|
||||
return None
|
||||
# Only allow safe characters: alphanumeric, dash, underscore, dot
|
||||
if not _re.match(r"^[a-zA-Z0-9._\-]+$", filename):
|
||||
return None
|
||||
full_path = _os.path.join(exports_dir, filename)
|
||||
try:
|
||||
resolved = _os.path.realpath(full_path)
|
||||
exports_resolved = _os.path.realpath(exports_dir)
|
||||
if not resolved.startswith(exports_resolved + _os.sep):
|
||||
return None
|
||||
except Exception: # noqa: BLE001
|
||||
return None
|
||||
return full_path
|
||||
|
||||
|
||||
def policy_export_reply(path: str, node_count: int, agent_count: int) -> str:
|
||||
filename = _os.path.basename(path)
|
||||
return (
|
||||
f"✅ **Policy exported**\n"
|
||||
f"File: `{filename}`\n"
|
||||
f"Node overrides: {node_count} Agent overrides: {agent_count}"
|
||||
)
|
||||
|
||||
|
||||
def policy_import_dry_run_reply(stats: dict, mode: str) -> str:
|
||||
return (
|
||||
f"🔍 **Import dry-run** (mode=`{mode}`, no changes applied)\n"
|
||||
f"Node overrides: +{stats.get('node_added',0)} ~{stats.get('node_updated',0)} -{stats.get('node_deleted',0)}\n"
|
||||
f"Agent overrides: +{stats.get('agent_added',0)} ~{stats.get('agent_updated',0)} -{stats.get('agent_deleted',0)}\n"
|
||||
f"_Use `dry_run=0` to apply._"
|
||||
)
|
||||
|
||||
|
||||
def format_import_diff(diff: Any) -> str:
|
||||
"""
|
||||
Format an ImportDiff as a human-readable Markdown string (M9.1).
|
||||
`diff` is an ImportDiff instance from policy_store.
|
||||
"""
|
||||
lines: List[str] = []
|
||||
|
||||
# Node overrides row
|
||||
node_parts: List[str] = []
|
||||
if diff.node_added: node_parts.append(f"+{diff.node_added} added")
|
||||
if diff.node_updated: node_parts.append(f"~{diff.node_updated} updated")
|
||||
if diff.node_deleted: node_parts.append(f"-{diff.node_deleted} deleted ⚠️")
|
||||
lines.append("**Node overrides:** " + (", ".join(node_parts) if node_parts else "no changes"))
|
||||
|
||||
# Agent overrides row
|
||||
agent_parts: List[str] = []
|
||||
if diff.agent_added: agent_parts.append(f"+{diff.agent_added} added")
|
||||
if diff.agent_updated: agent_parts.append(f"~{diff.agent_updated} updated")
|
||||
if diff.agent_deleted: agent_parts.append(f"-{diff.agent_deleted} deleted ⚠️")
|
||||
lines.append("**Agent overrides:** " + (", ".join(agent_parts) if agent_parts else "no changes"))
|
||||
|
||||
# Sample affected rooms
|
||||
if getattr(diff, "sample_keys", None):
|
||||
keys_str = ", ".join(f"`{k}`" for k in diff.sample_keys)
|
||||
more = diff.total_changes() - len(diff.sample_keys)
|
||||
suffix = f" _(+{more} more)_" if more > 0 else ""
|
||||
lines.append(f"**Affected rooms:** {keys_str}{suffix}")
|
||||
|
||||
# Replace danger banner
|
||||
if getattr(diff, "is_replace", False):
|
||||
lines.append("⚠️ **REPLACE mode** — existing overrides NOT in the file will be **deleted**.")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def policy_import_intent_reply(
|
||||
diff: Any,
|
||||
action_summary: str,
|
||||
nonce: str,
|
||||
ttl_s: int,
|
||||
) -> str:
|
||||
"""Reply for !policy import intent with diff preview (M9.1)."""
|
||||
lines = [
|
||||
"⚠️ **Confirm required**",
|
||||
f"Action: `{action_summary}`",
|
||||
"",
|
||||
"**Preview:**",
|
||||
format_import_diff(diff),
|
||||
"",
|
||||
]
|
||||
if diff.total_changes() == 0:
|
||||
lines.append("_(No policy changes would be made.)_")
|
||||
lines.append("")
|
||||
lines += [
|
||||
f"Type `!confirm {nonce}` within {ttl_s}s to apply.",
|
||||
"_(Only you can confirm. If the file changes, this confirm will be rejected.)_",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def policy_import_reply(stats: dict, mode: str) -> str:
|
||||
return (
|
||||
f"✅ **Policy imported** (mode=`{mode}`)\n"
|
||||
f"Node overrides: +{stats.get('node_added',0)} ~{stats.get('node_updated',0)} -{stats.get('node_deleted',0)}\n"
|
||||
f"Agent overrides: +{stats.get('agent_added',0)} ~{stats.get('agent_updated',0)} -{stats.get('agent_deleted',0)}"
|
||||
)
|
||||
|
||||
|
||||
def policy_restore_intent_reply(
|
||||
diff: Any,
|
||||
action_summary: str,
|
||||
nonce: str,
|
||||
ttl_s: int,
|
||||
) -> str:
|
||||
"""Reply for !policy restore intent — rollback preview + confirm prompt (M10.1)."""
|
||||
diff_text = format_import_diff(diff)
|
||||
return (
|
||||
f"🔄 **Policy restore (rollback) preview**\n"
|
||||
f"{diff_text}\n\n"
|
||||
f"⚠️ **Rollback action:** `{action_summary}`\n\n"
|
||||
f"Type `!confirm {nonce}` to apply restore (expires in {ttl_s}s)"
|
||||
)
|
||||
|
||||
|
||||
def policy_restore_applied_reply(
|
||||
stats: Any,
|
||||
mode: str,
|
||||
autobackup_basename: str = "",
|
||||
) -> str:
|
||||
"""Reply after !policy restore is confirmed and applied (M10.1)."""
|
||||
n_a = stats.get("node_added", 0) if isinstance(stats, dict) else 0
|
||||
n_u = stats.get("node_updated", 0) if isinstance(stats, dict) else 0
|
||||
n_d = stats.get("node_deleted", 0) if isinstance(stats, dict) else 0
|
||||
a_a = stats.get("agent_added", 0) if isinstance(stats, dict) else 0
|
||||
a_u = stats.get("agent_updated", 0) if isinstance(stats, dict) else 0
|
||||
a_d = stats.get("agent_deleted", 0) if isinstance(stats, dict) else 0
|
||||
backup_line = (
|
||||
f"\n\n💾 Pre-restore backup saved: `{autobackup_basename}`"
|
||||
if autobackup_basename else ""
|
||||
)
|
||||
return (
|
||||
f"✅ **Policy restored** (mode={mode})\n"
|
||||
f"Node overrides: +{n_a} ~{n_u} -{n_d}\n"
|
||||
f"Agent overrides: +{a_a} ~{a_u} -{a_d}"
|
||||
f"{backup_line}"
|
||||
)
|
||||
|
||||
|
||||
def policy_history_reply(changes: List[Any]) -> str:
|
||||
"""
|
||||
Format policy_changes records for !policy history reply (M10.2).
|
||||
|
||||
Each line: #{n}. [id:NN] [YYYY-MM-DD HH:MM] verb/mode +Xn ~Yn -Zn `file` op:`hash8` [⚠️]
|
||||
Use !policy change id=NN to see full details.
|
||||
"""
|
||||
if not changes:
|
||||
return "📋 **Policy change history**\nNo policy changes recorded yet."
|
||||
lines = ["📋 **Policy change history** (most recent first)\n"]
|
||||
for i, c in enumerate(changes, 1):
|
||||
destr_flag = " ⚠️" if c.is_destructive else ""
|
||||
fname = c.source_file[:40] + "…" if len(c.source_file) > 40 else c.source_file
|
||||
line = (
|
||||
f"{i}. [id:{c.id}] [{c.when_str()}] `{c.verb}/{c.mode}`"
|
||||
f" {c.changes_short()}{destr_flag}"
|
||||
f" `{fname}`"
|
||||
f" op:`{c.sender_hash[:8]}`"
|
||||
)
|
||||
lines.append(line)
|
||||
lines.append("\nUse `!policy change id=<n>` for full details of a specific change.")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def policy_change_detail_reply(change: Any) -> str:
|
||||
"""
|
||||
Format full details of a single PolicyChange for !policy change id=<n> (M10.3).
|
||||
"""
|
||||
destr_str = "⚠️ Yes" if change.is_destructive else "No"
|
||||
fname = change.source_file[:60] + "…" if len(change.source_file) > 60 else change.source_file
|
||||
lines = [
|
||||
f"🔍 **Policy change #{change.id}**\n",
|
||||
f"**Verb:** `{change.verb}`",
|
||||
f"**Mode:** `{change.mode}`",
|
||||
f"**Applied:** {change.when_str()} UTC",
|
||||
f"**Operator:** op:`{change.sender_hash[:8]}`",
|
||||
f"**File:** `{fname}`",
|
||||
f"**Destructive:** {destr_str}",
|
||||
"",
|
||||
"**Changes:**",
|
||||
f" Nodes: +{change.node_added} added ~{change.node_updated} updated -{change.node_deleted} deleted",
|
||||
f" Agents: +{change.agent_added} added ~{change.agent_updated} updated -{change.agent_deleted} deleted",
|
||||
"",
|
||||
"**Summary:**",
|
||||
f" {change.diff_summary}",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def policy_prune_preview_reply(result: Any, retention_days: int) -> str:
|
||||
"""Reply for !policy prune_exports dry_run=1 — preview of what would be pruned (M10.0)."""
|
||||
if result.count == 0:
|
||||
return (
|
||||
f"🗑️ **Policy exports prune preview** (retention={retention_days}d)\n"
|
||||
"No files older than the retention window found. Nothing to prune."
|
||||
)
|
||||
samples = result.sample_filenames(5)
|
||||
sample_str = "\n".join(f" - `{f}`" for f in samples)
|
||||
more = result.count - len(samples)
|
||||
more_str = f"\n _(+{more} more)_" if more > 0 else ""
|
||||
size_kb = result.total_bytes // 1024
|
||||
return (
|
||||
f"🗑️ **Policy exports prune preview** (retention={retention_days}d)\n"
|
||||
f"Would delete **{result.count}** file(s) (~{size_kb} KB):\n"
|
||||
f"{sample_str}{more_str}\n\n"
|
||||
f"To actually prune: `!policy prune_exports dry_run=0`"
|
||||
)
|
||||
|
||||
|
||||
def policy_prune_applied_reply(result: Any, retention_days: int) -> str:
|
||||
"""Reply after !policy prune_exports dry_run=0 is confirmed and applied (M10.0)."""
|
||||
if result.count == 0:
|
||||
return (
|
||||
f"🗑️ **Policy exports pruned** (retention={retention_days}d)\n"
|
||||
"No files matched the retention window."
|
||||
)
|
||||
size_kb = result.total_bytes // 1024
|
||||
return (
|
||||
f"✅ **Policy exports pruned** (retention={retention_days}d)\n"
|
||||
f"Deleted **{result.count}** file(s) (~{size_kb} KB freed)."
|
||||
)
|
||||
|
||||
|
||||
def policy_cmd_error(msg: str) -> str:
|
||||
return (
|
||||
f"❌ {msg}\n\n"
|
||||
"Usage:\n"
|
||||
" `!policy export`\n"
|
||||
" `!policy import path=policy-YYYYMMDD-HHMMSS.json [mode=merge|replace] [dry_run=0]`"
|
||||
)
|
||||
|
||||
|
||||
@@ -252,12 +1162,26 @@ def help_reply() -> str:
|
||||
"""Brief help text."""
|
||||
return (
|
||||
"**DAGI Bridge — Control Commands**\n\n"
|
||||
"`!runbook start <path> [node=NODA1]` — Start a runbook run\n"
|
||||
"`!runbook next <run_id>` — Advance to next step\n"
|
||||
"`!runbook complete <run_id> step=<n> status=ok` — Mark step complete\n"
|
||||
"`!runbook evidence <run_id>` — Get evidence artifact path\n"
|
||||
"`!runbook status <run_id>` — Show current run state\n"
|
||||
"`!status` — Bridge health summary\n"
|
||||
"`!runbook start <path> [node=NODA1]` — Start a runbook run ✅\n"
|
||||
"`!runbook next <run_id>` — Advance to next step ✅\n"
|
||||
"`!runbook complete <run_id> step=<n> status=ok [notes=...]` — Mark step complete ✅\n"
|
||||
"`!runbook status <run_id>` — Show run status ✅\n"
|
||||
"`!runbook evidence <run_id>` — Generate release evidence ✅\n"
|
||||
"`!runbook post_review <run_id>` — Generate post-release review ✅\n"
|
||||
"`!status` — Bridge health summary ✅\n"
|
||||
"`!nodes` — Node policy overview ✅\n"
|
||||
"`!node set room=!room:server node=NODA2` — Set room-node override ✅\n"
|
||||
"`!node unset room=!room:server` — Remove room-node override ✅\n"
|
||||
"`!node get room=!room:server` — Show current override ✅\n"
|
||||
"`!node list` — List dynamic overrides (top 10) ✅\n"
|
||||
"`!room agents set room=!X agents=sofiia,helion [default=sofiia]` — Set agent list ✅\n"
|
||||
"`!room agents add room=!X agent=druid` — Add agent to room ✅\n"
|
||||
"`!room agents remove room=!X agent=helion` — Remove agent from room ✅\n"
|
||||
"`!room agents get room=!X` — Show current agent policy ✅\n"
|
||||
"`!room agents list` — List all rooms with agent overrides ✅\n"
|
||||
"`!room agents unset room=!X` — Remove all agent overrides for room ✅\n"
|
||||
"`!policy export` — Export policy snapshot to file ✅\n"
|
||||
"`!policy import path=<file> [mode=merge|replace] [dry_run=0]` — Import policy snapshot ✅\n"
|
||||
"`!help` — This message\n\n"
|
||||
"_Only authorised operators can issue control commands._"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user