Files
microdao-daarion/services/matrix-bridge-dagi/app/confirm_store.py
Apple 82d5ff2a4f feat(matrix-bridge-dagi): M4–M11 + soak infrastructure (debug inject endpoint)
Includes all milestones M4 through M11:
- M4: agent discovery (!agents / !status)
- M5: node-aware routing + per-node observability
- M6: dynamic policy store (node/agent overrides, import/export)
- M7: Prometheus alerts + Grafana dashboard + metrics contract
- M8: node health tracker + soft failover + sticky cache + HA persistence
- M9: two-step confirm + diff preview for dangerous commands
- M10: auto-backup, restore, retention, policy history + change detail
- M11: soak scenarios (CI tests) + live soak script

Soak infrastructure (this commit):
- POST /v1/debug/inject_event (guarded by DEBUG_INJECT_ENABLED=false)
- _preflight_inject() and _check_wal() in soak script
- --db-path arg for WAL delta reporting
- Runbook sections 2a/2b/2c: Step 0 and Step 1 exact commands

Made-with: Cursor
2026-03-05 07:51:37 -08:00

168 lines
6.2 KiB
Python

"""
confirm_store — M9.0: Two-step confirmation for dangerous control commands.
Flow:
1. Operator issues a dangerous command (e.g. !node set, !policy import mode=replace).
2. Bridge calls ConfirmStore.add(..., callback=<coroutine>) → returns a nonce.
3. Bridge replies: "Type !confirm <nonce> within Ns to apply."
4. Operator sends !confirm <nonce>.
5. Bridge calls ConfirmStore.pop(nonce, sender_hash) → returns PendingConfirmation.
6. Bridge executes callback() → (reply_text, diff_summary).
7. Audit trail: matrix.control.intent / matrix.control.confirmed / matrix.control.applied.
Safety:
- One pending entry per sender (new request replaces old).
- Nonce is sender-bound: wrong sender_hash → pop returns None.
- TTL enforced via monotonic time; expired entries not returned.
- Nonce: 6 uppercase alphanumeric (NONCE_LEN chars from NONCE_CHARS).
"""
from __future__ import annotations
import secrets
import string
import threading
import time
from dataclasses import dataclass, field
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
NONCE_LEN = 6
NONCE_CHARS = string.ascii_uppercase + string.digits
_DEFAULT_TTL_S = 120.0
def make_nonce() -> str:
"""Generate a cryptographically random 6-char uppercase alphanumeric nonce."""
return "".join(secrets.choice(NONCE_CHARS) for _ in range(NONCE_LEN))
@dataclass
class PendingConfirmation:
"""A pending two-step confirmation waiting for !confirm <nonce>."""
nonce: str
sender_hash: str
verb: str # e.g. "node.set", "room.agents set", "policy.import"
normalized_args: str # human-readable args for audit
action_summary: str # "!node set room=!x:s node=NODA2"
room_id: str # Matrix room_id where the intent was issued
callback: Callable[[], Awaitable[Tuple[str, str]]] # async () → (reply_text, diff_summary)
expires_at: float # time.monotonic() deadline
class ConfirmStore:
"""
In-memory, thread-safe store for pending two-step confirmation entries.
One pending entry per sender at a time. If the same sender issues a new
dangerous command before confirming the previous one, the old entry is
replaced (new nonce issued).
"""
def __init__(self, ttl_s: float = _DEFAULT_TTL_S) -> None:
self.ttl_s = ttl_s
self._lock = threading.RLock()
self._by_nonce: Dict[str, PendingConfirmation] = {}
self._by_sender: Dict[str, str] = {} # sender_hash → nonce
# ── Public API ────────────────────────────────────────────────────────────
def add(
self,
sender_hash: str,
verb: str,
normalized_args: str,
action_summary: str,
room_id: str,
callback: Callable[[], Awaitable[Tuple[str, str]]],
) -> str:
"""
Create a pending confirmation entry. Returns the nonce string.
If the sender already has a pending entry it is replaced (old nonce
becomes invalid immediately).
"""
nonce = make_nonce()
expires_at = time.monotonic() + self.ttl_s
entry = PendingConfirmation(
nonce=nonce,
sender_hash=sender_hash,
verb=verb,
normalized_args=normalized_args,
action_summary=action_summary,
room_id=room_id,
callback=callback,
expires_at=expires_at,
)
with self._lock:
# Evict any previous pending entry for this sender
old_nonce = self._by_sender.get(sender_hash)
if old_nonce:
self._by_nonce.pop(old_nonce, None)
self._by_nonce[nonce] = entry
self._by_sender[sender_hash] = nonce
return nonce
def pop(self, nonce: str, sender_hash: str) -> Optional[PendingConfirmation]:
"""
Retrieve and atomically remove a pending confirmation.
Returns None if:
- nonce does not exist,
- sender_hash does not match the entry owner,
- or the entry has expired.
"""
nonce = nonce.upper()
with self._lock:
entry = self._by_nonce.get(nonce)
if entry is None:
return None
if entry.sender_hash != sender_hash:
# Wrong sender — deny without disclosing any detail
return None
if time.monotonic() > entry.expires_at:
# Expired — clean up and deny
self._by_nonce.pop(nonce, None)
self._by_sender.pop(entry.sender_hash, None)
return None
# Valid confirmation — consume the entry
self._by_nonce.pop(nonce)
self._by_sender.pop(sender_hash, None)
return entry
def pending_nonce(self, sender_hash: str) -> Optional[str]:
"""
Return the current pending nonce for a sender (non-destructive peek).
Returns None if no entry or the entry has expired.
"""
with self._lock:
nonce = self._by_sender.get(sender_hash)
if nonce is None:
return None
entry = self._by_nonce.get(nonce)
if entry is None or time.monotonic() > entry.expires_at:
# Lazy eviction
self._by_nonce.pop(nonce, None)
self._by_sender.pop(sender_hash, None)
return None
return nonce
def pending_count(self) -> int:
"""Number of non-expired pending entries (for /health, metrics)."""
now = time.monotonic()
with self._lock:
return sum(1 for e in self._by_nonce.values() if now <= e.expires_at)
def cleanup(self) -> int:
"""Eagerly remove all expired entries. Returns count removed."""
now = time.monotonic()
removed = 0
with self._lock:
expired_nonces = [
n for n, e in self._by_nonce.items() if now > e.expires_at
]
for n in expired_nonces:
entry = self._by_nonce.pop(n)
self._by_sender.pop(entry.sender_hash, None)
removed += 1
return removed