Includes all milestones M4 through M11: - M4: agent discovery (!agents / !status) - M5: node-aware routing + per-node observability - M6: dynamic policy store (node/agent overrides, import/export) - M7: Prometheus alerts + Grafana dashboard + metrics contract - M8: node health tracker + soft failover + sticky cache + HA persistence - M9: two-step confirm + diff preview for dangerous commands - M10: auto-backup, restore, retention, policy history + change detail - M11: soak scenarios (CI tests) + live soak script Soak infrastructure (this commit): - POST /v1/debug/inject_event (guarded by DEBUG_INJECT_ENABLED=false) - _preflight_inject() and _check_wal() in soak script - --db-path arg for WAL delta reporting - Runbook sections 2a/2b/2c: Step 0 and Step 1 exact commands Made-with: Cursor
168 lines
6.2 KiB
Python
168 lines
6.2 KiB
Python
"""
|
|
confirm_store — M9.0: Two-step confirmation for dangerous control commands.
|
|
|
|
Flow:
|
|
1. Operator issues a dangerous command (e.g. !node set, !policy import mode=replace).
|
|
2. Bridge calls ConfirmStore.add(..., callback=<coroutine>) → returns a nonce.
|
|
3. Bridge replies: "Type !confirm <nonce> within Ns to apply."
|
|
4. Operator sends !confirm <nonce>.
|
|
5. Bridge calls ConfirmStore.pop(nonce, sender_hash) → returns PendingConfirmation.
|
|
6. Bridge executes callback() → (reply_text, diff_summary).
|
|
7. Audit trail: matrix.control.intent / matrix.control.confirmed / matrix.control.applied.
|
|
|
|
Safety:
|
|
- One pending entry per sender (new request replaces old).
|
|
- Nonce is sender-bound: wrong sender_hash → pop returns None.
|
|
- TTL enforced via monotonic time; expired entries not returned.
|
|
- Nonce: 6 uppercase alphanumeric (NONCE_LEN chars from NONCE_CHARS).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import secrets
|
|
import string
|
|
import threading
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
|
|
|
|
NONCE_LEN = 6
|
|
NONCE_CHARS = string.ascii_uppercase + string.digits
|
|
|
|
_DEFAULT_TTL_S = 120.0
|
|
|
|
|
|
def make_nonce() -> str:
|
|
"""Generate a cryptographically random 6-char uppercase alphanumeric nonce."""
|
|
return "".join(secrets.choice(NONCE_CHARS) for _ in range(NONCE_LEN))
|
|
|
|
|
|
@dataclass
|
|
class PendingConfirmation:
|
|
"""A pending two-step confirmation waiting for !confirm <nonce>."""
|
|
nonce: str
|
|
sender_hash: str
|
|
verb: str # e.g. "node.set", "room.agents set", "policy.import"
|
|
normalized_args: str # human-readable args for audit
|
|
action_summary: str # "!node set room=!x:s node=NODA2"
|
|
room_id: str # Matrix room_id where the intent was issued
|
|
callback: Callable[[], Awaitable[Tuple[str, str]]] # async () → (reply_text, diff_summary)
|
|
expires_at: float # time.monotonic() deadline
|
|
|
|
|
|
class ConfirmStore:
|
|
"""
|
|
In-memory, thread-safe store for pending two-step confirmation entries.
|
|
|
|
One pending entry per sender at a time. If the same sender issues a new
|
|
dangerous command before confirming the previous one, the old entry is
|
|
replaced (new nonce issued).
|
|
"""
|
|
|
|
def __init__(self, ttl_s: float = _DEFAULT_TTL_S) -> None:
|
|
self.ttl_s = ttl_s
|
|
self._lock = threading.RLock()
|
|
self._by_nonce: Dict[str, PendingConfirmation] = {}
|
|
self._by_sender: Dict[str, str] = {} # sender_hash → nonce
|
|
|
|
# ── Public API ────────────────────────────────────────────────────────────
|
|
|
|
def add(
|
|
self,
|
|
sender_hash: str,
|
|
verb: str,
|
|
normalized_args: str,
|
|
action_summary: str,
|
|
room_id: str,
|
|
callback: Callable[[], Awaitable[Tuple[str, str]]],
|
|
) -> str:
|
|
"""
|
|
Create a pending confirmation entry. Returns the nonce string.
|
|
|
|
If the sender already has a pending entry it is replaced (old nonce
|
|
becomes invalid immediately).
|
|
"""
|
|
nonce = make_nonce()
|
|
expires_at = time.monotonic() + self.ttl_s
|
|
entry = PendingConfirmation(
|
|
nonce=nonce,
|
|
sender_hash=sender_hash,
|
|
verb=verb,
|
|
normalized_args=normalized_args,
|
|
action_summary=action_summary,
|
|
room_id=room_id,
|
|
callback=callback,
|
|
expires_at=expires_at,
|
|
)
|
|
with self._lock:
|
|
# Evict any previous pending entry for this sender
|
|
old_nonce = self._by_sender.get(sender_hash)
|
|
if old_nonce:
|
|
self._by_nonce.pop(old_nonce, None)
|
|
self._by_nonce[nonce] = entry
|
|
self._by_sender[sender_hash] = nonce
|
|
return nonce
|
|
|
|
def pop(self, nonce: str, sender_hash: str) -> Optional[PendingConfirmation]:
|
|
"""
|
|
Retrieve and atomically remove a pending confirmation.
|
|
|
|
Returns None if:
|
|
- nonce does not exist,
|
|
- sender_hash does not match the entry owner,
|
|
- or the entry has expired.
|
|
"""
|
|
nonce = nonce.upper()
|
|
with self._lock:
|
|
entry = self._by_nonce.get(nonce)
|
|
if entry is None:
|
|
return None
|
|
if entry.sender_hash != sender_hash:
|
|
# Wrong sender — deny without disclosing any detail
|
|
return None
|
|
if time.monotonic() > entry.expires_at:
|
|
# Expired — clean up and deny
|
|
self._by_nonce.pop(nonce, None)
|
|
self._by_sender.pop(entry.sender_hash, None)
|
|
return None
|
|
# Valid confirmation — consume the entry
|
|
self._by_nonce.pop(nonce)
|
|
self._by_sender.pop(sender_hash, None)
|
|
return entry
|
|
|
|
def pending_nonce(self, sender_hash: str) -> Optional[str]:
|
|
"""
|
|
Return the current pending nonce for a sender (non-destructive peek).
|
|
Returns None if no entry or the entry has expired.
|
|
"""
|
|
with self._lock:
|
|
nonce = self._by_sender.get(sender_hash)
|
|
if nonce is None:
|
|
return None
|
|
entry = self._by_nonce.get(nonce)
|
|
if entry is None or time.monotonic() > entry.expires_at:
|
|
# Lazy eviction
|
|
self._by_nonce.pop(nonce, None)
|
|
self._by_sender.pop(sender_hash, None)
|
|
return None
|
|
return nonce
|
|
|
|
def pending_count(self) -> int:
|
|
"""Number of non-expired pending entries (for /health, metrics)."""
|
|
now = time.monotonic()
|
|
with self._lock:
|
|
return sum(1 for e in self._by_nonce.values() if now <= e.expires_at)
|
|
|
|
def cleanup(self) -> int:
|
|
"""Eagerly remove all expired entries. Returns count removed."""
|
|
now = time.monotonic()
|
|
removed = 0
|
|
with self._lock:
|
|
expired_nonces = [
|
|
n for n, e in self._by_nonce.items() if now > e.expires_at
|
|
]
|
|
for n in expired_nonces:
|
|
entry = self._by_nonce.pop(n)
|
|
self._by_sender.pop(entry.sender_hash, None)
|
|
removed += 1
|
|
return removed
|