Includes all milestones M4 through M11: - M4: agent discovery (!agents / !status) - M5: node-aware routing + per-node observability - M6: dynamic policy store (node/agent overrides, import/export) - M7: Prometheus alerts + Grafana dashboard + metrics contract - M8: node health tracker + soft failover + sticky cache + HA persistence - M9: two-step confirm + diff preview for dangerous commands - M10: auto-backup, restore, retention, policy history + change detail - M11: soak scenarios (CI tests) + live soak script Soak infrastructure (this commit): - POST /v1/debug/inject_event (guarded by DEBUG_INJECT_ENABLED=false) - _preflight_inject() and _check_wal() in soak script - --db-path arg for WAL delta reporting - Runbook sections 2a/2b/2c: Step 0 and Step 1 exact commands Made-with: Cursor
334 lines
11 KiB
Python
334 lines
11 KiB
Python
"""
|
|
Mixed-Room Routing — Phase M2.1 + M2.2 (guard rails + rejection audit)
|
|
|
|
Supports 1 room → N agents with deterministic message routing.
|
|
|
|
Env:
|
|
BRIDGE_MIXED_ROOM_MAP=!roomX:server=sofiia,helion;!roomY:server=druid,nutra
|
|
BRIDGE_MIXED_DEFAULTS=!roomX:server=sofiia;!roomY:server=druid (optional)
|
|
|
|
Routing priority (per message):
|
|
1. Slash command: /sofiia message text → agent=sofiia
|
|
2. Mention @: @sofiia message text → agent=sofiia
|
|
3. Mention name: sofiia: message text → agent=sofiia
|
|
4. Fallback: default_agent_for_room (first in list, or explicit BRIDGE_MIXED_DEFAULTS)
|
|
|
|
Reply tagging (mixed room only):
|
|
Worker prepends "Agentname: " to reply so users see who answered.
|
|
Single-agent rooms are unaffected.
|
|
"""
|
|
|
|
import logging
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, FrozenSet, List, Optional, Tuple
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Room ID format: !<localpart>:<server>
|
|
_ROOM_ID_RE = re.compile(r"^![A-Za-z0-9\-_.]+:[A-Za-z0-9\-_.]+$")
|
|
|
|
# Routing patterns (compiled once)
|
|
_SLASH_RE = re.compile(r"^/([A-Za-z0-9_\-]+)\s*(.*)", re.DOTALL)
|
|
_MENTION_AT_RE = re.compile(r"^@([A-Za-z0-9_\-]+)\s*(.*)", re.DOTALL)
|
|
_MENTION_COLON_RE = re.compile(r"^([A-Za-z0-9_\-]+):\s+(.*)", re.DOTALL)
|
|
|
|
# Routing reason labels (successful routes)
|
|
REASON_SLASH = "slash_command"
|
|
REASON_AT_MENTION = "at_mention"
|
|
REASON_COLON_MENTION = "colon_mention"
|
|
REASON_DEFAULT = "default"
|
|
|
|
# Rejection reason labels (route_message returns agent_id=None + one of these)
|
|
REASON_REJECTED_UNKNOWN_AGENT = "unknown_agent"
|
|
REASON_REJECTED_SLASH_TOO_LONG = "slash_too_long"
|
|
REASON_REJECTED_NO_MAPPING = "no_mapping"
|
|
|
|
# Hard guards
|
|
_DEFAULT_MAX_AGENTS_PER_ROOM = 5
|
|
_DEFAULT_MAX_SLASH_LEN = 32
|
|
|
|
|
|
# ── Data structures ────────────────────────────────────────────────────────────
|
|
|
|
@dataclass
|
|
class MixedRoom:
|
|
"""A single mixed room with its ordered agent list and default agent."""
|
|
room_id: str
|
|
agents: List[str] # ordered; first = default if not overridden
|
|
default_agent: str # explicit default (from BRIDGE_MIXED_DEFAULTS or first agent)
|
|
|
|
def __post_init__(self) -> None:
|
|
if self.default_agent not in self.agents:
|
|
raise ValueError(
|
|
f"MixedRoom {self.room_id!r}: default_agent {self.default_agent!r} "
|
|
f"not in agents list {self.agents}"
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class MixedRoomConfig:
|
|
"""Parsed configuration for all mixed rooms."""
|
|
rooms: Dict[str, MixedRoom] = field(default_factory=dict) # room_id → MixedRoom
|
|
|
|
@property
|
|
def total_rooms(self) -> int:
|
|
return len(self.rooms)
|
|
|
|
def is_mixed(self, room_id: str) -> bool:
|
|
return room_id in self.rooms
|
|
|
|
def agents_for_room(self, room_id: str) -> List[str]:
|
|
room = self.rooms.get(room_id)
|
|
return list(room.agents) if room else []
|
|
|
|
def default_agent(self, room_id: str) -> Optional[str]:
|
|
room = self.rooms.get(room_id)
|
|
return room.default_agent if room else None
|
|
|
|
def as_summary(self) -> List[Dict]:
|
|
return [
|
|
{
|
|
"room_id": room_id,
|
|
"agents": list(room.agents),
|
|
"default_agent": room.default_agent,
|
|
}
|
|
for room_id, room in self.rooms.items()
|
|
]
|
|
|
|
|
|
# ── Parsers ────────────────────────────────────────────────────────────────────
|
|
|
|
def parse_mixed_room_map(
|
|
raw_map: str,
|
|
raw_defaults: str,
|
|
allowed_agents: FrozenSet[str],
|
|
max_agents_per_room: int = _DEFAULT_MAX_AGENTS_PER_ROOM,
|
|
) -> MixedRoomConfig:
|
|
"""
|
|
Parse BRIDGE_MIXED_ROOM_MAP and BRIDGE_MIXED_DEFAULTS into MixedRoomConfig.
|
|
|
|
Map format: "!room1:server=sofiia,helion;!room2:server=druid"
|
|
Defaults fmt: "!room1:server=sofiia;!room2:server=druid"
|
|
|
|
Raises ValueError on:
|
|
- Malformed room_id
|
|
- Empty agent list
|
|
- Too many agents per room (> max_agents_per_room)
|
|
- Agent not in allowed_agents
|
|
- Duplicate room_id in map
|
|
"""
|
|
if not raw_map or not raw_map.strip():
|
|
return MixedRoomConfig()
|
|
|
|
# Parse explicit defaults first
|
|
explicit_defaults: Dict[str, str] = {}
|
|
if raw_defaults and raw_defaults.strip():
|
|
for entry in raw_defaults.split(";"):
|
|
entry = entry.strip()
|
|
if not entry:
|
|
continue
|
|
if "=" not in entry:
|
|
raise ValueError(f"BRIDGE_MIXED_DEFAULTS bad entry (no '='): {entry!r}")
|
|
rid, agent = entry.split("=", 1)
|
|
rid, agent = rid.strip(), agent.strip()
|
|
if not _ROOM_ID_RE.match(rid):
|
|
raise ValueError(f"BRIDGE_MIXED_DEFAULTS bad room_id: {rid!r}")
|
|
explicit_defaults[rid] = agent
|
|
|
|
rooms: Dict[str, MixedRoom] = {}
|
|
errors: List[str] = []
|
|
|
|
for entry in raw_map.split(";"):
|
|
entry = entry.strip()
|
|
if not entry:
|
|
continue
|
|
|
|
if "=" not in entry:
|
|
errors.append(f"BRIDGE_MIXED_ROOM_MAP bad entry (no '='): {entry!r}")
|
|
continue
|
|
|
|
room_id, agents_raw = entry.split("=", 1)
|
|
room_id = room_id.strip()
|
|
agents_raw = agents_raw.strip()
|
|
|
|
if not _ROOM_ID_RE.match(room_id):
|
|
errors.append(f"Invalid room_id format: {room_id!r}")
|
|
continue
|
|
|
|
if room_id in rooms:
|
|
errors.append(f"Duplicate room_id in BRIDGE_MIXED_ROOM_MAP: {room_id!r}")
|
|
continue
|
|
|
|
agents = [a.strip() for a in agents_raw.split(",") if a.strip()]
|
|
if not agents:
|
|
errors.append(f"Empty agent list for room {room_id!r}")
|
|
continue
|
|
|
|
# M2.2 guard: fail-fast if too many agents per room
|
|
if len(agents) > max_agents_per_room:
|
|
errors.append(
|
|
f"Room {room_id!r} has {len(agents)} agents > MAX_AGENTS_PER_MIXED_ROOM={max_agents_per_room}"
|
|
)
|
|
continue
|
|
|
|
invalid = [a for a in agents if a not in allowed_agents]
|
|
if invalid:
|
|
errors.append(
|
|
f"Agents {invalid} for room {room_id!r} not in allowed_agents {set(allowed_agents)}"
|
|
)
|
|
continue
|
|
|
|
default = explicit_defaults.get(room_id, agents[0])
|
|
if default not in agents:
|
|
errors.append(
|
|
f"Default agent {default!r} for room {room_id!r} not in agents list {agents}"
|
|
)
|
|
continue
|
|
|
|
rooms[room_id] = MixedRoom(room_id=room_id, agents=agents, default_agent=default)
|
|
|
|
if errors:
|
|
raise ValueError(f"BRIDGE_MIXED_ROOM_MAP parse errors: {'; '.join(errors)}")
|
|
|
|
config = MixedRoomConfig(rooms=rooms)
|
|
logger.info(
|
|
"Mixed room config loaded: %d rooms, total agents=%d",
|
|
config.total_rooms,
|
|
sum(len(r.agents) for r in rooms.values()),
|
|
)
|
|
return config
|
|
|
|
|
|
# ── Routing ────────────────────────────────────────────────────────────────────
|
|
|
|
def route_message(
|
|
text: str,
|
|
room_id: str,
|
|
config: MixedRoomConfig,
|
|
allowed_agents: FrozenSet[str],
|
|
max_slash_len: int = _DEFAULT_MAX_SLASH_LEN,
|
|
) -> Tuple[Optional[str], str, str]:
|
|
"""
|
|
Determine which agent should handle this message.
|
|
|
|
Returns:
|
|
(agent_id, routing_reason, effective_text)
|
|
|
|
agent_id: matched agent or None if unresolvable / rejected
|
|
routing_reason: one of REASON_* or REASON_REJECTED_* constants
|
|
effective_text: text with routing prefix stripped (for cleaner invoke)
|
|
|
|
Priority:
|
|
1. /agentname ... (slash command)
|
|
2. @agentname ... (at-mention)
|
|
3. agentname: ... (colon-mention)
|
|
4. default agent for room (fallback)
|
|
|
|
Guard rails (M2.2):
|
|
- Slash command token longer than max_slash_len → REASON_REJECTED_SLASH_TOO_LONG
|
|
- Unknown agent in slash → REASON_REJECTED_UNKNOWN_AGENT (no fallthrough)
|
|
"""
|
|
room = config.rooms.get(room_id)
|
|
if room is None:
|
|
return None, REASON_REJECTED_NO_MAPPING, text
|
|
|
|
stripped = text.strip()
|
|
|
|
# 1. Slash: /sofiia hello world
|
|
m = _SLASH_RE.match(stripped)
|
|
if m:
|
|
candidate = m.group(1).lower()
|
|
body = m.group(2).strip() or stripped # keep original if body empty
|
|
|
|
# M2.2: reject suspiciously long slash tokens (garbage / injection attempts)
|
|
if len(candidate) > max_slash_len:
|
|
logger.warning(
|
|
"Slash token too long (%d > %d) in room %s — rejected",
|
|
len(candidate), max_slash_len, room_id,
|
|
)
|
|
return None, REASON_REJECTED_SLASH_TOO_LONG, text
|
|
|
|
agent = _resolve_agent(candidate, room, allowed_agents)
|
|
if agent:
|
|
logger.debug("Slash route: /%s → %s", candidate, agent)
|
|
return agent, REASON_SLASH, body
|
|
# Unknown agent → hard reject, do NOT fall through to default
|
|
logger.warning(
|
|
"Slash command /%s in room %s: agent not recognised or not allowed (available: %s)",
|
|
candidate, room_id, room.agents,
|
|
)
|
|
return None, REASON_REJECTED_UNKNOWN_AGENT, text
|
|
|
|
# 2. @mention: @sofiia hello
|
|
m = _MENTION_AT_RE.match(stripped)
|
|
if m:
|
|
candidate = m.group(1).lower()
|
|
body = m.group(2).strip() or stripped
|
|
agent = _resolve_agent(candidate, room, allowed_agents)
|
|
if agent:
|
|
logger.debug("@mention route: @%s → %s", candidate, agent)
|
|
return agent, REASON_AT_MENTION, body
|
|
|
|
# 3. colon-mention: sofiia: hello
|
|
m = _MENTION_COLON_RE.match(stripped)
|
|
if m:
|
|
candidate = m.group(1).lower()
|
|
body = m.group(2).strip() or stripped
|
|
agent = _resolve_agent(candidate, room, allowed_agents)
|
|
if agent:
|
|
logger.debug("Colon-mention route: %s: → %s", candidate, agent)
|
|
return agent, REASON_COLON_MENTION, body
|
|
|
|
# 4. Default fallback
|
|
return room.default_agent, REASON_DEFAULT, stripped
|
|
|
|
|
|
def _resolve_agent(
|
|
candidate: str,
|
|
room: MixedRoom,
|
|
allowed_agents: FrozenSet[str],
|
|
) -> Optional[str]:
|
|
"""
|
|
Return agent_id if candidate matches an allowed agent in this room.
|
|
Matching is case-insensitive against agent ids and their base names.
|
|
"""
|
|
for agent in room.agents:
|
|
if candidate == agent.lower():
|
|
if agent in allowed_agents:
|
|
return agent
|
|
return None
|
|
|
|
|
|
def reply_prefix(agent_id: str, is_mixed: bool) -> str:
|
|
"""
|
|
Return reply prefix string for mixed rooms.
|
|
Single-agent rooms get empty prefix (no change to M1 behaviour).
|
|
"""
|
|
if not is_mixed:
|
|
return ""
|
|
# Capitalise first letter of agent name: "sofiia" → "Sofiia"
|
|
return f"{agent_id.capitalize()}: "
|
|
|
|
|
|
def build_override_config(
|
|
base_config: MixedRoomConfig,
|
|
room_id: str,
|
|
agents: List[str],
|
|
default_agent: str,
|
|
) -> MixedRoomConfig:
|
|
"""
|
|
M6.1: Build a temporary MixedRoomConfig that uses a dynamic store override
|
|
for room_id while keeping all other rooms from base_config unchanged.
|
|
|
|
Used in _enqueue_from_mixed_room to inject PolicyStore agent overrides
|
|
without mutating the shared base configuration.
|
|
"""
|
|
rooms = dict(base_config.rooms)
|
|
rooms[room_id] = MixedRoom(
|
|
room_id=room_id,
|
|
agents=agents,
|
|
default_agent=default_agent,
|
|
)
|
|
return MixedRoomConfig(rooms=rooms)
|