microdao-daarion/services/matrix-bridge-dagi/app/mixed_routing.py

"""
Mixed-Room Routing — Phase M2.1 + M2.2 (guard rails + rejection audit)

Supports 1 room → N agents with deterministic message routing.

Env:
    BRIDGE_MIXED_ROOM_MAP=!roomX:server=sofiia,helion;!roomY:server=druid,nutra
    BRIDGE_MIXED_DEFAULTS=!roomX:server=sofiia;!roomY:server=druid   (optional)

Routing priority (per message):
  1. Slash command: /sofiia message text   → agent=sofiia
  2. Mention @:    @sofiia message text    → agent=sofiia
  3. Mention name: sofiia: message text    → agent=sofiia
  4. Fallback:     default_agent_for_room  (first in list, or explicit BRIDGE_MIXED_DEFAULTS)

Reply tagging (mixed room only):
  Worker prepends "Agentname: " to reply so users see who answered.
  Single-agent rooms are unaffected.
"""

import logging
import re
from dataclasses import dataclass, field
from typing import Dict, FrozenSet, List, Optional, Tuple

logger = logging.getLogger(__name__)

# Room ID format: !<localpart>:<server>
_ROOM_ID_RE = re.compile(r"^![A-Za-z0-9\-_.]+:[A-Za-z0-9\-_.]+$")

# Routing patterns (compiled once)
_SLASH_RE = re.compile(r"^/([A-Za-z0-9_\-]+)\s*(.*)", re.DOTALL)
_MENTION_AT_RE = re.compile(r"^@([A-Za-z0-9_\-]+)\s*(.*)", re.DOTALL)
_MENTION_COLON_RE = re.compile(r"^([A-Za-z0-9_\-]+):\s+(.*)", re.DOTALL)

# Routing reason labels (successful routes)
REASON_SLASH = "slash_command"
REASON_AT_MENTION = "at_mention"
REASON_COLON_MENTION = "colon_mention"
REASON_DEFAULT = "default"

# Rejection reason labels (route_message returns agent_id=None + one of these)
REASON_REJECTED_UNKNOWN_AGENT = "unknown_agent"
REASON_REJECTED_SLASH_TOO_LONG = "slash_too_long"
REASON_REJECTED_NO_MAPPING = "no_mapping"

# Hard guards
_DEFAULT_MAX_AGENTS_PER_ROOM = 5
_DEFAULT_MAX_SLASH_LEN = 32


# ── Data structures ────────────────────────────────────────────────────────────

@dataclass
class MixedRoom:
    """A single mixed room with its ordered agent list and default agent."""
    room_id: str
    agents: List[str]          # ordered; first = default if not overridden
    default_agent: str         # explicit default (from BRIDGE_MIXED_DEFAULTS or first agent)

    def __post_init__(self) -> None:
        if self.default_agent not in self.agents:
            raise ValueError(
                f"MixedRoom {self.room_id!r}: default_agent {self.default_agent!r} "
                f"not in agents list {self.agents}"
            )


@dataclass
class MixedRoomConfig:
    """Parsed configuration for all mixed rooms."""
    rooms: Dict[str, MixedRoom] = field(default_factory=dict)   # room_id → MixedRoom

    @property
    def total_rooms(self) -> int:
        return len(self.rooms)

    def is_mixed(self, room_id: str) -> bool:
        return room_id in self.rooms

    def agents_for_room(self, room_id: str) -> List[str]:
        room = self.rooms.get(room_id)
        return list(room.agents) if room else []

    def default_agent(self, room_id: str) -> Optional[str]:
        room = self.rooms.get(room_id)
        return room.default_agent if room else None

    def as_summary(self) -> List[Dict]:
        return [
            {
                "room_id": room_id,
                "agents": list(room.agents),
                "default_agent": room.default_agent,
            }
            for room_id, room in self.rooms.items()
        ]


# ── Parsers ────────────────────────────────────────────────────────────────────

def parse_mixed_room_map(
    raw_map: str,
    raw_defaults: str,
    allowed_agents: FrozenSet[str],
    max_agents_per_room: int = _DEFAULT_MAX_AGENTS_PER_ROOM,
) -> MixedRoomConfig:
    """
    Parse BRIDGE_MIXED_ROOM_MAP and BRIDGE_MIXED_DEFAULTS into MixedRoomConfig.

    Map format:   "!room1:server=sofiia,helion;!room2:server=druid"
    Defaults fmt: "!room1:server=sofiia;!room2:server=druid"

    Raises ValueError on:
      - Malformed room_id
      - Empty agent list
      - Too many agents per room (> max_agents_per_room)
      - Agent not in allowed_agents
      - Duplicate room_id in map
    """
    if not raw_map or not raw_map.strip():
        return MixedRoomConfig()

    # Parse explicit defaults first
    explicit_defaults: Dict[str, str] = {}
    if raw_defaults and raw_defaults.strip():
        for entry in raw_defaults.split(";"):
            entry = entry.strip()
            if not entry:
                continue
            if "=" not in entry:
                raise ValueError(f"BRIDGE_MIXED_DEFAULTS bad entry (no '='): {entry!r}")
            rid, agent = entry.split("=", 1)
            rid, agent = rid.strip(), agent.strip()
            if not _ROOM_ID_RE.match(rid):
                raise ValueError(f"BRIDGE_MIXED_DEFAULTS bad room_id: {rid!r}")
            explicit_defaults[rid] = agent

    rooms: Dict[str, MixedRoom] = {}
    errors: List[str] = []

    for entry in raw_map.split(";"):
        entry = entry.strip()
        if not entry:
            continue

        if "=" not in entry:
            errors.append(f"BRIDGE_MIXED_ROOM_MAP bad entry (no '='): {entry!r}")
            continue

        room_id, agents_raw = entry.split("=", 1)
        room_id = room_id.strip()
        agents_raw = agents_raw.strip()

        if not _ROOM_ID_RE.match(room_id):
            errors.append(f"Invalid room_id format: {room_id!r}")
            continue

        if room_id in rooms:
            errors.append(f"Duplicate room_id in BRIDGE_MIXED_ROOM_MAP: {room_id!r}")
            continue

        agents = [a.strip() for a in agents_raw.split(",") if a.strip()]
        if not agents:
            errors.append(f"Empty agent list for room {room_id!r}")
            continue

        # M2.2 guard: fail-fast if too many agents per room
        if len(agents) > max_agents_per_room:
            errors.append(
                f"Room {room_id!r} has {len(agents)} agents > MAX_AGENTS_PER_MIXED_ROOM={max_agents_per_room}"
            )
            continue

        invalid = [a for a in agents if a not in allowed_agents]
        if invalid:
            errors.append(
                f"Agents {invalid} for room {room_id!r} not in allowed_agents {set(allowed_agents)}"
            )
            continue

        default = explicit_defaults.get(room_id, agents[0])
        if default not in agents:
            errors.append(
                f"Default agent {default!r} for room {room_id!r} not in agents list {agents}"
            )
            continue

        rooms[room_id] = MixedRoom(room_id=room_id, agents=agents, default_agent=default)

    if errors:
        raise ValueError(f"BRIDGE_MIXED_ROOM_MAP parse errors: {'; '.join(errors)}")

    config = MixedRoomConfig(rooms=rooms)
    logger.info(
        "Mixed room config loaded: %d rooms, total agents=%d",
        config.total_rooms,
        sum(len(r.agents) for r in rooms.values()),
    )
    return config


# ── Routing ────────────────────────────────────────────────────────────────────

def route_message(
    text: str,
    room_id: str,
    config: MixedRoomConfig,
    allowed_agents: FrozenSet[str],
    max_slash_len: int = _DEFAULT_MAX_SLASH_LEN,
) -> Tuple[Optional[str], str, str]:
    """
    Determine which agent should handle this message.

    Returns:
        (agent_id, routing_reason, effective_text)

        agent_id:       matched agent or None if unresolvable / rejected
        routing_reason: one of REASON_* or REASON_REJECTED_* constants
        effective_text: text with routing prefix stripped (for cleaner invoke)

    Priority:
        1. /agentname ...   (slash command)
        2. @agentname ...   (at-mention)
        3. agentname: ...   (colon-mention)
        4. default agent for room (fallback)

    Guard rails (M2.2):
        - Slash command token longer than max_slash_len → REASON_REJECTED_SLASH_TOO_LONG
        - Unknown agent in slash → REASON_REJECTED_UNKNOWN_AGENT (no fallthrough)
    """
    room = config.rooms.get(room_id)
    if room is None:
        return None, REASON_REJECTED_NO_MAPPING, text

    stripped = text.strip()

    # 1. Slash: /sofiia hello world
    m = _SLASH_RE.match(stripped)
    if m:
        candidate = m.group(1).lower()
        body = m.group(2).strip() or stripped   # keep original if body empty

        # M2.2: reject suspiciously long slash tokens (garbage / injection attempts)
        if len(candidate) > max_slash_len:
            logger.warning(
                "Slash token too long (%d > %d) in room %s — rejected",
                len(candidate), max_slash_len, room_id,
            )
            return None, REASON_REJECTED_SLASH_TOO_LONG, text

        agent = _resolve_agent(candidate, room, allowed_agents)
        if agent:
            logger.debug("Slash route: /%s → %s", candidate, agent)
            return agent, REASON_SLASH, body
        # Unknown agent → hard reject, do NOT fall through to default
        logger.warning(
            "Slash command /%s in room %s: agent not recognised or not allowed (available: %s)",
            candidate, room_id, room.agents,
        )
        return None, REASON_REJECTED_UNKNOWN_AGENT, text

    # 2. @mention: @sofiia hello
    m = _MENTION_AT_RE.match(stripped)
    if m:
        candidate = m.group(1).lower()
        body = m.group(2).strip() or stripped
        agent = _resolve_agent(candidate, room, allowed_agents)
        if agent:
            logger.debug("@mention route: @%s → %s", candidate, agent)
            return agent, REASON_AT_MENTION, body

    # 3. colon-mention: sofiia: hello
    m = _MENTION_COLON_RE.match(stripped)
    if m:
        candidate = m.group(1).lower()
        body = m.group(2).strip() or stripped
        agent = _resolve_agent(candidate, room, allowed_agents)
        if agent:
            logger.debug("Colon-mention route: %s: → %s", candidate, agent)
            return agent, REASON_COLON_MENTION, body

    # 4. Default fallback
    return room.default_agent, REASON_DEFAULT, stripped


def _resolve_agent(
    candidate: str,
    room: MixedRoom,
    allowed_agents: FrozenSet[str],
) -> Optional[str]:
    """
    Return agent_id if candidate matches an allowed agent in this room.
    Matching is case-insensitive against agent ids and their base names.
    """
    for agent in room.agents:
        if candidate == agent.lower():
            if agent in allowed_agents:
                return agent
    return None


def reply_prefix(agent_id: str, is_mixed: bool) -> str:
    """
    Return reply prefix string for mixed rooms.
    Single-agent rooms get empty prefix (no change to M1 behaviour).
    """
    if not is_mixed:
        return ""
    # Capitalise first letter of agent name: "sofiia" → "Sofiia"
    return f"{agent_id.capitalize()}: "


def build_override_config(
    base_config: MixedRoomConfig,
    room_id: str,
    agents: List[str],
    default_agent: str,
) -> MixedRoomConfig:
    """
    M6.1: Build a temporary MixedRoomConfig that uses a dynamic store override
    for room_id while keeping all other rooms from base_config unchanged.

    Used in _enqueue_from_mixed_room to inject PolicyStore agent overrides
    without mutating the shared base configuration.
    """
    rooms = dict(base_config.rooms)
    rooms[room_id] = MixedRoom(
        room_id=room_id,
        agents=agents,
        default_agent=default_agent,
    )
    return MixedRoomConfig(rooms=rooms)