microdao-daarion/gateway-bot/behavior_policy.py

"""
Behavior Policy v1: Silent-by-default + Short-first + Media-no-comment
Уніфікована логіка для всіх агентів НОДА1.

Правила:
1. SOWA (Speak-Only-When-Asked) — не відповідай, якщо не питали
2. Short-First — 1-2 речення за замовчуванням
3. Media-no-comment — медіа без питання = мовчанка
"""
import re
import logging
from typing import Dict, Any, Optional, List, Tuple
from dataclasses import dataclass

logger = logging.getLogger(__name__)

# Marker for "no response needed"
NO_OUTPUT = "__NO_OUTPUT__"

# Training groups where agents respond to ALL messages
TRAINING_GROUP_IDS = {
    "-1003556680911",  # Agent Preschool Daarion.city
}

# Agent name variants for mention detection
AGENT_NAME_VARIANTS: Dict[str, List[str]] = {
    "helion": ["helion", "хеліон", "hélion", "helios", "@energyunionbot"],
    "daarwizz": ["daarwizz", "даарвіз", "@daarwizzbot"],
    "greenfood": ["greenfood", "грінфуд", "@greenfoodlivebot"],
    "agromatrix": ["agromatrix", "агроматрікс", "@agromatrixbot"],
    "alateya": ["alateya", "алатея", "@alateyabot"],
    "nutra": ["nutra", "нутра", "@nutrachat_bot"],
    "druid": ["druid", "друїд", "@druidbot"],
    "clan": ["clan", "spirit", "клан", "спіріт", "@clanbot"],
    "eonarch": ["eonarch", "еонарх", "@eonarchbot"],
}

# Commands that trigger response
COMMAND_PREFIXES = [
    "/ask", "/agent", "/help", "/start", "/status", "/link",
    "/daarwizz", "/helion", "/greenfood", "/agromatrix", "/alateya",
    "/nutra", "/druid", "/clan", "/eonarch",
    "/ingest", "/бренд", "/презентація", "/job",
]

# Question markers (Ukrainian + English)
QUESTION_MARKERS = [
    "?", "що", "як", "чому", "коли", "де", "хто", "чи", "який", "яка", "яке",
    "скільки", "навіщо", "звідки", "куди", "котрий", "котра",
    "what", "how", "why", "when", "where", "who", "which", "whose",
]

# Imperative markers (commands/requests)
IMPERATIVE_MARKERS = [
    "поясни", "розкажи", "зроби", "допоможи", "покажи", "дай", "скажи",
    "знайди", "перевір", "аналізуй", "порівняй", "підсумуй", "витягни",
    "опиши", "переклади", "напиши", "створи", "згенеруй", "порахуй",
    "explain", "tell", "do", "help", "show", "give", "say", "find",
    "check", "analyze", "compare", "summarize", "extract", "describe",
    "translate", "write", "create", "generate", "calculate",
]

# Broadcast/poster patterns
BROADCAST_PATTERNS = [
    r"^\d{1,2}[:.]\d{2}\s",  # Time pattern: "20:00", "14.30"
    r"^\d{1,2}[:.]\d{2}\s+\d{1,2}[./]\d{1,2}",  # "20:00 10.02"
    r"^[\u2705\u274c\u23f0\u2b50\u26a1\u2764]",  # Starts with common emoji
    r"^(анонс|запрошуємо|нагадуємо|увага|важливо|news|update|alert)",  # Announcement words
    r"^https?://",  # URL only
    r"#\w+.*#\w+",  # Multiple hashtags
]

# Short note patterns (timing, reactions, status updates)
SHORT_NOTE_PATTERNS = [
    r"^[\u2705\u274c\u2611\u2612]+$",  # Only checkmarks
    r"^\d{1,2}[:.]\d{2}(\s+\d{1,2}[./]\d{1,2})?\s*[\u2705\u274c]?$",  # "20:00 10.02 ✅"
    r"^[+\-ok\u2705\u274c]{1,3}$",  # +, -, ok, ✅, ❌
    r"^(ok|ок|добре|так|ні|yes|no|done|готово)$",  # Short confirmations
]


@dataclass
class BehaviorDecision:
    """Result of behavior analysis"""
    should_respond: bool
    reason: str
    is_training_group: bool = False
    is_direct_mention: bool = False
    is_command: bool = False
    is_question: bool = False
    is_imperative: bool = False
    is_broadcast: bool = False
    is_short_note: bool = False
    has_media: bool = False
    media_has_question: bool = False


def _normalize_text(text: str) -> str:
    """Normalize text for pattern matching"""
    if not text:
        return ""
    return text.lower().strip()


def detect_agent_mention(text: str, agent_id: str) -> bool:
    """
    Check if message mentions the agent.

    Args:
        text: Message text
        agent_id: Agent ID (e.g., "helion", "daarwizz")

    Returns:
        True if agent is mentioned
    """
    if not text:
        return False

    normalized = _normalize_text(text)

    # Get agent name variants
    variants = AGENT_NAME_VARIANTS.get(agent_id, [agent_id])

    for variant in variants:
        if variant.lower() in normalized:
            return True

    return False


def detect_any_agent_mention(text: str) -> Optional[str]:
    """
    Check if message mentions any agent.

    Returns:
        Agent ID if mentioned, None otherwise
    """
    if not text:
        return None

    normalized = _normalize_text(text)

    for agent_id, variants in AGENT_NAME_VARIANTS.items():
        for variant in variants:
            if variant.lower() in normalized:
                return agent_id

    return None


def detect_command(text: str) -> bool:
    """Check if message starts with a command"""
    if not text:
        return False

    stripped = text.strip()
    for prefix in COMMAND_PREFIXES:
        if stripped.lower().startswith(prefix):
            return True

    return False


def detect_question(text: str) -> bool:
    """Check if message contains a question"""
    if not text:
        return False

    normalized = _normalize_text(text)

    # Check for question mark
    if "?" in normalized:
        return True

    # Check for question words at start or after punctuation
    words = normalized.split()
    if words and words[0] in QUESTION_MARKERS:
        return True

    # Check for question markers anywhere (less strict)
    for marker in QUESTION_MARKERS:
        if f" {marker} " in f" {normalized} ":
            return True

    return False


def detect_imperative(text: str) -> bool:
    """Check if message contains an imperative (command/request)"""
    if not text:
        return False

    normalized = _normalize_text(text)
    words = normalized.split()

    if not words:
        return False

    # Check if starts with imperative
    first_word = words[0].rstrip(",.:!?")
    if first_word in IMPERATIVE_MARKERS:
        return True

    # Check for imperative after mention (e.g., "@Helion поясни")
    if len(words) >= 2:
        second_word = words[1].rstrip(",.:!?")
        if second_word in IMPERATIVE_MARKERS:
            return True

    return False


def detect_broadcast_intent(text: str) -> bool:
    """
    Check if message is a broadcast/announcement/poster.
    These should NOT trigger a response.
    """
    if not text:
        return False

    stripped = text.strip()

    # Check patterns
    for pattern in BROADCAST_PATTERNS:
        if re.match(pattern, stripped, re.IGNORECASE | re.UNICODE):
            logger.debug(f"Broadcast pattern matched: {pattern}")
            return True

    # Very short messages with only emojis/special chars
    if len(stripped) <= 5 and not any(c.isalpha() for c in stripped):
        return True

    return False


def detect_short_note(text: str) -> bool:
    """
    Check if message is a short note without request.
    E.g., "20:00 10.02 ✅", "+", "ok"
    """
    if not text:
        return True

    stripped = text.strip()

    # Very short messages
    if len(stripped) <= 10:
        for pattern in SHORT_NOTE_PATTERNS:
            if re.match(pattern, stripped, re.IGNORECASE | re.UNICODE):
                logger.debug(f"Short note pattern matched: {pattern}")
                return True

    return False


def detect_media_question(caption: str) -> bool:
    """
    Check if media caption contains a question/request.
    Media without question = no response.
    """
    if not caption:
        return False

    # Has question
    if detect_question(caption):
        return True

    # Has imperative
    if detect_imperative(caption):
        return True

    return False


def analyze_message(
    text: str,
    agent_id: str,
    chat_id: str,
    has_media: bool = False,
    media_caption: str = "",
    is_private_chat: bool = False,
    payload_explicit_request: bool = False,
) -> BehaviorDecision:
    """
    Main function to analyze message and decide if agent should respond.

    Args:
        text: Message text
        agent_id: Agent ID
        chat_id: Chat ID
        has_media: Whether message has photo/video/file/link
        media_caption: Caption for media (if any)
        is_private_chat: Whether this is a private DM
        payload_explicit_request: Gateway flag for explicit request

    Returns:
        BehaviorDecision with should_respond and reason
    """
    decision = BehaviorDecision(
        should_respond=False,
        reason="",
        is_training_group=str(chat_id) in TRAINING_GROUP_IDS,
        has_media=has_media,
    )

    # 1. Training groups: always respond
    if decision.is_training_group:
        decision.should_respond = True
        decision.reason = "training_group"
        return decision

    # 2. Private chat: always respond
    if is_private_chat:
        decision.should_respond = True
        decision.reason = "private_chat"
        return decision

    # 3. Explicit request from gateway payload
    if payload_explicit_request:
        decision.should_respond = True
        decision.reason = "explicit_request"
        return decision

    # 4. Media handling
    if has_media:
        decision.media_has_question = detect_media_question(media_caption)

        if not decision.media_has_question:
            # Media without question = NO_OUTPUT
            decision.should_respond = False
            decision.reason = "media_no_question"
            return decision
        else:
            # Media with question = respond
            decision.should_respond = True
            decision.reason = "media_with_question"
            return decision

    # 5. Check for broadcast/announcement
    decision.is_broadcast = detect_broadcast_intent(text)
    if decision.is_broadcast:
        # Broadcast without direct mention = NO_OUTPUT
        if not detect_agent_mention(text, agent_id):
            decision.should_respond = False
            decision.reason = "broadcast_no_mention"
            return decision

    # 6. Check for short note
    decision.is_short_note = detect_short_note(text)
    if decision.is_short_note:
        decision.should_respond = False
        decision.reason = "short_note"
        return decision

    # 7. Check for direct mention
    decision.is_direct_mention = detect_agent_mention(text, agent_id)

    # 8. Check for command
    decision.is_command = detect_command(text)

    # 9. Check for question
    decision.is_question = detect_question(text)

    # 10. Check for imperative
    decision.is_imperative = detect_imperative(text)

    # Decision logic
    if decision.is_direct_mention:
        decision.should_respond = True
        decision.reason = "direct_mention"
        return decision

    if decision.is_command:
        decision.should_respond = True
        decision.reason = "command"
        return decision

    # In groups: question/imperative without mention = NO_OUTPUT
    if decision.is_question or decision.is_imperative:
        # Only respond if there's no other agent mentioned
        other_agent = detect_any_agent_mention(text)
        if other_agent and other_agent != agent_id:
            decision.should_respond = False
            decision.reason = f"addressed_to_other_agent_{other_agent}"
            return decision

        # General question without mention = NO_OUTPUT in groups
        decision.should_respond = False
        decision.reason = "question_no_mention"
        return decision

    # Default: don't respond
    decision.should_respond = False
    decision.reason = "no_trigger"
    return decision


def should_respond(
    text: str,
    agent_id: str,
    chat_id: str,
    has_media: bool = False,
    media_caption: str = "",
    is_private_chat: bool = False,
    payload_explicit_request: bool = False,
) -> Tuple[bool, str]:
    """
    Simplified function returning (should_respond, reason).

    Returns:
        Tuple of (should_respond: bool, reason: str)
    """
    decision = analyze_message(
        text=text,
        agent_id=agent_id,
        chat_id=chat_id,
        has_media=has_media,
        media_caption=media_caption,
        is_private_chat=is_private_chat,
        payload_explicit_request=payload_explicit_request,
    )
    return decision.should_respond, decision.reason


def is_no_output_response(text: str) -> bool:
    """
    Check if LLM response indicates no output needed.
    Used when LLM returns empty or marker response.
    """
    if not text:
        return True

    stripped = text.strip().lower()

    # Check for NO_OUTPUT marker
    if NO_OUTPUT.lower() in stripped:
        return True

    # Check for common "I won't respond" patterns
    no_response_patterns = [
        r"^$",  # Empty
        r"^\s*$",  # Whitespace only
        r"^(no_output|no output|silent|мовчу|—)$",
        r"^\.{1,3}$",  # Just dots
    ]

    for pattern in no_response_patterns:
        if re.match(pattern, stripped, re.IGNORECASE):
            return True

    return False