""" Bot Gateway HTTP API Handles incoming webhooks from Telegram, Discord, etc. """ import asyncio import base64 import copy import hashlib import json import re import logging import os import sys import time import uuid import httpx from pathlib import Path from typing import Dict, Any, Optional, List, Tuple from datetime import datetime from dataclasses import dataclass from io import BytesIO from fastapi import APIRouter, HTTPException from pydantic import BaseModel from router_client import send_to_router from memory_client import memory_client from services.doc_service import ( parse_document, ingest_document, ask_about_document, get_doc_context ) from behavior_policy import ( should_respond, analyze_message, detect_media_question, detect_explicit_request, detect_url, detect_agent_mention, is_no_output_response, record_interaction, record_ack, get_ack_text, is_prober_request, has_agent_chat_participation, NO_OUTPUT, BehaviorDecision, AGENT_NAME_VARIANTS, ) logger = logging.getLogger(__name__) # Telegram message length limits TELEGRAM_MAX_MESSAGE_LENGTH = 4096 TELEGRAM_SAFE_LENGTH = 3500 # Leave room for formatting # Operator pending state cache (chat_id -> {ts, items}) LAST_PENDING_STATE: Dict[str, Dict[str, Any]] = {} PENDING_STATE_TTL = 1800 # 30 minutes # Per-user language preference cache (chat_id:user_id -> {lang, ts}) USER_LANGUAGE_PREFS: Dict[str, Dict[str, Any]] = {} USER_LANGUAGE_PREF_TTL = 30 * 24 * 3600 # 30 days # Per-user response style cache (agent:chat:user -> {style, ts}) USER_RESPONSE_STYLE_PREFS: Dict[str, Dict[str, Any]] = {} USER_RESPONSE_STYLE_PREF_TTL = 30 * 24 * 3600 # 30 days # Recent photo context for follow-up questions in chat (agent:chat:user -> {file_id, ts}) RECENT_PHOTO_CONTEXT: Dict[str, Dict[str, Any]] = {} RECENT_PHOTO_TTL = 30 * 60 # 30 minutes AGROMATRIX_GLOBAL_KNOWLEDGE_USER_ID = "agent:agromatrix:global" def _cleanup_recent_photo_context() -> None: now = time.time() expired = [k for k, v in RECENT_PHOTO_CONTEXT.items() if now - float(v.get("ts", 0)) > RECENT_PHOTO_TTL] for k in expired: del RECENT_PHOTO_CONTEXT[k] def _set_recent_photo_context(agent_id: str, chat_id: str, user_id: str, file_id: str) -> None: _cleanup_recent_photo_context() key = f"{agent_id}:{chat_id}:{user_id}" RECENT_PHOTO_CONTEXT[key] = {"file_id": file_id, "ts": time.time()} def _get_recent_photo_file_id(agent_id: str, chat_id: str, user_id: str) -> Optional[str]: _cleanup_recent_photo_context() key = f"{agent_id}:{chat_id}:{user_id}" rec = RECENT_PHOTO_CONTEXT.get(key) if not rec: return None return rec.get("file_id") def _extract_recent_photo_file_id_from_memory(memory_context: Dict[str, Any]) -> Optional[str]: """ Extract last seen Telegram photo file_id from memory context. Looks for patterns like: [Photo: ] """ if not memory_context: return None pattern = re.compile(r"\[Photo:\s*([^\]\s]+)\]") recent_events = memory_context.get("recent_events", []) or [] for ev in reversed(recent_events): body = (ev.get("body_text") or "").strip() if not body: continue m = pattern.search(body) if m: return m.group(1) local_text = memory_context.get("local_context_text") or "" for line in reversed(local_text.splitlines()): m = pattern.search(line) if m: return m.group(1) return None def _looks_like_photo_followup(text: str) -> bool: if not text: return False t = text.strip().lower() direct_markers = [ "що ти бачиш", "що на фото", "що на зображенні", "опиши фото", "подивись фото", "що на цьому фото", "що на цій фотографії", "що на цій світлині", "проаналізуй фото", "аналіз фото", "переглянь фото", "повернись до фото", "яка це рослина", "що це за рослина", "що за рослина", "що за культура", "яка культура", "визнач рослину", "what do you see", "what is in the image", "describe the photo", "analyze the photo", "analyze image", "what plant is this", "что ты видишь", "что на фото", "опиши фото", "посмотри фото", "проанализируй фото", "какое это растение", "что за растение", ] if any(m in t for m in direct_markers): return True # Flexible forms: "що на ... фото/зображенні/світлині" if re.search(r"(що|what|что)\s+на\s+.*(фото|зображ|світлин|image|photo)", t): # Exclude common meta-questions meta_exclude = ["канал", "чат", "бот", "нормально"] if not any(ex in t for ex in meta_exclude): return True return False def _extract_agromatrix_correction_label(text: str) -> Optional[str]: """ Extract corrected plant label from free-form user feedback. Examples: - "це соняшник" - "це не кабачок, а гарбуз" - "правильна відповідь: кукурудза" """ raw = (text or "").strip() if not raw: return None t = re.sub(r"\s+", " ", raw.lower()) patterns = [ r"правильн\w*\s+відповід\w*[:\-]?\s*([a-zа-яіїєґ0-9'’\-\s]{2,60})", r"це\s+не\s+[a-zа-яіїєґ0-9'’\-\s]{1,60},?\s+а\s+([a-zа-яіїєґ0-9'’\-\s]{2,60})", # Strict "це