gateway: add public invoke/jobs facade with redis queue worker and SSE

2026-02-20 17:55:47 +01:00
parent 7e82a427e3
commit 2e76ef9ccb
7 changed files with 619 additions and 55 deletions
--- a/gateway-bot/http_api.py
+++ b/gateway-bot/http_api.py
@@ -88,16 +88,84 @@ def _get_recent_photo_file_id(agent_id: str, chat_id: str, user_id: str) -> Opti
    return rec.get("file_id")


+def _extract_recent_photo_file_id_from_memory(memory_context: Dict[str, Any]) -> Optional[str]:
+    """
+    Extract last seen Telegram photo file_id from memory context.
+    Looks for patterns like: [Photo: <file_id>]
+    """
+    if not memory_context:
+        return None
+
+    pattern = re.compile(r"\[Photo:\s*([^\]\s]+)\]")
+
+    recent_events = memory_context.get("recent_events", []) or []
+    for ev in reversed(recent_events):
+        body = (ev.get("body_text") or "").strip()
+        if not body:
+            continue
+        m = pattern.search(body)
+        if m:
+            return m.group(1)
+
+    local_text = memory_context.get("local_context_text") or ""
+    for line in reversed(local_text.splitlines()):
+        m = pattern.search(line)
+        if m:
+            return m.group(1)
+    return None
+
+
 def _looks_like_photo_followup(text: str) -> bool:
    if not text:
        return False
    t = text.strip().lower()
-    markers = [
+    direct_markers = [
        "що ти бачиш", "що на фото", "що на зображенні", "опиши фото", "подивись фото",
+        "що на цьому фото", "що на цій фотографії", "що на цій світлині",
+        "проаналізуй фото", "аналіз фото", "переглянь фото", "повернись до фото",
+        "яка це рослина", "що це за рослина", "що за рослина", "що за культура",
+        "яка культура", "визнач рослину",
        "what do you see", "what is in the image", "describe the photo",
+        "analyze the photo", "analyze image", "what plant is this",
        "что ты видишь", "что на фото", "опиши фото", "посмотри фото",
+        "проанализируй фото", "какое это растение", "что за растение",
    ]
-    return any(m in t for m in markers)
+    if any(m in t for m in direct_markers):
+        return True
+
+    # If user is correcting previous visual interpretation, route to vision again.
+    correction_markers = [
+        "неправильна відповідь", "не правильна відповідь", "не видумуй", "це не так",
+        "ти помилився", "ти помилилась", "неправильно визначив",
+        "wrong answer", "you are wrong", "that is incorrect",
+        "неправильный ответ", "это не так", "ты ошибся",
+    ]
+    photo_topic_markers = ["фото", "зображ", "рослин", "image", "photo", "plant", "растен"]
+    if any(c in t for c in correction_markers) and any(p in t for p in photo_topic_markers):
+        return True
+
+    # Flexible forms: "що на ... фото/зображенні/світлині"
+    if re.search(r"(що|what|что)\s+на\s+.*(фото|зображ|світлин|image|photo)", t):
+        # Exclude common meta-questions
+        meta_exclude = ["канал", "чат", "бот", "нормально"]
+        if not any(ex in t for ex in meta_exclude):
+            return True
+    return False
+
+
+def _is_agromatrix_plant_intel_intent(agent_id: str, text: str) -> bool:
+    if (agent_id or "").lower() != "agromatrix":
+        return False
+    if not text:
+        return False
+    tl = text.strip().lower()
+    markers = [
+        "що за рослина", "що це за рослина", "яка це рослина", "яка культура",
+        "визнач рослину", "ідентифікуй рослину", "хвороба рослини", "плями на листі",
+        "what plant", "identify plant", "identify crop", "plant disease",
+        "что за растение", "определи растение", "болезнь растения",
+    ]
+    return any(m in tl for m in markers)


 def _cleanup_user_language_prefs() -> None:
@@ -855,6 +923,112 @@ def should_force_concise_reply(text: str) -> bool:
    return True


+def _strip_answer_markup_noise(answer_text: str) -> str:
+    if not answer_text:
+        return ""
+    cleaned = answer_text.strip()
+    cleaned = re.sub(r"^\s*\*{1,3}\s*коротка відповідь\s*:?\s*\*{0,3}\s*", "", cleaned, flags=re.IGNORECASE)
+    cleaned = re.sub(r"^\s*\*{1,3}\s*відповідь\s*:?\s*\*{0,3}\s*", "", cleaned, flags=re.IGNORECASE)
+    cleaned = re.sub(r"^\s*#+\s*", "", cleaned)
+    # Remove markdown emphasis noise that leaks into short answers
+    cleaned = cleaned.replace("**", "")
+    cleaned = cleaned.replace("__", "")
+    return cleaned.strip()
+
+
+def _compress_bulleted_answer(answer_text: str, max_items: int = 3) -> str:
+    if not answer_text:
+        return ""
+    lines = [ln.strip() for ln in answer_text.splitlines() if ln.strip()]
+    bullet_lines: List[str] = []
+    for ln in lines:
+        normalized = ln.replace("**", "").replace("__", "").strip()
+        if re.match(r"^(\*?\s*[-*•]|\*?\s*\d+[\.\):])\s*", normalized):
+            item = re.sub(r"^(\*?\s*[-*•]|\*?\s*\d+[\.\):])\s*", "", normalized).strip()
+            item = re.sub(r"\s+", " ", item).strip(" -–—")
+            item = re.sub(r"\.{2,}", ".", item)
+            item = re.sub(r"\s+\.", ".", item)
+            # Keep concise mode truly short: first complete sentence from each bullet.
+            parts = re.split(r"(?<=[.!?…])\s+", item)
+            if parts:
+                item = parts[0].strip()
+            item = item.rstrip(":").strip()
+            if item:
+                bullet_lines.append(item)
+    if not bullet_lines:
+        return answer_text.strip()
+    picked = bullet_lines[:max_items]
+    joined = ". ".join(picked)
+    if joined and not joined.endswith((".", "!", "?")):
+        joined += "."
+    joined = re.sub(r"\s+", " ", joined).strip()
+    return joined or answer_text.strip()
+
+
+def _limit_to_sentences(text: str, max_sentences: int = 3) -> str:
+    if not text:
+        return ""
+    parts = re.split(r"(?<=[.!?…])\s+", text.strip())
+    parts = [p.strip() for p in parts if p.strip()]
+    if len(parts) <= max_sentences:
+        return " ".join(parts).strip()
+    return " ".join(parts[:max_sentences]).strip()
+
+
+def _agromatrix_rewrite_capability_limitations(user_text: str, answer_text: str) -> str:
+    if not answer_text:
+        return answer_text
+    low = answer_text.lower()
+    limitation_markers = (
+        "не можу бачити", "не можу переглядати зображення", "не маю доступу до зображень",
+        "працюю лише з текстом", "працюю виключно з текстом",
+        "cannot view images", "cannot analyze images", "as a text model",
+    )
+    if not any(m in low for m in limitation_markers):
+        return answer_text
+
+    ulow = (user_text or "").lower()
+    photo_markers = ("фото", "зображ", "image", "photo", "картин", "світлин")
+    if any(m in ulow for m in photo_markers):
+        return (
+            "Можу аналізувати фото. Надішли, будь ласка, зображення ще раз одним повідомленням "
+            "з коротким питанням, і я дам точний розбір."
+        )
+
+    return (
+        "Можу працювати природною мовою та з мультимодальністю: фото, голос і документи. "
+        "Сформулюй запит коротко, і я відповім по суті."
+    )
+
+
+def postprocess_agent_answer(
+    agent_id: str,
+    user_text: str,
+    answer_text: str,
+    force_detailed: bool,
+    needs_complex_reasoning: bool,
+) -> str:
+    if not answer_text:
+        return answer_text
+
+    if (agent_id or "").lower() != "agromatrix":
+        return answer_text
+
+    # Keep detailed/complex answers intact.
+    if force_detailed or needs_complex_reasoning:
+        return answer_text
+
+    user_text_len = len((user_text or "").strip())
+    if user_text_len > 280:
+        return _agromatrix_rewrite_capability_limitations(user_text, answer_text)
+
+    cleaned = _strip_answer_markup_noise(answer_text)
+    cleaned = _agromatrix_rewrite_capability_limitations(user_text, cleaned)
+    compact = _compress_bulleted_answer(cleaned, max_items=1)
+    short = _limit_to_sentences(compact, max_sentences=3)
+    return short or answer_text
+
+
 COMPLEX_REASONING_KEYWORDS = [
    "стратег", "roadmap", "алгоритм", "architecture", "архітектур",
    "прогноз", "scenario", "модель", "аналіз", "побудуй", "plan", "дослідж",
@@ -2148,12 +2322,45 @@ async def handle_telegram_webhook(
        text = update.message.get("text", "")
        caption = update.message.get("caption", "")

-    # If user asks about a recently sent photo, run vision on cached photo file_id.
-    if text and _looks_like_photo_followup(text):
+    # Photo/image intent guard:
+    # if text references a photo/image, try to resolve latest file_id and route to vision.
+    photo_intent = False
+    if text:
+        tl = text.lower()
+        photo_intent = _looks_like_photo_followup(text) or any(
+            k in tl for k in ("фото", "зображ", "світлин", "image", "photo")
+        )
+        if not photo_intent:
+            # Robust fallback for common formulations like "що на цьому фото?"
+            photo_intent = bool(
+                re.search(r"(що|what|что).{0,24}(цьому|этом|this).{0,24}(фото|зображ|світлин|image|photo)", tl)
+            )
+
+    if photo_intent:
        recent_file_id = _get_recent_photo_file_id(agent_config.agent_id, chat_id, user_id)
+
+        # Fallback: recover latest photo file_id from memory-service context (survives process restarts).
+        if not recent_file_id:
+            try:
+                mc = await memory_client.get_context(
+                    user_id=f"tg:{user_id}",
+                    agent_id=agent_config.agent_id,
+                    team_id=dao_id,
+                    channel_id=chat_id,
+                    limit=80,
+                )
+                recent_file_id = _extract_recent_photo_file_id_from_memory(mc)
+                if recent_file_id:
+                    _set_recent_photo_context(agent_config.agent_id, chat_id, user_id, recent_file_id)
+                    logger.info(
+                        f"{agent_config.name}: Recovered photo file_id from memory context for follow-up: {recent_file_id}"
+                    )
+            except Exception as e:
+                logger.warning(f"{agent_config.name}: failed to recover photo file_id from memory: {e}")
+
        if recent_file_id:
            logger.info(
-                f"{agent_config.name}: Detected follow-up photo question; using cached file_id={recent_file_id}"
+                f"{agent_config.name}: Photo intent detected; using file_id={recent_file_id}"
            )
            followup_result = await process_photo(
                agent_config=agent_config,
@@ -2167,6 +2374,16 @@ async def handle_telegram_webhook(
                bypass_media_gate=True,
            )
            return followup_result
+
+        # Hard guard: don't send photo-related requests to text LLM path when image context is missing.
+        is_question_like = ("?" in text) or any(k in tl for k in ("що", "опиши", "проанал", "what", "describe", "analy", "что"))
+        if is_question_like:
+            await send_telegram_message(
+                chat_id,
+                "Бачу питання про фото, але не знайшов зображення в історії сесії. Надішли фото ще раз з коротким питанням, і я одразу проаналізую.",
+                telegram_token,
+            )
+            return {"ok": True, "handled": True, "reason": "photo_followup_without_image_context"}
        
        if not text and not caption:
            # Check for unsupported message types and silently ignore
@@ -2432,6 +2649,7 @@ async def handle_telegram_webhook(
            "session_id": f"tg:{chat_id}:{dao_id}",
            "username": username,
            "chat_id": chat_id,
+            "raw_user_text": text,
            "sender_is_bot": is_sender_bot,
            "mentioned_bots": mentioned_bots,
            "requires_complex_reasoning": needs_complex_reasoning,
@@ -2454,6 +2672,9 @@ async def handle_telegram_webhook(
    if should_force_detailed_reply(text):
        router_request["metadata"]["force_detailed"] = True

+    if _is_agromatrix_plant_intel_intent(agent_config.agent_id, text):
+        router_request["metadata"]["crewai_profile"] = "plant_intel"
+
    if should_force_concise_reply(text):
        # IMPORTANT: preserve conversation context! Only append concise instruction
        router_request["metadata"]["force_concise"] = True
@@ -2551,9 +2772,14 @@ async def handle_telegram_webhook(
            )
            return {"ok": True, "skipped": True, "reason": "no_output_from_llm"}
        
-        # Truncate if too long for Telegram
-        if len(answer_text) > TELEGRAM_SAFE_LENGTH:
-            answer_text = answer_text[:TELEGRAM_SAFE_LENGTH] + "\n\n_... (відповідь обрізано)_"
+        force_detailed_reply = bool(router_request.get("metadata", {}).get("force_detailed"))
+        answer_text = postprocess_agent_answer(
+            agent_id=agent_config.agent_id,
+            user_text=text or "",
+            answer_text=answer_text,
+            force_detailed=force_detailed_reply,
+            needs_complex_reasoning=needs_complex_reasoning,
+        )
        
        # Skip Telegram sending for prober requests (chat_id=0)
        if is_prober:
@@ -2591,7 +2817,9 @@ async def handle_telegram_webhook(
                
                async with httpx.AsyncClient() as client:
                    files = {"photo": ("image.png", BytesIO(image_bytes), "image/png")}
-                    data = {"chat_id": chat_id, "caption": answer_text}
+                    # Telegram caption limit is 1024 chars.
+                    safe_caption = (answer_text or "")[:1024]
+                    data = {"chat_id": chat_id, "caption": safe_caption}
                    response_photo = await client.post(url, files=files, data=data, timeout=30.0)
                    response_photo.raise_for_status()
                    logger.info(f"✅ Sent generated image to Telegram chat {chat_id}")
@@ -3532,44 +3760,51 @@ async def send_telegram_message(chat_id: str, text: str, bot_token: Optional[str
        return False

    # Defensive cleanup for occasional reasoning/markup leaks.
-    import re
    safe_text = re.sub(r'<think>.*?</think>', '', text or "", flags=re.DOTALL)
    safe_text = re.sub(r'<think>.*$', '', safe_text, flags=re.DOTALL)
    safe_text = safe_text.strip() or "..."

    token_id = telegram_token.split(":", 1)[0] if ":" in telegram_token else "unknown"
    url = f"https://api.telegram.org/bot{telegram_token}/sendMessage"
-    payload = {
-        "chat_id": str(chat_id),
-        "text": safe_text,
-        "disable_web_page_preview": True,
-    }

-    try:
-        async with httpx.AsyncClient() as client:
-            response = await client.post(url, json=payload, timeout=15.0)
+    async def _send_chunk(chunk: str) -> bool:
+        payload = {
+            "chat_id": str(chat_id),
+            "text": chunk,
+            "disable_web_page_preview": True,
+        }
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(url, json=payload, timeout=15.0)

-        if response.status_code >= 400:
-            err_desc = response.text[:300]
-            try:
-                body = response.json()
-                err_desc = body.get("description") or err_desc
-            except Exception:
-                pass
-            logger.error(
-                "Telegram sendMessage failed: bot_id=%s chat_id=%s status=%s desc=%s",
-                token_id,
-                chat_id,
-                response.status_code,
-                err_desc,
-            )
+            if response.status_code >= 400:
+                err_desc = response.text[:300]
+                try:
+                    body = response.json()
+                    err_desc = body.get("description") or err_desc
+                except Exception:
+                    pass
+                logger.error(
+                    "Telegram sendMessage failed: bot_id=%s chat_id=%s status=%s desc=%s",
+                    token_id,
+                    chat_id,
+                    response.status_code,
+                    err_desc,
+                )
+                return False
+            return True
+        except Exception as e:
+            logger.error("Telegram sendMessage exception: bot_id=%s chat_id=%s error=%s", token_id, chat_id, e)
            return False

-        logger.info("Telegram message sent: bot_id=%s chat_id=%s", token_id, chat_id)
-        return True
-    except Exception as e:
-        logger.error("Telegram sendMessage exception: bot_id=%s chat_id=%s error=%s", token_id, chat_id, e)
-        return False
+    all_ok = True
+    chunks = _chunk_text(safe_text, max_len=TELEGRAM_MAX_MESSAGE_LENGTH)
+    for chunk in chunks:
+        sent = await _send_chunk(chunk)
+        all_ok = all_ok and sent
+    if all_ok:
+        logger.info("Telegram message sent: bot_id=%s chat_id=%s chunks=%s", token_id, chat_id, len(chunks))
+    return all_ok


 # ========================================