feat: implement TTS, Document processing, and Memory Service /facts API

- TTS: xtts-v2 integration with voice cloning support - Document: docling integration for PDF/DOCX/PPTX processing - Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints - Added required dependencies (TTS, docling)
2026-01-17 08:16:37 -08:00
parent a9fcadc6e2
commit 5290287058
121 changed files with 17071 additions and 436 deletions
--- a/gateway-bot/http_api.py
+++ b/gateway-bot/http_api.py
@@ -196,17 +196,18 @@ GREENFOOD_SYSTEM_PROMPT = GREENFOOD_CONFIG.system_prompt
 # Request Models
 # ========================================

-# DRUID webhook endpoint
-@router.post("/druid/telegram/webhook")
-async def druid_telegram_webhook(update: TelegramUpdate):
-    return await handle_telegram_webhook(DRUID_CONFIG, update)
-
 class TelegramUpdate(BaseModel):
    """Simplified Telegram update model"""
    update_id: Optional[int] = None
    message: Optional[Dict[str, Any]] = None


+# DRUID webhook endpoint
+@router.post("/druid/telegram/webhook")
+async def druid_telegram_webhook(update: TelegramUpdate):
+    return await handle_telegram_webhook(DRUID_CONFIG, update)
+
+
 class DiscordMessage(BaseModel):
    """Simplified Discord message model"""
    content: Optional[str] = None
@@ -517,7 +518,8 @@ async def process_photo(
        )
        
        # Send to Router with specialist_vision_8b model (Swapper)
-        prompt = caption.strip() if caption else "Опиши це зображення детально."
+        # IMPORTANT: Default prompt must request BRIEF description (2-3 sentences max)
+        prompt = caption.strip() if caption else "Коротко (2-3 речення) скажи, що на цьому зображенні та яке його значення."
        router_request = {
            "message": f"{prompt}\n\n[Зображення передано окремо у context.images]",
            "mode": "chat",
@@ -555,10 +557,10 @@ async def process_photo(
            answer_text = response.get("data", {}).get("text") or response.get("response", "")
            
            if answer_text:
-                # Photo processed successfully
+                # Photo processed - send LLM response directly
                await send_telegram_message(
                    chat_id,
-                    f"✅ **Фото оброблено**\n\n{answer_text}",
+                    answer_text,  # No prefix, just the LLM response
                    telegram_token
                )
                
@@ -579,7 +581,7 @@ async def process_photo(
            else:
                await send_telegram_message(
                    chat_id,
-                    "Фото оброблено, але не вдалося отримати опис.",
+                    "Не вдалося отримати опис зображення.",
                    telegram_token
                )
                return {"ok": False, "error": "No description in response"}
@@ -588,7 +590,7 @@ async def process_photo(
            logger.error(f"{agent_config.name}: Vision-8b error: {error_msg}")
            await send_telegram_message(
                chat_id,
-                f"Вибач, не вдалося обробити фото: {error_msg}",
+                "Вибач, сталася помилка при обробці фото.",
                telegram_token
            )
            return {"ok": False, "error": error_msg}
@@ -598,7 +600,7 @@ async def process_photo(
        telegram_token = agent_config.get_telegram_token()
        await send_telegram_message(
            chat_id,
-            "Вибач, не вдалося обробити фото. Переконайся, що Swapper Service з vision-8b моделлю запущений.",
+            "Вибач, сталася помилка при обробці фото.",
            telegram_token
        )
        return {"ok": False, "error": "Photo processing failed"}
@@ -811,8 +813,12 @@ async def handle_telegram_webhook(
    """
    # Allow updates without message if they contain photo/voice
    # The actual message validation happens after multimodal checks
-    # if not update.message:
-    #     raise HTTPException(status_code=400, detail="No message in update")
+    if not update.message:
+        # Handle channel_post or other update types
+        if hasattr(update, 'channel_post') and update.channel_post:
+            # Ignore channel posts or handle separately
+            return {"status": "ok", "skipped": "channel_post"}
+        return {"status": "ok", "skipped": "no_message"}
    
    # Extract message details
    from_user = update.message.get("from", {})
@@ -1020,18 +1026,28 @@ async def handle_telegram_webhook(
            # Fall through to regular chat if RAG query fails
    
    # Regular chat mode
-    # Fetch memory context
+    # Fetch memory context (includes local context as fallback)
+    # Helion має доступ до більшої історії (100 повідомлень) для кращого контексту
+    context_limit = 100 if agent_config.agent_id == "helion" else 10
    memory_context = await memory_client.get_context(
        user_id=f"tg:{user_id}",
        agent_id=agent_config.agent_id,
        team_id=dao_id,
        channel_id=chat_id,
-        limit=10
+        limit=context_limit
    )
    
+    # Build message with conversation context
+    local_history = memory_context.get("local_context_text", "")
+    if local_history:
+        # Add conversation history to message for better context understanding
+        message_with_context = f"[Контекст розмови]\n{local_history}\n\n[Поточне повідомлення від {username}]\n{text}"
+    else:
+        message_with_context = text
+    
    # Build request to Router
    router_request = {
-        "message": text,
+        "message": message_with_context,
        "mode": "chat",
        "agent": agent_config.agent_id,
        "metadata": {
@@ -1320,7 +1336,7 @@ async def _old_telegram_webhook(update: TelegramUpdate):
                    
                    # Send to Router with specialist_vision_8b model (Swapper)
                    router_request = {
-                        "message": f"Опиши це зображення детально: {file_url}",
+                        "message": f"Коротко (2-3 речення) опиши значення цього зображення: {file_url}",
                        "mode": "chat",
                        "agent": "daarwizz",
                        "metadata": {
@@ -1355,7 +1371,7 @@ async def _old_telegram_webhook(update: TelegramUpdate):
                            # Photo processed successfully
                            await send_telegram_message(
                                chat_id,
-                                f"✅ **Фото оброблено**\n\n{answer_text}"
+                                answer_text  # No prefix, just the LLM response
                            )
                            
                            # Save to memory for context
@@ -1373,7 +1389,7 @@ async def _old_telegram_webhook(update: TelegramUpdate):
                            
                            return {"ok": True, "agent": "daarwizz", "model": "specialist_vision_8b"}
                        else:
-                            await send_telegram_message(chat_id, "Фото оброблено, але не вдалося отримати опис.")
+                            await send_telegram_message(chat_id, "Не вдалося отримати опис зображення.")
                            return {"ok": False, "error": "No description in response"}
                    else:
                        error_msg = response.get("error", "Unknown error") if isinstance(response, dict) else "Router error"
@@ -1383,7 +1399,7 @@ async def _old_telegram_webhook(update: TelegramUpdate):
                    
                except Exception as e:
                    logger.error(f"Photo processing failed: {e}", exc_info=True)
-                    await send_telegram_message(chat_id, "Вибач, не вдалося обробити фото. Переконайся, що Vision Encoder сервіс запущений.")
+                    await send_telegram_message(chat_id, "Вибач, сталася помилка при обробці фото.")
                    return {"ok": False, "error": "Photo processing failed"}
        
        # Check if it's a voice message
@@ -1952,8 +1968,9 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate):
                    file_url = f"https://api.telegram.org/file/bot{helion_token}/{file_path}"
                    
                    # Send to Router with specialist_vision_8b model (Swapper)
+                    # IMPORTANT: Request BRIEF description (2-3 sentences per v2.3 prompt rules)
                    router_request = {
-                        "message": f"Опиши це зображення детально, зосередься на технічних деталях EcoMiner/BioMiner якщо вони є: {file_url}",
+                        "message": f"Коротко (2-3 речення максимум): що на цьому зображенні та яке його значення для Energy Union? {file_url}",
                        "mode": "chat",
                        "agent": "helion",
                        "metadata": {
@@ -1985,10 +2002,10 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate):
                        answer_text = response.get("data", {}).get("text") or response.get("response", "")
                        
                        if answer_text:
-                            # Photo processed successfully
+                            # Photo processed - send LLM response directly WITHOUT prefix
                            await send_telegram_message(
                                chat_id,
-                                f"✅ **Фото оброблено**\n\n{answer_text}",
+                                answer_text,  # No prefix, just the LLM response
                                helion_token
                            )
                            
@@ -2007,18 +2024,18 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate):
                            
                            return {"ok": True, "agent": "helion", "model": "specialist_vision_8b"}
                        else:
-                            await send_telegram_message(chat_id, "Фото оброблено, але не вдалося отримати опис.", helion_token)
+                            await send_telegram_message(chat_id, "Не вдалося отримати опис зображення.", helion_token)
                            return {"ok": False, "error": "No description in response"}
                    else:
                        error_msg = response.get("error", "Unknown error") if isinstance(response, dict) else "Router error"
                        logger.error(f"Helion: Vision-8b error: {error_msg}")
-                        await send_telegram_message(chat_id, f"Вибач, не вдалося обробити фото: {error_msg}", helion_token)
+                        await send_telegram_message(chat_id, "Вибач, сталася помилка при обробці фото.", helion_token)
                        return {"ok": False, "error": error_msg}
                    
                except Exception as e:
                    logger.error(f"Helion: Photo processing failed: {e}", exc_info=True)
                    helion_token = os.getenv("HELION_TELEGRAM_BOT_TOKEN")
-                    await send_telegram_message(chat_id, "Вибач, не вдалося обробити фото. Переконайся, що Swapper Service з vision-8b моделлю запущений.", helion_token)
+                    await send_telegram_message(chat_id, "Вибач, сталася помилка при обробці фото.", helion_token)
                    return {"ok": False, "error": "Photo processing failed"}
        
        # Get message text
@@ -2065,7 +2082,7 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate):
                # Fall through to regular chat if RAG query fails
        
        # Regular chat mode
-        # Fetch memory context
+        # Fetch memory context (includes local context as fallback)
        memory_context = await memory_client.get_context(
            user_id=f"tg:{user_id}",
            agent_id="helion",
@@ -2074,9 +2091,17 @@ async def _old_helion_telegram_webhook(update: TelegramUpdate):
            limit=10
        )
        
+        # Build message with conversation context
+        local_history = memory_context.get("local_context_text", "")
+        if local_history:
+            # Add conversation history to message for better context understanding
+            message_with_context = f"[Контекст розмови]\n{local_history}\n\n[Поточне повідомлення від {username}]\n{text}"
+        else:
+            message_with_context = text
+        
        # Build request to Router with Helion context
        router_request = {
-            "message": text,
+            "message": message_with_context,
            "mode": "chat",
            "agent": "helion",  # Helion agent identifier
            "metadata": {