feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection

## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
2026-01-28 06:40:34 -08:00
parent 4aeb69e7ae
commit 0c8bef82f4
120 changed files with 21905 additions and 425 deletions
--- a/gateway-bot/services/doc_service.py
+++ b/gateway-bot/services/doc_service.py
@@ -8,7 +8,9 @@ This service can be used by:
 - Mobile apps
 - Any other client
 """
+import os
 import logging
+import hashlib
 from typing import Optional, Dict, Any, List
 from pydantic import BaseModel
 from datetime import datetime
@@ -175,7 +177,7 @@ class DocumentService:
        metadata: Optional[Dict[str, Any]] = None
    ) -> ParsedResult:
        """
-        Parse a document through DAGI Router.
+        Parse a document directly through Swapper service.
        
        Args:
            session_id: Session identifier (e.g., "telegram:123", "web:user456")
@@ -183,72 +185,90 @@ class DocumentService:
            file_name: Name of the file
            dao_id: DAO identifier
            user_id: User identifier
-            output_mode: Output format ("qa_pairs", "markdown", "chunks")
+            output_mode: Output format ("qa_pairs", "markdown", "chunks", "text")
            metadata: Optional additional metadata
        
        Returns:
            ParsedResult with parsed data
        """
+        import httpx
+        
+        SWAPPER_URL = os.getenv("SWAPPER_URL", "http://swapper-service:8890")
+        
        try:
-            # Build request to Router
-            router_request = {
-                "mode": "doc_parse",
-                "agent": "parser",
-                "metadata": {
-                    "source": self._extract_source(session_id),
-                    "dao_id": dao_id,
-                    "user_id": user_id,
-                    "session_id": session_id,
-                    **(metadata or {})
-                },
-                "payload": {
-                    "doc_url": doc_url,
-                    "file_name": file_name,
-                    "output_mode": output_mode,
-                    "dao_id": dao_id,
-                    "user_id": user_id,
-                },
-            }
-            
            logger.info(f"Parsing document: session={session_id}, file={file_name}, mode={output_mode}")
            
-            # Send to Router
-            response = await send_to_router(router_request)
+            # Download the document first
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                doc_response = await client.get(doc_url)
+                if doc_response.status_code != 200:
+                    return ParsedResult(
+                        success=False,
+                        error=f"Failed to download document: {doc_response.status_code}"
+                    )
+                doc_content = doc_response.content
+            
+            # Send directly to Swapper /document endpoint
+            async with httpx.AsyncClient(timeout=120.0) as client:
+                # Map output_mode: qa_pairs -> text (Swapper doesn't support qa_pairs directly)
+                swapper_mode = "markdown" if output_mode in ["qa_pairs", "markdown"] else "text"
+                
+                mime_type = "application/octet-stream"
+                if file_name:
+                    import mimetypes
+                    mime_type = mimetypes.guess_type(file_name)[0] or mime_type
+                
+                files = {"file": (file_name, doc_content, mime_type)}
+                data = {"output_format": swapper_mode}
+                
+                swapper_response = await client.post(
+                    f"{SWAPPER_URL}/document",
+                    files=files,
+                    data=data
+                )
+                
+                if swapper_response.status_code == 200:
+                    response = {"ok": True, "data": swapper_response.json()}
+                else:
+                    logger.error(f"Swapper document error: {swapper_response.status_code} - {swapper_response.text[:200]}")
+                    return ParsedResult(
+                        success=False,
+                        error=f"Document parsing failed: {swapper_response.status_code}"
+                    )
            
            if not isinstance(response, dict):
                return ParsedResult(
                    success=False,
-                    error="Invalid response from router"
+                    error="Invalid response from Swapper"
                )
            
            data = response.get("data", {})
            
-            # Extract doc_id
-            doc_id = data.get("doc_id") or data.get("metadata", {}).get("doc_id")
+            # Swapper returns: {success, model, output_format, result, filename, processing_time_ms}
+            parsed_text = data.get("result", "")
+            output_format = data.get("output_format", "text")
+            model_used = data.get("model", "unknown")
+            
+            logger.info(f"Document parsed: {len(parsed_text)} chars using {model_used}")
+            
+            # Generate a simple doc_id based on filename and timestamp
+            doc_id = hashlib.md5(f"{file_name}:{datetime.utcnow().isoformat()}".encode()).hexdigest()[:12]
            
            # Save document context for follow-up queries
-            if doc_id:
-                await self.save_doc_context(
-                    session_id=session_id,
-                    doc_id=doc_id,
-                    doc_url=doc_url,
-                    file_name=file_name,
-                    dao_id=dao_id
-                )
+            await self.save_doc_context(
+                session_id=session_id,
+                doc_id=doc_id,
+                doc_url=doc_url,
+                file_name=file_name,
+                dao_id=dao_id
+            )
            
-            # Extract parsed data
-            qa_pairs_raw = data.get("qa_pairs", [])
+            # Convert text to markdown format
+            markdown = parsed_text if output_format == "markdown" else f"```\n{parsed_text}\n```"
+            
+            # No QA pairs from direct parsing - would need LLM for that
            qa_pairs = None
-            if qa_pairs_raw:
-                # Convert to QAItem list
-                try:
-                    qa_pairs = [QAItem(**qa) if isinstance(qa, dict) else QAItem(question=qa.get("question", ""), answer=qa.get("answer", "")) for qa in qa_pairs_raw]
-                except Exception as e:
-                    logger.warning(f"Failed to parse qa_pairs: {e}")
-                    qa_pairs = None
-            
-            markdown = data.get("markdown")
-            chunks = data.get("chunks", [])
+            chunks = []
            chunks_meta = None
            if chunks:
                chunks_meta = {