feat: complete RAG pipeline integration (ingest + query + Memory)

Parser Service: - Add /ocr/ingest endpoint (PARSER → RAG in one call) - Add RAG_BASE_URL and RAG_TIMEOUT to config - Add OcrIngestResponse schema - Create file_converter utility for PDF/image → PNG bytes - Endpoint accepts file, dao_id, doc_id, user_id - Automatically parses with dots.ocr and sends to RAG Service Router Integration: - Add _handle_rag_query() method in RouterApp - Combines Memory + RAG → LLM pipeline - Get Memory context (facts, events, summaries) - Query RAG Service for documents - Build prompt with Memory + RAG documents - Call LLM provider with combined context - Return answer with citations Clients: - Create rag_client.py for Router (query RAG Service) - Create memory_client.py for Router (get Memory context) E2E Tests: - Create e2e_rag_pipeline.sh script for full pipeline test - Test ingest → query → router query flow - Add E2E_RAG_README.md with usage examples Docker: - Add RAG_SERVICE_URL and MEMORY_SERVICE_URL to router environment
2025-11-16 05:02:14 -08:00
parent 6d69f901f7
commit 382e661f1f
10 changed files with 719 additions and 1 deletions
--- a/router_app.py
+++ b/router_app.py
@@ -55,6 +55,11 @@ class RouterApp:
    
    async def handle(self, req: RouterRequest) -> RouterResponse:
        """Handle router request with RBAC context injection for chat mode"""
+        
+        # Special handling for rag_query mode (RAG + Memory → LLM)
+        if req.mode == "rag_query":
+            return await self._handle_rag_query(req)
+        
        # 1. RBAC injection for microDAO chat
        if req.mode == "chat" and req.dao_id and req.user_id:
            try:
@@ -127,6 +132,156 @@ class RouterApp:
                error=f"Internal error: {str(e)}"
            )
    
+    async def _handle_rag_query(self, req: RouterRequest) -> RouterResponse:
+        """
+        Handle RAG query mode: combines Memory + RAG → LLM
+        
+        Flow:
+        1. Get Memory context
+        2. Query RAG Service for documents
+        3. Build prompt with Memory + RAG
+        4. Call LLM provider
+        5. Return answer with citations
+        """
+        from rag_client import rag_client
+        from memory_client import memory_client
+        
+        logger.info(f"Handling RAG query: dao_id={req.dao_id}, user_id={req.user_id}")
+        
+        try:
+            # Extract question
+            question = req.payload.get("question") or req.message
+            if not question:
+                return RouterResponse(
+                    ok=False,
+                    provider_id="router",
+                    error="Missing 'question' in payload"
+                )
+            
+            dao_id = req.dao_id or "daarion"
+            user_id = req.user_id or "anonymous"
+            
+            # 1. Get Memory context
+            memory_ctx = {}
+            try:
+                memory_ctx = await memory_client.get_context(
+                    user_id=user_id,
+                    agent_id=req.agent or "daarwizz",
+                    team_id=dao_id,
+                    channel_id=req.payload.get("channel_id"),
+                    limit=10
+                )
+                logger.info(f"Memory context retrieved: {len(memory_ctx.get('facts', []))} facts, {len(memory_ctx.get('recent_events', []))} events")
+            except Exception as e:
+                logger.warning(f"Memory context fetch failed: {e}")
+            
+            # 2. Query RAG Service
+            rag_resp = await rag_client.query(
+                dao_id=dao_id,
+                question=question,
+                top_k=5,
+                user_id=user_id
+            )
+            
+            rag_answer = rag_resp.get("answer", "")
+            rag_citations = rag_resp.get("citations", [])
+            rag_docs = rag_resp.get("documents", [])
+            
+            logger.info(f"RAG retrieved {len(rag_docs)} documents, {len(rag_citations)} citations")
+            
+            # 3. Build final prompt with Memory + RAG
+            system_prompt = (
+                "Ти асистент microDAO. Використовуй і особисту пам'ять, і документи DAO.\n"
+                "Формуй чітку, структуровану відповідь українською, посилаючись на документи "
+                "через індекси [1], [2] тощо, де це доречно.\n\n"
+            )
+            
+            # Add Memory context
+            memory_text = ""
+            if memory_ctx.get("facts"):
+                facts_summary = ", ".join([
+                    f"{f.get('fact_key', '')}={f.get('fact_value', '')}"
+                    for f in memory_ctx["facts"][:5]
+                ])
+                if facts_summary:
+                    memory_text += f"Особисті факти: {facts_summary}\n"
+            
+            if memory_ctx.get("recent_events"):
+                recent = memory_ctx["recent_events"][:3]
+                events_summary = "\n".join([
+                    f"- {e.get('body_text', '')[:100]}"
+                    for e in recent
+                ])
+                if events_summary:
+                    memory_text += f"Останні події:\n{events_summary}\n"
+            
+            # Add RAG documents
+            docs_text = ""
+            for i, citation in enumerate(rag_citations[:5], start=1):
+                doc_id = citation.get("doc_id", "unknown")
+                page = citation.get("page", 0)
+                excerpt = citation.get("excerpt", "")
+                docs_text += f"[{i}] (doc_id={doc_id}, page={page}): {excerpt}\n"
+            
+            # Build final prompt
+            final_prompt = (
+                f"{system_prompt}"
+                f"{'1) Пам\'ять (короткий summary):\n' + memory_text + '\n' if memory_text else ''}"
+                f"2) Релевантні документи (витяги):\n{docs_text}\n\n"
+                f"Питання користувача:\n{question}\n\n"
+                "Відповідь:"
+            )
+            
+            # 4. Call LLM provider
+            provider = self.routing_table.resolve_provider(req)
+            logger.info(f"Calling LLM provider: {provider.id}")
+            
+            # Create modified request with final prompt
+            llm_req = RouterRequest(
+                mode="chat",  # Use chat mode for LLM
+                agent=req.agent,
+                dao_id=req.dao_id,
+                source=req.source,
+                session_id=req.session_id,
+                user_id=req.user_id,
+                message=final_prompt,
+                payload=req.payload
+            )
+            
+            llm_response = await provider.call(llm_req)
+            
+            if not llm_response.ok:
+                return RouterResponse(
+                    ok=False,
+                    provider_id="router",
+                    error=f"LLM call failed: {llm_response.error}"
+                )
+            
+            # 5. Return response with citations
+            return RouterResponse(
+                ok=True,
+                provider_id=llm_response.provider_id,
+                data={
+                    "text": llm_response.data.get("text", ""),
+                    "citations": rag_citations
+                },
+                metadata={
+                    "memory_used": bool(memory_text),
+                    "rag_used": True,
+                    "documents_retrieved": len(rag_docs),
+                    "citations_count": len(rag_citations)
+                },
+                error=None
+            )
+            
+        except Exception as e:
+            logger.error(f"RAG query handler error: {e}", exc_info=True)
+            return RouterResponse(
+                ok=False,
+                provider_id="router",
+                error=f"RAG query failed: {str(e)}"
+            )
+    
    def get_provider_info(self):
        """Get info about registered providers"""
        return {