feat: complete RAG pipeline integration (ingest + query + Memory)

Parser Service:
- Add /ocr/ingest endpoint (PARSER → RAG in one call)
- Add RAG_BASE_URL and RAG_TIMEOUT to config
- Add OcrIngestResponse schema
- Create file_converter utility for PDF/image → PNG bytes
- Endpoint accepts file, dao_id, doc_id, user_id
- Automatically parses with dots.ocr and sends to RAG Service

Router Integration:
- Add _handle_rag_query() method in RouterApp
- Combines Memory + RAG → LLM pipeline
- Get Memory context (facts, events, summaries)
- Query RAG Service for documents
- Build prompt with Memory + RAG documents
- Call LLM provider with combined context
- Return answer with citations

Clients:
- Create rag_client.py for Router (query RAG Service)
- Create memory_client.py for Router (get Memory context)

E2E Tests:
- Create e2e_rag_pipeline.sh script for full pipeline test
- Test ingest → query → router query flow
- Add E2E_RAG_README.md with usage examples

Docker:
- Add RAG_SERVICE_URL and MEMORY_SERVICE_URL to router environment
This commit is contained in:
Apple
2025-11-16 05:02:14 -08:00
parent 6d69f901f7
commit 382e661f1f
10 changed files with 719 additions and 1 deletions

View File

@@ -55,6 +55,11 @@ class RouterApp:
async def handle(self, req: RouterRequest) -> RouterResponse:
"""Handle router request with RBAC context injection for chat mode"""
# Special handling for rag_query mode (RAG + Memory → LLM)
if req.mode == "rag_query":
return await self._handle_rag_query(req)
# 1. RBAC injection for microDAO chat
if req.mode == "chat" and req.dao_id and req.user_id:
try:
@@ -127,6 +132,156 @@ class RouterApp:
error=f"Internal error: {str(e)}"
)
async def _handle_rag_query(self, req: RouterRequest) -> RouterResponse:
"""
Handle RAG query mode: combines Memory + RAG → LLM
Flow:
1. Get Memory context
2. Query RAG Service for documents
3. Build prompt with Memory + RAG
4. Call LLM provider
5. Return answer with citations
"""
from rag_client import rag_client
from memory_client import memory_client
logger.info(f"Handling RAG query: dao_id={req.dao_id}, user_id={req.user_id}")
try:
# Extract question
question = req.payload.get("question") or req.message
if not question:
return RouterResponse(
ok=False,
provider_id="router",
error="Missing 'question' in payload"
)
dao_id = req.dao_id or "daarion"
user_id = req.user_id or "anonymous"
# 1. Get Memory context
memory_ctx = {}
try:
memory_ctx = await memory_client.get_context(
user_id=user_id,
agent_id=req.agent or "daarwizz",
team_id=dao_id,
channel_id=req.payload.get("channel_id"),
limit=10
)
logger.info(f"Memory context retrieved: {len(memory_ctx.get('facts', []))} facts, {len(memory_ctx.get('recent_events', []))} events")
except Exception as e:
logger.warning(f"Memory context fetch failed: {e}")
# 2. Query RAG Service
rag_resp = await rag_client.query(
dao_id=dao_id,
question=question,
top_k=5,
user_id=user_id
)
rag_answer = rag_resp.get("answer", "")
rag_citations = rag_resp.get("citations", [])
rag_docs = rag_resp.get("documents", [])
logger.info(f"RAG retrieved {len(rag_docs)} documents, {len(rag_citations)} citations")
# 3. Build final prompt with Memory + RAG
system_prompt = (
"Ти асистент microDAO. Використовуй і особисту пам'ять, і документи DAO.\n"
"Формуй чітку, структуровану відповідь українською, посилаючись на документи "
"через індекси [1], [2] тощо, де це доречно.\n\n"
)
# Add Memory context
memory_text = ""
if memory_ctx.get("facts"):
facts_summary = ", ".join([
f"{f.get('fact_key', '')}={f.get('fact_value', '')}"
for f in memory_ctx["facts"][:5]
])
if facts_summary:
memory_text += f"Особисті факти: {facts_summary}\n"
if memory_ctx.get("recent_events"):
recent = memory_ctx["recent_events"][:3]
events_summary = "\n".join([
f"- {e.get('body_text', '')[:100]}"
for e in recent
])
if events_summary:
memory_text += f"Останні події:\n{events_summary}\n"
# Add RAG documents
docs_text = ""
for i, citation in enumerate(rag_citations[:5], start=1):
doc_id = citation.get("doc_id", "unknown")
page = citation.get("page", 0)
excerpt = citation.get("excerpt", "")
docs_text += f"[{i}] (doc_id={doc_id}, page={page}): {excerpt}\n"
# Build final prompt
final_prompt = (
f"{system_prompt}"
f"{'1) Пам\'ять (короткий summary):\n' + memory_text + '\n' if memory_text else ''}"
f"2) Релевантні документи (витяги):\n{docs_text}\n\n"
f"Питання користувача:\n{question}\n\n"
"Відповідь:"
)
# 4. Call LLM provider
provider = self.routing_table.resolve_provider(req)
logger.info(f"Calling LLM provider: {provider.id}")
# Create modified request with final prompt
llm_req = RouterRequest(
mode="chat", # Use chat mode for LLM
agent=req.agent,
dao_id=req.dao_id,
source=req.source,
session_id=req.session_id,
user_id=req.user_id,
message=final_prompt,
payload=req.payload
)
llm_response = await provider.call(llm_req)
if not llm_response.ok:
return RouterResponse(
ok=False,
provider_id="router",
error=f"LLM call failed: {llm_response.error}"
)
# 5. Return response with citations
return RouterResponse(
ok=True,
provider_id=llm_response.provider_id,
data={
"text": llm_response.data.get("text", ""),
"citations": rag_citations
},
metadata={
"memory_used": bool(memory_text),
"rag_used": True,
"documents_retrieved": len(rag_docs),
"citations_count": len(rag_citations)
},
error=None
)
except Exception as e:
logger.error(f"RAG query handler error: {e}", exc_info=True)
return RouterResponse(
ok=False,
provider_id="router",
error=f"RAG query failed: {str(e)}"
)
def get_provider_info(self):
"""Get info about registered providers"""
return {