feat: add RAG quality metrics, optimized prompts, and evaluation tools

Optimized Prompts: - Create utils/rag_prompt_builder.py with citation-optimized prompts - Specialized for DAO tokenomics and technical documentation - Proper citation format [1], [2] with doc_id, page, section - Memory context integration (facts, events, summaries) - Token count estimation RAG Service Metrics: - Add comprehensive logging in query_pipeline.py - Log: question, doc_ids, scores, retrieval method, timing - Track: retrieval_time, total_query_time, documents_found, citations_count - Add metrics in ingest_pipeline.py: pages_processed, blocks_processed, pipeline_time Router Improvements: - Use optimized prompt builder in _handle_rag_query() - Add graceful fallback: if RAG unavailable, use Memory only - Log prompt token count, RAG usage, Memory usage - Return detailed metadata (rag_used, memory_used, citations_count, metrics) Evaluation Tools: - Create tests/rag_eval.py for systematic quality testing - Test fixed questions with expected doc_ids - Save results to JSON and CSV - Compare RAG Service vs Router results - Track: citations, expected docs found, query times Documentation: - Create docs/RAG_METRICS_PLAN.md - Plan for Prometheus metrics collection - Grafana dashboard panels and alerts - Implementation guide for metrics
2025-11-16 05:12:19 -08:00
parent 382e661f1f
commit 1ed1181105
6 changed files with 769 additions and 57 deletions
--- a/router_app.py
+++ b/router_app.py
@@ -189,48 +189,33 @@ class RouterApp:
            
            logger.info(f"RAG retrieved {len(rag_docs)} documents, {len(rag_citations)} citations")
            
-            # 3. Build final prompt with Memory + RAG
-            system_prompt = (
-                "Ти асистент microDAO. Використовуй і особисту пам'ять, і документи DAO.\n"
-                "Формуй чітку, структуровану відповідь українською, посилаючись на документи "
-                "через індекси [1], [2] тощо, де це доречно.\n\n"
-            )
+            # 3. Build final prompt with Memory + RAG (using optimized prompt builder)
+            from utils.rag_prompt_builder import build_rag_prompt_with_citations, estimate_token_count
            
-            # Add Memory context
-            memory_text = ""
-            if memory_ctx.get("facts"):
-                facts_summary = ", ".join([
-                    f"{f.get('fact_key', '')}={f.get('fact_value', '')}"
-                    for f in memory_ctx["facts"][:5]
-                ])
-                if facts_summary:
-                    memory_text += f"Особисті факти: {facts_summary}\n"
+            # Only include RAG if available
+            if rag_used and rag_citations:
+                final_prompt = build_rag_prompt_with_citations(
+                    question=question,
+                    memory_context=memory_ctx,
+                    rag_citations=rag_citations,
+                    rag_documents=rag_docs
+                )
+            else:
+                # Fallback: Memory only prompt
+                from utils.rag_prompt_builder import _build_memory_section
+                memory_section = _build_memory_section(memory_ctx)
+                
+                final_prompt = (
+                    "Ти — експерт-консультант з токеноміки та архітектури DAO в екосистемі DAARION.city.\n"
+                    "Відповідай на основі особистої пам'яті та контексту.\n\n"
+                )
+                if memory_section:
+                    final_prompt += f"**Особиста пам'ять та контекст:**\n{memory_section}\n\n"
+                final_prompt += f"**Питання користувача:**\n{question}\n\n**Відповідь:**"
            
-            if memory_ctx.get("recent_events"):
-                recent = memory_ctx["recent_events"][:3]
-                events_summary = "\n".join([
-                    f"- {e.get('body_text', '')[:100]}"
-                    for e in recent
-                ])
-                if events_summary:
-                    memory_text += f"Останні події:\n{events_summary}\n"
-            
-            # Add RAG documents
-            docs_text = ""
-            for i, citation in enumerate(rag_citations[:5], start=1):
-                doc_id = citation.get("doc_id", "unknown")
-                page = citation.get("page", 0)
-                excerpt = citation.get("excerpt", "")
-                docs_text += f"[{i}] (doc_id={doc_id}, page={page}): {excerpt}\n"
-            
-            # Build final prompt
-            final_prompt = (
-                f"{system_prompt}"
-                f"{'1) Пам\'ять (короткий summary):\n' + memory_text + '\n' if memory_text else ''}"
-                f"2) Релевантні документи (витяги):\n{docs_text}\n\n"
-                f"Питання користувача:\n{question}\n\n"
-                "Відповідь:"
-            )
+            # Estimate token count for logging
+            estimated_tokens = estimate_token_count(final_prompt)
+            logger.info(f"Final prompt length: ~{estimated_tokens} tokens, RAG used: {rag_used}")
            
            # 4. Call LLM provider
            provider = self.routing_table.resolve_provider(req)
@@ -263,13 +248,15 @@ class RouterApp:
                provider_id=llm_response.provider_id,
                data={
                    "text": llm_response.data.get("text", ""),
-                    "citations": rag_citations
+                    "citations": rag_citations if rag_used else []
                },
                metadata={
-                    "memory_used": bool(memory_text),
-                    "rag_used": True,
-                    "documents_retrieved": len(rag_docs),
-                    "citations_count": len(rag_citations)
+                    "memory_used": bool(memory_ctx.get("facts") or memory_ctx.get("recent_events")),
+                    "rag_used": rag_used,
+                    "documents_retrieved": len(rag_docs) if rag_used else 0,
+                    "citations_count": len(rag_citations) if rag_used else 0,
+                    "prompt_tokens_estimated": estimated_tokens,
+                    "rag_metrics": rag_resp.get("metrics") if rag_resp else None
                },
                error=None
            )