Files
microdao-daarion/services/rag-service/tests/test_e2e.py
Apple 9b86f9a694 feat: implement RAG Service MVP with PARSER + Memory integration
RAG Service Implementation:
- Create rag-service/ with full structure (config, document_store, embedding, pipelines)
- Document Store: PostgreSQL + pgvector via Haystack
- Embedding: BAAI/bge-m3 (multilingual, 1024 dim)
- Ingest Pipeline: Convert ParsedDocument to Haystack Documents, embed, index
- Query Pipeline: Retrieve documents, generate answers via DAGI Router
- FastAPI endpoints: /ingest, /query, /health

Tests:
- Unit tests for ingest and query pipelines
- E2E test with example parsed JSON
- Test fixtures with real PARSER output example

Router Integration:
- Add mode='rag_query' routing rule in router-config.yml
- Priority 7, uses local_qwen3_8b for RAG queries

Docker:
- Add rag-service to docker-compose.yml
- Configure dependencies (router, city-db)
- Add model cache volume

Documentation:
- Complete README with API examples
- Integration guides for PARSER and Router
2025-11-16 04:41:53 -08:00

68 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
E2E tests for RAG Service
Tests full ingest → query pipeline
"""
import pytest
import json
from pathlib import Path
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
# Load example parsed JSON
FIXTURES_DIR = Path(__file__).parent / "fixtures"
EXAMPLE_JSON = json.loads((FIXTURES_DIR / "parsed_json_example.json").read_text())
class TestE2E:
"""End-to-end tests"""
def test_health(self):
"""Test health endpoint"""
response = client.get("/health")
assert response.status_code == 200
data = response.json()
assert data["status"] == "healthy"
assert data["service"] == "rag-service"
@pytest.mark.skip(reason="Requires database connection")
def test_ingest_then_query(self):
"""Test full pipeline: ingest → query"""
# Step 1: Ingest document
ingest_request = {
"dao_id": "daarion",
"doc_id": "microdao-tokenomics-2025-11",
"parsed_json": EXAMPLE_JSON
}
ingest_response = client.post("/ingest", json=ingest_request)
assert ingest_response.status_code == 200
ingest_data = ingest_response.json()
assert ingest_data["status"] == "success"
assert ingest_data["doc_count"] > 0
# Step 2: Query
query_request = {
"dao_id": "daarion",
"question": "Поясни токеноміку microDAO і роль стейкінгу"
}
query_response = client.post("/query", json=query_request)
assert query_response.status_code == 200
query_data = query_response.json()
assert "answer" in query_data
assert len(query_data["answer"]) > 0
assert "citations" in query_data
assert len(query_data["citations"]) > 0
# Check citation structure
citation = query_data["citations"][0]
assert "doc_id" in citation
assert "page" in citation
assert "excerpt" in citation