RAG Service Implementation: - Create rag-service/ with full structure (config, document_store, embedding, pipelines) - Document Store: PostgreSQL + pgvector via Haystack - Embedding: BAAI/bge-m3 (multilingual, 1024 dim) - Ingest Pipeline: Convert ParsedDocument to Haystack Documents, embed, index - Query Pipeline: Retrieve documents, generate answers via DAGI Router - FastAPI endpoints: /ingest, /query, /health Tests: - Unit tests for ingest and query pipelines - E2E test with example parsed JSON - Test fixtures with real PARSER output example Router Integration: - Add mode='rag_query' routing rule in router-config.yml - Priority 7, uses local_qwen3_8b for RAG queries Docker: - Add rag-service to docker-compose.yml - Configure dependencies (router, city-db) - Add model cache volume Documentation: - Complete README with API examples - Integration guides for PARSER and Router
68 lines
2.0 KiB
Python
68 lines
2.0 KiB
Python
"""
|
||
E2E tests for RAG Service
|
||
Tests full ingest → query pipeline
|
||
"""
|
||
|
||
import pytest
|
||
import json
|
||
from pathlib import Path
|
||
from fastapi.testclient import TestClient
|
||
|
||
from app.main import app
|
||
|
||
client = TestClient(app)
|
||
|
||
# Load example parsed JSON
|
||
FIXTURES_DIR = Path(__file__).parent / "fixtures"
|
||
EXAMPLE_JSON = json.loads((FIXTURES_DIR / "parsed_json_example.json").read_text())
|
||
|
||
|
||
class TestE2E:
|
||
"""End-to-end tests"""
|
||
|
||
def test_health(self):
|
||
"""Test health endpoint"""
|
||
response = client.get("/health")
|
||
assert response.status_code == 200
|
||
data = response.json()
|
||
assert data["status"] == "healthy"
|
||
assert data["service"] == "rag-service"
|
||
|
||
@pytest.mark.skip(reason="Requires database connection")
|
||
def test_ingest_then_query(self):
|
||
"""Test full pipeline: ingest → query"""
|
||
# Step 1: Ingest document
|
||
ingest_request = {
|
||
"dao_id": "daarion",
|
||
"doc_id": "microdao-tokenomics-2025-11",
|
||
"parsed_json": EXAMPLE_JSON
|
||
}
|
||
|
||
ingest_response = client.post("/ingest", json=ingest_request)
|
||
assert ingest_response.status_code == 200
|
||
ingest_data = ingest_response.json()
|
||
assert ingest_data["status"] == "success"
|
||
assert ingest_data["doc_count"] > 0
|
||
|
||
# Step 2: Query
|
||
query_request = {
|
||
"dao_id": "daarion",
|
||
"question": "Поясни токеноміку microDAO і роль стейкінгу"
|
||
}
|
||
|
||
query_response = client.post("/query", json=query_request)
|
||
assert query_response.status_code == 200
|
||
query_data = query_response.json()
|
||
|
||
assert "answer" in query_data
|
||
assert len(query_data["answer"]) > 0
|
||
assert "citations" in query_data
|
||
assert len(query_data["citations"]) > 0
|
||
|
||
# Check citation structure
|
||
citation = query_data["citations"][0]
|
||
assert "doc_id" in citation
|
||
assert "page" in citation
|
||
assert "excerpt" in citation
|
||
|