feat: add tests and integrate dots.ocr model
G.2.5 - Tests: - Add pytest test suite with fixtures - test_preprocessing.py - PDF/image loading, normalization, validation - test_postprocessing.py - chunks, QA pairs, markdown generation - test_inference.py - dummy parser and inference functions - test_api.py - API endpoint tests - Add pytest.ini configuration G.1.3 - dots.ocr Integration: - Update model_loader.py with real model loading code - Support for AutoModelForVision2Seq and AutoProcessor - Device handling (CUDA/CPU/MPS) with fallback - Error handling with dummy fallback option - Update inference.py with real model inference - Process images through model - Generate and decode outputs - Parse model output to blocks - Add model_output_parser.py - Parse JSON or plain text model output - Convert to structured blocks - Layout detection support (placeholder) Dependencies: - Add pytest, pytest-asyncio, httpx for testing
This commit is contained in:
109
services/parser-service/tests/test_api.py
Normal file
109
services/parser-service/tests/test_api.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Tests for API endpoints
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
from app.main import app
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
class TestHealthEndpoint:
|
||||
"""Tests for health check endpoint"""
|
||||
|
||||
def test_health(self):
|
||||
"""Test health endpoint"""
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "healthy"
|
||||
assert data["service"] == "parser-service"
|
||||
|
||||
|
||||
class TestParseEndpoint:
|
||||
"""Tests for parse endpoint"""
|
||||
|
||||
def test_parse_no_file(self):
|
||||
"""Test parse without file"""
|
||||
response = client.post("/ocr/parse")
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_parse_image(self):
|
||||
"""Test parsing image"""
|
||||
# Create test image
|
||||
img = Image.new('RGB', (800, 600), color='white')
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='PNG')
|
||||
buffer.seek(0)
|
||||
|
||||
response = client.post(
|
||||
"/ocr/parse",
|
||||
files={"file": ("test.png", buffer, "image/png")},
|
||||
data={"output_mode": "raw_json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "document" in data or "chunks" in data or "markdown" in data
|
||||
|
||||
def test_parse_chunks_mode(self):
|
||||
"""Test parsing in chunks mode"""
|
||||
img = Image.new('RGB', (800, 600), color='white')
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='PNG')
|
||||
buffer.seek(0)
|
||||
|
||||
response = client.post(
|
||||
"/ocr/parse",
|
||||
files={"file": ("test.png", buffer, "image/png")},
|
||||
data={"output_mode": "chunks", "dao_id": "test-dao"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "chunks" in data
|
||||
|
||||
def test_parse_markdown_mode(self):
|
||||
"""Test parsing in markdown mode"""
|
||||
img = Image.new('RGB', (800, 600), color='white')
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='PNG')
|
||||
buffer.seek(0)
|
||||
|
||||
response = client.post(
|
||||
"/ocr/parse",
|
||||
files={"file": ("test.png", buffer, "image/png")},
|
||||
data={"output_mode": "markdown"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "markdown" in data
|
||||
|
||||
|
||||
class TestParseChunksEndpoint:
|
||||
"""Tests for parse_chunks endpoint"""
|
||||
|
||||
def test_parse_chunks(self):
|
||||
"""Test parse_chunks endpoint"""
|
||||
img = Image.new('RGB', (800, 600), color='white')
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='PNG')
|
||||
buffer.seek(0)
|
||||
|
||||
response = client.post(
|
||||
"/ocr/parse_chunks",
|
||||
files={"file": ("test.png", buffer, "image/png")},
|
||||
data={"dao_id": "test-dao"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "chunks" in data
|
||||
assert "total_chunks" in data
|
||||
assert data["dao_id"] == "test-dao"
|
||||
|
||||
Reference in New Issue
Block a user