feat: add tests and integrate dots.ocr model
G.2.5 - Tests: - Add pytest test suite with fixtures - test_preprocessing.py - PDF/image loading, normalization, validation - test_postprocessing.py - chunks, QA pairs, markdown generation - test_inference.py - dummy parser and inference functions - test_api.py - API endpoint tests - Add pytest.ini configuration G.1.3 - dots.ocr Integration: - Update model_loader.py with real model loading code - Support for AutoModelForVision2Seq and AutoProcessor - Device handling (CUDA/CPU/MPS) with fallback - Error handling with dummy fallback option - Update inference.py with real model inference - Process images through model - Generate and decode outputs - Parse model output to blocks - Add model_output_parser.py - Parse JSON or plain text model output - Convert to structured blocks - Layout detection support (placeholder) Dependencies: - Add pytest, pytest-asyncio, httpx for testing
This commit is contained in:
53
services/parser-service/tests/test_inference.py
Normal file
53
services/parser-service/tests/test_inference.py
Normal file
@@ -0,0 +1,53 @@
|
||||
"""
|
||||
Tests for inference functions
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
from app.runtime.inference import (
|
||||
parse_document_from_images,
|
||||
dummy_parse_document_from_images
|
||||
)
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
class TestDummyParser:
|
||||
"""Tests for dummy parser"""
|
||||
|
||||
def test_dummy_parse_document_from_images(self):
|
||||
"""Test dummy parser with images"""
|
||||
images = [
|
||||
Image.new('RGB', (800, 600), color='white'),
|
||||
Image.new('RGB', (800, 600), color='white')
|
||||
]
|
||||
|
||||
doc = dummy_parse_document_from_images(images, doc_id="test-doc")
|
||||
|
||||
assert doc.doc_id == "test-doc"
|
||||
assert len(doc.pages) == 2
|
||||
assert all(len(page.blocks) > 0 for page in doc.pages)
|
||||
assert all(page.width == 800 for page in doc.pages)
|
||||
assert all(page.height == 600 for page in doc.pages)
|
||||
|
||||
|
||||
class TestParseDocumentFromImages:
|
||||
"""Tests for parse_document_from_images"""
|
||||
|
||||
def test_parse_document_from_images_dummy_mode(self, monkeypatch):
|
||||
"""Test parsing with dummy mode enabled"""
|
||||
monkeypatch.setenv("USE_DUMMY_PARSER", "true")
|
||||
from app.core.config import Settings
|
||||
settings = Settings()
|
||||
|
||||
images = [Image.new('RGB', (800, 600), color='white')]
|
||||
doc = parse_document_from_images(images, doc_id="test-doc")
|
||||
|
||||
assert doc.doc_id == "test-doc"
|
||||
assert len(doc.pages) == 1
|
||||
|
||||
def test_parse_document_from_images_empty(self):
|
||||
"""Test parsing with empty images list"""
|
||||
with pytest.raises(ValueError, match="No valid images"):
|
||||
parse_document_from_images([], doc_id="test-doc")
|
||||
|
||||
Reference in New Issue
Block a user