Files
microdao-daarion/services/parser-service/tests/test_inference.py
Apple 2a353040f6 feat: add tests and integrate dots.ocr model
G.2.5 - Tests:
- Add pytest test suite with fixtures
- test_preprocessing.py - PDF/image loading, normalization, validation
- test_postprocessing.py - chunks, QA pairs, markdown generation
- test_inference.py - dummy parser and inference functions
- test_api.py - API endpoint tests
- Add pytest.ini configuration

G.1.3 - dots.ocr Integration:
- Update model_loader.py with real model loading code
  - Support for AutoModelForVision2Seq and AutoProcessor
  - Device handling (CUDA/CPU/MPS) with fallback
  - Error handling with dummy fallback option
- Update inference.py with real model inference
  - Process images through model
  - Generate and decode outputs
  - Parse model output to blocks
- Add model_output_parser.py
  - Parse JSON or plain text model output
  - Convert to structured blocks
  - Layout detection support (placeholder)

Dependencies:
- Add pytest, pytest-asyncio, httpx for testing
2025-11-15 13:25:01 -08:00

54 lines
1.6 KiB
Python

"""
Tests for inference functions
"""
import pytest
from PIL import Image
from app.runtime.inference import (
parse_document_from_images,
dummy_parse_document_from_images
)
from app.core.config import settings
class TestDummyParser:
"""Tests for dummy parser"""
def test_dummy_parse_document_from_images(self):
"""Test dummy parser with images"""
images = [
Image.new('RGB', (800, 600), color='white'),
Image.new('RGB', (800, 600), color='white')
]
doc = dummy_parse_document_from_images(images, doc_id="test-doc")
assert doc.doc_id == "test-doc"
assert len(doc.pages) == 2
assert all(len(page.blocks) > 0 for page in doc.pages)
assert all(page.width == 800 for page in doc.pages)
assert all(page.height == 600 for page in doc.pages)
class TestParseDocumentFromImages:
"""Tests for parse_document_from_images"""
def test_parse_document_from_images_dummy_mode(self, monkeypatch):
"""Test parsing with dummy mode enabled"""
monkeypatch.setenv("USE_DUMMY_PARSER", "true")
from app.core.config import Settings
settings = Settings()
images = [Image.new('RGB', (800, 600), color='white')]
doc = parse_document_from_images(images, doc_id="test-doc")
assert doc.doc_id == "test-doc"
assert len(doc.pages) == 1
def test_parse_document_from_images_empty(self):
"""Test parsing with empty images list"""
with pytest.raises(ValueError, match="No valid images"):
parse_document_from_images([], doc_id="test-doc")