feat: add qa_build mode, tests, and region mode support

Router Configuration:
- Add mode='qa_build' routing rule in router-config.yml
- Priority 8, uses local_qwen3_8b for Q&A generation

2-Stage Q&A Pipeline Tests:
- Create test_qa_pipeline.py with comprehensive tests
- Test prompt building, JSON parsing, router integration
- Mock DAGI Router responses for testing

Region Mode (Grounding OCR):
- Add region_bbox and region_page parameters to ParseRequest
- Support region mode in local_runtime with bbox in prompt
- Update endpoints to accept region parameters (x, y, width, height, page)
- Validate region parameters and filter pages for region mode
- Pass region_bbox through inference pipeline

Updates:
- Update local_runtime to support region_bbox in prompts
- Update inference.py to pass region_bbox to local_runtime
- Update endpoints.py to handle region mode parameters
This commit is contained in:
Apple
2025-11-16 04:26:35 -08:00
parent be22752590
commit d3c701f3ff
6 changed files with 266 additions and 12 deletions

View File

@@ -34,7 +34,12 @@ async def parse_document_endpoint(
doc_url: Optional[str] = Form(None),
output_mode: str = Form("raw_json"),
dao_id: Optional[str] = Form(None),
doc_id: Optional[str] = Form(None)
doc_id: Optional[str] = Form(None),
region_bbox_x: Optional[float] = Form(None),
region_bbox_y: Optional[float] = Form(None),
region_bbox_width: Optional[float] = Form(None),
region_bbox_height: Optional[float] = Form(None),
region_page: Optional[int] = Form(None)
):
"""
Parse document (PDF or image) using dots.ocr
@@ -81,6 +86,30 @@ async def parse_document_endpoint(
image = load_image(content)
images = [image]
# For region mode, validate and prepare region bbox
region_bbox = None
if output_mode == "region":
if not all([region_bbox_x is not None, region_bbox_y is not None,
region_bbox_width is not None, region_bbox_height is not None]):
raise HTTPException(
status_code=400,
detail="region mode requires region_bbox_x, region_bbox_y, region_bbox_width, region_bbox_height"
)
region_bbox = {
"x": float(region_bbox_x),
"y": float(region_bbox_y),
"width": float(region_bbox_width),
"height": float(region_bbox_height)
}
# If region_page specified, only process that page
if region_page is not None:
if region_page < 1 or region_page > len(images):
raise HTTPException(
status_code=400,
detail=f"region_page {region_page} out of range (1-{len(images)})"
)
images = [images[region_page - 1]]
else:
# TODO: Download from doc_url
raise HTTPException(
@@ -99,14 +128,16 @@ async def parse_document_endpoint(
images=images,
output_mode=output_mode,
doc_id=doc_id or str(uuid.uuid4()),
doc_type=doc_type
doc_type=doc_type,
region_bbox=region_bbox
)
else:
parsed_doc = parse_document_from_images(
images=images,
output_mode=output_mode,
doc_id=doc_id or str(uuid.uuid4()),
doc_type=doc_type
doc_type=doc_type,
region_bbox=region_bbox
)
# Build response based on output_mode