feat: integrate dots.ocr native prompt modes and 2-stage qa_pairs pipeline

Prompt Modes Integration:
- Create local_runtime.py with DOTS_PROMPT_MAP
- Map OutputMode to native dots.ocr prompt modes (prompt_layout_all_en, prompt_ocr, etc.)
- Support dict_promptmode_to_prompt from dots.ocr with fallback prompts
- Add layout_only and region modes to OutputMode enum

2-Stage Q&A Pipeline:
- Create qa_builder.py for 2-stage qa_pairs generation
- Stage 1: PARSER (dots.ocr) → raw JSON via prompt_layout_all_en
- Stage 2: LLM (DAGI Router) → Q&A pairs via mode=qa_build
- Update endpoints.py to use 2-stage pipeline for qa_pairs mode
- Add ROUTER_BASE_URL and ROUTER_TIMEOUT to config

Updates:
- Update inference.py to use local_runtime with native prompts
- Update ollama_client.py to use same prompt map
- Add PROMPT_MODES.md documentation
This commit is contained in:
Apple
2025-11-16 04:24:03 -08:00
parent d474a085c3
commit be22752590
8 changed files with 714 additions and 44 deletions

View File

@@ -21,6 +21,7 @@ from app.runtime.preprocessing import (
from app.runtime.postprocessing import (
build_chunks, build_qa_pairs, build_markdown
)
from app.runtime.qa_builder import build_qa_pairs_via_router
logger = logging.getLogger(__name__)
@@ -120,9 +121,27 @@ async def parse_document_endpoint(
elif output_mode == "markdown":
response_data["markdown"] = build_markdown(parsed_doc)
elif output_mode == "qa_pairs":
response_data["qa_pairs"] = build_qa_pairs(parsed_doc)
# 2-stage pipeline: PARSER → LLM (DAGI Router)
logger.info("Starting 2-stage Q&A pipeline: PARSER → LLM")
try:
qa_pairs = await build_qa_pairs_via_router(
parsed_doc=parsed_doc,
dao_id=dao_id or "daarion"
)
response_data["qa_pairs"] = qa_pairs
logger.info(f"Generated {len(qa_pairs)} Q&A pairs via DAGI Router")
except Exception as e:
logger.error(f"Q&A generation failed, falling back to simple extraction: {e}")
# Fallback to simple Q&A extraction
response_data["qa_pairs"] = build_qa_pairs(parsed_doc)
elif output_mode == "chunks":
response_data["chunks"] = build_chunks(parsed_doc, dao_id=dao_id)
elif output_mode == "layout_only":
# Return document with layout info only
response_data["document"] = parsed_doc
elif output_mode == "region":
# Region parsing (for future use)
response_data["document"] = parsed_doc
return ParseResponse(**response_data)
@@ -136,13 +155,20 @@ async def parse_document_endpoint(
@router.post("/parse_qa", response_model=ParseResponse)
async def parse_qa_endpoint(
file: Optional[UploadFile] = File(None),
doc_url: Optional[str] = Form(None)
doc_url: Optional[str] = Form(None),
dao_id: Optional[str] = Form(None)
):
"""Parse document and return Q&A pairs"""
"""
Parse document and return Q&A pairs (2-stage pipeline)
Stage 1: PARSER (dots.ocr) → raw JSON
Stage 2: LLM (DAGI Router) → Q&A pairs
"""
return await parse_document_endpoint(
file=file,
doc_url=doc_url,
output_mode="qa_pairs"
output_mode="qa_pairs",
dao_id=dao_id
)