feat: add Ollama runtime support and RAG implementation plan
Ollama Runtime: - Add ollama_client.py for Ollama API integration - Support for dots-ocr model via Ollama - Add OLLAMA_BASE_URL configuration - Update inference.py to support Ollama runtime (RUNTIME_TYPE=ollama) - Update endpoints to handle async Ollama calls - Alternative to local transformers model RAG Implementation Plan: - Create TODO-RAG.md with detailed Haystack integration plan - Document Store setup (pgvector) - Embedding model selection - Ingest pipeline (PARSER → RAG) - Query pipeline (RAG → LLM) - Integration with DAGI Router - Bot commands (/upload_doc, /ask_doc) - Testing strategy Now supports three runtime modes: 1. Local transformers (RUNTIME_TYPE=local) 2. Ollama (RUNTIME_TYPE=ollama) 3. Dummy (USE_DUMMY_PARSER=true)
This commit is contained in:
@@ -16,11 +16,94 @@ from app.runtime.preprocessing import (
|
||||
)
|
||||
from app.runtime.postprocessing import build_parsed_document
|
||||
from app.runtime.model_output_parser import parse_model_output_to_blocks
|
||||
from app.runtime.ollama_client import (
|
||||
call_ollama_vision, parse_ollama_response, OutputMode as OllamaOutputMode
|
||||
)
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def parse_document_with_ollama(
|
||||
images: List[Image.Image],
|
||||
output_mode: Literal["raw_json", "markdown", "qa_pairs", "chunks"] = "raw_json",
|
||||
doc_id: Optional[str] = None,
|
||||
doc_type: Literal["pdf", "image"] = "image"
|
||||
) -> ParsedDocument:
|
||||
"""
|
||||
Parse document using Ollama API
|
||||
|
||||
Args:
|
||||
images: List of PIL Images
|
||||
output_mode: Output format mode
|
||||
doc_id: Document ID
|
||||
doc_type: Document type
|
||||
|
||||
Returns:
|
||||
ParsedDocument
|
||||
"""
|
||||
import io
|
||||
|
||||
# Convert output_mode to Ollama format
|
||||
ollama_mode_map = {
|
||||
"raw_json": OllamaOutputMode.raw_json,
|
||||
"markdown": OllamaOutputMode.markdown,
|
||||
"qa_pairs": OllamaOutputMode.qa_pairs,
|
||||
"chunks": OllamaOutputMode.raw_json # Use raw_json for chunks, will be processed later
|
||||
}
|
||||
ollama_mode = ollama_mode_map.get(output_mode, OllamaOutputMode.raw_json)
|
||||
|
||||
pages_data = []
|
||||
|
||||
for idx, image in enumerate(images, start=1):
|
||||
try:
|
||||
# Convert image to PNG bytes
|
||||
buf = io.BytesIO()
|
||||
image.convert("RGB").save(buf, format="PNG")
|
||||
png_bytes = buf.getvalue()
|
||||
|
||||
# Call Ollama
|
||||
ollama_data = await call_ollama_vision(png_bytes, ollama_mode)
|
||||
raw_text, parsed_json = parse_ollama_response(ollama_data, ollama_mode)
|
||||
|
||||
logger.debug(f"Ollama output for page {idx}: {raw_text[:100]}...")
|
||||
|
||||
# Parse into blocks
|
||||
if parsed_json and isinstance(parsed_json, dict):
|
||||
# Use structured JSON if available
|
||||
blocks = parsed_json.get("blocks", [])
|
||||
if not blocks:
|
||||
# Fallback: create block from raw text
|
||||
blocks = [{
|
||||
"type": "paragraph",
|
||||
"text": raw_text,
|
||||
"bbox": {"x": 0, "y": 0, "width": image.width, "height": image.height},
|
||||
"reading_order": 1
|
||||
}]
|
||||
else:
|
||||
# Parse plain text output
|
||||
blocks = parse_model_output_to_blocks(raw_text, image.size, page_num=idx)
|
||||
|
||||
pages_data.append({
|
||||
"blocks": blocks,
|
||||
"width": image.width,
|
||||
"height": image.height
|
||||
})
|
||||
|
||||
logger.info(f"Processed page {idx}/{len(images)} via Ollama")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing page {idx} with Ollama: {e}", exc_info=True)
|
||||
continue
|
||||
|
||||
return build_parsed_document(
|
||||
pages_data=pages_data,
|
||||
doc_id=doc_id or "parsed-doc",
|
||||
doc_type=doc_type,
|
||||
metadata={"model": settings.PARSER_MODEL_NAME, "runtime": "ollama"}
|
||||
)
|
||||
|
||||
|
||||
def parse_document_from_images(
|
||||
images: List[Image.Image],
|
||||
output_mode: Literal["raw_json", "markdown", "qa_pairs", "chunks"] = "raw_json",
|
||||
@@ -44,7 +127,12 @@ def parse_document_from_images(
|
||||
logger.info("Using dummy parser (USE_DUMMY_PARSER=true)")
|
||||
return dummy_parse_document_from_images(images, doc_id, doc_type)
|
||||
|
||||
# Try to get model
|
||||
# Check if using Ollama runtime
|
||||
if settings.RUNTIME_TYPE == "ollama":
|
||||
logger.info("Using Ollama runtime")
|
||||
return await parse_document_with_ollama(images, output_mode, doc_id, doc_type)
|
||||
|
||||
# Try to get local model
|
||||
model = get_model()
|
||||
|
||||
if model is None:
|
||||
|
||||
Reference in New Issue
Block a user