feat: add Ollama runtime support and RAG implementation plan

Ollama Runtime: - Add ollama_client.py for Ollama API integration - Support for dots-ocr model via Ollama - Add OLLAMA_BASE_URL configuration - Update inference.py to support Ollama runtime (RUNTIME_TYPE=ollama) - Update endpoints to handle async Ollama calls - Alternative to local transformers model RAG Implementation Plan: - Create TODO-RAG.md with detailed Haystack integration plan - Document Store setup (pgvector) - Embedding model selection - Ingest pipeline (PARSER → RAG) - Query pipeline (RAG → LLM) - Integration with DAGI Router - Bot commands (/upload_doc, /ask_doc) - Testing strategy Now supports three runtime modes: 1. Local transformers (RUNTIME_TYPE=local) 2. Ollama (RUNTIME_TYPE=ollama) 3. Dummy (USE_DUMMY_PARSER=true)
2025-11-16 02:56:36 -08:00
parent d56ff3493d
commit 00f9102e50
6 changed files with 607 additions and 9 deletions
--- a/services/parser-service/app/runtime/inference.py
+++ b/services/parser-service/app/runtime/inference.py
@@ -16,11 +16,94 @@ from app.runtime.preprocessing import (
 )
 from app.runtime.postprocessing import build_parsed_document
 from app.runtime.model_output_parser import parse_model_output_to_blocks
+from app.runtime.ollama_client import (
+    call_ollama_vision, parse_ollama_response, OutputMode as OllamaOutputMode
+)
 from app.core.config import settings

 logger = logging.getLogger(__name__)


+async def parse_document_with_ollama(
+    images: List[Image.Image],
+    output_mode: Literal["raw_json", "markdown", "qa_pairs", "chunks"] = "raw_json",
+    doc_id: Optional[str] = None,
+    doc_type: Literal["pdf", "image"] = "image"
+) -> ParsedDocument:
+    """
+    Parse document using Ollama API
+    
+    Args:
+        images: List of PIL Images
+        output_mode: Output format mode
+        doc_id: Document ID
+        doc_type: Document type
+    
+    Returns:
+        ParsedDocument
+    """
+    import io
+    
+    # Convert output_mode to Ollama format
+    ollama_mode_map = {
+        "raw_json": OllamaOutputMode.raw_json,
+        "markdown": OllamaOutputMode.markdown,
+        "qa_pairs": OllamaOutputMode.qa_pairs,
+        "chunks": OllamaOutputMode.raw_json  # Use raw_json for chunks, will be processed later
+    }
+    ollama_mode = ollama_mode_map.get(output_mode, OllamaOutputMode.raw_json)
+    
+    pages_data = []
+    
+    for idx, image in enumerate(images, start=1):
+        try:
+            # Convert image to PNG bytes
+            buf = io.BytesIO()
+            image.convert("RGB").save(buf, format="PNG")
+            png_bytes = buf.getvalue()
+            
+            # Call Ollama
+            ollama_data = await call_ollama_vision(png_bytes, ollama_mode)
+            raw_text, parsed_json = parse_ollama_response(ollama_data, ollama_mode)
+            
+            logger.debug(f"Ollama output for page {idx}: {raw_text[:100]}...")
+            
+            # Parse into blocks
+            if parsed_json and isinstance(parsed_json, dict):
+                # Use structured JSON if available
+                blocks = parsed_json.get("blocks", [])
+                if not blocks:
+                    # Fallback: create block from raw text
+                    blocks = [{
+                        "type": "paragraph",
+                        "text": raw_text,
+                        "bbox": {"x": 0, "y": 0, "width": image.width, "height": image.height},
+                        "reading_order": 1
+                    }]
+            else:
+                # Parse plain text output
+                blocks = parse_model_output_to_blocks(raw_text, image.size, page_num=idx)
+            
+            pages_data.append({
+                "blocks": blocks,
+                "width": image.width,
+                "height": image.height
+            })
+            
+            logger.info(f"Processed page {idx}/{len(images)} via Ollama")
+            
+        except Exception as e:
+            logger.error(f"Error processing page {idx} with Ollama: {e}", exc_info=True)
+            continue
+    
+    return build_parsed_document(
+        pages_data=pages_data,
+        doc_id=doc_id or "parsed-doc",
+        doc_type=doc_type,
+        metadata={"model": settings.PARSER_MODEL_NAME, "runtime": "ollama"}
+    )
+
+
 def parse_document_from_images(
    images: List[Image.Image],
    output_mode: Literal["raw_json", "markdown", "qa_pairs", "chunks"] = "raw_json",
@@ -44,7 +127,12 @@ def parse_document_from_images(
        logger.info("Using dummy parser (USE_DUMMY_PARSER=true)")
        return dummy_parse_document_from_images(images, doc_id, doc_type)
    
-    # Try to get model
+    # Check if using Ollama runtime
+    if settings.RUNTIME_TYPE == "ollama":
+        logger.info("Using Ollama runtime")
+        return await parse_document_with_ollama(images, output_mode, doc_id, doc_type)
+    
+    # Try to get local model
    model = get_model()
    
    if model is None: