feat: complete dots.ocr integration with deployment setup

Model Loader: - Update model_loader.py with complete dots.ocr loading code - Proper device detection (CUDA/CPU/MPS) with fallback - Memory optimization (low_cpu_mem_usage) - Better error handling and logging - Support for local model paths and HF Hub Docker: - Multi-stage Dockerfile (CPU/CUDA builds) - docker-compose.yml for parser-service - .dockerignore for clean builds - Model cache volume for persistence Configuration: - Support DOTS_OCR_MODEL_ID and DEVICE env vars (backward compatible) - Better defaults and environment variable handling Deployment: - Add DEPLOYMENT.md with detailed instructions - Local deployment (venv) - Docker Compose deployment - Ollama runtime setup - Troubleshooting guide Integration: - Add parser-service to main docker-compose.yml - Configure volumes and networks - Health checks and dependencies
2025-11-16 03:00:01 -08:00
parent 8713810d72
commit ca05c91799
7 changed files with 511 additions and 35 deletions
--- a/services/parser-service/app/runtime/model_loader.py
+++ b/services/parser-service/app/runtime/model_loader.py
@@ -37,56 +37,94 @@ def load_model() -> Optional[object]:
    
    try:
        # Load dots.ocr model
-        # Note: Adjust imports and model class based on actual dots.ocr implementation
-        # This is a template that should work with most Vision-Language models
+        # dots.ocr is a Vision-Language Model for document OCR and layout parsing
        
        try:
            from transformers import AutoModelForVision2Seq, AutoProcessor
            import torch
-        except ImportError:
-            logger.error("transformers or torch not installed. Install with: pip install transformers torch")
+        except ImportError as e:
+            logger.error(f"transformers or torch not installed: {e}")
+            logger.error("Install with: pip install transformers torch")
            if not settings.ALLOW_DUMMY_FALLBACK:
                raise
            return None
        
-        logger.info(f"Loading model from: {settings.PARSER_MODEL_NAME}")
+        model_name = settings.PARSER_MODEL_NAME
+        logger.info(f"Loading dots.ocr model from: {model_name}")
+        logger.info(f"Target device: {settings.PARSER_DEVICE}")
        
-        # Load processor
-        processor = AutoProcessor.from_pretrained(
-            settings.PARSER_MODEL_NAME,
-            trust_remote_code=True  # If model has custom code
-        )
+        # Load processor (handles image preprocessing and text tokenization)
+        try:
+            processor = AutoProcessor.from_pretrained(
+                model_name,
+                trust_remote_code=True  # dots.ocr may have custom code
+            )
+            logger.info("Processor loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load processor: {e}")
+            if not settings.ALLOW_DUMMY_FALLBACK:
+                raise
+            return None
        
        # Determine device and dtype
        device = settings.PARSER_DEVICE
-        if device == "cuda" and not torch.cuda.is_available():
-            logger.warning("CUDA not available, falling back to CPU")
-            device = "cpu"
-        elif device == "mps" and not hasattr(torch.backends, "mps") or not torch.backends.mps.is_available():
-            logger.warning("MPS not available, falling back to CPU")
-            device = "cpu"
        
-        dtype = torch.float16 if device != "cpu" else torch.float32
+        # Check CUDA availability
+        if device == "cuda":
+            if not torch.cuda.is_available():
+                logger.warning("CUDA requested but not available, falling back to CPU")
+                device = "cpu"
+            else:
+                logger.info(f"Using CUDA device: {torch.cuda.get_device_name(0)}")
+        
+        # Check MPS availability (Apple Silicon)
+        elif device == "mps":
+            if not hasattr(torch.backends, "mps") or not torch.backends.mps.is_available():
+                logger.warning("MPS requested but not available, falling back to CPU")
+                device = "cpu"
+            else:
+                logger.info("Using MPS (Apple Silicon)")
+        
+        # Determine dtype based on device
+        if device == "cpu":
+            dtype = torch.float32
+        else:
+            dtype = torch.float16  # Use half precision for GPU to save memory
+        
+        logger.info(f"Loading model with dtype: {dtype}")
        
        # Load model
-        model = AutoModelForVision2Seq.from_pretrained(
-            settings.PARSER_MODEL_NAME,
-            device_map=device if device != "cpu" else None,
-            torch_dtype=dtype,
-            trust_remote_code=True
-        )
-        
-        if device == "cpu":
-            model = model.to("cpu")
+        try:
+            model = AutoModelForVision2Seq.from_pretrained(
+                model_name,
+                device_map=device if device != "cpu" else None,
+                torch_dtype=dtype,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True  # Optimize memory usage
+            )
+            
+            # Explicitly move to device if CPU
+            if device == "cpu":
+                model = model.to("cpu")
+                model.eval()  # Set to evaluation mode
+            
+            logger.info(f"Model loaded successfully on device: {device}")
+            
+        except Exception as e:
+            logger.error(f"Failed to load model: {e}", exc_info=True)
+            if not settings.ALLOW_DUMMY_FALLBACK:
+                raise
+            return None
        
        # Store model and processor
        _model = {
            "model": model,
            "processor": processor,
-            "device": device
+            "device": device,
+            "dtype": dtype
        }
        
-        logger.info(f"Model loaded successfully on device: {device}")
+        logger.info(f"dots.ocr model ready on {device}")
        
    except ImportError as e:
        logger.error(f"Required packages not installed: {e}")