feat: complete dots.ocr integration with deployment setup

Model Loader: - Update model_loader.py with complete dots.ocr loading code - Proper device detection (CUDA/CPU/MPS) with fallback - Memory optimization (low_cpu_mem_usage) - Better error handling and logging - Support for local model paths and HF Hub Docker: - Multi-stage Dockerfile (CPU/CUDA builds) - docker-compose.yml for parser-service - .dockerignore for clean builds - Model cache volume for persistence Configuration: - Support DOTS_OCR_MODEL_ID and DEVICE env vars (backward compatible) - Better defaults and environment variable handling Deployment: - Add DEPLOYMENT.md with detailed instructions - Local deployment (venv) - Docker Compose deployment - Ollama runtime setup - Troubleshooting guide Integration: - Add parser-service to main docker-compose.yml - Configure volumes and networks - Health checks and dependencies
2025-11-16 03:00:01 -08:00
parent 8713810d72
commit ca05c91799
7 changed files with 511 additions and 35 deletions
--- a/services/parser-service/docker-compose.yml
+++ b/services/parser-service/docker-compose.yml
@@ -0,0 +1,93 @@
+version: '3.8'
+
+services:
+  parser-service:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: cpu  # Use 'cuda' for GPU support
+    container_name: dagi-parser-service
+    ports:
+      - "9400:9400"
+    environment:
+      # Model configuration
+      - PARSER_MODEL_NAME=${PARSER_MODEL_NAME:-rednote-hilab/dots.ocr}
+      - DOTS_OCR_MODEL_ID=${DOTS_OCR_MODEL_ID:-rednote-hilab/dots.ocr}
+      - PARSER_DEVICE=${PARSER_DEVICE:-cpu}
+      - DEVICE=${DEVICE:-cpu}
+      
+      # Runtime configuration
+      - RUNTIME_TYPE=${RUNTIME_TYPE:-local}
+      - USE_DUMMY_PARSER=${USE_DUMMY_PARSER:-false}
+      - ALLOW_DUMMY_FALLBACK=${ALLOW_DUMMY_FALLBACK:-true}
+      
+      # Ollama (if RUNTIME_TYPE=ollama)
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://ollama:11434}
+      
+      # Processing limits
+      - PARSER_MAX_PAGES=${PARSER_MAX_PAGES:-100}
+      - MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50}
+      - PDF_DPI=${PDF_DPI:-200}
+      - IMAGE_MAX_SIZE=${IMAGE_MAX_SIZE:-2048}
+      
+      # Service
+      - API_HOST=0.0.0.0
+      - API_PORT=9400
+      - TEMP_DIR=/tmp/parser
+    volumes:
+      # Model cache (persist between restarts)
+      - parser-model-cache:/root/.cache/huggingface
+      # Temp files
+      - parser-temp:/tmp/parser
+      # Logs
+      - ./logs:/app/logs
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9400/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    # Uncomment for GPU support
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
+
+  # Optional: Ollama service (if using Ollama runtime)
+  ollama:
+    image: ollama/ollama:latest
+    container_name: dagi-ollama
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama-data:/root/.ollama
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    # Uncomment for GPU support
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
+
+volumes:
+  parser-model-cache:
+    driver: local
+  parser-temp:
+    driver: local
+  ollama-data:
+    driver: local
+
+networks:
+  dagi-network:
+    external: true
+    name: dagi-network
+