Model Loader: - Update model_loader.py with complete dots.ocr loading code - Proper device detection (CUDA/CPU/MPS) with fallback - Memory optimization (low_cpu_mem_usage) - Better error handling and logging - Support for local model paths and HF Hub Docker: - Multi-stage Dockerfile (CPU/CUDA builds) - docker-compose.yml for parser-service - .dockerignore for clean builds - Model cache volume for persistence Configuration: - Support DOTS_OCR_MODEL_ID and DEVICE env vars (backward compatible) - Better defaults and environment variable handling Deployment: - Add DEPLOYMENT.md with detailed instructions - Local deployment (venv) - Docker Compose deployment - Ollama runtime setup - Troubleshooting guide Integration: - Add parser-service to main docker-compose.yml - Configure volumes and networks - Health checks and dependencies
94 lines
2.3 KiB
YAML
94 lines
2.3 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
parser-service:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
target: cpu # Use 'cuda' for GPU support
|
|
container_name: dagi-parser-service
|
|
ports:
|
|
- "9400:9400"
|
|
environment:
|
|
# Model configuration
|
|
- PARSER_MODEL_NAME=${PARSER_MODEL_NAME:-rednote-hilab/dots.ocr}
|
|
- DOTS_OCR_MODEL_ID=${DOTS_OCR_MODEL_ID:-rednote-hilab/dots.ocr}
|
|
- PARSER_DEVICE=${PARSER_DEVICE:-cpu}
|
|
- DEVICE=${DEVICE:-cpu}
|
|
|
|
# Runtime configuration
|
|
- RUNTIME_TYPE=${RUNTIME_TYPE:-local}
|
|
- USE_DUMMY_PARSER=${USE_DUMMY_PARSER:-false}
|
|
- ALLOW_DUMMY_FALLBACK=${ALLOW_DUMMY_FALLBACK:-true}
|
|
|
|
# Ollama (if RUNTIME_TYPE=ollama)
|
|
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://ollama:11434}
|
|
|
|
# Processing limits
|
|
- PARSER_MAX_PAGES=${PARSER_MAX_PAGES:-100}
|
|
- MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50}
|
|
- PDF_DPI=${PDF_DPI:-200}
|
|
- IMAGE_MAX_SIZE=${IMAGE_MAX_SIZE:-2048}
|
|
|
|
# Service
|
|
- API_HOST=0.0.0.0
|
|
- API_PORT=9400
|
|
- TEMP_DIR=/tmp/parser
|
|
volumes:
|
|
# Model cache (persist between restarts)
|
|
- parser-model-cache:/root/.cache/huggingface
|
|
# Temp files
|
|
- parser-temp:/tmp/parser
|
|
# Logs
|
|
- ./logs:/app/logs
|
|
networks:
|
|
- dagi-network
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:9400/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
# Uncomment for GPU support
|
|
# deploy:
|
|
# resources:
|
|
# reservations:
|
|
# devices:
|
|
# - driver: nvidia
|
|
# count: 1
|
|
# capabilities: [gpu]
|
|
|
|
# Optional: Ollama service (if using Ollama runtime)
|
|
ollama:
|
|
image: ollama/ollama:latest
|
|
container_name: dagi-ollama
|
|
ports:
|
|
- "11434:11434"
|
|
volumes:
|
|
- ollama-data:/root/.ollama
|
|
networks:
|
|
- dagi-network
|
|
restart: unless-stopped
|
|
# Uncomment for GPU support
|
|
# deploy:
|
|
# resources:
|
|
# reservations:
|
|
# devices:
|
|
# - driver: nvidia
|
|
# count: 1
|
|
# capabilities: [gpu]
|
|
|
|
volumes:
|
|
parser-model-cache:
|
|
driver: local
|
|
parser-temp:
|
|
driver: local
|
|
ollama-data:
|
|
driver: local
|
|
|
|
networks:
|
|
dagi-network:
|
|
external: true
|
|
name: dagi-network
|
|
|