diff --git a/docker-compose.yml b/docker-compose.yml index d29f5e96..51814edf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -193,6 +193,44 @@ services: timeout: 10s retries: 3 + # PARSER Service (Document OCR using dots.ocr) + parser-service: + build: + context: ./services/parser-service + dockerfile: Dockerfile + target: cpu + container_name: dagi-parser-service + ports: + - "9400:9400" + environment: + - PARSER_MODEL_NAME=${PARSER_MODEL_NAME:-rednote-hilab/dots.ocr} + - DOTS_OCR_MODEL_ID=${DOTS_OCR_MODEL_ID:-rednote-hilab/dots.ocr} + - PARSER_DEVICE=${PARSER_DEVICE:-cpu} + - DEVICE=${DEVICE:-cpu} + - RUNTIME_TYPE=${RUNTIME_TYPE:-local} + - USE_DUMMY_PARSER=${USE_DUMMY_PARSER:-false} + - ALLOW_DUMMY_FALLBACK=${ALLOW_DUMMY_FALLBACK:-true} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://ollama:11434} + - PARSER_MAX_PAGES=${PARSER_MAX_PAGES:-100} + - MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50} + - PDF_DPI=${PDF_DPI:-200} + - IMAGE_MAX_SIZE=${IMAGE_MAX_SIZE:-2048} + volumes: + - parser-model-cache:/root/.cache/huggingface + - ./logs:/app/logs + networks: + - dagi-network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9400/health"] + interval: 30s + timeout: 10s + retries: 3 + +volumes: + parser-model-cache: + driver: local + networks: dagi-network: driver: bridge diff --git a/services/parser-service/.dockerignore b/services/parser-service/.dockerignore new file mode 100644 index 00000000..447735bb --- /dev/null +++ b/services/parser-service/.dockerignore @@ -0,0 +1,25 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +*.so +*.egg +*.egg-info +dist +build +.env +.venv +venv/ +ENV/ +.pytest_cache +.coverage +htmlcov/ +*.log +.DS_Store +.git +.gitignore +README.md +tests/ +*.md + diff --git a/services/parser-service/DEPLOYMENT.md b/services/parser-service/DEPLOYMENT.md new file mode 100644 index 00000000..11b58c25 --- /dev/null +++ b/services/parser-service/DEPLOYMENT.md @@ -0,0 +1,245 @@ +# PARSER Service - Deployment Guide + +Інструкції з розгортання PARSER-сервісу з dots.ocr моделлю. + +## Варіанти розгортання + +### 1. Docker Compose (рекомендовано) + +Найпростіший спосіб - використовувати готовий `docker-compose.yml`: + +```bash +cd services/parser-service + +# CPU версія (за замовчуванням) +docker-compose up -d + +# Або з GPU (якщо є NVIDIA GPU) +# Спочатку встановіть nvidia-container-toolkit +# Потім розкоментуйте GPU секцію в docker-compose.yml +docker-compose up -d +``` + +**Environment variables** (через `.env` або `docker-compose.yml`): + +```bash +# Модель +PARSER_MODEL_NAME=rednote-hilab/dots.ocr +DOTS_OCR_MODEL_ID=rednote-hilab/dots.ocr +PARSER_DEVICE=cpu # або cuda, mps + +# Runtime +RUNTIME_TYPE=local # або ollama +USE_DUMMY_PARSER=false +ALLOW_DUMMY_FALLBACK=true + +# Ollama (якщо RUNTIME_TYPE=ollama) +OLLAMA_BASE_URL=http://ollama:11434 +``` + +### 2. Локальне розгортання (Python venv) + +#### Крок 1: Створити venv + +```bash +cd services/parser-service +python3.11 -m venv venv +source venv/bin/activate # Linux/Mac +# або +venv\Scripts\activate # Windows +``` + +#### Крок 2: Встановити залежності + +**CPU версія:** +```bash +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu +pip install -r requirements.txt +``` + +**CUDA версія (якщо є NVIDIA GPU):** +```bash +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 +pip install -r requirements.txt +``` + +**MPS версія (Apple Silicon):** +```bash +pip install torch torchvision torchaudio +pip install -r requirements.txt +``` + +#### Крок 3: Налаштувати environment + +Створити `.env` файл: + +```bash +# .env +PARSER_MODEL_NAME=rednote-hilab/dots.ocr +DOTS_OCR_MODEL_ID=rednote-hilab/dots.ocr +PARSER_DEVICE=cpu # або cuda, mps +RUNTIME_TYPE=local +USE_DUMMY_PARSER=false +ALLOW_DUMMY_FALLBACK=true +``` + +#### Крок 4: Запустити сервіс + +```bash +uvicorn app.main:app --host 0.0.0.0 --port 9400 --reload +``` + +### 3. Ollama Runtime (альтернатива) + +Якщо не хочете встановлювати transformers/torch локально: + +#### Крок 1: Встановити Ollama + +```bash +# Linux/Mac +curl -fsSL https://ollama.ai/install.sh | sh + +# Windows +# Завантажити з https://ollama.ai/download +``` + +#### Крок 2: Завантажити dots-ocr модель + +```bash +ollama pull dots-ocr +# Або якщо модель називається інакше: +# ollama pull +``` + +#### Крок 3: Налаштувати parser-service + +```bash +export RUNTIME_TYPE=ollama +export OLLAMA_BASE_URL=http://localhost:11434 +export PARSER_MODEL_NAME=dots-ocr +``` + +#### Крок 4: Запустити сервіс + +```bash +uvicorn app.main:app --host 0.0.0.0 --port 9400 +``` + +## Модель dots.ocr + +### Варіанти отримання моделі + +1. **HuggingFace Hub** (автоматично): + - Модель завантажиться автоматично при першому використанні + - Кешується в `~/.cache/huggingface/` + +2. **Локальний шлях**: + ```bash + export PARSER_MODEL_NAME=/opt/models/dots.ocr + ``` + +3. **Git clone**: + ```bash + git clone https://huggingface.co/rednote-hilab/dots.ocr /opt/models/dots.ocr + export PARSER_MODEL_NAME=/opt/models/dots.ocr + ``` + +### Розмір моделі та вимоги + +- **Розмір:** Залежить від конкретної версії dots.ocr (зазвичай 1-7GB) +- **RAM:** Мінімум 4GB для CPU, 8GB+ для GPU +- **GPU:** Опційно, значно прискорює обробку + +## Перевірка роботи + +### Health check + +```bash +curl http://localhost:9400/health +``` + +Очікуваний відповідь: +```json +{ + "status": "healthy", + "service": "parser-service", + "model": "rednote-hilab/dots.ocr", + "device": "cpu", + "version": "1.0.0" +} +``` + +### Тестовий запит + +```bash +curl -X POST http://localhost:9400/ocr/parse \ + -F "file=@test.pdf" \ + -F "output_mode=raw_json" +``` + +## Troubleshooting + +### Помилка: "CUDA not available" + +**Рішення:** +- Перевірте, чи встановлено CUDA: `nvidia-smi` +- Встановіть правильну версію PyTorch з CUDA підтримкою +- Або використовуйте `PARSER_DEVICE=cpu` + +### Помилка: "Model not found" + +**Рішення:** +- Перевірте правильність `PARSER_MODEL_NAME` +- Переконайтеся, що є доступ до HuggingFace Hub +- Або вкажіть локальний шлях до моделі + +### Помилка: "Out of memory" + +**Рішення:** +- Зменште `PARSER_MAX_PAGES` +- Використовуйте CPU замість GPU +- Або використовуйте Ollama runtime + +### Модель завантажується повільно + +**Рішення:** +- Перший раз модель завантажується з HuggingFace (може бути повільно) +- Наступні запуски використовують кеш +- Можна попередньо завантажити: `python -c "from transformers import AutoModelForVision2Seq; AutoModelForVision2Seq.from_pretrained('rednote-hilab/dots.ocr')"` + +## Інтеграція з docker-compose.yml (основний проект) + +Додати в основний `docker-compose.yml`: + +```yaml +services: + parser-service: + build: + context: ./services/parser-service + dockerfile: Dockerfile + target: cpu + container_name: dagi-parser-service + ports: + - "9400:9400" + environment: + - PARSER_MODEL_NAME=${PARSER_MODEL_NAME:-rednote-hilab/dots.ocr} + - PARSER_DEVICE=${PARSER_DEVICE:-cpu} + - RUNTIME_TYPE=local + - USE_DUMMY_PARSER=${USE_DUMMY_PARSER:-false} + volumes: + - parser-model-cache:/root/.cache/huggingface + networks: + - dagi-network + depends_on: + - city-db + restart: unless-stopped +``` + +## Production рекомендації + +1. **GPU:** Використовуйте GPU для кращої продуктивності +2. **Model caching:** Зберігайте модель в volume для швидшого старту +3. **Resource limits:** Встановіть memory limits в docker-compose +4. **Monitoring:** Додайте логування та метрики +5. **Scaling:** Можна запускати кілька інстансів за load balancer + diff --git a/services/parser-service/Dockerfile b/services/parser-service/Dockerfile index 67a2dda0..2b9fb7c9 100644 --- a/services/parser-service/Dockerfile +++ b/services/parser-service/Dockerfile @@ -1,4 +1,6 @@ -FROM python:3.11-slim +# Multi-stage build for PARSER Service +# Stage 1: Base with system dependencies +FROM python:3.11-slim as base WORKDIR /app @@ -7,17 +9,23 @@ RUN apt-get update && apt-get install -y \ poppler-utils \ libgl1-mesa-glx \ libglib2.0-0 \ + git \ && rm -rf /var/lib/apt/lists/* -# Copy requirements and install dependencies +# Stage 2: CPU-only build +FROM base as cpu + +# Copy requirements and install CPU-only dependencies COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir \ + torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \ + pip install --no-cache-dir -r requirements.txt # Copy application code COPY . . -# Create temp directory -RUN mkdir -p /tmp/parser +# Create temp directory and model cache +RUN mkdir -p /tmp/parser /root/.cache/huggingface # Expose port EXPOSE 9400 @@ -25,3 +33,32 @@ EXPOSE 9400 # Run application CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "9400"] +# Stage 3: CUDA build (optional, use --target=cuda) +FROM base as cuda + +# Install CUDA dependencies +RUN apt-get update && apt-get install -y \ + nvidia-cuda-toolkit \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install CUDA dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir \ + torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 && \ + pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create temp directory and model cache +RUN mkdir -p /tmp/parser /root/.cache/huggingface + +# Expose port +EXPOSE 9400 + +# Run application +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "9400"] + +# Default to CPU build +FROM cpu + diff --git a/services/parser-service/app/core/config.py b/services/parser-service/app/core/config.py index a6909abc..3865ff99 100644 --- a/services/parser-service/app/core/config.py +++ b/services/parser-service/app/core/config.py @@ -15,8 +15,8 @@ class Settings(BaseSettings): API_PORT: int = 9400 # PARSER Model - PARSER_MODEL_NAME: str = os.getenv("PARSER_MODEL_NAME", "rednote-hilab/dots.ocr") - PARSER_DEVICE: Literal["cuda", "cpu", "mps"] = os.getenv("PARSER_DEVICE", "cpu") + PARSER_MODEL_NAME: str = os.getenv("PARSER_MODEL_NAME", os.getenv("DOTS_OCR_MODEL_ID", "rednote-hilab/dots.ocr")) + PARSER_DEVICE: Literal["cuda", "cpu", "mps"] = os.getenv("PARSER_DEVICE", os.getenv("DEVICE", "cpu")) PARSER_MAX_PAGES: int = int(os.getenv("PARSER_MAX_PAGES", "100")) PARSER_MAX_RESOLUTION: str = os.getenv("PARSER_MAX_RESOLUTION", "4096x4096") PARSER_BATCH_SIZE: int = int(os.getenv("PARSER_BATCH_SIZE", "1")) diff --git a/services/parser-service/app/runtime/model_loader.py b/services/parser-service/app/runtime/model_loader.py index ee0dbcc4..d6fa8906 100644 --- a/services/parser-service/app/runtime/model_loader.py +++ b/services/parser-service/app/runtime/model_loader.py @@ -37,56 +37,94 @@ def load_model() -> Optional[object]: try: # Load dots.ocr model - # Note: Adjust imports and model class based on actual dots.ocr implementation - # This is a template that should work with most Vision-Language models + # dots.ocr is a Vision-Language Model for document OCR and layout parsing try: from transformers import AutoModelForVision2Seq, AutoProcessor import torch - except ImportError: - logger.error("transformers or torch not installed. Install with: pip install transformers torch") + except ImportError as e: + logger.error(f"transformers or torch not installed: {e}") + logger.error("Install with: pip install transformers torch") if not settings.ALLOW_DUMMY_FALLBACK: raise return None - logger.info(f"Loading model from: {settings.PARSER_MODEL_NAME}") + model_name = settings.PARSER_MODEL_NAME + logger.info(f"Loading dots.ocr model from: {model_name}") + logger.info(f"Target device: {settings.PARSER_DEVICE}") - # Load processor - processor = AutoProcessor.from_pretrained( - settings.PARSER_MODEL_NAME, - trust_remote_code=True # If model has custom code - ) + # Load processor (handles image preprocessing and text tokenization) + try: + processor = AutoProcessor.from_pretrained( + model_name, + trust_remote_code=True # dots.ocr may have custom code + ) + logger.info("Processor loaded successfully") + except Exception as e: + logger.error(f"Failed to load processor: {e}") + if not settings.ALLOW_DUMMY_FALLBACK: + raise + return None # Determine device and dtype device = settings.PARSER_DEVICE - if device == "cuda" and not torch.cuda.is_available(): - logger.warning("CUDA not available, falling back to CPU") - device = "cpu" - elif device == "mps" and not hasattr(torch.backends, "mps") or not torch.backends.mps.is_available(): - logger.warning("MPS not available, falling back to CPU") - device = "cpu" - dtype = torch.float16 if device != "cpu" else torch.float32 + # Check CUDA availability + if device == "cuda": + if not torch.cuda.is_available(): + logger.warning("CUDA requested but not available, falling back to CPU") + device = "cpu" + else: + logger.info(f"Using CUDA device: {torch.cuda.get_device_name(0)}") + + # Check MPS availability (Apple Silicon) + elif device == "mps": + if not hasattr(torch.backends, "mps") or not torch.backends.mps.is_available(): + logger.warning("MPS requested but not available, falling back to CPU") + device = "cpu" + else: + logger.info("Using MPS (Apple Silicon)") + + # Determine dtype based on device + if device == "cpu": + dtype = torch.float32 + else: + dtype = torch.float16 # Use half precision for GPU to save memory + + logger.info(f"Loading model with dtype: {dtype}") # Load model - model = AutoModelForVision2Seq.from_pretrained( - settings.PARSER_MODEL_NAME, - device_map=device if device != "cpu" else None, - torch_dtype=dtype, - trust_remote_code=True - ) - - if device == "cpu": - model = model.to("cpu") + try: + model = AutoModelForVision2Seq.from_pretrained( + model_name, + device_map=device if device != "cpu" else None, + torch_dtype=dtype, + trust_remote_code=True, + low_cpu_mem_usage=True # Optimize memory usage + ) + + # Explicitly move to device if CPU + if device == "cpu": + model = model.to("cpu") + model.eval() # Set to evaluation mode + + logger.info(f"Model loaded successfully on device: {device}") + + except Exception as e: + logger.error(f"Failed to load model: {e}", exc_info=True) + if not settings.ALLOW_DUMMY_FALLBACK: + raise + return None # Store model and processor _model = { "model": model, "processor": processor, - "device": device + "device": device, + "dtype": dtype } - logger.info(f"Model loaded successfully on device: {device}") + logger.info(f"dots.ocr model ready on {device}") except ImportError as e: logger.error(f"Required packages not installed: {e}") diff --git a/services/parser-service/docker-compose.yml b/services/parser-service/docker-compose.yml new file mode 100644 index 00000000..c90468cc --- /dev/null +++ b/services/parser-service/docker-compose.yml @@ -0,0 +1,93 @@ +version: '3.8' + +services: + parser-service: + build: + context: . + dockerfile: Dockerfile + target: cpu # Use 'cuda' for GPU support + container_name: dagi-parser-service + ports: + - "9400:9400" + environment: + # Model configuration + - PARSER_MODEL_NAME=${PARSER_MODEL_NAME:-rednote-hilab/dots.ocr} + - DOTS_OCR_MODEL_ID=${DOTS_OCR_MODEL_ID:-rednote-hilab/dots.ocr} + - PARSER_DEVICE=${PARSER_DEVICE:-cpu} + - DEVICE=${DEVICE:-cpu} + + # Runtime configuration + - RUNTIME_TYPE=${RUNTIME_TYPE:-local} + - USE_DUMMY_PARSER=${USE_DUMMY_PARSER:-false} + - ALLOW_DUMMY_FALLBACK=${ALLOW_DUMMY_FALLBACK:-true} + + # Ollama (if RUNTIME_TYPE=ollama) + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://ollama:11434} + + # Processing limits + - PARSER_MAX_PAGES=${PARSER_MAX_PAGES:-100} + - MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50} + - PDF_DPI=${PDF_DPI:-200} + - IMAGE_MAX_SIZE=${IMAGE_MAX_SIZE:-2048} + + # Service + - API_HOST=0.0.0.0 + - API_PORT=9400 + - TEMP_DIR=/tmp/parser + volumes: + # Model cache (persist between restarts) + - parser-model-cache:/root/.cache/huggingface + # Temp files + - parser-temp:/tmp/parser + # Logs + - ./logs:/app/logs + networks: + - dagi-network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9400/health"] + interval: 30s + timeout: 10s + retries: 3 + # Uncomment for GPU support + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] + + # Optional: Ollama service (if using Ollama runtime) + ollama: + image: ollama/ollama:latest + container_name: dagi-ollama + ports: + - "11434:11434" + volumes: + - ollama-data:/root/.ollama + networks: + - dagi-network + restart: unless-stopped + # Uncomment for GPU support + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] + +volumes: + parser-model-cache: + driver: local + parser-temp: + driver: local + ollama-data: + driver: local + +networks: + dagi-network: + external: true + name: dagi-network +