feat: add Vision Encoder service + Vision RAG implementation

- Vision Encoder Service (OpenCLIP ViT-L/14, GPU-accelerated) - FastAPI app with text/image embedding endpoints (768-dim) - Docker support with NVIDIA GPU runtime - Port 8001, health checks, model info API - Qdrant Vector Database integration - Port 6333/6334 (HTTP/gRPC) - Image embeddings storage (768-dim, Cosine distance) - Auto collection creation - Vision RAG implementation - VisionEncoderClient (Python client for API) - Image Search module (text-to-image, image-to-image) - Vision RAG routing in DAGI Router (mode: image_search) - VisionEncoderProvider integration - Documentation (5000+ lines) - SYSTEM-INVENTORY.md - Complete system inventory - VISION-ENCODER-STATUS.md - Service status - VISION-RAG-IMPLEMENTATION.md - Implementation details - vision_encoder_deployment_task.md - Deployment checklist - services/vision-encoder/README.md - Deployment guide - Updated WARP.md, INFRASTRUCTURE.md, Jupyter Notebook - Testing - test-vision-encoder.sh - Smoke tests (6 tests) - Unit tests for client, image search, routing - Services: 17 total (added Vision Encoder + Qdrant) - AI Models: 3 (qwen3:8b, OpenCLIP ViT-L/14, BAAI/bge-m3) - GPU Services: 2 (Vision Encoder, Ollama) - VRAM Usage: ~10 GB (concurrent) Status: Production Ready ✅
2025-11-17 05:24:36 -08:00
parent b2b51f08fb
commit 4601c6fca8
55 changed files with 13205 additions and 3 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -201,6 +201,63 @@ services:
      retries: 3
      start_period: 10s

+  # Vision Encoder Service - OpenCLIP for text/image embeddings
+  vision-encoder:
+    build:
+      context: ./services/vision-encoder
+      dockerfile: Dockerfile
+    container_name: dagi-vision-encoder
+    ports:
+      - "8001:8001"
+    environment:
+      - DEVICE=${VISION_DEVICE:-cuda}
+      - MODEL_NAME=${VISION_MODEL_NAME:-ViT-L-14}
+      - MODEL_PRETRAINED=${VISION_MODEL_PRETRAINED:-openai}
+      - NORMALIZE_EMBEDDINGS=true
+      - QDRANT_HOST=qdrant
+      - QDRANT_PORT=6333
+      - QDRANT_ENABLED=true
+    volumes:
+      - ./logs:/app/logs
+      - vision-model-cache:/root/.cache/clip
+    depends_on:
+      - qdrant
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    # GPU support - requires nvidia-docker runtime
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
+  # Qdrant Vector Database - for image/text embeddings
+  qdrant:
+    image: qdrant/qdrant:v1.7.4
+    container_name: dagi-qdrant
+    ports:
+      - "6333:6333"  # HTTP API
+      - "6334:6334"  # gRPC API
+    volumes:
+      - qdrant-data:/qdrant/storage
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:6333/healthz"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
 volumes:
  rag-model-cache:
    driver: local
@@ -208,6 +265,10 @@ volumes:
    driver: local
  postgres-data:
    driver: local
+  vision-model-cache:
+    driver: local
+  qdrant-data:
+    driver: local

 networks:
  dagi-network: