feat: implement TTS, Document processing, and Memory Service /facts API

- TTS: xtts-v2 integration with voice cloning support - Document: docling integration for PDF/DOCX/PPTX processing - Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints - Added required dependencies (TTS, docling)
2026-01-17 08:16:37 -08:00
parent a9fcadc6e2
commit 5290287058
121 changed files with 17071 additions and 436 deletions
--- a/docker-compose.node1.yml
+++ b/docker-compose.node1.yml
@@ -0,0 +1,287 @@
+version: '3.8'
+
+services:
+  # DAGI Router для NODE1
+  router:
+    build:
+      context: ./services/router
+      dockerfile: Dockerfile
+    container_name: dagi-router-node1
+    ports:
+      - "9102:8000"
+    environment:
+      - NATS_URL=nats://nats:4222
+      - ROUTER_CONFIG_PATH=/app/router_config.yaml
+      - LOG_LEVEL=info
+      - NODE_ID=node-1-hetzner-gex44
+      - MEMORY_SERVICE_URL=http://memory-service:8000
+      - QDRANT_HOST=qdrant
+      - QDRANT_PORT=6333
+      - QDRANT_ENABLED=true
+      - NEO4J_BOLT_URL=bolt://neo4j:7687
+      - NEO4J_HTTP_URL=http://neo4j:7474
+      - NEO4J_USER=neo4j
+      - NEO4J_PASSWORD=DaarionNeo4j2026!
+      - DEEPSEEK_API_KEY=sk-0db94e8193ec4a6e9acd593ee8d898e7
+      - MISTRAL_API_KEY=40Gwjo8nVBx4i4vIkgszvXw9bOwDOu4G
+      - VISION_ENCODER_URL=http://vision-encoder:8001
+      - SWAPPER_SERVICE_URL=http://swapper-service:8890
+      - IMAGE_GEN_URL=http://swapper-service:8890/image/generate
+      - STT_SERVICE_URL=http://swapper-service:8890
+      - STT_SERVICE_UPLOAD_URL=http://swapper-service:8890/stt
+      - OCR_SERVICE_URL=http://swapper-service:8890
+      - WEB_SEARCH_SERVICE_URL=http://swapper-service:8890
+    volumes:
+      - ./services/router/router_config.yaml:/app/router_config.yaml:ro
+      - ./logs:/app/logs
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+
+  # Swapper Service для NODE1 - Dynamic LLM + OCR model loading
+  swapper-service:
+    build:
+      context: ./services/swapper-service
+      dockerfile: Dockerfile
+    container_name: swapper-service-node1
+    ports:
+      - "8890:8890"
+      - "8891:8891"  # Metrics
+    environment:
+      - OLLAMA_BASE_URL=http://172.18.0.1:11434
+      - SWAPPER_CONFIG_PATH=/app/config/swapper_config.yaml
+      - SWAPPER_MODE=single-active
+      - MAX_CONCURRENT_MODELS=2  # 1 LLM + 1 OCR
+      - MODEL_SWAP_TIMEOUT=300
+      - GPU_ENABLED=true
+      - NODE_ID=node-1-hetzner-gex44
+      - HF_HOME=/root/.cache/huggingface
+      - CUDA_VISIBLE_DEVICES=0
+      - CRAWL4AI_URL=http://crawl4ai:11235
+    volumes:
+      - ./services/swapper-service/config/swapper_config_node1.yaml:/app/config/swapper_config.yaml:ro
+      - ./logs:/app/logs
+      - swapper-hf-cache-node1:/root/.cache/huggingface
+    # GPU support for OCR models
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    extra_hosts:
+      - "host.docker.internal:172.18.0.1"
+    healthcheck:
+      test: ["CMD-SHELL", "wget -qO- http://localhost:8890/health || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
+  # Image Generation тепер інтегровано в Swapper Service (lazy loading)
+  # Endpoint: POST /image/generate на swapper-service:8890
+
+  # Crawl4AI - Advanced Web Crawler with JavaScript support
+  crawl4ai:
+    image: unclecode/crawl4ai:latest
+    container_name: dagi-crawl4ai-node1
+    ports:
+      - "11235:11235"
+    environment:
+      - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-}
+      - MAX_CONCURRENT_TASKS=5
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11235/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+
+  # Gateway Bot (Helion + DAARWIZZ)
+  gateway:
+    build:
+      context: ./gateway-bot
+      dockerfile: Dockerfile
+    container_name: dagi-gateway-node1
+    ports:
+      - "9300:9300"
+    environment:
+      - ROUTER_URL=http://router:8000
+      - HELION_TELEGRAM_BOT_TOKEN=8112062582:AAGS-HwRLEI269lDutLtAJTFArsIq31YNhE
+      - HELION_NAME=Helion
+      - HELION_PROMPT_PATH=/app/gateway-bot/helion_prompt.txt
+      - MEMORY_SERVICE_URL=http://memory-service:8000
+      - SWAPPER_SERVICE_URL=http://swapper-service:8890
+      - IMAGE_GEN_URL=http://swapper-service:8890/image/generate
+      - STT_SERVICE_URL=http://swapper-service:8890
+      - STT_SERVICE_UPLOAD_URL=http://swapper-service:8890/stt
+      - OCR_SERVICE_URL=http://swapper-service:8890
+      - WEB_SEARCH_SERVICE_URL=http://swapper-service:8890
+    volumes:
+      - ./gateway-bot:/app/gateway-bot:ro
+      - ./logs:/app/logs
+    depends_on:
+      - router
+      - memory-service
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9300/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+
+  # Memory Service
+  memory-service:
+    build:
+      context: ./services/memory-service
+      dockerfile: Dockerfile
+    container_name: dagi-memory-service-node1
+    ports:
+      - "8000:8000"
+    environment:
+      # PostgreSQL connection (uses MEMORY_ prefix as per config.py)
+      - MEMORY_POSTGRES_HOST=dagi-postgres
+      - MEMORY_POSTGRES_PORT=5432
+      - MEMORY_POSTGRES_USER=daarion
+      - MEMORY_POSTGRES_PASSWORD=DaarionDB2026!
+      - MEMORY_POSTGRES_DB=daarion_memory
+      # Qdrant connection
+      - MEMORY_QDRANT_HOST=qdrant
+      - MEMORY_QDRANT_PORT=6333
+      # Optional
+      - MEMORY_COHERE_API_KEY=${COHERE_API_KEY:-}
+      - MEMORY_DEBUG=false
+    volumes:
+      - ./logs:/app/logs
+    depends_on:
+      - qdrant
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+
+  # Qdrant Vector Database
+  qdrant:
+    image: qdrant/qdrant:v1.7.4
+    container_name: dagi-qdrant-node1
+    ports:
+      - "6333:6333"  # HTTP API
+      - "6334:6334"  # gRPC API
+    volumes:
+      - qdrant-data-node1:/qdrant/storage
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:6333/healthz"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
+  # Neo4j Graph Database
+  neo4j:
+    image: neo4j:5.15-community
+    container_name: dagi-neo4j-node1
+    ports:
+      - "7474:7474"  # HTTP
+      - "7687:7687"  # Bolt
+    environment:
+      - NEO4J_AUTH=neo4j/DaarionNeo4j2026!
+      - NEO4J_PLUGINS=["apoc"]
+      - NEO4J_dbms_memory_heap_initial__size=512m
+      - NEO4J_dbms_memory_heap_max__size=2G
+    volumes:
+      - neo4j-data-node1:/data
+      - neo4j-logs-node1:/logs
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:7474"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
+  # Redis Cache
+  redis:
+    image: redis:7-alpine
+    container_name: dagi-redis-node1
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis-data-node1:/data
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "redis-cli", "PING"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+
+  # Vision Encoder Service - OpenCLIP for text/image embeddings
+  vision-encoder:
+    build:
+      context: ./services/vision-encoder
+      dockerfile: Dockerfile
+    container_name: dagi-vision-encoder-node1
+    ports:
+      - "8001:8001"
+    environment:
+      - DEVICE=cpu  # НОДА1 без GPU
+      - MODEL_NAME=${VISION_MODEL_NAME:-ViT-L-14}
+      - MODEL_PRETRAINED=${VISION_MODEL_PRETRAINED:-openai}
+      - NORMALIZE_EMBEDDINGS=true
+      - QDRANT_HOST=qdrant
+      - QDRANT_PORT=6333
+      - QDRANT_ENABLED=true
+    volumes:
+      - ./logs:/app/logs
+      - vision-model-cache-node1:/root/.cache/clip
+    depends_on:
+      - qdrant
+    networks:
+      - dagi-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
+  # OCR тепер через Swapper Service (got-ocr2, donut-base, donut-cord моделі)
+
+volumes:
+  qdrant-data-node1:
+  neo4j-data-node1:
+  neo4j-logs-node1:
+  redis-data-node1:
+  vision-model-cache-node1:
+  docling-model-cache-node1:
+  swapper-hf-cache-node1:
+
+networks:
+  dagi-network:
+    external: true