feat: implement TTS, Document processing, and Memory Service /facts API

- TTS: xtts-v2 integration with voice cloning support - Document: docling integration for PDF/DOCX/PPTX processing - Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints - Added required dependencies (TTS, docling)
2026-01-17 08:16:37 -08:00
parent a9fcadc6e2
commit 5290287058
121 changed files with 17071 additions and 436 deletions
--- a/services/chandra-service/main.py
+++ b/services/chandra-service/main.py
@@ -0,0 +1,177 @@
+"""
+Chandra Document Processing Service
+Wrapper for Datalab Chandra OCR model
+"""
+import logging
+import os
+from typing import Optional, Dict, Any
+from fastapi import FastAPI, HTTPException, File, UploadFile, Form
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+import httpx
+import base64
+from io import BytesIO
+from PIL import Image
+
+logger = logging.getLogger(__name__)
+
+app = FastAPI(title="Chandra Document Processing Service")
+
+# Configuration
+CHANDRA_API_URL = os.getenv("CHANDRA_API_URL", "http://chandra-inference:8000")
+CHANDRA_LICENSE_KEY = os.getenv("CHANDRA_LICENSE_KEY", "")
+CHANDRA_MODEL = os.getenv("CHANDRA_MODEL", "chandra-small")  # chandra-small or chandra
+
+# Health check endpoint
+@app.get("/health")
+async def health():
+    """Health check endpoint"""
+    try:
+        # Check if Chandra inference service is available
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            response = await client.get(f"{CHANDRA_API_URL}/health")
+            if response.status_code == 200:
+                return {
+                    "status": "healthy",
+                    "service": "chandra-service",
+                    "chandra_api": CHANDRA_API_URL,
+                    "model": CHANDRA_MODEL
+                }
+            else:
+                return {
+                    "status": "degraded",
+                    "service": "chandra-service",
+                    "chandra_api": CHANDRA_API_URL,
+                    "error": "Chandra inference service unavailable"
+                }
+    except Exception as e:
+        logger.error(f"Health check failed: {e}")
+        return {
+            "status": "unhealthy",
+            "service": "chandra-service",
+            "error": str(e)
+        }
+
+
+class ProcessDocumentRequest(BaseModel):
+    """Request model for document processing"""
+    doc_url: Optional[str] = None
+    doc_base64: Optional[str] = None
+    output_format: str = "markdown"  # markdown, html, json
+    accurate_mode: bool = False
+
+
+@app.post("/process")
+async def process_document(
+    request: ProcessDocumentRequest,
+    file: Optional[UploadFile] = File(None)
+):
+    """
+    Process a document using Chandra OCR.
+    
+    Accepts:
+    - doc_url: URL to document/image
+    - doc_base64: Base64 encoded document/image
+    - file: Uploaded file
+    - output_format: markdown, html, or json
+    - accurate_mode: Use accurate mode (slower but more precise)
+    """
+    try:
+        # Determine input source
+        image_data = None
+        
+        if file:
+            # Read uploaded file
+            contents = await file.read()
+            image_data = contents
+        elif request.doc_base64:
+            # Decode base64
+            image_data = base64.b64decode(request.doc_base64)
+        elif request.doc_url:
+            # Download from URL
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                response = await client.get(request.doc_url)
+                if response.status_code == 200:
+                    image_data = response.content
+                else:
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"Failed to download document from URL: {response.status_code}"
+                    )
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail="No document provided. Use file, doc_url, or doc_base64"
+            )
+        
+        # Prepare request to Chandra inference service
+        files = {
+            "file": ("document", image_data, "application/octet-stream")
+        }
+        data = {
+            "output_format": request.output_format,
+            "accurate_mode": str(request.accurate_mode).lower()
+        }
+        
+        if CHANDRA_LICENSE_KEY:
+            data["license_key"] = CHANDRA_LICENSE_KEY
+        
+        # Call Chandra inference service
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            response = await client.post(
+                f"{CHANDRA_API_URL}/process",
+                files=files,
+                data=data
+            )
+            
+            if response.status_code == 200:
+                result = response.json()
+                return {
+                    "success": True,
+                    "output_format": request.output_format,
+                    "result": result
+                }
+            else:
+                logger.error(f"Chandra API error: {response.status_code} - {response.text}")
+                raise HTTPException(
+                    status_code=response.status_code,
+                    detail=f"Chandra API error: {response.text}"
+                )
+                
+    except httpx.TimeoutException:
+        logger.error("Chandra API timeout")
+        raise HTTPException(
+            status_code=504,
+            detail="Chandra API timeout"
+        )
+    except Exception as e:
+        logger.error(f"Document processing failed: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Document processing failed: {str(e)}"
+        )
+
+
+@app.get("/models")
+async def list_models():
+    """List available Chandra models"""
+    return {
+        "models": [
+            {
+                "name": "chandra-small",
+                "description": "Fast model with lower latency",
+                "vram_required": "~8GB"
+            },
+            {
+                "name": "chandra",
+                "description": "Balanced model",
+                "vram_required": "~16GB"
+            }
+        ],
+        "current_model": CHANDRA_MODEL
+    }
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8002)