feat: implement TTS, Document processing, and Memory Service /facts API
- TTS: xtts-v2 integration with voice cloning support
- Document: docling integration for PDF/DOCX/PPTX processing
- Memory Service: added /facts/upsert, /facts/{key}, /facts endpoints
- Added required dependencies (TTS, docling)
This commit is contained in:
177
services/chandra-service/main.py
Normal file
177
services/chandra-service/main.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""
|
||||
Chandra Document Processing Service
|
||||
Wrapper for Datalab Chandra OCR model
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional, Dict, Any
|
||||
from fastapi import FastAPI, HTTPException, File, UploadFile, Form
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
import httpx
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="Chandra Document Processing Service")
|
||||
|
||||
# Configuration
|
||||
CHANDRA_API_URL = os.getenv("CHANDRA_API_URL", "http://chandra-inference:8000")
|
||||
CHANDRA_LICENSE_KEY = os.getenv("CHANDRA_LICENSE_KEY", "")
|
||||
CHANDRA_MODEL = os.getenv("CHANDRA_MODEL", "chandra-small") # chandra-small or chandra
|
||||
|
||||
# Health check endpoint
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
"""Health check endpoint"""
|
||||
try:
|
||||
# Check if Chandra inference service is available
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(f"{CHANDRA_API_URL}/health")
|
||||
if response.status_code == 200:
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "chandra-service",
|
||||
"chandra_api": CHANDRA_API_URL,
|
||||
"model": CHANDRA_MODEL
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"status": "degraded",
|
||||
"service": "chandra-service",
|
||||
"chandra_api": CHANDRA_API_URL,
|
||||
"error": "Chandra inference service unavailable"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed: {e}")
|
||||
return {
|
||||
"status": "unhealthy",
|
||||
"service": "chandra-service",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
class ProcessDocumentRequest(BaseModel):
|
||||
"""Request model for document processing"""
|
||||
doc_url: Optional[str] = None
|
||||
doc_base64: Optional[str] = None
|
||||
output_format: str = "markdown" # markdown, html, json
|
||||
accurate_mode: bool = False
|
||||
|
||||
|
||||
@app.post("/process")
|
||||
async def process_document(
|
||||
request: ProcessDocumentRequest,
|
||||
file: Optional[UploadFile] = File(None)
|
||||
):
|
||||
"""
|
||||
Process a document using Chandra OCR.
|
||||
|
||||
Accepts:
|
||||
- doc_url: URL to document/image
|
||||
- doc_base64: Base64 encoded document/image
|
||||
- file: Uploaded file
|
||||
- output_format: markdown, html, or json
|
||||
- accurate_mode: Use accurate mode (slower but more precise)
|
||||
"""
|
||||
try:
|
||||
# Determine input source
|
||||
image_data = None
|
||||
|
||||
if file:
|
||||
# Read uploaded file
|
||||
contents = await file.read()
|
||||
image_data = contents
|
||||
elif request.doc_base64:
|
||||
# Decode base64
|
||||
image_data = base64.b64decode(request.doc_base64)
|
||||
elif request.doc_url:
|
||||
# Download from URL
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.get(request.doc_url)
|
||||
if response.status_code == 200:
|
||||
image_data = response.content
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Failed to download document from URL: {response.status_code}"
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="No document provided. Use file, doc_url, or doc_base64"
|
||||
)
|
||||
|
||||
# Prepare request to Chandra inference service
|
||||
files = {
|
||||
"file": ("document", image_data, "application/octet-stream")
|
||||
}
|
||||
data = {
|
||||
"output_format": request.output_format,
|
||||
"accurate_mode": str(request.accurate_mode).lower()
|
||||
}
|
||||
|
||||
if CHANDRA_LICENSE_KEY:
|
||||
data["license_key"] = CHANDRA_LICENSE_KEY
|
||||
|
||||
# Call Chandra inference service
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
response = await client.post(
|
||||
f"{CHANDRA_API_URL}/process",
|
||||
files=files,
|
||||
data=data
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
return {
|
||||
"success": True,
|
||||
"output_format": request.output_format,
|
||||
"result": result
|
||||
}
|
||||
else:
|
||||
logger.error(f"Chandra API error: {response.status_code} - {response.text}")
|
||||
raise HTTPException(
|
||||
status_code=response.status_code,
|
||||
detail=f"Chandra API error: {response.text}"
|
||||
)
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.error("Chandra API timeout")
|
||||
raise HTTPException(
|
||||
status_code=504,
|
||||
detail="Chandra API timeout"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Document processing failed: {e}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Document processing failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@app.get("/models")
|
||||
async def list_models():
|
||||
"""List available Chandra models"""
|
||||
return {
|
||||
"models": [
|
||||
{
|
||||
"name": "chandra-small",
|
||||
"description": "Fast model with lower latency",
|
||||
"vram_required": "~8GB"
|
||||
},
|
||||
{
|
||||
"name": "chandra",
|
||||
"description": "Balanced model",
|
||||
"vram_required": "~16GB"
|
||||
}
|
||||
],
|
||||
"current_model": CHANDRA_MODEL
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8002)
|
||||
Reference in New Issue
Block a user