feat: Add presence heartbeat for Matrix online status
- matrix-gateway: POST /internal/matrix/presence/online endpoint - usePresenceHeartbeat hook with activity tracking - Auto away after 5 min inactivity - Offline on page close/visibility change - Integrated in MatrixChatRoom component
This commit is contained in:
364
services/vector-db-service/app/main.py
Normal file
364
services/vector-db-service/app/main.py
Normal file
@@ -0,0 +1,364 @@
|
||||
"""
|
||||
Vector DB Service - Векторна база даних для DAARION Knowledge Base
|
||||
ChromaDB + Sentence Transformers для embeddings та RAG
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException, UploadFile, File
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
import hashlib
|
||||
|
||||
# Logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(
|
||||
title="Vector DB Service",
|
||||
description="Vector Database для DAARION (ChromaDB + Sentence Transformers)",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Lazy import vector DB
|
||||
try:
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
CHROMADB_AVAILABLE = True
|
||||
except ImportError:
|
||||
CHROMADB_AVAILABLE = False
|
||||
logger.warning("⚠️ ChromaDB not available")
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
EMBEDDINGS_AVAILABLE = True
|
||||
except ImportError:
|
||||
EMBEDDINGS_AVAILABLE = False
|
||||
logger.warning("⚠️ Sentence Transformers not available")
|
||||
|
||||
# Конфігурація
|
||||
CHROMA_PERSIST_DIR = os.getenv("CHROMA_PERSIST_DIR", "./chroma_data")
|
||||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||||
|
||||
# Global variables
|
||||
_chroma_client = None
|
||||
_embedding_model = None
|
||||
|
||||
def get_chroma_client():
|
||||
"""Lazy initialization of ChromaDB client"""
|
||||
global _chroma_client
|
||||
if _chroma_client is None and CHROMADB_AVAILABLE:
|
||||
_chroma_client = chromadb.Client(Settings(
|
||||
chroma_db_impl="duckdb+parquet",
|
||||
persist_directory=CHROMA_PERSIST_DIR
|
||||
))
|
||||
return _chroma_client
|
||||
|
||||
def get_embedding_model():
|
||||
"""Lazy initialization of embedding model"""
|
||||
global _embedding_model
|
||||
if _embedding_model is None and EMBEDDINGS_AVAILABLE:
|
||||
_embedding_model = SentenceTransformer(EMBEDDING_MODEL)
|
||||
return _embedding_model
|
||||
|
||||
class Document(BaseModel):
|
||||
id: str
|
||||
text: str
|
||||
metadata: Optional[Dict[str, Any]] = {}
|
||||
|
||||
class AddDocumentsRequest(BaseModel):
|
||||
collection: str
|
||||
documents: List[Document]
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
collection: str
|
||||
query: str
|
||||
n_results: Optional[int] = 5
|
||||
filter: Optional[Dict[str, Any]] = None
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
id: str
|
||||
text: str
|
||||
score: float
|
||||
metadata: Dict[str, Any]
|
||||
|
||||
class SearchResponse(BaseModel):
|
||||
query: str
|
||||
results: List[SearchResult]
|
||||
collection: str
|
||||
total: int
|
||||
timestamp: str
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Health check"""
|
||||
return {
|
||||
"service": "Vector DB Service",
|
||||
"status": "running",
|
||||
"chromadb": CHROMADB_AVAILABLE,
|
||||
"embeddings": EMBEDDINGS_AVAILABLE,
|
||||
"embedding_model": EMBEDDING_MODEL,
|
||||
"persist_dir": CHROMA_PERSIST_DIR,
|
||||
"version": "1.0.0"
|
||||
}
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"status": "healthy" if (CHROMADB_AVAILABLE and EMBEDDINGS_AVAILABLE) else "degraded",
|
||||
"chromadb": "available" if CHROMADB_AVAILABLE else "unavailable",
|
||||
"embeddings": "available" if EMBEDDINGS_AVAILABLE else "unavailable"
|
||||
}
|
||||
|
||||
@app.post("/api/collections")
|
||||
async def create_collection(collection_name: str):
|
||||
"""
|
||||
Створює нову колекцію
|
||||
"""
|
||||
try:
|
||||
client = get_chroma_client()
|
||||
if not client:
|
||||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||||
|
||||
# Створити або отримати колекцію
|
||||
collection = client.get_or_create_collection(
|
||||
name=collection_name,
|
||||
metadata={"created_at": datetime.utcnow().isoformat()}
|
||||
)
|
||||
|
||||
logger.info(f"✅ Created/got collection: {collection_name}")
|
||||
|
||||
return {
|
||||
"collection": collection_name,
|
||||
"created": True,
|
||||
"count": collection.count()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Create collection error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/api/collections")
|
||||
async def list_collections():
|
||||
"""
|
||||
Список всіх колекцій
|
||||
"""
|
||||
try:
|
||||
client = get_chroma_client()
|
||||
if not client:
|
||||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||||
|
||||
collections = client.list_collections()
|
||||
|
||||
return {
|
||||
"collections": [
|
||||
{
|
||||
"name": col.name,
|
||||
"count": col.count(),
|
||||
"metadata": col.metadata
|
||||
}
|
||||
for col in collections
|
||||
],
|
||||
"total": len(collections)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ List collections error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/api/documents")
|
||||
async def add_documents(request: AddDocumentsRequest):
|
||||
"""
|
||||
Додає документи в колекцію
|
||||
|
||||
Body:
|
||||
{
|
||||
"collection": "knowledge_base",
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc1",
|
||||
"text": "DAARION is a decentralized platform...",
|
||||
"metadata": {"source": "website", "date": "2025-01-01"}
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
client = get_chroma_client()
|
||||
if not client:
|
||||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||||
|
||||
model = get_embedding_model()
|
||||
if not model:
|
||||
raise HTTPException(status_code=503, detail="Embeddings not available")
|
||||
|
||||
# Отримати колекцію
|
||||
collection = client.get_or_create_collection(name=request.collection)
|
||||
|
||||
# Підготувати дані
|
||||
ids = [doc.id for doc in request.documents]
|
||||
documents = [doc.text for doc in request.documents]
|
||||
metadatas = [doc.metadata for doc in request.documents]
|
||||
|
||||
# Згенерувати embeddings
|
||||
embeddings = model.encode(documents, convert_to_numpy=True).tolist()
|
||||
|
||||
# Додати в ChromaDB
|
||||
collection.add(
|
||||
ids=ids,
|
||||
documents=documents,
|
||||
embeddings=embeddings,
|
||||
metadatas=metadatas
|
||||
)
|
||||
|
||||
logger.info(f"✅ Added {len(documents)} documents to {request.collection}")
|
||||
|
||||
return {
|
||||
"collection": request.collection,
|
||||
"added": len(documents),
|
||||
"total": collection.count()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Add documents error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/api/search", response_model=SearchResponse)
|
||||
async def search_documents(request: SearchRequest):
|
||||
"""
|
||||
Пошук документів в колекції
|
||||
|
||||
Body:
|
||||
{
|
||||
"collection": "knowledge_base",
|
||||
"query": "What is DAARION?",
|
||||
"n_results": 5
|
||||
}
|
||||
"""
|
||||
try:
|
||||
client = get_chroma_client()
|
||||
if not client:
|
||||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||||
|
||||
model = get_embedding_model()
|
||||
if not model:
|
||||
raise HTTPException(status_code=503, detail="Embeddings not available")
|
||||
|
||||
# Отримати колекцію
|
||||
try:
|
||||
collection = client.get_collection(name=request.collection)
|
||||
except:
|
||||
raise HTTPException(status_code=404, detail=f"Collection '{request.collection}' not found")
|
||||
|
||||
# Згенерувати embedding для запиту
|
||||
query_embedding = model.encode([request.query], convert_to_numpy=True).tolist()[0]
|
||||
|
||||
# Пошук
|
||||
results = collection.query(
|
||||
query_embeddings=[query_embedding],
|
||||
n_results=request.n_results,
|
||||
where=request.filter
|
||||
)
|
||||
|
||||
# Форматувати результати
|
||||
search_results = []
|
||||
for idx in range(len(results['ids'][0])):
|
||||
search_results.append(SearchResult(
|
||||
id=results['ids'][0][idx],
|
||||
text=results['documents'][0][idx],
|
||||
score=1 - results['distances'][0][idx], # Convert distance to similarity
|
||||
metadata=results['metadatas'][0][idx]
|
||||
))
|
||||
|
||||
logger.info(f"✅ Found {len(search_results)} results for '{request.query}'")
|
||||
|
||||
return SearchResponse(
|
||||
query=request.query,
|
||||
results=search_results,
|
||||
collection=request.collection,
|
||||
total=len(search_results),
|
||||
timestamp=datetime.utcnow().isoformat()
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Search error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.delete("/api/documents")
|
||||
async def delete_documents(collection: str, ids: List[str]):
|
||||
"""
|
||||
Видаляє документи з колекції
|
||||
"""
|
||||
try:
|
||||
client = get_chroma_client()
|
||||
if not client:
|
||||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||||
|
||||
# Отримати колекцію
|
||||
try:
|
||||
coll = client.get_collection(name=collection)
|
||||
except:
|
||||
raise HTTPException(status_code=404, detail=f"Collection '{collection}' not found")
|
||||
|
||||
# Видалити документи
|
||||
coll.delete(ids=ids)
|
||||
|
||||
logger.info(f"✅ Deleted {len(ids)} documents from {collection}")
|
||||
|
||||
return {
|
||||
"collection": collection,
|
||||
"deleted": len(ids),
|
||||
"remaining": coll.count()
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Delete documents error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.delete("/api/collections/{collection_name}")
|
||||
async def delete_collection(collection_name: str):
|
||||
"""
|
||||
Видаляє колекцію
|
||||
"""
|
||||
try:
|
||||
client = get_chroma_client()
|
||||
if not client:
|
||||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||||
|
||||
# Видалити колекцію
|
||||
client.delete_collection(name=collection_name)
|
||||
|
||||
logger.info(f"✅ Deleted collection: {collection_name}")
|
||||
|
||||
return {
|
||||
"collection": collection_name,
|
||||
"deleted": True
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Delete collection error: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8898)
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user