Some checks failed
Build and Deploy Docs / build-and-deploy (push) Has been cancelled
- Created logs/ structure (sessions, operations, incidents) - Added session-start/log/end scripts - Installed Git hooks for auto-logging commits/pushes - Added shell integration for zsh - Created CHANGELOG.md - Documented today's session (2026-01-10)
377 lines
11 KiB
Python
377 lines
11 KiB
Python
"""
|
||
Vector DB Service - Векторна база даних для DAARION Knowledge Base
|
||
ChromaDB + Sentence Transformers для embeddings та RAG
|
||
"""
|
||
|
||
from fastapi import FastAPI, HTTPException, UploadFile, File
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
from pydantic import BaseModel
|
||
import logging
|
||
import os
|
||
from typing import Optional, List, Dict, Any
|
||
from datetime import datetime
|
||
import hashlib
|
||
|
||
# Logging
|
||
logging.basicConfig(level=logging.INFO)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
app = FastAPI(
|
||
title="Vector DB Service",
|
||
description="Vector Database для DAARION (ChromaDB + Sentence Transformers)",
|
||
version="1.0.0"
|
||
)
|
||
|
||
# CORS
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=["*"],
|
||
allow_credentials=True,
|
||
allow_methods=["*"],
|
||
allow_headers=["*"],
|
||
)
|
||
|
||
# Lazy import vector DB
|
||
try:
|
||
import chromadb
|
||
from chromadb.config import Settings
|
||
CHROMADB_AVAILABLE = True
|
||
except ImportError:
|
||
CHROMADB_AVAILABLE = False
|
||
logger.warning("⚠️ ChromaDB not available")
|
||
|
||
try:
|
||
from sentence_transformers import SentenceTransformer
|
||
EMBEDDINGS_AVAILABLE = True
|
||
except ImportError:
|
||
EMBEDDINGS_AVAILABLE = False
|
||
logger.warning("⚠️ Sentence Transformers not available")
|
||
|
||
# Конфігурація
|
||
CHROMA_PERSIST_DIR = os.getenv("CHROMA_PERSIST_DIR", "./chroma_data")
|
||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||
|
||
# Global variables
|
||
_chroma_client = None
|
||
_embedding_model = None
|
||
|
||
def get_chroma_client():
|
||
"""Lazy initialization of ChromaDB client"""
|
||
global _chroma_client
|
||
if _chroma_client is None and CHROMADB_AVAILABLE:
|
||
_chroma_client = chromadb.Client(Settings(
|
||
chroma_db_impl="duckdb+parquet",
|
||
persist_directory=CHROMA_PERSIST_DIR
|
||
))
|
||
return _chroma_client
|
||
|
||
def get_embedding_model():
|
||
"""Lazy initialization of embedding model"""
|
||
global _embedding_model
|
||
if _embedding_model is None and EMBEDDINGS_AVAILABLE:
|
||
_embedding_model = SentenceTransformer(EMBEDDING_MODEL)
|
||
return _embedding_model
|
||
|
||
class Document(BaseModel):
|
||
id: str
|
||
text: str
|
||
metadata: Optional[Dict[str, Any]] = {}
|
||
|
||
class AddDocumentsRequest(BaseModel):
|
||
collection: str
|
||
documents: List[Document]
|
||
|
||
class SearchRequest(BaseModel):
|
||
collection: str
|
||
query: str
|
||
n_results: Optional[int] = 5
|
||
filter: Optional[Dict[str, Any]] = None
|
||
|
||
class SearchResult(BaseModel):
|
||
id: str
|
||
text: str
|
||
score: float
|
||
metadata: Dict[str, Any]
|
||
|
||
class SearchResponse(BaseModel):
|
||
query: str
|
||
results: List[SearchResult]
|
||
collection: str
|
||
total: int
|
||
timestamp: str
|
||
|
||
@app.get("/")
|
||
async def root():
|
||
"""Health check"""
|
||
return {
|
||
"service": "Vector DB Service",
|
||
"status": "running",
|
||
"chromadb": CHROMADB_AVAILABLE,
|
||
"embeddings": EMBEDDINGS_AVAILABLE,
|
||
"embedding_model": EMBEDDING_MODEL,
|
||
"persist_dir": CHROMA_PERSIST_DIR,
|
||
"version": "1.0.0"
|
||
}
|
||
|
||
@app.get("/health")
|
||
async def health():
|
||
"""Health check endpoint"""
|
||
return {
|
||
"status": "healthy" if (CHROMADB_AVAILABLE and EMBEDDINGS_AVAILABLE) else "degraded",
|
||
"chromadb": "available" if CHROMADB_AVAILABLE else "unavailable",
|
||
"embeddings": "available" if EMBEDDINGS_AVAILABLE else "unavailable"
|
||
}
|
||
|
||
@app.post("/api/collections")
|
||
async def create_collection(collection_name: str):
|
||
"""
|
||
Створює нову колекцію
|
||
"""
|
||
try:
|
||
client = get_chroma_client()
|
||
if not client:
|
||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||
|
||
# Створити або отримати колекцію
|
||
collection = client.get_or_create_collection(
|
||
name=collection_name,
|
||
metadata={"created_at": datetime.utcnow().isoformat()}
|
||
)
|
||
|
||
logger.info(f"✅ Created/got collection: {collection_name}")
|
||
|
||
return {
|
||
"collection": collection_name,
|
||
"created": True,
|
||
"count": collection.count()
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Create collection error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
@app.get("/api/collections")
|
||
async def list_collections():
|
||
"""
|
||
Список всіх колекцій
|
||
"""
|
||
try:
|
||
client = get_chroma_client()
|
||
if not client:
|
||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||
|
||
collections = client.list_collections()
|
||
|
||
return {
|
||
"collections": [
|
||
{
|
||
"name": col.name,
|
||
"count": col.count(),
|
||
"metadata": col.metadata
|
||
}
|
||
for col in collections
|
||
],
|
||
"total": len(collections)
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ List collections error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
@app.post("/api/documents")
|
||
async def add_documents(request: AddDocumentsRequest):
|
||
"""
|
||
Додає документи в колекцію
|
||
|
||
Body:
|
||
{
|
||
"collection": "knowledge_base",
|
||
"documents": [
|
||
{
|
||
"id": "doc1",
|
||
"text": "DAARION is a decentralized platform...",
|
||
"metadata": {"source": "website", "date": "2025-01-01"}
|
||
}
|
||
]
|
||
}
|
||
"""
|
||
try:
|
||
client = get_chroma_client()
|
||
if not client:
|
||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||
|
||
model = get_embedding_model()
|
||
if not model:
|
||
raise HTTPException(status_code=503, detail="Embeddings not available")
|
||
|
||
# Отримати колекцію
|
||
collection = client.get_or_create_collection(name=request.collection)
|
||
|
||
# Підготувати дані
|
||
ids = [doc.id for doc in request.documents]
|
||
documents = [doc.text for doc in request.documents]
|
||
metadatas = [doc.metadata for doc in request.documents]
|
||
|
||
# Згенерувати embeddings
|
||
embeddings = model.encode(documents, convert_to_numpy=True).tolist()
|
||
|
||
# Додати в ChromaDB
|
||
collection.add(
|
||
ids=ids,
|
||
documents=documents,
|
||
embeddings=embeddings,
|
||
metadatas=metadatas
|
||
)
|
||
|
||
logger.info(f"✅ Added {len(documents)} documents to {request.collection}")
|
||
|
||
return {
|
||
"collection": request.collection,
|
||
"added": len(documents),
|
||
"total": collection.count()
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Add documents error: {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
@app.post("/api/search", response_model=SearchResponse)
|
||
async def search_documents(request: SearchRequest):
|
||
"""
|
||
Пошук документів в колекції
|
||
|
||
Body:
|
||
{
|
||
"collection": "knowledge_base",
|
||
"query": "What is DAARION?",
|
||
"n_results": 5
|
||
}
|
||
"""
|
||
try:
|
||
client = get_chroma_client()
|
||
if not client:
|
||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||
|
||
model = get_embedding_model()
|
||
if not model:
|
||
raise HTTPException(status_code=503, detail="Embeddings not available")
|
||
|
||
# Отримати колекцію
|
||
try:
|
||
collection = client.get_collection(name=request.collection)
|
||
except:
|
||
raise HTTPException(status_code=404, detail=f"Collection '{request.collection}' not found")
|
||
|
||
# Згенерувати embedding для запиту
|
||
query_embedding = model.encode([request.query], convert_to_numpy=True).tolist()[0]
|
||
|
||
# Пошук
|
||
results = collection.query(
|
||
query_embeddings=[query_embedding],
|
||
n_results=request.n_results,
|
||
where=request.filter
|
||
)
|
||
|
||
# Форматувати результати
|
||
search_results = []
|
||
for idx in range(len(results['ids'][0])):
|
||
search_results.append(SearchResult(
|
||
id=results['ids'][0][idx],
|
||
text=results['documents'][0][idx],
|
||
score=1 - results['distances'][0][idx], # Convert distance to similarity
|
||
metadata=results['metadatas'][0][idx]
|
||
))
|
||
|
||
logger.info(f"✅ Found {len(search_results)} results for '{request.query}'")
|
||
|
||
return SearchResponse(
|
||
query=request.query,
|
||
results=search_results,
|
||
collection=request.collection,
|
||
total=len(search_results),
|
||
timestamp=datetime.utcnow().isoformat()
|
||
)
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Search error: {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
@app.delete("/api/documents")
|
||
async def delete_documents(collection: str, ids: List[str]):
|
||
"""
|
||
Видаляє документи з колекції
|
||
"""
|
||
try:
|
||
client = get_chroma_client()
|
||
if not client:
|
||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||
|
||
# Отримати колекцію
|
||
try:
|
||
coll = client.get_collection(name=collection)
|
||
except:
|
||
raise HTTPException(status_code=404, detail=f"Collection '{collection}' not found")
|
||
|
||
# Видалити документи
|
||
coll.delete(ids=ids)
|
||
|
||
logger.info(f"✅ Deleted {len(ids)} documents from {collection}")
|
||
|
||
return {
|
||
"collection": collection,
|
||
"deleted": len(ids),
|
||
"remaining": coll.count()
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Delete documents error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
@app.delete("/api/collections/{collection_name}")
|
||
async def delete_collection(collection_name: str):
|
||
"""
|
||
Видаляє колекцію
|
||
"""
|
||
try:
|
||
client = get_chroma_client()
|
||
if not client:
|
||
raise HTTPException(status_code=503, detail="ChromaDB not available")
|
||
|
||
# Видалити колекцію
|
||
client.delete_collection(name=collection_name)
|
||
|
||
logger.info(f"✅ Deleted collection: {collection_name}")
|
||
|
||
return {
|
||
"collection": collection_name,
|
||
"deleted": True
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Delete collection error: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
if __name__ == "__main__":
|
||
import uvicorn
|
||
uvicorn.run(app, host="0.0.0.0", port=8898)
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|