Features: - Three-tier memory architecture (short/mid/long-term) - PostgreSQL schema for conversations, events, memories - Qdrant vector database for semantic search - Cohere embeddings (embed-multilingual-v3.0, 1024 dims) - FastAPI Memory Service with full CRUD - External Secrets integration with Vault - Kubernetes deployment manifests Components: - infrastructure/database/agent-memory-schema.sql - infrastructure/kubernetes/apps/qdrant/ - infrastructure/kubernetes/apps/memory-service/ - services/memory-service/ (FastAPI app) Also includes: - External Secrets Operator - Traefik Ingress Controller - Cert-Manager with Let's Encrypt - ArgoCD for GitOps
87 lines
2.2 KiB
Python
87 lines
2.2 KiB
Python
"""
|
|
DAARION Memory Service - Embedding Layer (Cohere)
|
|
"""
|
|
import cohere
|
|
from typing import List
|
|
import structlog
|
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
|
|
from .config import get_settings
|
|
|
|
logger = structlog.get_logger()
|
|
settings = get_settings()
|
|
|
|
# Initialize Cohere client
|
|
co = cohere.Client(settings.cohere_api_key)
|
|
|
|
|
|
@retry(
|
|
stop=stop_after_attempt(3),
|
|
wait=wait_exponential(multiplier=1, min=1, max=10)
|
|
)
|
|
async def get_embeddings(
|
|
texts: List[str],
|
|
input_type: str = "search_document"
|
|
) -> List[List[float]]:
|
|
"""
|
|
Get embeddings from Cohere API.
|
|
|
|
Args:
|
|
texts: List of texts to embed
|
|
input_type: "search_document" for indexing, "search_query" for queries
|
|
|
|
Returns:
|
|
List of embedding vectors (1024 dimensions for embed-multilingual-v3.0)
|
|
"""
|
|
if not texts:
|
|
return []
|
|
|
|
logger.info("generating_embeddings", count=len(texts), input_type=input_type)
|
|
|
|
response = co.embed(
|
|
texts=texts,
|
|
model=settings.cohere_model,
|
|
input_type=input_type,
|
|
truncate="END"
|
|
)
|
|
|
|
embeddings = response.embeddings
|
|
|
|
logger.info(
|
|
"embeddings_generated",
|
|
count=len(embeddings),
|
|
dimensions=len(embeddings[0]) if embeddings else 0
|
|
)
|
|
|
|
return embeddings
|
|
|
|
|
|
async def get_query_embedding(query: str) -> List[float]:
|
|
"""Get embedding for a search query"""
|
|
embeddings = await get_embeddings([query], input_type="search_query")
|
|
return embeddings[0] if embeddings else []
|
|
|
|
|
|
async def get_document_embeddings(texts: List[str]) -> List[List[float]]:
|
|
"""Get embeddings for documents (memories, summaries)"""
|
|
return await get_embeddings(texts, input_type="search_document")
|
|
|
|
|
|
# Batch processing for large sets
|
|
async def batch_embed(
|
|
texts: List[str],
|
|
input_type: str = "search_document",
|
|
batch_size: int = 96 # Cohere limit
|
|
) -> List[List[float]]:
|
|
"""
|
|
Embed large number of texts in batches.
|
|
"""
|
|
all_embeddings = []
|
|
|
|
for i in range(0, len(texts), batch_size):
|
|
batch = texts[i:i + batch_size]
|
|
embeddings = await get_embeddings(batch, input_type)
|
|
all_embeddings.extend(embeddings)
|
|
|
|
return all_embeddings
|