🧠 Add Agent Memory System with PostgreSQL + Qdrant + Cohere
Features: - Three-tier memory architecture (short/mid/long-term) - PostgreSQL schema for conversations, events, memories - Qdrant vector database for semantic search - Cohere embeddings (embed-multilingual-v3.0, 1024 dims) - FastAPI Memory Service with full CRUD - External Secrets integration with Vault - Kubernetes deployment manifests Components: - infrastructure/database/agent-memory-schema.sql - infrastructure/kubernetes/apps/qdrant/ - infrastructure/kubernetes/apps/memory-service/ - services/memory-service/ (FastAPI app) Also includes: - External Secrets Operator - Traefik Ingress Controller - Cert-Manager with Let's Encrypt - ArgoCD for GitOps
This commit is contained in:
86
services/memory-service/app/embedding.py
Normal file
86
services/memory-service/app/embedding.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
DAARION Memory Service - Embedding Layer (Cohere)
|
||||
"""
|
||||
import cohere
|
||||
from typing import List
|
||||
import structlog
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
from .config import get_settings
|
||||
|
||||
logger = structlog.get_logger()
|
||||
settings = get_settings()
|
||||
|
||||
# Initialize Cohere client
|
||||
co = cohere.Client(settings.cohere_api_key)
|
||||
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=1, max=10)
|
||||
)
|
||||
async def get_embeddings(
|
||||
texts: List[str],
|
||||
input_type: str = "search_document"
|
||||
) -> List[List[float]]:
|
||||
"""
|
||||
Get embeddings from Cohere API.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
input_type: "search_document" for indexing, "search_query" for queries
|
||||
|
||||
Returns:
|
||||
List of embedding vectors (1024 dimensions for embed-multilingual-v3.0)
|
||||
"""
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
logger.info("generating_embeddings", count=len(texts), input_type=input_type)
|
||||
|
||||
response = co.embed(
|
||||
texts=texts,
|
||||
model=settings.cohere_model,
|
||||
input_type=input_type,
|
||||
truncate="END"
|
||||
)
|
||||
|
||||
embeddings = response.embeddings
|
||||
|
||||
logger.info(
|
||||
"embeddings_generated",
|
||||
count=len(embeddings),
|
||||
dimensions=len(embeddings[0]) if embeddings else 0
|
||||
)
|
||||
|
||||
return embeddings
|
||||
|
||||
|
||||
async def get_query_embedding(query: str) -> List[float]:
|
||||
"""Get embedding for a search query"""
|
||||
embeddings = await get_embeddings([query], input_type="search_query")
|
||||
return embeddings[0] if embeddings else []
|
||||
|
||||
|
||||
async def get_document_embeddings(texts: List[str]) -> List[List[float]]:
|
||||
"""Get embeddings for documents (memories, summaries)"""
|
||||
return await get_embeddings(texts, input_type="search_document")
|
||||
|
||||
|
||||
# Batch processing for large sets
|
||||
async def batch_embed(
|
||||
texts: List[str],
|
||||
input_type: str = "search_document",
|
||||
batch_size: int = 96 # Cohere limit
|
||||
) -> List[List[float]]:
|
||||
"""
|
||||
Embed large number of texts in batches.
|
||||
"""
|
||||
all_embeddings = []
|
||||
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i:i + batch_size]
|
||||
embeddings = await get_embeddings(batch, input_type)
|
||||
all_embeddings.extend(embeddings)
|
||||
|
||||
return all_embeddings
|
||||
Reference in New Issue
Block a user