🧠 Add Agent Memory System with PostgreSQL + Qdrant + Cohere

Features:
- Three-tier memory architecture (short/mid/long-term)
- PostgreSQL schema for conversations, events, memories
- Qdrant vector database for semantic search
- Cohere embeddings (embed-multilingual-v3.0, 1024 dims)
- FastAPI Memory Service with full CRUD
- External Secrets integration with Vault
- Kubernetes deployment manifests

Components:
- infrastructure/database/agent-memory-schema.sql
- infrastructure/kubernetes/apps/qdrant/
- infrastructure/kubernetes/apps/memory-service/
- services/memory-service/ (FastAPI app)

Also includes:
- External Secrets Operator
- Traefik Ingress Controller
- Cert-Manager with Let's Encrypt
- ArgoCD for GitOps
This commit is contained in:
Apple
2026-01-10 07:52:32 -08:00
parent 12545a7c76
commit 90758facae
16 changed files with 2769 additions and 579 deletions

View File

@@ -0,0 +1,86 @@
"""
DAARION Memory Service - Embedding Layer (Cohere)
"""
import cohere
from typing import List
import structlog
from tenacity import retry, stop_after_attempt, wait_exponential
from .config import get_settings
logger = structlog.get_logger()
settings = get_settings()
# Initialize Cohere client
co = cohere.Client(settings.cohere_api_key)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=10)
)
async def get_embeddings(
texts: List[str],
input_type: str = "search_document"
) -> List[List[float]]:
"""
Get embeddings from Cohere API.
Args:
texts: List of texts to embed
input_type: "search_document" for indexing, "search_query" for queries
Returns:
List of embedding vectors (1024 dimensions for embed-multilingual-v3.0)
"""
if not texts:
return []
logger.info("generating_embeddings", count=len(texts), input_type=input_type)
response = co.embed(
texts=texts,
model=settings.cohere_model,
input_type=input_type,
truncate="END"
)
embeddings = response.embeddings
logger.info(
"embeddings_generated",
count=len(embeddings),
dimensions=len(embeddings[0]) if embeddings else 0
)
return embeddings
async def get_query_embedding(query: str) -> List[float]:
"""Get embedding for a search query"""
embeddings = await get_embeddings([query], input_type="search_query")
return embeddings[0] if embeddings else []
async def get_document_embeddings(texts: List[str]) -> List[List[float]]:
"""Get embeddings for documents (memories, summaries)"""
return await get_embeddings(texts, input_type="search_document")
# Batch processing for large sets
async def batch_embed(
texts: List[str],
input_type: str = "search_document",
batch_size: int = 96 # Cohere limit
) -> List[List[float]]:
"""
Embed large number of texts in batches.
"""
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
embeddings = await get_embeddings(batch, input_type)
all_embeddings.extend(embeddings)
return all_embeddings