🧠 Add Agent Memory System with PostgreSQL + Qdrant + Cohere
Features: - Three-tier memory architecture (short/mid/long-term) - PostgreSQL schema for conversations, events, memories - Qdrant vector database for semantic search - Cohere embeddings (embed-multilingual-v3.0, 1024 dims) - FastAPI Memory Service with full CRUD - External Secrets integration with Vault - Kubernetes deployment manifests Components: - infrastructure/database/agent-memory-schema.sql - infrastructure/kubernetes/apps/qdrant/ - infrastructure/kubernetes/apps/memory-service/ - services/memory-service/ (FastAPI app) Also includes: - External Secrets Operator - Traefik Ingress Controller - Cert-Manager with Let's Encrypt - ArgoCD for GitOps
This commit is contained in:
@@ -1,443 +1,483 @@
|
||||
"""
|
||||
Memory Service - FastAPI додаток
|
||||
Підтримує: user_facts, dialog_summaries, agent_memory_events
|
||||
Інтеграція з token-gate через RBAC
|
||||
DAARION Memory Service - FastAPI Application
|
||||
|
||||
Трирівнева пам'ять агентів:
|
||||
- Short-term: conversation events (робочий буфер)
|
||||
- Mid-term: thread summaries (сесійна/тематична)
|
||||
- Long-term: memory items (персональна/проектна)
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import FastAPI, Depends, HTTPException, Query, Header
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import List, Optional
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
from fastapi import FastAPI, HTTPException, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from app.models import Base, UserFact, DialogSummary, AgentMemoryEvent
|
||||
from app.schemas import (
|
||||
UserFactCreate, UserFactUpdate, UserFactResponse, UserFactUpsertRequest, UserFactUpsertResponse,
|
||||
DialogSummaryCreate, DialogSummaryResponse, DialogSummaryListResponse,
|
||||
AgentMemoryEventCreate, AgentMemoryEventResponse, AgentMemoryEventListResponse,
|
||||
TokenGateCheck, TokenGateCheckResponse
|
||||
)
|
||||
from app.crud import (
|
||||
get_user_fact, get_user_facts, create_user_fact, update_user_fact,
|
||||
upsert_user_fact, delete_user_fact, get_user_facts_by_token_gate,
|
||||
create_dialog_summary, get_dialog_summaries, get_dialog_summary, delete_dialog_summary,
|
||||
create_agent_memory_event, get_agent_memory_events, delete_agent_memory_event
|
||||
from .config import get_settings
|
||||
from .models import (
|
||||
CreateThreadRequest, AddEventRequest, CreateMemoryRequest,
|
||||
MemoryFeedbackRequest, RetrievalRequest, SummaryRequest,
|
||||
ThreadResponse, EventResponse, MemoryResponse,
|
||||
SummaryResponse, RetrievalResponse, RetrievalResult,
|
||||
ContextResponse, MemoryCategory, FeedbackAction
|
||||
)
|
||||
from .vector_store import vector_store
|
||||
from .database import db
|
||||
|
||||
# ========== Configuration ==========
|
||||
logger = structlog.get_logger()
|
||||
settings = get_settings()
|
||||
|
||||
DATABASE_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"sqlite:///./memory.db" # SQLite для розробки, PostgreSQL для продакшену
|
||||
)
|
||||
|
||||
# Створюємо engine та sessionmaker
|
||||
engine = create_engine(DATABASE_URL)
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Startup and shutdown events"""
|
||||
# Startup
|
||||
logger.info("starting_memory_service")
|
||||
await db.connect()
|
||||
await vector_store.initialize()
|
||||
yield
|
||||
# Shutdown
|
||||
await db.disconnect()
|
||||
logger.info("memory_service_stopped")
|
||||
|
||||
# Створюємо таблиці (для dev, в продакшені використовуйте міграції)
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
# ========== FastAPI App ==========
|
||||
|
||||
app = FastAPI(
|
||||
title="Memory Service",
|
||||
description="Сервіс пам'яті для MicroDAO: user_facts, dialog_summaries, agent_memory_events",
|
||||
version="1.0.0"
|
||||
title="DAARION Memory Service",
|
||||
description="Agent memory management with PostgreSQL + Qdrant + Cohere",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # В продакшені обмежте це
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# ========== Dependencies ==========
|
||||
|
||||
def get_db():
|
||||
"""Dependency для отримання DB сесії"""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def verify_token(authorization: Optional[str] = Header(None)) -> Optional[str]:
|
||||
"""
|
||||
Перевірка JWT токену (заглушка)
|
||||
В продакшені інтегруйте з вашою системою авторизації
|
||||
"""
|
||||
if not authorization:
|
||||
raise HTTPException(status_code=401, detail="Missing authorization header")
|
||||
|
||||
# Заглушка: в реальності перевіряйте JWT
|
||||
# token = authorization.replace("Bearer ", "")
|
||||
# user_id = verify_jwt_token(token)
|
||||
# return user_id
|
||||
|
||||
# Для тестування повертаємо user_id з заголовка
|
||||
return "u_test" # TODO: реалізувати реальну перевірку
|
||||
|
||||
|
||||
async def check_token_gate(
|
||||
user_id: str,
|
||||
token_requirements: dict,
|
||||
db: Session
|
||||
) -> TokenGateCheckResponse:
|
||||
"""
|
||||
Перевірка токен-гейту (інтеграція з RBAC/Wallet Service)
|
||||
Заглушка - в продакшені викликайте ваш PDP/Wallet Service
|
||||
"""
|
||||
# TODO: Інтегрувати з:
|
||||
# - PDP Service для перевірки capabilities
|
||||
# - Wallet Service для перевірки балансів
|
||||
# - RBAC для перевірки ролей
|
||||
|
||||
# Приклад логіки:
|
||||
# if "token" in token_requirements:
|
||||
# token_type = token_requirements["token"]
|
||||
# min_balance = token_requirements.get("min_balance", 0)
|
||||
# balance = await wallet_service.get_balance(user_id, token_type)
|
||||
# if balance < min_balance:
|
||||
# return TokenGateCheckResponse(
|
||||
# allowed=False,
|
||||
# reason=f"Insufficient {token_type} balance",
|
||||
# missing_requirements={"token": token_type, "required": min_balance, "current": balance}
|
||||
# )
|
||||
|
||||
# Заглушка: завжди дозволяємо
|
||||
return TokenGateCheckResponse(allowed=True)
|
||||
|
||||
|
||||
# ========== Health Check ==========
|
||||
# ============================================================================
|
||||
# HEALTH
|
||||
# ============================================================================
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
return {"status": "ok", "service": "memory-service"}
|
||||
async def health():
|
||||
"""Health check"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": settings.service_name,
|
||||
"vector_store": await vector_store.get_collection_stats()
|
||||
}
|
||||
|
||||
|
||||
# ========== User Facts Endpoints ==========
|
||||
# ============================================================================
|
||||
# THREADS (Conversations)
|
||||
# ============================================================================
|
||||
|
||||
@app.post("/facts/upsert", response_model=UserFactUpsertResponse)
|
||||
async def upsert_fact(
|
||||
fact_request: UserFactUpsertRequest,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
@app.post("/threads", response_model=ThreadResponse)
|
||||
async def create_thread(request: CreateThreadRequest):
|
||||
"""Create new conversation thread"""
|
||||
thread = await db.create_thread(
|
||||
org_id=request.org_id,
|
||||
user_id=request.user_id,
|
||||
workspace_id=request.workspace_id,
|
||||
agent_id=request.agent_id,
|
||||
title=request.title,
|
||||
tags=request.tags,
|
||||
metadata=request.metadata
|
||||
)
|
||||
return thread
|
||||
|
||||
|
||||
@app.get("/threads/{thread_id}", response_model=ThreadResponse)
|
||||
async def get_thread(thread_id: UUID):
|
||||
"""Get thread by ID"""
|
||||
thread = await db.get_thread(thread_id)
|
||||
if not thread:
|
||||
raise HTTPException(status_code=404, detail="Thread not found")
|
||||
return thread
|
||||
|
||||
|
||||
@app.get("/threads", response_model=List[ThreadResponse])
|
||||
async def list_threads(
|
||||
user_id: UUID = Query(...),
|
||||
org_id: UUID = Query(...),
|
||||
workspace_id: Optional[UUID] = None,
|
||||
agent_id: Optional[UUID] = None,
|
||||
limit: int = Query(default=20, le=100)
|
||||
):
|
||||
"""
|
||||
Створити або оновити факт користувача (upsert)
|
||||
"""List threads for user"""
|
||||
threads = await db.list_threads(
|
||||
org_id=org_id,
|
||||
user_id=user_id,
|
||||
workspace_id=workspace_id,
|
||||
agent_id=agent_id,
|
||||
limit=limit
|
||||
)
|
||||
return threads
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# EVENTS (Short-term Memory)
|
||||
# ============================================================================
|
||||
|
||||
@app.post("/events", response_model=EventResponse)
|
||||
async def add_event(request: AddEventRequest):
|
||||
"""Add event to conversation (message, tool call, etc.)"""
|
||||
event = await db.add_event(
|
||||
thread_id=request.thread_id,
|
||||
event_type=request.event_type,
|
||||
role=request.role,
|
||||
content=request.content,
|
||||
tool_name=request.tool_name,
|
||||
tool_input=request.tool_input,
|
||||
tool_output=request.tool_output,
|
||||
payload=request.payload,
|
||||
token_count=request.token_count,
|
||||
model_used=request.model_used,
|
||||
latency_ms=request.latency_ms,
|
||||
metadata=request.metadata
|
||||
)
|
||||
return event
|
||||
|
||||
|
||||
@app.get("/threads/{thread_id}/events", response_model=List[EventResponse])
|
||||
async def get_events(
|
||||
thread_id: UUID,
|
||||
limit: int = Query(default=50, le=200),
|
||||
offset: int = Query(default=0)
|
||||
):
|
||||
"""Get events for thread (most recent first)"""
|
||||
events = await db.get_events(thread_id, limit=limit, offset=offset)
|
||||
return events
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# MEMORIES (Long-term Memory)
|
||||
# ============================================================================
|
||||
|
||||
@app.post("/memories", response_model=MemoryResponse)
|
||||
async def create_memory(request: CreateMemoryRequest):
|
||||
"""Create long-term memory item"""
|
||||
# Create in PostgreSQL
|
||||
memory = await db.create_memory(
|
||||
org_id=request.org_id,
|
||||
user_id=request.user_id,
|
||||
workspace_id=request.workspace_id,
|
||||
agent_id=request.agent_id,
|
||||
category=request.category,
|
||||
fact_text=request.fact_text,
|
||||
confidence=request.confidence,
|
||||
source_event_id=request.source_event_id,
|
||||
source_thread_id=request.source_thread_id,
|
||||
extraction_method=request.extraction_method,
|
||||
is_sensitive=request.is_sensitive,
|
||||
retention=request.retention,
|
||||
ttl_days=request.ttl_days,
|
||||
tags=request.tags,
|
||||
metadata=request.metadata
|
||||
)
|
||||
|
||||
Це основний ендпоінт для контрольованої довгострокової пам'яті.
|
||||
Підтримує токен-гейт інтеграцію.
|
||||
"""
|
||||
# Перевірка токен-гейту якщо потрібно
|
||||
if fact_request.token_gated and fact_request.token_requirements:
|
||||
gate_check = await check_token_gate(
|
||||
fact_request.user_id,
|
||||
fact_request.token_requirements,
|
||||
db
|
||||
)
|
||||
if not gate_check.allowed:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Token gate check failed: {gate_check.reason}"
|
||||
# Index in Qdrant
|
||||
point_id = await vector_store.index_memory(
|
||||
memory_id=memory["memory_id"],
|
||||
text=request.fact_text,
|
||||
org_id=request.org_id,
|
||||
user_id=request.user_id,
|
||||
category=request.category,
|
||||
agent_id=request.agent_id,
|
||||
workspace_id=request.workspace_id,
|
||||
thread_id=request.source_thread_id
|
||||
)
|
||||
|
||||
# Update memory with embedding ID
|
||||
await db.update_memory_embedding_id(memory["memory_id"], point_id)
|
||||
|
||||
return memory
|
||||
|
||||
|
||||
@app.get("/memories/{memory_id}", response_model=MemoryResponse)
|
||||
async def get_memory(memory_id: UUID):
|
||||
"""Get memory by ID"""
|
||||
memory = await db.get_memory(memory_id)
|
||||
if not memory:
|
||||
raise HTTPException(status_code=404, detail="Memory not found")
|
||||
return memory
|
||||
|
||||
|
||||
@app.get("/memories", response_model=List[MemoryResponse])
|
||||
async def list_memories(
|
||||
user_id: UUID = Query(...),
|
||||
org_id: UUID = Query(...),
|
||||
agent_id: Optional[UUID] = None,
|
||||
workspace_id: Optional[UUID] = None,
|
||||
category: Optional[MemoryCategory] = None,
|
||||
include_global: bool = True,
|
||||
limit: int = Query(default=50, le=200)
|
||||
):
|
||||
"""List memories for user"""
|
||||
memories = await db.list_memories(
|
||||
org_id=org_id,
|
||||
user_id=user_id,
|
||||
agent_id=agent_id,
|
||||
workspace_id=workspace_id,
|
||||
category=category,
|
||||
include_global=include_global,
|
||||
limit=limit
|
||||
)
|
||||
return memories
|
||||
|
||||
|
||||
@app.post("/memories/{memory_id}/feedback")
|
||||
async def memory_feedback(memory_id: UUID, request: MemoryFeedbackRequest):
|
||||
"""User feedback on memory (confirm/reject/edit/delete)"""
|
||||
memory = await db.get_memory(memory_id)
|
||||
if not memory:
|
||||
raise HTTPException(status_code=404, detail="Memory not found")
|
||||
|
||||
# Record feedback
|
||||
await db.add_memory_feedback(
|
||||
memory_id=memory_id,
|
||||
user_id=request.user_id,
|
||||
action=request.action,
|
||||
old_value=memory["fact_text"],
|
||||
new_value=request.new_value,
|
||||
reason=request.reason
|
||||
)
|
||||
|
||||
# Apply action
|
||||
if request.action == FeedbackAction.CONFIRM:
|
||||
new_confidence = min(1.0, memory["confidence"] + settings.memory_confirm_boost)
|
||||
await db.update_memory_confidence(memory_id, new_confidence, verified=True)
|
||||
|
||||
elif request.action == FeedbackAction.REJECT:
|
||||
new_confidence = max(0.0, memory["confidence"] - settings.memory_reject_penalty)
|
||||
if new_confidence < settings.memory_min_confidence:
|
||||
# Mark as invalid
|
||||
await db.invalidate_memory(memory_id)
|
||||
await vector_store.delete_memory(memory_id)
|
||||
else:
|
||||
await db.update_memory_confidence(memory_id, new_confidence)
|
||||
|
||||
elif request.action == FeedbackAction.EDIT:
|
||||
if request.new_value:
|
||||
await db.update_memory_text(memory_id, request.new_value)
|
||||
# Re-index with new text
|
||||
await vector_store.delete_memory(memory_id)
|
||||
await vector_store.index_memory(
|
||||
memory_id=memory_id,
|
||||
text=request.new_value,
|
||||
org_id=memory["org_id"],
|
||||
user_id=memory["user_id"],
|
||||
category=memory["category"],
|
||||
agent_id=memory.get("agent_id"),
|
||||
workspace_id=memory.get("workspace_id")
|
||||
)
|
||||
|
||||
elif request.action == FeedbackAction.DELETE:
|
||||
await db.invalidate_memory(memory_id)
|
||||
await vector_store.delete_memory(memory_id)
|
||||
|
||||
return {"status": "ok", "action": request.action.value}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# RETRIEVAL (Semantic Search)
|
||||
# ============================================================================
|
||||
|
||||
@app.post("/retrieve", response_model=RetrievalResponse)
|
||||
async def retrieve_memories(request: RetrievalRequest):
|
||||
"""
|
||||
Semantic retrieval of relevant memories.
|
||||
|
||||
# Перевірка прав доступу (користувач може змінювати тільки свої факти)
|
||||
if fact_request.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Cannot modify other user's facts")
|
||||
Performs multiple queries and deduplicates results.
|
||||
"""
|
||||
all_results = []
|
||||
seen_ids = set()
|
||||
|
||||
fact, created = upsert_user_fact(db, fact_request)
|
||||
for query in request.queries:
|
||||
results = await vector_store.search_memories(
|
||||
query=query,
|
||||
org_id=request.org_id,
|
||||
user_id=request.user_id,
|
||||
agent_id=request.agent_id,
|
||||
workspace_id=request.workspace_id,
|
||||
categories=request.categories,
|
||||
include_global=request.include_global,
|
||||
top_k=request.top_k
|
||||
)
|
||||
|
||||
for r in results:
|
||||
memory_id = r.get("memory_id")
|
||||
if memory_id and memory_id not in seen_ids:
|
||||
seen_ids.add(memory_id)
|
||||
|
||||
# Get full memory from DB for confidence check
|
||||
memory = await db.get_memory(UUID(memory_id))
|
||||
if memory and memory["confidence"] >= request.min_confidence:
|
||||
all_results.append(RetrievalResult(
|
||||
memory_id=UUID(memory_id),
|
||||
fact_text=r["text"],
|
||||
category=MemoryCategory(r["category"]),
|
||||
confidence=memory["confidence"],
|
||||
relevance_score=r["score"],
|
||||
agent_id=UUID(r["agent_id"]) if r.get("agent_id") else None,
|
||||
is_global=r.get("agent_id") is None
|
||||
))
|
||||
|
||||
# Update usage stats
|
||||
await db.increment_memory_usage(UUID(memory_id))
|
||||
|
||||
return UserFactUpsertResponse(
|
||||
fact=UserFactResponse.model_validate(fact),
|
||||
created=created
|
||||
# Sort by relevance
|
||||
all_results.sort(key=lambda x: x.relevance_score, reverse=True)
|
||||
|
||||
return RetrievalResponse(
|
||||
results=all_results[:request.top_k],
|
||||
query_count=len(request.queries),
|
||||
total_results=len(all_results)
|
||||
)
|
||||
|
||||
|
||||
@app.get("/facts", response_model=List[UserFactResponse])
|
||||
async def list_facts(
|
||||
team_id: Optional[str] = Query(None),
|
||||
fact_keys: Optional[str] = Query(None, description="Comma-separated list of fact keys"),
|
||||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Отримати список фактів користувача"""
|
||||
fact_keys_list = None
|
||||
if fact_keys:
|
||||
fact_keys_list = [k.strip() for k in fact_keys.split(",")]
|
||||
|
||||
facts = get_user_facts(db, user_id, team_id, fact_keys_list, skip, limit)
|
||||
return [UserFactResponse.model_validate(f) for f in facts]
|
||||
# ============================================================================
|
||||
# SUMMARIES (Mid-term Memory)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@app.get("/facts/{fact_key}", response_model=UserFactResponse)
|
||||
async def get_fact(
|
||||
fact_key: str,
|
||||
team_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Отримати конкретний факт за ключем"""
|
||||
fact = get_user_fact(db, user_id, fact_key, team_id)
|
||||
if not fact:
|
||||
raise HTTPException(status_code=404, detail="Fact not found")
|
||||
return UserFactResponse.model_validate(fact)
|
||||
|
||||
|
||||
@app.post("/facts", response_model=UserFactResponse)
|
||||
async def create_fact(
|
||||
fact: UserFactCreate,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Створити новий факт"""
|
||||
if fact.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Cannot create fact for other user")
|
||||
|
||||
db_fact = create_user_fact(db, fact)
|
||||
return UserFactResponse.model_validate(db_fact)
|
||||
|
||||
|
||||
@app.patch("/facts/{fact_id}", response_model=UserFactResponse)
|
||||
async def update_fact(
|
||||
fact_id: str,
|
||||
fact_update: UserFactUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Оновити факт"""
|
||||
fact = db.query(UserFact).filter(UserFact.id == fact_id).first()
|
||||
if not fact:
|
||||
raise HTTPException(status_code=404, detail="Fact not found")
|
||||
|
||||
if fact.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Cannot modify other user's fact")
|
||||
|
||||
updated_fact = update_user_fact(db, fact_id, fact_update)
|
||||
if not updated_fact:
|
||||
raise HTTPException(status_code=404, detail="Fact not found")
|
||||
|
||||
return UserFactResponse.model_validate(updated_fact)
|
||||
|
||||
|
||||
@app.delete("/facts/{fact_id}")
|
||||
async def delete_fact(
|
||||
fact_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Видалити факт"""
|
||||
fact = db.query(UserFact).filter(UserFact.id == fact_id).first()
|
||||
if not fact:
|
||||
raise HTTPException(status_code=404, detail="Fact not found")
|
||||
|
||||
if fact.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Cannot delete other user's fact")
|
||||
|
||||
success = delete_user_fact(db, fact_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Fact not found")
|
||||
|
||||
return {"success": True}
|
||||
|
||||
|
||||
@app.get("/facts/token-gated", response_model=List[UserFactResponse])
|
||||
async def list_token_gated_facts(
|
||||
team_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Отримати токен-гейт факти користувача"""
|
||||
facts = get_user_facts_by_token_gate(db, user_id, team_id)
|
||||
return [UserFactResponse.model_validate(f) for f in facts]
|
||||
|
||||
|
||||
# ========== Dialog Summary Endpoints ==========
|
||||
|
||||
@app.post("/summaries", response_model=DialogSummaryResponse)
|
||||
async def create_summary(
|
||||
summary: DialogSummaryCreate,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
@app.post("/threads/{thread_id}/summarize", response_model=SummaryResponse)
|
||||
async def create_summary(thread_id: UUID, request: SummaryRequest):
|
||||
"""
|
||||
Створити підсумок діалогу
|
||||
Generate rolling summary for thread.
|
||||
|
||||
Використовується для масштабування без переповнення контексту.
|
||||
Агрегує інформацію про сесії/діалоги.
|
||||
Compresses old events into a structured summary.
|
||||
"""
|
||||
db_summary = create_dialog_summary(db, summary)
|
||||
return DialogSummaryResponse.model_validate(db_summary)
|
||||
|
||||
|
||||
@app.get("/summaries", response_model=DialogSummaryListResponse)
|
||||
async def list_summaries(
|
||||
team_id: Optional[str] = Query(None),
|
||||
channel_id: Optional[str] = Query(None),
|
||||
agent_id: Optional[str] = Query(None),
|
||||
user_id_param: Optional[str] = Query(None, alias="user_id"),
|
||||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
cursor: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Отримати список підсумків діалогів"""
|
||||
summaries, next_cursor = get_dialog_summaries(
|
||||
db, team_id, channel_id, agent_id, user_id_param, skip, limit, cursor
|
||||
thread = await db.get_thread(thread_id)
|
||||
if not thread:
|
||||
raise HTTPException(status_code=404, detail="Thread not found")
|
||||
|
||||
# Check if summary is needed
|
||||
if not request.force and thread["total_tokens"] < settings.summary_trigger_tokens:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Token count ({thread['total_tokens']}) below threshold ({settings.summary_trigger_tokens})"
|
||||
)
|
||||
|
||||
# Get events to summarize
|
||||
events = await db.get_events_for_summary(thread_id)
|
||||
|
||||
# TODO: Call LLM to generate summary
|
||||
# For now, create a placeholder
|
||||
summary_text = f"Summary of {len(events)} events. [Implement LLM summarization]"
|
||||
state = {
|
||||
"goals": [],
|
||||
"decisions": [],
|
||||
"open_questions": [],
|
||||
"next_steps": [],
|
||||
"key_facts": []
|
||||
}
|
||||
|
||||
# Create summary
|
||||
summary = await db.create_summary(
|
||||
thread_id=thread_id,
|
||||
summary_text=summary_text,
|
||||
state=state,
|
||||
events_from_seq=events[0]["sequence_num"] if events else 0,
|
||||
events_to_seq=events[-1]["sequence_num"] if events else 0,
|
||||
events_count=len(events)
|
||||
)
|
||||
|
||||
return DialogSummaryListResponse(
|
||||
items=[DialogSummaryResponse.model_validate(s) for s in summaries],
|
||||
total=len(summaries),
|
||||
cursor=next_cursor
|
||||
# Index summary in Qdrant
|
||||
await vector_store.index_summary(
|
||||
summary_id=summary["summary_id"],
|
||||
text=summary_text,
|
||||
thread_id=thread_id,
|
||||
org_id=thread["org_id"],
|
||||
user_id=thread["user_id"],
|
||||
agent_id=thread.get("agent_id"),
|
||||
workspace_id=thread.get("workspace_id")
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
@app.get("/threads/{thread_id}/summary", response_model=Optional[SummaryResponse])
|
||||
async def get_latest_summary(thread_id: UUID):
|
||||
"""Get latest summary for thread"""
|
||||
summary = await db.get_latest_summary(thread_id)
|
||||
return summary
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CONTEXT (Full context for agent)
|
||||
# ============================================================================
|
||||
|
||||
@app.get("/threads/{thread_id}/context", response_model=ContextResponse)
|
||||
async def get_context(
|
||||
thread_id: UUID,
|
||||
queries: List[str] = Query(default=[]),
|
||||
top_k: int = Query(default=10)
|
||||
):
|
||||
"""
|
||||
Get full context for agent prompt.
|
||||
|
||||
Combines:
|
||||
- Latest summary (mid-term)
|
||||
- Recent messages (short-term)
|
||||
- Retrieved memories (long-term)
|
||||
"""
|
||||
thread = await db.get_thread(thread_id)
|
||||
if not thread:
|
||||
raise HTTPException(status_code=404, detail="Thread not found")
|
||||
|
||||
# Get summary
|
||||
summary = await db.get_latest_summary(thread_id)
|
||||
|
||||
# Get recent messages
|
||||
recent = await db.get_events(
|
||||
thread_id,
|
||||
limit=settings.short_term_window_messages
|
||||
)
|
||||
|
||||
# Retrieve memories if queries provided
|
||||
retrieved = []
|
||||
if queries:
|
||||
retrieval_response = await retrieve_memories(RetrievalRequest(
|
||||
org_id=thread["org_id"],
|
||||
user_id=thread["user_id"],
|
||||
agent_id=thread.get("agent_id"),
|
||||
workspace_id=thread.get("workspace_id"),
|
||||
queries=queries,
|
||||
top_k=top_k,
|
||||
include_global=True
|
||||
))
|
||||
retrieved = retrieval_response.results
|
||||
|
||||
# Estimate tokens
|
||||
token_estimate = sum(e.get("token_count", 0) or 0 for e in recent)
|
||||
if summary:
|
||||
token_estimate += summary.get("summary_tokens", 0) or 0
|
||||
|
||||
return ContextResponse(
|
||||
thread_id=thread_id,
|
||||
summary=summary,
|
||||
recent_messages=recent,
|
||||
retrieved_memories=retrieved,
|
||||
token_estimate=token_estimate
|
||||
)
|
||||
|
||||
|
||||
@app.get("/summaries/{summary_id}", response_model=DialogSummaryResponse)
|
||||
async def get_summary(
|
||||
summary_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Отримати підсумок за ID"""
|
||||
summary = get_dialog_summary(db, summary_id)
|
||||
if not summary:
|
||||
raise HTTPException(status_code=404, detail="Summary not found")
|
||||
return DialogSummaryResponse.model_validate(summary)
|
||||
# ============================================================================
|
||||
# ADMIN
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@app.delete("/summaries/{summary_id}")
|
||||
async def delete_summary(
|
||||
summary_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Видалити підсумок"""
|
||||
success = delete_dialog_summary(db, summary_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Summary not found")
|
||||
return {"success": True}
|
||||
|
||||
|
||||
# ========== Agent Memory Event Endpoints ==========
|
||||
|
||||
@app.post("/agents/{agent_id}/memory", response_model=AgentMemoryEventResponse)
|
||||
async def create_memory_event(
|
||||
agent_id: str,
|
||||
event: AgentMemoryEventCreate,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Створити подію пам'яті агента"""
|
||||
# Перевірка що agent_id збігається
|
||||
if event.agent_id != agent_id:
|
||||
raise HTTPException(status_code=400, detail="agent_id mismatch")
|
||||
|
||||
db_event = create_agent_memory_event(db, event)
|
||||
return AgentMemoryEventResponse.model_validate(db_event)
|
||||
|
||||
|
||||
@app.get("/agents/{agent_id}/memory", response_model=AgentMemoryEventListResponse)
|
||||
async def list_memory_events(
|
||||
agent_id: str,
|
||||
team_id: Optional[str] = Query(None),
|
||||
channel_id: Optional[str] = Query(None),
|
||||
scope: Optional[str] = Query(None, description="short_term | mid_term | long_term"),
|
||||
kind: Optional[str] = Query(None, description="message | fact | summary | note"),
|
||||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
cursor: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Отримати список подій пам'яті агента"""
|
||||
events, next_cursor = get_agent_memory_events(
|
||||
db, agent_id, team_id, channel_id, scope, kind, skip, limit, cursor
|
||||
)
|
||||
|
||||
return AgentMemoryEventListResponse(
|
||||
items=[AgentMemoryEventResponse.model_validate(e) for e in events],
|
||||
total=len(events),
|
||||
cursor=next_cursor
|
||||
)
|
||||
|
||||
|
||||
@app.delete("/agents/{agent_id}/memory/{event_id}")
|
||||
async def delete_memory_event(
|
||||
agent_id: str,
|
||||
event_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""Видалити подію пам'яті"""
|
||||
success = delete_agent_memory_event(db, event_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Memory event not found")
|
||||
return {"success": True}
|
||||
|
||||
|
||||
# ========== Monitor Events Endpoints (Batch Processing) ==========
|
||||
|
||||
from app.monitor_events import MonitorEventBatch, MonitorEventResponse, save_monitor_events_batch, save_monitor_event_single
|
||||
|
||||
@app.post("/api/memory/monitor-events/batch", response_model=MonitorEventResponse)
|
||||
async def save_monitor_events_batch_endpoint(
|
||||
batch: MonitorEventBatch,
|
||||
db: Session = Depends(get_db),
|
||||
authorization: Optional[str] = Header(None)
|
||||
):
|
||||
"""
|
||||
Зберегти батч подій Monitor Agent
|
||||
Оптимізовано для збору метрик з багатьох нод
|
||||
"""
|
||||
return await save_monitor_events_batch(batch, db, authorization)
|
||||
|
||||
@app.post("/api/memory/monitor-events/{node_id}", response_model=AgentMemoryEventResponse)
|
||||
async def save_monitor_event_endpoint(
|
||||
node_id: str,
|
||||
event: Dict[str, Any],
|
||||
db: Session = Depends(get_db),
|
||||
authorization: Optional[str] = Header(None)
|
||||
):
|
||||
"""
|
||||
Зберегти одну подію Monitor Agent
|
||||
"""
|
||||
return await save_monitor_event_single(node_id, event, db, authorization)
|
||||
|
||||
|
||||
# ========== Token Gate Integration Endpoint ==========
|
||||
|
||||
@app.post("/token-gate/check", response_model=TokenGateCheckResponse)
|
||||
async def check_token_gate_endpoint(
|
||||
check: TokenGateCheck,
|
||||
db: Session = Depends(get_db),
|
||||
user_id: str = Depends(verify_token)
|
||||
):
|
||||
"""
|
||||
Перевірка токен-гейту для факту
|
||||
|
||||
Інтеграція з RBAC/Wallet Service для перевірки доступу
|
||||
"""
|
||||
if check.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Cannot check token gate for other user")
|
||||
|
||||
return await check_token_gate(user_id, check.token_requirements, db)
|
||||
@app.get("/stats")
|
||||
async def get_stats():
|
||||
"""Get service statistics"""
|
||||
return {
|
||||
"vector_store": await vector_store.get_collection_stats(),
|
||||
"database": await db.get_stats()
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
|
||||
Reference in New Issue
Block a user