feat: auto-summarize trigger for agent memory

- Memory Service: POST /agents/{agent_id}/summarize endpoint
  - Fetches recent events by agent_id (new db.list_facts_by_agent)
  - Generates structured summary via DeepSeek LLM
  - Saves summary to PostgreSQL facts + Qdrant vector store
  - Returns structured JSON (summary, goals, decisions, key_facts)

- Gateway memory_client: auto-trigger after 30 turns
  - Turn counter per chat (agent_id:channel_id)
  - 5-minute debounce between summarize calls
  - Fire-and-forget via asyncio.ensure_future (non-blocking)
  - Configurable via SUMMARIZE_TURN_THRESHOLD / SUMMARIZE_DEBOUNCE_SECONDS

- Database: list_facts_by_agent() for agent-level queries without user_id

Tested on NODE1: Helion summarize returns valid Ukrainian summary with 20 events.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Apple
2026-02-09 10:15:43 -08:00
parent acceac6929
commit 0cfd3619ea
3 changed files with 263 additions and 0 deletions

View File

@@ -517,6 +517,25 @@ class Database:
return [dict(row) for row in rows]
async def list_facts_by_agent(
self,
agent_id: str,
channel_id: str = None,
limit: int = 60
) -> list:
"""List facts for an agent (any user), ordered by most recent."""
async with self.pool.acquire() as conn:
query = "SELECT * FROM user_facts WHERE agent_id = $1"
params = [agent_id]
if channel_id:
query += " AND fact_key LIKE '%' || $2 || '%'"
params.append(channel_id)
query += " ORDER BY updated_at DESC"
query += f" LIMIT ${len(params) + 1}"
params.append(limit)
rows = await conn.fetch(query, *params)
return [dict(row) for row in rows]
async def delete_fact(
self,
user_id: str,

View File

@@ -1011,3 +1011,169 @@ async def get_stats():
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
# ============================================================================
# AGENT-LEVEL SUMMARIZE (called by Gateway auto-trigger)
# ============================================================================
class AgentSummarizeRequest(BaseModel):
"""Request to generate a summary of recent agent memory events"""
channel_id: Optional[str] = None
user_id: Optional[str] = None
max_events: int = 60 # how many recent events to summarize
force: bool = False # bypass debounce check
@app.post("/agents/{agent_id}/summarize")
async def summarize_agent_memory(agent_id: str, request: AgentSummarizeRequest):
"""
Generate rolling summary of recent agent memory events.
Called by Gateway when conversation reaches threshold.
1. Fetch recent events from facts table (agent-isolated)
2. Generate structured summary via DeepSeek LLM
3. Save summary back as a special fact for future context retrieval
4. Index summary in Qdrant for semantic search
"""
import json as json_lib
from datetime import datetime as dt
from uuid import uuid4
try:
# 1. Fetch recent events for this agent (all users)
facts = await db.list_facts_by_agent(
agent_id=agent_id,
channel_id=request.channel_id,
limit=request.max_events
)
# Filter for chat events (optionally by channel)
events = []
for fact in facts:
if not fact.get("fact_key", "").startswith("chat_event:"):
continue
event_data = fact.get("fact_value_json", {})
if isinstance(event_data, str):
try:
event_data = json_lib.loads(event_data)
except Exception:
event_data = {}
if not isinstance(event_data, dict):
event_data = {}
if request.channel_id and event_data.get("channel_id") != request.channel_id:
continue
events.append(event_data)
if len(events) < 5:
return {
"status": "skipped",
"reason": f"Too few events ({len(events)}), need at least 5",
"events_count": len(events)
}
# 2. Format events for LLM
formatted_events = []
for e in events:
formatted_events.append({
"role": e.get("role", "user"),
"content": e.get("content", ""),
"timestamp": e.get("timestamp", "")
})
# 3. Generate summary via DeepSeek LLM
llm_result = await _llm_generate_summary(formatted_events)
# 4. Save summary as a special fact
summary_id = str(uuid4())
timestamp = dt.utcnow().isoformat()
summary_fact = {
"type": "dialog_summary",
"summary_id": summary_id,
"agent_id": agent_id,
"channel_id": request.channel_id,
"summary": llm_result["summary"],
"goals": llm_result["goals"],
"decisions": llm_result["decisions"],
"open_questions": llm_result["open_questions"],
"next_steps": llm_result["next_steps"],
"key_facts": llm_result["key_facts"],
"events_summarized": len(events),
"timestamp": timestamp
}
await db.ensure_facts_table()
await db.upsert_fact(
user_id=request.user_id or "system",
fact_key=f"summary:{agent_id}:{request.channel_id or all}:{timestamp}",
fact_value_json=summary_fact,
team_id=None,
agent_id=agent_id
)
# 5. Index in Qdrant for semantic search
summary_text = llm_result["summary"]
if summary_text and len(summary_text) > 20:
try:
from .embedding import get_document_embeddings
from qdrant_client import models as qmodels
embeddings = await get_document_embeddings([summary_text])
if embeddings:
vector = embeddings[0]
collection_name = f"{agent_id}_summaries"
# Ensure collection exists
try:
vector_store.client.get_collection(collection_name)
except Exception:
vector_store.client.create_collection(
collection_name=collection_name,
vectors_config=qmodels.VectorParams(
size=len(vector),
distance=qmodels.Distance.COSINE
)
)
logger.info("created_summary_collection", collection=collection_name)
vector_store.client.upsert(
collection_name=collection_name,
points=[
qmodels.PointStruct(
id=summary_id,
vector=vector,
payload={
"type": "dialog_summary",
"agent_id": agent_id,
"channel_id": request.channel_id,
"events_count": len(events),
"summary_text": summary_text,
"timestamp": timestamp
}
)
]
)
except Exception as ve:
logger.warning("summary_qdrant_index_failed",
error=str(ve), agent_id=agent_id)
logger.info("agent_summary_created",
agent_id=agent_id,
channel_id=request.channel_id,
events_count=len(events),
summary_len=len(summary_text))
return {
"status": "ok",
"summary_id": summary_id,
"summary": llm_result["summary"],
"goals": llm_result["goals"],
"decisions": llm_result["decisions"],
"key_facts": llm_result["key_facts"],
"events_summarized": len(events),
"timestamp": timestamp
}
except Exception as e:
logger.error("agent_summarize_failed", error=str(e), agent_id=agent_id)
raise HTTPException(status_code=500, detail=str(e))