feat: auto-summarize trigger for agent memory
- Memory Service: POST /agents/{agent_id}/summarize endpoint
- Fetches recent events by agent_id (new db.list_facts_by_agent)
- Generates structured summary via DeepSeek LLM
- Saves summary to PostgreSQL facts + Qdrant vector store
- Returns structured JSON (summary, goals, decisions, key_facts)
- Gateway memory_client: auto-trigger after 30 turns
- Turn counter per chat (agent_id:channel_id)
- 5-minute debounce between summarize calls
- Fire-and-forget via asyncio.ensure_future (non-blocking)
- Configurable via SUMMARIZE_TURN_THRESHOLD / SUMMARIZE_DEBOUNCE_SECONDS
- Database: list_facts_by_agent() for agent-level queries without user_id
Tested on NODE1: Helion summarize returns valid Ukrainian summary with 20 events.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -13,6 +13,10 @@ MEMORY_SERVICE_URL = os.getenv("MEMORY_SERVICE_URL", "http://memory-service:8000
|
||||
CONTEXT_CACHE_TTL = float(os.getenv("MEMORY_CONTEXT_CACHE_TTL", "5"))
|
||||
LOCAL_CONTEXT_MAX_MESSAGES = int(os.getenv("LOCAL_CONTEXT_MAX_MESSAGES", "50"))
|
||||
|
||||
# Auto-summarize trigger configuration
|
||||
SUMMARIZE_TURN_THRESHOLD = int(os.getenv("SUMMARIZE_TURN_THRESHOLD", "30"))
|
||||
SUMMARIZE_DEBOUNCE_SECONDS = int(os.getenv("SUMMARIZE_DEBOUNCE_SECONDS", "300")) # 5 min
|
||||
|
||||
# =====================================
|
||||
# LOCAL CONTEXT STORE (fallback when Memory Service unavailable)
|
||||
# =====================================
|
||||
@@ -69,6 +73,9 @@ class MemoryClient:
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.timeout = 10.0
|
||||
self._context_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
|
||||
# Auto-summarize state
|
||||
self._turn_counters: Dict[str, int] = {}
|
||||
self._last_summarize: Dict[str, float] = {}
|
||||
|
||||
def _cache_key(
|
||||
self,
|
||||
@@ -258,12 +265,83 @@ class MemoryClient:
|
||||
headers={"Authorization": f"Bearer {user_id}"}
|
||||
)
|
||||
|
||||
# Auto-summarize trigger (fire-and-forget, non-blocking)
|
||||
try:
|
||||
asyncio.ensure_future(self._maybe_trigger_summarize(
|
||||
agent_id=agent_id,
|
||||
channel_id=channel_id,
|
||||
user_id=user_id,
|
||||
team_id=team_id
|
||||
))
|
||||
except Exception as trigger_err:
|
||||
logger.debug(f"Summarize trigger scheduling failed: {trigger_err}")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
# Memory Service недоступний - але локальний контекст вже збережено
|
||||
logger.debug(f"Memory Service unavailable (using local context): {e}")
|
||||
return True # Return True because local context was saved
|
||||
|
||||
async def _maybe_trigger_summarize(
|
||||
self,
|
||||
agent_id: str,
|
||||
channel_id,
|
||||
user_id: str,
|
||||
team_id=None
|
||||
):
|
||||
"""
|
||||
Auto-trigger summarize when conversation reaches threshold.
|
||||
Uses turn counter + debounce to avoid excessive calls.
|
||||
"""
|
||||
chat_key = f"{agent_id}:{channel_id or user_id}"
|
||||
|
||||
# Increment turn counter
|
||||
self._turn_counters[chat_key] = self._turn_counters.get(chat_key, 0) + 1
|
||||
turn_count = self._turn_counters[chat_key]
|
||||
|
||||
# Check threshold
|
||||
if turn_count < SUMMARIZE_TURN_THRESHOLD:
|
||||
return
|
||||
|
||||
# Check debounce
|
||||
now = time.monotonic()
|
||||
last = self._last_summarize.get(chat_key, 0)
|
||||
if now - last < SUMMARIZE_DEBOUNCE_SECONDS:
|
||||
logger.debug(f"Summarize debounce active for {chat_key}, skipping")
|
||||
return
|
||||
|
||||
# Reset counter and update timestamp
|
||||
self._turn_counters[chat_key] = 0
|
||||
self._last_summarize[chat_key] = now
|
||||
|
||||
# Fire-and-forget summarize request
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.post(
|
||||
f"{self.base_url}/agents/{agent_id}/summarize",
|
||||
json={
|
||||
"channel_id": channel_id,
|
||||
"user_id": user_id,
|
||||
"max_events": 60,
|
||||
"force": False
|
||||
},
|
||||
headers={"Authorization": f"Bearer {user_id}"}
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
logger.info(
|
||||
f"Auto-summary created for {chat_key}: "
|
||||
f"events={data.get('events_summarized', '?')}, "
|
||||
f"summary_len={len(data.get('summary', ''))}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Auto-summary failed for {chat_key}: "
|
||||
f"status={resp.status_code}, body={resp.text[:200]}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Auto-summary request failed for {chat_key}: {e}")
|
||||
|
||||
async def create_dialog_summary(
|
||||
self,
|
||||
team_id: str,
|
||||
|
||||
Reference in New Issue
Block a user