runtime: sync router/gateway/config policy and clan role registry

2026-02-19 00:14:06 -08:00
parent 675b25953b
commit dfc0ef1ceb
35 changed files with 6141 additions and 498 deletions
--- a/services/router/memory_retrieval.py
+++ b/services/router/memory_retrieval.py
@@ -363,6 +363,8 @@ class MemoryRetrieval:
        query: str,
        agent_id: str = "helion",
        platform_user_id: Optional[str] = None,
+        chat_id: Optional[str] = None,
+        user_id: Optional[str] = None,
        visibility: str = "platform",
        limit: int = 5
    ) -> List[Dict[str, Any]]:
@@ -377,6 +379,16 @@ class MemoryRetrieval:
        
        all_results = []
        
+        q = (query or "").lower()
+        # If user explicitly asks about documents/catalogs, prefer knowledge base docs over chat snippets.
+        is_doc_query = any(k in q for k in ["pdf", "каталог", "каталоз", "документ", "файл", "стор", "page", "pages"])
+        # Simple keyword gate to avoid irrelevant chat snippets dominating doc queries.
+        # Example: when asking "з каталогу Defenda 2026 ... гліфосат", old "Бокаші" messages may match too well.
+        topic_keywords: List[str] = []
+        for kw in ["defenda", "ifagri", "bayer", "гліфосат", "glyphos", "глифос", "npk", "мінерал", "добрив", "гербіц", "фунгіц", "інсектиц"]:
+            if kw in q:
+                topic_keywords.append(kw)
+
        # Dynamic collection names based on agent_id
        memory_items_collection = f"{agent_id}_memory_items"
        messages_collection = f"{agent_id}_messages"
@@ -420,18 +432,34 @@ class MemoryRetrieval:
            
            # Search 2: {agent_id}_messages (chat history)
            try:
+                msg_filter = None
+                if chat_id:
+                    # Payload schema differs across ingesters: some use chat_id, others channel_id.
+                    msg_filter = qmodels.Filter(
+                        should=[
+                            qmodels.FieldCondition(key="chat_id", match=qmodels.MatchValue(value=str(chat_id))),
+                            qmodels.FieldCondition(key="channel_id", match=qmodels.MatchValue(value=str(chat_id))),
+                        ]
+                    )
                results = self.qdrant_client.search(
                    collection_name=messages_collection,
                    query_vector=embedding,
+                    query_filter=msg_filter,
                    limit=limit,
                    with_payload=True
                )
                
                for r in results:
-                    if r.score > 0.4:  # Higher threshold for messages
+                    # Higher threshold for messages; even higher when user asks about docs to avoid pulling old chatter.
+                    msg_thresh = 0.5 if is_doc_query else 0.4
+                    if r.score > msg_thresh:
                        text = r.payload.get("text", r.payload.get("content", ""))
                        # Skip very short or system messages
                        if len(text) > 20 and not text.startswith("<"):
+                            if is_doc_query and topic_keywords:
+                                tl = text.lower()
+                                if not any(k in tl for k in topic_keywords):
+                                    continue
                            all_results.append({
                                "text": text,
                                "type": "message",
@@ -446,18 +474,21 @@ class MemoryRetrieval:
                results = self.qdrant_client.search(
                    collection_name=docs_collection,
                    query_vector=embedding,
-                    limit=3,  # Less docs, they're usually longer
+                    limit=6 if is_doc_query else 3,  # Pull more docs for explicit doc queries
                    with_payload=True
                )
                
                for r in results:
-                    if r.score > 0.5:  # Higher threshold for docs
+                    # When user asks about PDF/catalogs, relax threshold so docs show up more reliably.
+                    doc_thresh = 0.35 if is_doc_query else 0.5
+                    if r.score > doc_thresh:
                        text = r.payload.get("text", r.payload.get("content", ""))
                        if len(text) > 30:
                            all_results.append({
                                "text": text[:500],  # Truncate long docs
                                "type": "knowledge",
-                                "score": r.score,
+                                # Slightly boost docs for doc queries so they win vs chat snippets.
+                                "score": (r.score + 0.12) if is_doc_query else r.score,
                                "source": "docs"
                            })
            except Exception as e:
@@ -614,7 +645,8 @@ class MemoryRetrieval:
        message_text: str,
        response_text: str,
        chat_id: str,
-        message_type: str = "conversation"
+        message_type: str = "conversation",
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> bool:
        """
        Store a message exchange in agent-specific Qdrant collection.
@@ -656,23 +688,27 @@ class MemoryRetrieval:
            
            # Store in Qdrant
            point_id = str(uuid.uuid4())
+            payload = {
+                "text": combined_text[:5000],  # Limit payload size
+                "user_message": message_text[:2000],
+                "assistant_response": response_text[:3000],
+                "user_id": user_id,
+                "username": username,
+                "chat_id": chat_id,
+                "agent_id": agent_id,
+                "type": message_type,
+                "timestamp": datetime.utcnow().isoformat()
+            }
+            if metadata and isinstance(metadata, dict):
+                payload["metadata"] = metadata
+
            self.qdrant_client.upsert(
                collection_name=messages_collection,
                points=[
                    qmodels.PointStruct(
                        id=point_id,
                        vector=embedding,
-                        payload={
-                            "text": combined_text[:5000],  # Limit payload size
-                            "user_message": message_text[:2000],
-                            "assistant_response": response_text[:3000],
-                            "user_id": user_id,
-                            "username": username,
-                            "chat_id": chat_id,
-                            "agent_id": agent_id,
-                            "type": message_type,
-                            "timestamp": datetime.utcnow().isoformat()
-                        }
+                        payload=payload
                    )
                ]
            )