agromatrix: add pending-question memory, anti-repeat guard, and numeric contract

2026-02-21 12:47:23 +01:00
parent a87a1fe52c
commit d963c52fe5
2 changed files with 621 additions and 50 deletions
--- a/services/router/memory_retrieval.py
+++ b/services/router/memory_retrieval.py
@@ -22,6 +22,7 @@ import re
 from typing import Optional, Dict, Any, List
 from dataclasses import dataclass, field
 from datetime import datetime
+import hashlib

 import httpx
 import asyncpg
@@ -36,6 +37,9 @@ COHERE_API_KEY = os.getenv("COHERE_API_KEY", "")
 NEO4J_BOLT_URL = os.getenv("NEO4J_BOLT_URL", "bolt://neo4j:7687")
 NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
 NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4j")
+PENDING_QUESTIONS_LIMIT = int(os.getenv("AGENT_PENDING_QUESTIONS_LIMIT", "5"))
+SHARED_AGRO_LIBRARY_ENABLED = os.getenv("AGROMATRIX_SHARED_LIBRARY_ENABLED", "true").lower() == "true"
+SHARED_AGRO_LIBRARY_REQUIRE_REVIEW = os.getenv("AGROMATRIX_SHARED_LIBRARY_REQUIRE_REVIEW", "true").lower() == "true"


@dataclass
@@ -62,6 +66,7 @@ class SessionState:
    last_answer_fingerprint: Optional[str] = None
    trust_mode: bool = False
    apprentice_mode: bool = False
+    pending_questions: List[str] = field(default_factory=list)


@dataclass 
@@ -96,6 +101,10 @@ class MemoryBrief:
                lines.append("📚 Режим учня — можеш ставити уточнюючі питання")
            if self.session_state.active_topic:
                lines.append(f"📌 Активна тема: {self.session_state.active_topic}")
+            if self.session_state.pending_questions:
+                lines.append("🕘 Невідповідані питання в цьому чаті (відповідай на них першочергово):")
+                for q in self.session_state.pending_questions[:3]:
+                    lines.append(f"  - {q[:180]}")
        
        # User facts (preferences, profile)
        if self.user_facts:
@@ -179,6 +188,7 @@ class MemoryRetrieval:
        
        # HTTP client for embeddings
        self.http_client = httpx.AsyncClient(timeout=30.0)
+        await self._ensure_aux_tables()
        
    async def close(self):
        """Close connections"""
@@ -188,6 +198,57 @@ class MemoryRetrieval:
            await self.neo4j_driver.close()
        if self.http_client:
            await self.http_client.aclose()
+
+    async def _ensure_aux_tables(self):
+        """Create auxiliary tables used by agent runtime policies."""
+        if not self.pg_pool:
+            return
+        try:
+            async with self.pg_pool.acquire() as conn:
+                await conn.execute(
+                    """
+                    CREATE TABLE IF NOT EXISTS agent_session_state (
+                      channel TEXT NOT NULL,
+                      chat_id TEXT NOT NULL,
+                      user_id TEXT NOT NULL,
+                      agent_id TEXT NOT NULL,
+                      conversation_id TEXT NOT NULL,
+                      last_user_id TEXT,
+                      last_user_nick TEXT,
+                      active_topic TEXT,
+                      context_open BOOLEAN NOT NULL DEFAULT FALSE,
+                      last_media_handled BOOLEAN NOT NULL DEFAULT TRUE,
+                      last_answer_fingerprint TEXT,
+                      trust_mode BOOLEAN NOT NULL DEFAULT FALSE,
+                      apprentice_mode BOOLEAN NOT NULL DEFAULT FALSE,
+                      updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+                      created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+                      PRIMARY KEY (channel, chat_id, user_id, agent_id)
+                    );
+                    CREATE INDEX IF NOT EXISTS idx_agent_session_state_conv
+                      ON agent_session_state (conversation_id);
+
+                    CREATE TABLE IF NOT EXISTS agent_pending_questions (
+                      id BIGSERIAL PRIMARY KEY,
+                      channel TEXT NOT NULL,
+                      chat_id TEXT NOT NULL,
+                      user_id TEXT NOT NULL,
+                      agent_id TEXT NOT NULL,
+                      question_text TEXT NOT NULL,
+                      question_fingerprint TEXT NOT NULL,
+                      status TEXT NOT NULL DEFAULT 'pending',
+                      created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+                      answered_at TIMESTAMPTZ,
+                      metadata JSONB NOT NULL DEFAULT '{}'::jsonb
+                    );
+                    CREATE INDEX IF NOT EXISTS idx_agent_pending_questions_scope
+                      ON agent_pending_questions (agent_id, channel, chat_id, user_id, status, created_at DESC);
+                    CREATE UNIQUE INDEX IF NOT EXISTS idx_agent_pending_questions_unique_open
+                      ON agent_pending_questions (agent_id, channel, chat_id, user_id, question_fingerprint, status);
+                    """
+                )
+        except Exception as e:
+            logger.warning(f"Aux tables init failed: {e}")
    
    # =========================================================================
    # L2: Platform Identity Resolution
@@ -237,7 +298,7 @@ class MemoryRetrieval:
                identity.is_mentor = bool(is_mentor)
                
        except Exception as e:
-            logger.warning(f"Identity resolution failed: {e}")
+            logger.debug(f"Identity resolution fallback: {e}")
        
        return identity
    
@@ -249,7 +310,9 @@ class MemoryRetrieval:
        self,
        channel: str,
        chat_id: str,
-        thread_id: Optional[str] = None
+        thread_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+        user_id: Optional[str] = None,
    ) -> SessionState:
        """Get or create session state for conversation"""
        state = SessionState()
@@ -259,42 +322,78 @@ class MemoryRetrieval:
        
        try:
            async with self.pg_pool.acquire() as conn:
-                # Get or create conversation
-                conv_id = await conn.fetchval(
-                    "SELECT get_or_create_conversation($1, $2, $3, NULL)",
-                    channel, chat_id, thread_id
-                )
-                state.conversation_id = str(conv_id) if conv_id else None
-                
-                # Get conversation state
-                if conv_id:
-                    row = await conn.fetchrow("""
-                        SELECT * FROM helion_conversation_state
-                        WHERE conversation_id = $1
-                    """, conv_id)
-                    
-                    if row:
-                        state.last_addressed = row.get('last_addressed_to_helion', False)
-                        state.active_topic = row.get('active_topic_id')
-                        state.context_open = row.get('active_context_open', False)
-                        state.last_media_handled = row.get('last_media_handled', True)
-                        state.last_answer_fingerprint = row.get('last_answer_fingerprint')
-                        state.trust_mode = row.get('group_trust_mode', False)
-                        state.apprentice_mode = row.get('apprentice_mode', False)
+                if agent_id and user_id:
+                    conv_id = self._build_conversation_id(channel, chat_id, user_id, agent_id)
+                    row = await conn.fetchrow(
+                        """
+                        SELECT conversation_id, active_topic, context_open, last_media_handled,
+                               last_answer_fingerprint, trust_mode, apprentice_mode
+                        FROM agent_session_state
+                        WHERE channel = $1
+                          AND chat_id = $2
+                          AND user_id = $3
+                          AND agent_id = $4
+                        """,
+                        channel,
+                        chat_id,
+                        user_id,
+                        agent_id,
+                    )
+                    if not row:
+                        await conn.execute(
+                            """
+                            INSERT INTO agent_session_state
+                              (channel, chat_id, user_id, agent_id, conversation_id)
+                            VALUES ($1, $2, $3, $4, $5)
+                            ON CONFLICT (channel, chat_id, user_id, agent_id) DO NOTHING
+                            """,
+                            channel,
+                            chat_id,
+                            user_id,
+                            agent_id,
+                            conv_id,
+                        )
+                        state.conversation_id = conv_id
                    else:
-                        # Create initial state
-                        await conn.execute("""
-                            INSERT INTO helion_conversation_state (conversation_id)
-                            VALUES ($1)
-                            ON CONFLICT (conversation_id) DO NOTHING
-                        """, conv_id)
-                
-                # Check if trusted group
-                is_trusted = await conn.fetchval(
-                    "SELECT is_trusted_group($1, $2)",
-                    channel, chat_id
-                )
-                state.trust_mode = bool(is_trusted)
+                        state.conversation_id = str(row.get("conversation_id") or conv_id)
+                        state.active_topic = row.get("active_topic")
+                        state.context_open = bool(row.get("context_open", False))
+                        state.last_media_handled = bool(row.get("last_media_handled", True))
+                        state.last_answer_fingerprint = row.get("last_answer_fingerprint")
+                        state.trust_mode = bool(row.get("trust_mode", False))
+                        state.apprentice_mode = bool(row.get("apprentice_mode", False))
+                else:
+                    state.conversation_id = self._build_conversation_id(
+                        channel,
+                        chat_id,
+                        user_id or "unknown",
+                        agent_id or "agent",
+                    )
+
+                if agent_id and user_id:
+                    pending_rows = await conn.fetch(
+                        """
+                        SELECT question_text
+                        FROM agent_pending_questions
+                        WHERE channel = $1
+                          AND chat_id = $2
+                          AND user_id = $3
+                          AND agent_id = $4
+                          AND status = 'pending'
+                        ORDER BY created_at ASC
+                        LIMIT $5
+                        """,
+                        channel,
+                        chat_id,
+                        user_id,
+                        agent_id,
+                        PENDING_QUESTIONS_LIMIT,
+                    )
+                    state.pending_questions = [
+                        str(r.get("question_text") or "").strip()
+                        for r in pending_rows
+                        if str(r.get("question_text") or "").strip()
+                    ]
                
        except Exception as e:
            logger.warning(f"Session state retrieval failed: {e}")
@@ -494,6 +593,32 @@ class MemoryRetrieval:
                            })
            except Exception as e:
                logger.debug(f"{docs_collection} search: {e}")
+
+            # Search 4: shared agronomy memory (reviewed, cross-chat, anonymized)
+            if (
+                SHARED_AGRO_LIBRARY_ENABLED
+                and agent_id == "agromatrix"
+                and self._is_plant_query(query)
+            ):
+                try:
+                    results = self.qdrant_client.search(
+                        collection_name="agromatrix_shared_library",
+                        query_vector=embedding,
+                        limit=3,
+                        with_payload=True
+                    )
+                    for r in results:
+                        if r.score > 0.45:
+                            text = str(r.payload.get("text") or "").strip()
+                            if len(text) > 20:
+                                all_results.append({
+                                    "text": text[:500],
+                                    "type": "shared_agro_fact",
+                                    "score": r.score + 0.05,
+                                    "source": "shared_agronomy_library"
+                                })
+                except Exception as e:
+                    logger.debug(f"agromatrix_shared_library search: {e}")
            
            # Sort by score and deduplicate
            all_results.sort(key=lambda x: x.get("score", 0), reverse=True)
@@ -546,6 +671,28 @@ class MemoryRetrieval:
            return ""
        normalized = re.sub(r"\s+", " ", text.strip().lower())
        return normalized[:220]
+
+    @staticmethod
+    def _is_plant_query(text: str) -> bool:
+        q = (text or "").lower()
+        if not q:
+            return False
+        markers = [
+            "рослин", "культур", "лист", "стебл", "бур'ян", "хвороб", "шкідник",
+            "what plant", "identify plant", "crop", "species", "leaf", "stem",
+            "что за растение", "культура", "листок", "фото рослини"
+        ]
+        return any(m in q for m in markers)
+
+    @staticmethod
+    def _question_fingerprint(question_text: str) -> str:
+        normalized = re.sub(r"\s+", " ", (question_text or "").strip().lower())
+        return hashlib.sha1(normalized.encode("utf-8")).hexdigest()[:16]
+
+    @staticmethod
+    def _build_conversation_id(channel: str, chat_id: str, user_id: str, agent_id: str) -> str:
+        seed = f"{channel}:{chat_id}:{user_id}:{agent_id}"
+        return hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
    
    async def get_user_graph_context(
        self,
@@ -639,7 +786,13 @@ class MemoryRetrieval:
        brief.user_identity = identity
        
        # L1: Session State
-        session = await self.get_session_state(channel, chat_id, thread_id)
+        session = await self.get_session_state(
+            channel,
+            chat_id,
+            thread_id,
+            agent_id=agent_id,
+            user_id=user_id,
+        )
        brief.session_state = session
        brief.is_trusted_group = session.trust_mode
        
@@ -749,6 +902,22 @@ class MemoryRetrieval:
                    )
                ]
            )
+
+            # Optional shared agronomy memory:
+            # - never stores user/chat identifiers
+            # - supports review gate (pending vs approved)
+            if (
+                SHARED_AGRO_LIBRARY_ENABLED
+                and agent_id == "agromatrix"
+                and message_type in {"vision", "conversation"}
+                and isinstance(metadata, dict)
+                and metadata.get("deterministic_plant_id")
+            ):
+                await self._store_shared_agronomy_memory(
+                    message_text=message_text,
+                    response_text=response_text,
+                    metadata=metadata,
+                )
            
            logger.debug(f"✅ Stored message in {messages_collection}: {point_id[:8]}...")
            return True
@@ -756,6 +925,202 @@ class MemoryRetrieval:
        except Exception as e:
            logger.warning(f"Failed to store message in {messages_collection}: {e}")
            return False
+
+    async def _store_shared_agronomy_memory(
+        self,
+        message_text: str,
+        response_text: str,
+        metadata: Dict[str, Any],
+    ) -> bool:
+        if not self.qdrant_client or not COHERE_API_KEY:
+            return False
+        try:
+            from qdrant_client.http import models as qmodels
+            import uuid
+
+            reviewed = bool(metadata.get("mentor_confirmed") or metadata.get("reviewed"))
+            collection = "agromatrix_shared_library"
+            if SHARED_AGRO_LIBRARY_REQUIRE_REVIEW and not reviewed:
+                collection = "agromatrix_shared_pending"
+
+            try:
+                self.qdrant_client.get_collection(collection)
+            except Exception:
+                self.qdrant_client.create_collection(
+                    collection_name=collection,
+                    vectors_config=qmodels.VectorParams(
+                        size=1024,
+                        distance=qmodels.Distance.COSINE,
+                    ),
+                )
+
+            compact = (
+                f"Plant case\nQuestion: {message_text[:800]}\n"
+                f"Answer: {response_text[:1200]}\n"
+                f"Candidates: {json.dumps(metadata.get('candidates', []), ensure_ascii=False)[:1200]}"
+            )
+            embedding = await self.get_embedding(compact[:2000])
+            if not embedding:
+                return False
+
+            payload = {
+                "text": compact[:3000],
+                "type": "plant_case",
+                "deterministic_plant_id": True,
+                "decision": metadata.get("decision"),
+                "confidence_threshold": metadata.get("confidence_threshold"),
+                "candidates": metadata.get("candidates", [])[:5],
+                "reviewed": reviewed,
+                "timestamp": datetime.utcnow().isoformat(),
+            }
+            self.qdrant_client.upsert(
+                collection_name=collection,
+                points=[qmodels.PointStruct(id=str(uuid.uuid4()), vector=embedding, payload=payload)],
+            )
+            return True
+        except Exception as e:
+            logger.debug(f"Shared agronomy memory store failed: {e}")
+            return False
+
+    async def register_pending_question(
+        self,
+        channel: str,
+        chat_id: str,
+        user_id: str,
+        agent_id: str,
+        question_text: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> bool:
+        if not self.pg_pool:
+            return False
+        text = (question_text or "").strip()
+        if not text:
+            return False
+        fp = self._question_fingerprint(text)
+        try:
+            async with self.pg_pool.acquire() as conn:
+                await conn.execute(
+                    """
+                    INSERT INTO agent_pending_questions
+                      (channel, chat_id, user_id, agent_id, question_text, question_fingerprint, status, metadata)
+                    VALUES ($1, $2, $3, $4, $5, $6, 'pending', $7::jsonb)
+                    ON CONFLICT (agent_id, channel, chat_id, user_id, question_fingerprint, status)
+                    DO NOTHING
+                    """,
+                    channel,
+                    chat_id,
+                    user_id,
+                    agent_id,
+                    text[:1200],
+                    fp,
+                    json.dumps(metadata or {}, ensure_ascii=False),
+                )
+                # Keep only last N open items.
+                await conn.execute(
+                    """
+                    WITH ranked AS (
+                        SELECT id, ROW_NUMBER() OVER (
+                            PARTITION BY channel, chat_id, user_id, agent_id, status
+                            ORDER BY created_at DESC
+                        ) AS rn
+                        FROM agent_pending_questions
+                        WHERE channel = $1
+                          AND chat_id = $2
+                          AND user_id = $3
+                          AND agent_id = $4
+                          AND status = 'pending'
+                    )
+                    UPDATE agent_pending_questions p
+                    SET status = 'dismissed',
+                        answered_at = NOW(),
+                        metadata = COALESCE(p.metadata, '{}'::jsonb) || '{"reason":"overflow_trim"}'::jsonb
+                    FROM ranked r
+                    WHERE p.id = r.id
+                      AND r.rn > $5
+                    """,
+                    channel,
+                    chat_id,
+                    user_id,
+                    agent_id,
+                    max(1, PENDING_QUESTIONS_LIMIT),
+                )
+            return True
+        except Exception as e:
+            logger.warning(f"register_pending_question failed: {e}")
+            return False
+
+    async def resolve_pending_question(
+        self,
+        channel: str,
+        chat_id: str,
+        user_id: str,
+        agent_id: str,
+        answer_text: Optional[str] = None,
+        reason: str = "answered",
+    ) -> bool:
+        if not self.pg_pool:
+            return False
+        try:
+            async with self.pg_pool.acquire() as conn:
+                row = await conn.fetchrow(
+                    """
+                    WITH target AS (
+                      SELECT id
+                      FROM agent_pending_questions
+                      WHERE channel = $1
+                        AND chat_id = $2
+                        AND user_id = $3
+                        AND agent_id = $4
+                        AND status = 'pending'
+                      ORDER BY created_at ASC
+                      LIMIT 1
+                    )
+                    UPDATE agent_pending_questions p
+                    SET status = CASE WHEN $5 = 'dismissed' THEN 'dismissed' ELSE 'answered' END,
+                        answered_at = NOW(),
+                        metadata = COALESCE(p.metadata, '{}'::jsonb)
+                                   || jsonb_build_object(
+                                        'resolution_reason', $5,
+                                        'answer_fingerprint', COALESCE($6, '')
+                                      )
+                    FROM target t
+                    WHERE p.id = t.id
+                    RETURNING p.id
+                    """,
+                    channel,
+                    chat_id,
+                    user_id,
+                    agent_id,
+                    reason,
+                    self._question_fingerprint(answer_text or "") if answer_text else "",
+                )
+            return bool(row)
+        except Exception as e:
+            logger.warning(f"resolve_pending_question failed: {e}")
+            return False
+
+    async def store_interaction(
+        self,
+        channel: str,
+        chat_id: str,
+        user_id: str,
+        agent_id: str,
+        username: Optional[str],
+        user_message: str,
+        assistant_response: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> bool:
+        # Backward-compatible wrapper for older call sites.
+        return await self.store_message(
+            agent_id=agent_id,
+            user_id=user_id,
+            username=username,
+            message_text=user_message,
+            response_text=assistant_response,
+            chat_id=chat_id,
+            message_type="conversation",
+            metadata=metadata,
+        )
    
    async def update_session_state(
        self,
@@ -774,10 +1139,10 @@ class MemoryRetrieval:
                param_idx = 2
                
                allowed_fields = [
-                    'last_addressed_to_helion', 'last_user_id', 'last_user_nick',
-                    'active_topic_id', 'active_context_open', 'last_media_id',
-                    'last_media_handled', 'last_answer_fingerprint', 'group_trust_mode',
-                    'apprentice_mode', 'proactive_questions_today'
+                    'last_user_id', 'last_user_nick',
+                    'active_topic', 'context_open',
+                    'last_media_handled', 'last_answer_fingerprint',
+                    'trust_mode', 'apprentice_mode'
                ]
                
                for field, value in updates.items():
@@ -787,7 +1152,7 @@ class MemoryRetrieval:
                        param_idx += 1
                
                query = f"""
-                    UPDATE helion_conversation_state
+                    UPDATE agent_session_state
                    SET {', '.join(set_clauses)}
                    WHERE conversation_id = $1
                """