agromatrix: harden correction parser + cap context + persist last photo ref

2026-02-21 11:13:41 +01:00
parent 69486a92be
commit 3d04cd4c88
1 changed files with 118 additions and 4 deletions
--- a/gateway-bot/http_api.py
+++ b/gateway-bot/http_api.py
@@ -165,7 +165,8 @@ def _extract_agromatrix_correction_label(text: str) -> Optional[str]:
    patterns = [
        r"правильн\w*\s+відповід\w*[:\-]?\s*([a-zа-яіїєґ0-9'’\-\s]{2,60})",
        r"це\s+не\s+[a-zа-яіїєґ0-9'’\-\s]{1,60},?\s+а\s+([a-zа-яіїєґ0-9'’\-\s]{2,60})",
-        r"це\s+([a-zа-яіїєґ0-9'’\-\s]{2,60})",
+        # Strict "це <label>" form, but never "це не ...".
+        r"це\s+(?!не\b)([a-zа-яіїєґ0-9'’\-\s]{2,40})",
    ]
    for pat in patterns:
        m = re.search(pat, t)
@@ -174,14 +175,55 @@ def _extract_agromatrix_correction_label(text: str) -> Optional[str]:
        label = re.sub(r"\s+", " ", (m.group(1) or "").strip(" .,!?:;\"'()[]{}"))
        if not label:
            continue
-        if len(label.split()) > 6:
+        if len(label.split()) > 4:
            continue
        if label in {"не знаю", "помилка", "невірно", "не вірно"}:
            continue
+        # Filter imperative/meta phrases that are not plant labels.
+        bad_prefixes = (
+            "не ", "в чат", "зробити", "напиши", "потрібно", "навіщо",
+            "ти ", "він ", "вона ", "це ", "а ", "і ",
+        )
+        if label.startswith(bad_prefixes):
+            continue
+        if any(x in label for x in ("повідом", "чат", "відповід", "потрібно", "не потрібно")):
+            continue
+        if re.search(r"\d", label):
+            continue
        return label
    return None


+def _is_agromatrix_correction_only_message(text: str) -> bool:
+    t = (text or "").strip().lower()
+    if not t:
+        return False
+    # Treat as "correction only" when there is no direct question.
+    if "?" in t:
+        return False
+    markers = (
+        "це ", "правильна відповідь", "невірно", "не вірно", "це не",
+        "не так", "неправильно", "виправ",
+    )
+    return any(m in t for m in markers)
+
+
+def _truncate_context_for_prompt(raw: str, *, max_chars: int = 2200, max_lines: int = 28) -> str:
+    if not raw:
+        return ""
+    lines = [ln for ln in raw.splitlines() if ln.strip()]
+    if len(lines) > max_lines:
+        lines = lines[-max_lines:]
+    out = "\n".join(lines)
+    if len(out) > max_chars:
+        out = out[-max_chars:]
+        # try to cut from next line boundary for cleaner prompt
+        pos = out.find("\n")
+        if 0 <= pos < 200:
+            out = out[pos + 1 :]
+    return out.strip()
+
+
 def _agromatrix_observation_doc_id(file_id: str, label: str) -> str:
    digest = hashlib.sha1(f"{file_id}:{label}".encode("utf-8")).hexdigest()[:16]
    return f"agromatrix-photo-{digest}"
@@ -267,6 +309,44 @@ async def _get_agromatrix_photo_prior(file_id: str, dao_id: str) -> Optional[str
        return None


+async def _set_agromatrix_last_photo_ref(*, chat_id: str, user_id: str, file_id: str, dao_id: str) -> None:
+    if not (chat_id and user_id and file_id):
+        return
+    try:
+        await memory_client.upsert_fact(
+            user_id=AGROMATRIX_GLOBAL_KNOWLEDGE_USER_ID,
+            fact_key=f"agromatrix:last_photo:{chat_id}:{user_id}",
+            fact_value=file_id,
+            fact_value_json={"file_id": file_id, "updated_at": datetime.utcnow().isoformat()},
+            team_id=dao_id,
+        )
+    except Exception as e:
+        logger.warning(f"AgroMatrix last photo ref save failed: {e}")
+
+
+async def _get_agromatrix_last_photo_ref(*, chat_id: str, user_id: str, dao_id: str) -> Optional[str]:
+    if not (chat_id and user_id):
+        return None
+    try:
+        fact = await memory_client.get_fact(
+            user_id=AGROMATRIX_GLOBAL_KNOWLEDGE_USER_ID,
+            fact_key=f"agromatrix:last_photo:{chat_id}:{user_id}",
+            team_id=dao_id,
+        )
+        if not fact:
+            return None
+        data = fact.get("fact_value_json") if isinstance(fact, dict) else None
+        if isinstance(data, dict):
+            file_id = str(data.get("file_id") or "").strip()
+            if file_id:
+                return file_id
+        file_id = str(fact.get("fact_value") or "").strip() if isinstance(fact, dict) else ""
+        return file_id or None
+    except Exception as e:
+        logger.warning(f"AgroMatrix last photo ref lookup failed: {e}")
+        return None
+
+
 def _needs_photo_only_response(text: str) -> bool:
    """
    Return True only for explicit requests to analyze/describe image content.
@@ -1554,6 +1634,13 @@ async def process_photo(
    
    logger.info(f"{agent_config.name}: Photo from {username} (tg:{user_id}), file_id: {file_id}")
    _set_recent_photo_context(agent_config.agent_id, chat_id, user_id, file_id)
+    if agent_config.agent_id == "agromatrix":
+        await _set_agromatrix_last_photo_ref(
+            chat_id=chat_id,
+            user_id=user_id,
+            file_id=file_id,
+            dao_id=dao_id,
+        )
    
    # Get caption for media question check
    caption = caption_override if caption_override is not None else ((update.message or {}).get("caption") or "")
@@ -2722,6 +2809,12 @@ async def handle_telegram_webhook(
                    recent_file_id = _extract_recent_photo_file_id_from_memory(mc)
                except Exception:
                    recent_file_id = None
+            if not recent_file_id:
+                recent_file_id = await _get_agromatrix_last_photo_ref(
+                    chat_id=chat_id,
+                    user_id=user_id,
+                    dao_id=dao_id,
+                )
            if recent_file_id:
                await _save_agromatrix_photo_learning(
                    file_id=recent_file_id,
@@ -2734,6 +2827,22 @@ async def handle_telegram_webhook(
                logger.info(
                    f"AgroMatrix learning updated: file_id={recent_file_id}, label={corrected_label}"
                )
+                if _is_agromatrix_correction_only_message(text):
+                    ack = f"Прийняв. Зберіг корекцію: {corrected_label}. Далі врахую це у відповідях."
+                    await send_telegram_message(chat_id, ack, telegram_token)
+                    await memory_client.save_chat_turn(
+                        agent_id=agent_config.agent_id,
+                        team_id=dao_id,
+                        user_id=f"tg:{user_id}",
+                        message=text,
+                        response=ack,
+                        channel_id=chat_id,
+                        scope="short_term",
+                        save_agent_response=True,
+                        agent_metadata={"agromatrix_learning_ack": True},
+                        username=username,
+                    )
+                    return {"ok": True, "agent": agent_config.agent_id, "mode": "learning_ack"}

    # Photo/image intent guard:
    # if text references a photo/image, try to resolve latest file_id and route to vision.
@@ -3039,7 +3148,7 @@ async def handle_telegram_webhook(
    # Regular chat mode
    # Fetch memory context (includes local context as fallback)
    # Всі агенти мають доступ до однакової історії (80 повідомлень) для контексту
-    context_limit = 80  # Однакове для всіх агентів
+    context_limit = 40 if agent_config.agent_id == "agromatrix" else 80
    memory_context = await memory_client.get_context(
        user_id=f"tg:{user_id}",
        agent_id=agent_config.agent_id,
@@ -3049,7 +3158,11 @@ async def handle_telegram_webhook(
    )
    
    # Build message with conversation context
-    local_history = memory_context.get("local_context_text", "")
+    local_history = _truncate_context_for_prompt(
+        memory_context.get("local_context_text", ""),
+        max_chars=2200 if agent_config.agent_id == "agromatrix" else 3800,
+        max_lines=28 if agent_config.agent_id == "agromatrix" else 48,
+    )
    
    # Check if this is a training group
    is_training_group = str(chat_id) in TRAINING_GROUP_IDS
@@ -3069,6 +3182,7 @@ async def handle_telegram_webhook(
        # Do not duplicate current prompt if it matches one pending message.
        unresolved_non_current = [q for q in unresolved_questions if q.strip() != (text or "").strip()]
        if unresolved_non_current:
+            unresolved_non_current = unresolved_non_current[-1:] if agent_config.agent_id == "agromatrix" else unresolved_non_current
            unresolved_block = (
                "[КРИТИЧНО: є невідповідані питання цього користувача. "
                "Спочатку коротко відповідай на них, потім на поточне повідомлення. "