agromatrix: deterministic plant-id flow + confidence guard + plantnet env

2026-02-21 12:03:28 +01:00
parent 50dfcd7390
commit a87a1fe52c
3 changed files with 250 additions and 8 deletions
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -176,6 +176,91 @@ def _build_cautious_plant_response(base_text: str, source_count: int) -> str:
    )


+def _extract_image_inputs_for_plant_tools(images: Optional[List[str]], metadata: Dict[str, Any]) -> Dict[str, str]:
+    out: Dict[str, str] = {}
+    file_url = str((metadata or {}).get("file_url") or "").strip()
+    if file_url.startswith("http://") or file_url.startswith("https://"):
+        out["image_url"] = file_url
+    if images and isinstance(images, list):
+        first = images[0]
+        if isinstance(first, str):
+            s = first.strip()
+            if s.startswith("data:image/") and ";base64," in s:
+                out["image_data"] = s
+            elif not out.get("image_url") and (s.startswith("http://") or s.startswith("https://")):
+                out["image_url"] = s
+    return out
+
+
+def _parse_tool_result_json(payload: Any) -> Dict[str, Any]:
+    if isinstance(payload, dict):
+        return payload
+    if isinstance(payload, str):
+        s = payload.strip()
+        if s.startswith("{") or s.startswith("["):
+            try:
+                parsed = json.loads(s)
+                return parsed if isinstance(parsed, dict) else {}
+            except Exception:
+                return {}
+    return {}
+
+
+def _extract_top_candidates(tool_json: Dict[str, Any], limit: int = 3) -> List[Dict[str, Any]]:
+    rows = tool_json.get("top_k") if isinstance(tool_json, dict) else None
+    if not isinstance(rows, list):
+        return []
+    out: List[Dict[str, Any]] = []
+    for row in rows[:limit]:
+        if not isinstance(row, dict):
+            continue
+        try:
+            conf = float(row.get("confidence", 0.0))
+        except Exception:
+            conf = 0.0
+        if conf > 1.0 and conf <= 100.0:
+            conf = conf / 100.0
+        if conf < 0:
+            conf = 0.0
+        if conf > 1.0:
+            conf = 1.0
+        name = str(row.get("name") or row.get("scientific_name") or "unknown").strip()
+        sci = str(row.get("scientific_name") or name or "unknown").strip()
+        out.append({"confidence": conf, "name": name, "scientific_name": sci})
+    return out
+
+
+def _build_agromatrix_not_sure_response(candidates: List[Dict[str, Any]], threshold: float) -> str:
+    if not candidates:
+        return (
+            "Не впевнений у точній ідентифікації по цьому фото. "
+            "Надішли, будь ласка, 2-3 чіткі фото: загальний план рослини, листок крупним планом і стебло/вузол росту."
+        )
+    lines: List[str] = []
+    for i, c in enumerate(candidates[:2], 1):
+        conf_pct = int(round(float(c.get("confidence", 0.0)) * 100))
+        lines.append(f"{i}) {c.get('name')} ({c.get('scientific_name')}), confidence ~{conf_pct}%")
+    return (
+        f"Не впевнений у точній ідентифікації (поріг надійності: {int(round(threshold * 100))}%).\n"
+        f"Найближчі варіанти:\n" + "\n".join(lines) + "\n"
+        "Щоб підтвердити вид, надішли чіткі фото листка (верх/низ), стебла та загального вигляду."
+    )
+
+
+def _build_agromatrix_deterministic_fallback(candidates: List[Dict[str, Any]]) -> str:
+    if not candidates:
+        return (
+            "Не впевнений у точній ідентифікації по цьому фото. "
+            "Надішли чіткіші фото листка, стебла і загального вигляду рослини."
+        )
+    top = candidates[0]
+    conf_pct = int(round(float(top.get("confidence", 0.0)) * 100))
+    return (
+        f"Ймовірна ідентифікація: {top.get('name')} ({top.get('scientific_name')}), confidence ~{conf_pct}%. "
+        "Це результат автоматичної класифікації; для підтвердження бажано ще 1-2 фото з інших ракурсів."
+    )
+
+
 EMPTY_ANSWER_GUARD_AGENTS = {"devtools", "monitor"}


@@ -1565,6 +1650,153 @@ async def agent_infer(agent_id: str, request: InferRequest):
    # =========================================================================
    if request.images and len(request.images) > 0:
        logger.info(f"🖼️ Vision request: {len(request.images)} image(s)")
+        plant_intent = _is_plant_identification_request(request.prompt)
+
+        # Deterministic AgroMatrix policy:
+        # 1) run plant classifiers first (nature-id / plantnet)
+        # 2) apply confidence threshold
+        # 3) LLM only explains classifier result, no new guessing
+        if request_agent_id == "agromatrix" and plant_intent and TOOL_MANAGER_AVAILABLE and tool_manager:
+            try:
+                image_inputs = _extract_image_inputs_for_plant_tools(request.images, metadata)
+                if image_inputs:
+                    threshold = float(
+                        os.getenv(
+                            "AGROMATRIX_PLANT_CONFIDENCE_MIN",
+                            os.getenv("NATURE_ID_MIN_CONFIDENCE", "0.65"),
+                        )
+                    )
+                    nature_args: Dict[str, Any] = {"top_k": 5, "min_confidence": threshold}
+                    nature_args.update(image_inputs)
+                    nature_res = await tool_manager.execute_tool(
+                        "nature_id_identify",
+                        nature_args,
+                        agent_id=request_agent_id,
+                        chat_id=chat_id,
+                        user_id=user_id,
+                    )
+                    nature_json = _parse_tool_result_json(nature_res.result) if nature_res and nature_res.success else {}
+                    candidates = _extract_top_candidates(nature_json, limit=3)
+
+                    plantnet_key = (os.getenv("PLANTNET_API_KEY") or "").strip()
+                    if plantnet_key:
+                        plantnet_args: Dict[str, Any] = {"top_k": 3, "organ": "leaf"}
+                        plantnet_args.update(image_inputs)
+                        plantnet_res = await tool_manager.execute_tool(
+                            "plantnet_lookup",
+                            plantnet_args,
+                            agent_id=request_agent_id,
+                            chat_id=chat_id,
+                            user_id=user_id,
+                        )
+                        plantnet_json = _parse_tool_result_json(plantnet_res.result) if plantnet_res and plantnet_res.success else {}
+                        plantnet_candidates = _extract_top_candidates(plantnet_json, limit=2)
+                        if not candidates and plantnet_candidates:
+                            candidates = plantnet_candidates
+
+                    top_conf = float(candidates[0].get("confidence", 0.0)) if candidates else 0.0
+                    if (not candidates) or (top_conf < threshold):
+                        response_text = _build_agromatrix_not_sure_response(candidates, threshold)
+                        if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
+                            asyncio.create_task(
+                                memory_retrieval.store_message(
+                                    agent_id=request_agent_id,
+                                    user_id=user_id,
+                                    username=username,
+                                    message_text=f"[Image][PlantIntent] {request.prompt}",
+                                    response_text=response_text,
+                                    chat_id=chat_id,
+                                    message_type="vision",
+                                    metadata={
+                                        "deterministic_plant_id": True,
+                                        "confidence_threshold": threshold,
+                                        "candidates": candidates,
+                                        "decision": "uncertain",
+                                    },
+                                )
+                            )
+                        return InferResponse(
+                            response=response_text,
+                            model="plant-id-deterministic",
+                            backend="plant-id-deterministic",
+                            tokens_used=0,
+                        )
+
+                    # High-confidence deterministic result -> LLM explains only this result.
+                    top = candidates[0]
+                    classifier_payload = {
+                        "source": nature_json.get("source") if isinstance(nature_json, dict) else "nature-id",
+                        "threshold": threshold,
+                        "selected": top,
+                        "top_k": candidates,
+                    }
+                    explain_prompt = (
+                        "Користувач попросив ідентифікувати рослину на фото.\n"
+                        f"Використай ТІЛЬКИ цей deterministic результат класифікатора: {json.dumps(classifier_payload, ensure_ascii=False)}\n\n"
+                        "Сформуй коротку відповідь українською (2-4 речення):\n"
+                        "1) назва культури (common + scientific),\n"
+                        "2) confidence у %, \n"
+                        "3) 1-2 ознаки для практичної перевірки в полі.\n"
+                        "Не вигадуй інші види. Якщо даних замало, прямо скажи: 'не впевнений'."
+                    )
+                    llm_model = "plant-id-deterministic"
+                    llm_backend = "plant-id-deterministic"
+                    llm_tokens = 0
+                    try:
+                        llm_resp = await internal_llm_complete(
+                            InternalLLMRequest(
+                                prompt=explain_prompt,
+                                llm_profile="reasoning",
+                                max_tokens=min(int(request.max_tokens or 220), 280),
+                                temperature=0.1,
+                                role_context="AgroMatrix classifier explainer",
+                                metadata={"agent_id": "agromatrix"},
+                            )
+                        )
+                        response_text = _sanitize_vision_text_for_user(llm_resp.text)
+                        llm_model = llm_resp.model
+                        llm_backend = f"plant-id-explainer-{llm_resp.provider}"
+                        llm_tokens = llm_resp.tokens_used
+                    except Exception as e:
+                        logger.warning(f"⚠️ Deterministic plant explanation LLM failed: {e}")
+                        response_text = ""
+
+                    if not response_text:
+                        response_text = _build_agromatrix_deterministic_fallback(candidates)
+                    else:
+                        low = response_text.lower()
+                        top_name = str(top.get("name") or "").lower()
+                        top_sci = str(top.get("scientific_name") or "").lower()
+                        if (top_name and top_name not in low) and (top_sci and top_sci not in low):
+                            response_text = _build_agromatrix_deterministic_fallback(candidates)
+
+                    if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
+                        asyncio.create_task(
+                            memory_retrieval.store_message(
+                                agent_id=request_agent_id,
+                                user_id=user_id,
+                                username=username,
+                                message_text=f"[Image][PlantIntent] {request.prompt}",
+                                response_text=response_text,
+                                chat_id=chat_id,
+                                message_type="vision",
+                                metadata={
+                                    "deterministic_plant_id": True,
+                                    "confidence_threshold": threshold,
+                                    "candidates": candidates,
+                                    "decision": "high_confidence",
+                                },
+                            )
+                        )
+                    return InferResponse(
+                        response=response_text,
+                        model=llm_model,
+                        backend=llm_backend,
+                        tokens_used=llm_tokens,
+                    )
+            except Exception as e:
+                logger.warning(f"⚠️ Deterministic AgroMatrix plant flow failed, fallback to generic vision: {e}")
+
        try:
            # Use Swapper's /vision endpoint (manages model loading)
            vision_payload = {
--- a/services/router/tool_manager.py
+++ b/services/router/tool_manager.py
@@ -812,6 +812,21 @@ class ToolManager:
        if "web.telegram.org" in host:
            return True
        return False
+
+    @staticmethod
+    def _normalize_confidence(value: Any) -> float:
+        try:
+            v = float(value)
+        except Exception:
+            return 0.0
+        if v < 0:
+            return 0.0
+        # Some backends return percentages (e.g. 97.6) instead of 0..1.
+        if v > 1.0 and v <= 100.0:
+            v = v / 100.0
+        if v > 1.0:
+            v = 1.0
+        return v
    
    async def execute_tool(
        self,
@@ -2802,10 +2817,7 @@ class ToolManager:
                    if not isinstance(row, dict):
                        continue
                    conf = row.get("confidence", 0.0)
-                    try:
-                        conf_f = float(conf)
-                    except Exception:
-                        conf_f = 0.0
+                    conf_f = self._normalize_confidence(conf)
                    top_k_rows.append({
                        "confidence": conf_f,
                        "name": str(row.get("name") or row.get("scientific_name") or "unknown"),
@@ -2816,10 +2828,7 @@ class ToolManager:
                    if not isinstance(item, dict):
                        continue
                    score = item.get("score", item.get("confidence", 0.0))
-                    try:
-                        score_f = float(score)
-                    except Exception:
-                        score_f = 0.0
+                    score_f = self._normalize_confidence(score)
                    sname = item.get("scientific_name") or item.get("label") or item.get("name") or "unknown"
                    cname = item.get("common_name") or item.get("common") or sname
                    top_k_rows.append({