router: add tool manager runtime and memory retrieval updates

2026-02-20 17:56:33 +01:00
parent 9ecce79810
commit a8a153a87a
5 changed files with 703 additions and 42 deletions
--- a/services/router/agent_tools_config.py
+++ b/services/router/agent_tools_config.py
@@ -46,8 +46,15 @@ AGENT_SPECIALIZED_TOOLS = {
    "nutra": ['comfy_generate_image', 'comfy_generate_video'],
    
    # AgroMatrix - Agriculture
-    # Specialized: crop analysis, weather integration, field mapping
-    "agromatrix": ['comfy_generate_image', 'comfy_generate_video'],
+    # Specialized: crop analysis, weather integration, field mapping + plant intelligence
+    "agromatrix": [
+        'comfy_generate_image',
+        'comfy_generate_video',
+        'plantnet_lookup',
+        'nature_id_identify',
+        'gbif_species_lookup',
+        'agrovoc_lookup',
+    ],
    
    # GreenFood - Food & Eco
    # Specialized: recipe analysis, eco-scoring
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -11,6 +11,7 @@ import httpx
 import logging
 import hashlib
 import time  # For latency metrics
+from datetime import datetime

 # CrewAI Integration
 try:
@@ -128,6 +129,54 @@ def _vision_answer_uncertain(answer: str) -> bool:
    return any(m in a for m in uncertain_markers)


+def _is_plant_identification_request(prompt: str) -> bool:
+    if not prompt:
+        return False
+    p = prompt.lower()
+    markers = [
+        "що за рослина", "що це за рослина", "яка це рослина", "яка рослина",
+        "що за культура", "яка культура", "визнач рослину", "визнач культуру",
+        "ідентифікуй рослину", "впізнай рослину",
+        "what plant", "identify plant", "identify crop", "what crop",
+        "что за растение", "какое растение", "определи растение",
+    ]
+    return any(m in p for m in markers)
+
+
+def _build_plant_web_queries(prompt: str, vision_text: str) -> List[str]:
+    base = _build_vision_web_query(prompt, vision_text)
+    vt = _extract_vision_search_facts(vision_text, max_chars=180)
+    queries: List[str] = []
+    if base:
+        queries.append(base)
+    if vt:
+        queries.append(f"{vt} crop seedling identification")
+        queries.append(f"{vt} identification by leaves field photo")
+        queries.append(f"{vt} визначення культури за листком")
+    # Deduplicate while preserving order.
+    seen = set()
+    out: List[str] = []
+    for q in queries:
+        qq = re.sub(r"\s+", " ", q).strip()
+        if qq and qq not in seen:
+            seen.add(qq)
+            out.append(qq)
+    return out[:3]
+
+
+def _build_cautious_plant_response(base_text: str, source_count: int) -> str:
+    concise_base = _sanitize_vision_text_for_user(base_text) or "По цьому фото поки не можу надійно визначити культуру."
+    parts = [seg.strip() for seg in re.split(r"(?<=[.!?])\s+", concise_base) if seg.strip()]
+    if len(parts) > 2:
+        concise_base = " ".join(parts[:2]).strip()
+    return (
+        f"{concise_base}\n\n"
+        f"Зараз це попередня оцінка (перевірених джерел: {source_count}). "
+        "Щоб дати точну ідентифікацію, надішли 2-3 фото: загальний план, крупний план листка "
+        "і фото стебла/вузла росту."
+    )
+
+
 EMPTY_ANSWER_GUARD_AGENTS = {"devtools", "monitor"}


@@ -187,6 +236,44 @@ def _build_image_fallback_response(agent_id: str, prompt: str = "") -> str:
    return "Я поки не бачу достатньо деталей на фото. Надішли, будь ласка, чіткіше фото або крупний план об'єкта."


+def _parse_tool_json_result(raw: Any) -> Dict[str, Any]:
+    if isinstance(raw, dict):
+        return raw
+    if isinstance(raw, str):
+        try:
+            parsed = json.loads(raw)
+            return parsed if isinstance(parsed, dict) else {}
+        except Exception:
+            return {}
+    return {}
+
+
+
+def _looks_like_image_question(prompt: str) -> bool:
+    if not prompt:
+        return False
+    p = str(prompt).lower().strip()
+    # If it's too short (e.g. "що це?", "что это?"), it might be a follow-up to an image.
+    # But we should only trigger the guard if the user EXPLICITLY mentions an image
+    # and we don't have one.
+    markers = (
+        "що на фото", "що на цьому фото", "що на зображ", "опиши фото", "проаналізуй фото",
+        "what is in the image", "what is on this photo", "describe the photo", "analyze image",
+        "что на фото", "что на этом фото", "опиши фото", "проанализируй фото",
+    )
+    if any(m in p for m in markers):
+        return True
+    
+    # Refined regex: must contain 'what|what is|how' and 'photo|image'
+    # but avoid generic "can you..."
+    if re.search(r"(що|what|что|опиши|проаналізуй|подивись).{1,24}(фото|зображ|image|photo|світлин)", p):
+        # Exclude common meta-questions that might contain these words but aren't about an image
+        meta_exclude = ["канал", "чат", "бот", "нормально"]
+        if not any(ex in p for ex in meta_exclude):
+            return True
+            
+    return False
+

 def _sanitize_vision_text_for_user(text: str) -> str:
    if not text:
@@ -1218,6 +1305,22 @@ async def agent_infer(agent_id: str, request: InferRequest):
    username = metadata.get("username")
    # Get agent_id from metadata or URL parameter
    request_agent_id = metadata.get("agent_id", agent_id).lower()
+
+    # Safety guard: avoid text-only handling for image questions without image payload.
+    # IMPORTANT: inspect only the latest user text when provided by gateway,
+    # not the full context-augmented prompt.
+    raw_user_text = str(metadata.get("raw_user_text", "") or "").strip()
+    image_guard_text = raw_user_text if raw_user_text else request.prompt
+    if (not request.images) and _looks_like_image_question(image_guard_text):
+        return InferResponse(
+            response=(
+                "Бачу запит про фото/зображення, але в запиті немає самого файлу. "
+                "Надішліть фото ще раз (або прикріпіть файл із підписом), і я одразу проаналізую."
+            ),
+            model="vision-context-required",
+            backend="router-guard",
+            tokens_used=0,
+        )
    
    if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
        try:
@@ -1248,20 +1351,32 @@ async def agent_infer(agent_id: str, request: InferRequest):
        logger.info(f"ℹ️ No system_prompt in request for agent {agent_id}, loading from configured sources")

    if not system_prompt:
-        try:
-            from prompt_builder import get_agent_system_prompt
-            system_prompt = await get_agent_system_prompt(
-                agent_id,
-                city_service_url=CITY_SERVICE_URL,
-                router_config=router_config
-            )
-            logger.info(f"✅ Loaded system prompt from database for {agent_id}")
-        except Exception as e:
-            logger.warning(f"⚠️ Could not load prompt from database: {e}")
-            # Fallback to config
+        if not (CITY_SERVICE_URL or '').strip():
            system_prompt_source = "router_config"
            agent_config = router_config.get("agents", {}).get(agent_id, {})
            system_prompt = agent_config.get("system_prompt")
+            logger.info(f"ℹ️ CITY_SERVICE_URL is empty; loaded system prompt from router_config for {agent_id}")
+        else:
+            try:
+                from prompt_builder import get_agent_system_prompt
+                system_prompt = await get_agent_system_prompt(
+                    agent_id,
+                    city_service_url=CITY_SERVICE_URL,
+                    router_config=router_config
+                )
+                logger.info(f"✅ Loaded system prompt from city service/config for {agent_id}")
+            except Exception as e:
+                logger.warning(f"⚠️ Could not load prompt via prompt_builder: {e}")
+                # Fallback to config
+                system_prompt_source = "router_config"
+                agent_config = router_config.get("agents", {}).get(agent_id, {})
+                system_prompt = agent_config.get("system_prompt")
+
+    if system_prompt and system_prompt_source == "city_service":
+        # prompt_builder may silently fall back to router config; reflect actual source in metadata/logs
+        cfg_prompt = (router_config.get("agents", {}).get(agent_id, {}) or {}).get("system_prompt")
+        if cfg_prompt and (system_prompt or "").strip() == str(cfg_prompt).strip():
+            system_prompt_source = "router_config"

    if not system_prompt:
        system_prompt_source = "empty"
@@ -1284,6 +1399,109 @@ async def agent_infer(agent_id: str, request: InferRequest):
    # Use router config to get default model for agent, fallback to qwen3:8b
    agent_config = router_config.get("agents", {}).get(agent_id, {})

+    # =========================================================================
+    # AGROMATRIX PLANT PRE-VISION (edge tool before CrewAI)
+    # =========================================================================
+    crewai_profile = str(effective_metadata.get("crewai_profile", "") or "").strip().lower()
+    is_agromatrix_plant = request_agent_id == "agromatrix" and crewai_profile == "plant_intel"
+
+    if is_agromatrix_plant and http_client and user_id and chat_id and not request.images:
+        # Follow-up path: reuse last structured plant identification from fact-memory.
+        fact_key = f"last_plant:{request_agent_id}:{chat_id}"
+        try:
+            fact_resp = await http_client.get(
+                f"http://memory-service:8000/facts/{fact_key}",
+                params={"user_id": user_id},
+                timeout=8.0,
+            )
+            if fact_resp.status_code == 200:
+                fact_data = fact_resp.json() or {}
+                last_plant = fact_data.get("fact_value_json") or {}
+                if isinstance(last_plant, str):
+                    try:
+                        last_plant = json.loads(last_plant)
+                    except Exception:
+                        last_plant = {}
+                if isinstance(last_plant, dict) and last_plant.get("top_k"):
+                    effective_metadata["last_plant"] = last_plant
+                    # Give deterministic context to synthesis without exposing internals to end user.
+                    request.prompt = (
+                        f"{request.prompt}\n\n"
+                        f"[PREVIOUS_PLANT_IDENTIFICATION] {json.dumps(last_plant, ensure_ascii=False)}"
+                    )
+                    logger.info(
+                        f"🌿 Plant follow-up context loaded: top1={((last_plant.get('top_k') or [{}])[0]).get('scientific_name', 'N/A')}"
+                    )
+        except Exception as e:
+            logger.warning(f"⚠️ Plant follow-up context load failed: {e}")
+
+    if is_agromatrix_plant and request.images and len(request.images) > 0 and TOOL_MANAGER_AVAILABLE and tool_manager:
+        first_image = request.images[0]
+        tool_args: Dict[str, Any] = {"top_k": 5}
+        if isinstance(first_image, str) and first_image.startswith("data:"):
+            tool_args["image_data"] = first_image
+        elif isinstance(first_image, str):
+            tool_args["image_url"] = first_image
+
+        try:
+            tool_res = await tool_manager.execute_tool(
+                "nature_id_identify",
+                tool_args,
+                agent_id=request_agent_id,
+                chat_id=chat_id,
+                user_id=user_id,
+            )
+            if tool_res and tool_res.success and tool_res.result:
+                plant_vision = _parse_tool_json_result(tool_res.result)
+                if plant_vision:
+                    top_k_rows = plant_vision.get("top_k") or []
+                    top1 = top_k_rows[0] if top_k_rows else {}
+                    confidence = float(plant_vision.get("confidence") or top1.get("confidence") or 0.0)
+                    effective_metadata["plant_vision"] = plant_vision
+                    effective_metadata["plant_top_k"] = top_k_rows
+                    effective_metadata["plant_confidence"] = confidence
+                    request.prompt = (
+                        f"{request.prompt}\n\n"
+                        f"[PLANT_VISION_PREPROCESSED] {json.dumps(plant_vision, ensure_ascii=False)}"
+                    )
+                    if top1:
+                        logger.info(
+                            f"🌿 Vision pre-process: {confidence:.2f}% {top1.get('scientific_name') or top1.get('name') or 'unknown'}"
+                        )
+                    else:
+                        logger.info("🌿 Vision pre-process: no candidates")
+
+                    if plant_vision.get("recommend_fallback"):
+                        logger.info("🌿 Vision pre-process: low confidence -> GBIF fallback enabled")
+
+                    # Persist structured plant result for follow-up questions.
+                    if http_client and user_id and chat_id:
+                        fact_key = f"last_plant:{request_agent_id}:{chat_id}"
+                        try:
+                            await http_client.post(
+                                "http://memory-service:8000/facts/upsert",
+                                json={
+                                    "user_id": user_id,
+                                    "fact_key": fact_key,
+                                    "fact_value": (top1.get("scientific_name") if isinstance(top1, dict) else None),
+                                    "fact_value_json": {
+                                        "top_k": top_k_rows,
+                                        "confidence": confidence,
+                                        "recommend_fallback": bool(plant_vision.get("recommend_fallback")),
+                                        "gbif_validation": plant_vision.get("gbif_validation"),
+                                        "identified_at": datetime.utcnow().isoformat(),
+                                        "agent_id": request_agent_id,
+                                        "chat_id": chat_id,
+                                        "source": "plant_vision_preprocess",
+                                    },
+                                },
+                                timeout=8.0,
+                            )
+                        except Exception as e:
+                            logger.warning(f"⚠️ Failed to store last_plant fact: {e}")
+        except Exception as e:
+            logger.warning(f"⚠️ Plant pre-vision failed: {e}")
+
    # =========================================================================
    # CREWAI DECISION: Use orchestration or direct LLM?
    # =========================================================================
@@ -1341,9 +1559,12 @@ async def agent_infer(agent_id: str, request: InferRequest):
            # Get agent CrewAI config from registry (or router_config fallback)
            crewai_cfg = agent_config.get("crewai", {})
            
+            # CrewAI trigger should inspect the latest user message first (if provided by gateway),
+            # otherwise it can overreact to long context history.
+            decision_prompt = str(effective_metadata.get("raw_user_text", "") or "").strip() or request.prompt
            use_crewai, crewai_reason = should_use_crewai(
                agent_id=agent_id,
-                prompt=request.prompt,
+                prompt=decision_prompt,
                agent_config=agent_config,
                metadata=effective_metadata,
                force_crewai=effective_metadata.get("force_crewai", False),
@@ -1366,6 +1587,10 @@ async def agent_infer(agent_id: str, request: InferRequest):
                        },
                        "metadata": effective_metadata,
                        "runtime_envelope": runtime_envelope,
+                        "plant_vision": effective_metadata.get("plant_vision"),
+                        "plant_top_k": effective_metadata.get("plant_top_k"),
+                        "plant_confidence": effective_metadata.get("plant_confidence"),
+                        "last_plant": effective_metadata.get("last_plant"),
                    },
                    team=crewai_cfg.get("team"),
                    profile=effective_metadata.get("crewai_profile")
@@ -1503,6 +1728,9 @@ async def agent_infer(agent_id: str, request: InferRequest):
                full_response = _sanitize_vision_text_for_user(raw_response)
                vision_web_query = ""
                vision_sources: List[Dict[str, str]] = []
+                plant_intent = _is_plant_identification_request(request.prompt)
+                wants_web = False
+                uncertain = False

                # Debug: log full response structure
                logger.info(
@@ -1520,39 +1748,57 @@ async def agent_infer(agent_id: str, request: InferRequest):
                    try:
                        wants_web = _vision_prompt_wants_web(request.prompt)
                        uncertain = _vision_answer_uncertain(full_response or raw_response)
-                        if wants_web or uncertain:
-                            query = _build_vision_web_query(request.prompt, full_response or raw_response)
-                            if not query:
+                        if wants_web or uncertain or plant_intent:
+                            queries = (
+                                _build_plant_web_queries(request.prompt, full_response or raw_response)
+                                if plant_intent
+                                else [_build_vision_web_query(request.prompt, full_response or raw_response)]
+                            )
+                            queries = [q for q in queries if q]
+                            if not queries:
                                logger.info("🔎 Vision web enrich skipped: query not actionable")
                            else:
-                                vision_web_query = query
-                                search_result = await tool_manager.execute_tool(
-                                    "web_search",
-                                    {"query": query, "max_results": 3},
-                                    agent_id=request_agent_id,
-                                    chat_id=chat_id,
-                                    user_id=user_id,
-                                )
-                                if search_result and search_result.success and search_result.result:
-
+                                vision_web_query = queries[0]
+                                merged_chunks: List[str] = []
+                                merged_sources: List[Dict[str, str]] = []
+                                for query in queries:
+                                    search_result = await tool_manager.execute_tool(
+                                        "web_search",
+                                        {"query": query, "max_results": 3},
+                                        agent_id=request_agent_id,
+                                        chat_id=chat_id,
+                                        user_id=user_id,
+                                    )
+                                    if not (search_result and search_result.success and search_result.result):
+                                        continue
                                    compact_search = _compact_web_search_result(
                                        search_result.result,
                                        query=query,
                                        agent_id=request_agent_id,
                                    )
+                                    if not compact_search or "Нічого не знайдено" in compact_search:
+                                        continue
+                                    merged_chunks.append(compact_search)
+                                    merged_sources.extend(_extract_sources_from_compact(compact_search))

-                                    if compact_search and "Нічого не знайдено" not in compact_search:
-                                        vision_sources = _extract_sources_from_compact(compact_search)
+                                # Deduplicate sources.
+                                if merged_sources:
+                                    uniq: List[Dict[str, str]] = []
+                                    seen = set()
+                                    for s in merged_sources:
+                                        key = (s.get("title", ""), s.get("url", ""))
+                                        if key in seen:
+                                            continue
+                                        seen.add(key)
+                                        uniq.append(s)
+                                    vision_sources = uniq[:5]

-                                        base_text = full_response or "Не вдалося надійно ідентифікувати об'єкт на фото."
-
-                                        full_response = (
-
-                                            f"{base_text}\n\n"
-
-                                            f"Додатково знайшов у відкритих джерелах:\n{compact_search}"
-
-                                        )
+                                if merged_chunks:
+                                    base_text = full_response or "Не вдалося надійно ідентифікувати об'єкт на фото."
+                                    full_response = (
+                                        f"{base_text}\n\n"
+                                        f"Додатково знайшов у відкритих джерелах:\n{merged_chunks[0]}"
+                                    )

                                    logger.info(
                                        "🌐 Vision web enrichment applied "
@@ -1568,6 +1814,11 @@ async def agent_infer(agent_id: str, request: InferRequest):
                        f"query='{vision_web_query[:180]}', sources={len(vision_sources)}"
                    )

+                # Plant identification safety gate:
+                # avoid hard species claims when confidence is low or evidence is weak.
+                if request_agent_id == "agromatrix" and plant_intent and (uncertain or len(vision_sources) < 2):
+                    full_response = _build_cautious_plant_response(full_response or raw_response, len(vision_sources))
+
                # Image quality gate: one soft retry if response looks empty/meta.
                if _image_response_needs_retry(full_response):
                    try:
--- a/services/router/memory_retrieval.py
+++ b/services/router/memory_retrieval.py
@@ -18,6 +18,7 @@ Collections per agent:
 import os
 import json
 import logging
+import re
 from typing import Optional, Dict, Any, List
 from dataclasses import dataclass, field
 from datetime import datetime
@@ -453,7 +454,7 @@ class MemoryRetrieval:
                    # Higher threshold for messages; even higher when user asks about docs to avoid pulling old chatter.
                    msg_thresh = 0.5 if is_doc_query else 0.4
                    if r.score > msg_thresh:
-                        text = r.payload.get("text", r.payload.get("content", ""))
+                        text = self._extract_message_text(r.payload)
                        # Skip very short or system messages
                        if len(text) > 20 and not text.startswith("<"):
                            if is_doc_query and topic_keywords:
@@ -501,7 +502,7 @@ class MemoryRetrieval:
            seen_texts = set()
            unique_results = []
            for r in all_results:
-                text_key = r.get("text", "")[:50].lower()
+                text_key = self._canonical_text_key(r.get("text", ""))
                if text_key not in seen_texts:
                    seen_texts.add(text_key)
                    unique_results.append(r)
@@ -511,6 +512,40 @@ class MemoryRetrieval:
        except Exception as e:
            logger.warning(f"Memory search failed for {agent_id}: {e}")
            return []
+
+    @staticmethod
+    def _extract_message_text(payload: Dict[str, Any]) -> str:
+        """
+        Normalize text across both payload schemas:
+        - memory-service: content/text (+ role/channel_id)
+        - router: user_message + assistant_response (+ chat_id)
+        """
+        if not payload:
+            return ""
+
+        text = (payload.get("text") or payload.get("content") or "").strip()
+        if text:
+            lower = text.lower()
+            marker = "\n\nassistant:"
+            idx = lower.rfind(marker)
+            if lower.startswith("user:") and idx != -1:
+                assistant_text = text[idx + len(marker):].strip()
+                if assistant_text:
+                    return assistant_text
+            return text
+
+        user_text = (payload.get("user_message") or "").strip()
+        assistant_text = (payload.get("assistant_response") or "").strip()
+        if user_text and assistant_text:
+            return f"User: {user_text}\n\nAssistant: {assistant_text}"
+        return user_text or assistant_text
+
+    @staticmethod
+    def _canonical_text_key(text: str) -> str:
+        if not text:
+            return ""
+        normalized = re.sub(r"\s+", " ", text.strip().lower())
+        return normalized[:220]
    
    async def get_user_graph_context(
        self,
@@ -619,6 +654,8 @@ class MemoryRetrieval:
                query=message,
                agent_id=agent_id,
                platform_user_id=identity.platform_user_id,
+                chat_id=chat_id,
+                user_id=user_id,
                limit=5
            )
            brief.relevant_memories = memories
--- a/services/router/router-config.yml
+++ b/services/router/router-config.yml
@@ -408,8 +408,9 @@ agents:
    description: "Monitor Agent - архітектор-інспектор DAGI"
    default_llm: local_qwen3_8b
    system_prompt: |
-      Ти - Monitor Agent, стежиш за нодами, сервісами, агентами.
-      Якщо бачиш у чаті інших ботів, відповідай тільки за інфраструктурою або прямим тегом.
+      Ти - Monitor Agent, інфраструктурний інспектор DAGI: ноди, сервіси, пайплайни, алерти.
+      Ти знаєш, що DAARWIZZ — головний оркестратор мережі DAARION.city; для governance/маршрутизації посилайся на нього.
+      Відповідай коротко і по суті; якщо даних бракує — одразу кажи, який саме метрик/лог потрібен.
    tools:
      - id: get_metrics
        type: builtin
--- a/services/router/tool_manager.py
+++ b/services/router/tool_manager.py
@@ -108,6 +108,116 @@ TOOL_DEFINITIONS = [
            }
        }
    },
+    {
+        "type": "function",
+        "function": {
+            "name": "plantnet_lookup",
+            "description": "Визначення рослин через Pl@ntNet API. Повертає top-k кандидатів з confidence.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Короткий опис рослини/культури (якщо немає image_url)"
+                    },
+                    "image_url": {
+                        "type": "string",
+                        "description": "Публічне посилання на фото рослини"
+                    },
+                    "organ": {
+                        "type": "string",
+                        "description": "Орган рослини: leaf/flower/fruit/bark/auto",
+                        "default": "auto"
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "description": "Скільки кандидатів повернути (1-10)",
+                        "default": 3
+                    }
+                }
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "nature_id_identify",
+            "description": "Локальна/open-source ідентифікація рослин через nature-id сумісний сервіс.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "image_url": {
+                        "type": "string",
+                        "description": "Публічне посилання на фото рослини"
+                    },
+                    "image_data": {
+                        "type": "string",
+                        "description": "Data URL зображення (data:image/...;base64,...)"
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "description": "Скільки кандидатів повернути (1-10)",
+                        "default": 3
+                    },
+                    "min_confidence": {
+                        "type": "number",
+                        "description": "Поріг confidence для fallback на GBIF",
+                        "default": 0.65
+                    }
+                },
+                "required": ["image_url"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "gbif_species_lookup",
+            "description": "Пошук таксонів у GBIF для валідації назви культури/рослини.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Назва/термін для пошуку виду"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Кількість результатів (1-10)",
+                        "default": 5
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "agrovoc_lookup",
+            "description": "Нормалізація агро-термінів через AGROVOC (SPARQL).",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Термін культури/хвороби/технології"
+                    },
+                    "lang": {
+                        "type": "string",
+                        "description": "Мова міток (en/uk/ru)",
+                        "default": "en"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Кількість результатів (1-10)",
+                        "default": 5
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
    # PRIORITY 3: Generation tools
    {
        "type": "function",
@@ -709,6 +819,14 @@ class ToolManager:
                return await self._web_search(arguments)
            elif tool_name == "web_extract":
                return await self._web_extract(arguments)
+            elif tool_name == "plantnet_lookup":
+                return await self._plantnet_lookup(arguments)
+            elif tool_name == "nature_id_identify":
+                return await self._nature_id_identify(arguments)
+            elif tool_name == "gbif_species_lookup":
+                return await self._gbif_species_lookup(arguments)
+            elif tool_name == "agrovoc_lookup":
+                return await self._agrovoc_lookup(arguments)
            elif tool_name == "image_generate":
                return await self._image_generate(arguments)
            elif tool_name == "comfy_generate_image":
@@ -2530,6 +2648,253 @@ class ToolManager:
        except Exception as e:
            return ToolResult(success=False, result=None, error=str(e))
    
+    async def _plantnet_lookup(self, args: Dict) -> ToolResult:
+        """Plant identification via Pl@ntNet API (skeleton adapter)."""
+        query = str(args.get("query", "") or "").strip()
+        image_url = str(args.get("image_url", "") or "").strip()
+        organ = str(args.get("organ", "auto") or "auto").strip().lower()
+        top_k = max(1, min(int(args.get("top_k", 3)), 5))
+
+        api_key = (os.getenv("PLANTNET_API_KEY") or "").strip()
+        if image_url and api_key:
+            try:
+                params = {
+                    "api-key": api_key,
+                    "images": image_url,
+                    "organs": "leaf" if organ == "auto" else organ,
+                    "lang": "en",
+                }
+                resp = await self.http_client.get(
+                    "https://my-api.plantnet.org/v2/identify/all",
+                    params=params,
+                    timeout=25.0,
+                )
+                if resp.status_code == 200:
+                    data = resp.json()
+                    results = (data.get("results") or [])[:top_k]
+                    if not results:
+                        return ToolResult(success=True, result="Pl@ntNet: кандидатів не знайдено.")
+                    lines = []
+                    for idx, item in enumerate(results, 1):
+                        species = (item.get("species") or {})
+                        sname = species.get("scientificNameWithoutAuthor") or species.get("scientificName") or "unknown"
+                        common = species.get("commonNames") or []
+                        cname = common[0] if common else "-"
+                        score = float(item.get("score") or 0.0)
+                        lines.append(f"{idx}. {sname} ({cname}) score={score:.3f}")
+                    return ToolResult(success=True, result="Pl@ntNet candidates:\n" + "\n".join(lines))
+                return ToolResult(success=False, result=None, error=f"plantnet_http_{resp.status_code}")
+            except Exception as e:
+                return ToolResult(success=False, result=None, error=f"plantnet_error: {e}")
+
+        if image_url:
+            ni = await self._nature_id_identify({"image_url": image_url, "top_k": top_k})
+            if ni.success:
+                return ni
+
+        if query:
+            return await self._gbif_species_lookup({"query": query, "limit": top_k})
+
+        return ToolResult(
+            success=False,
+            result=None,
+            error="No available plant ID backend (set PLANTNET_API_KEY or NATURE_ID_URL, or provide text query)",
+        )
+
+    async def _nature_id_identify(self, args: Dict) -> ToolResult:
+        """Open-source plant identification via self-hosted nature-id compatible endpoint."""
+        image_url = str(args.get("image_url", "") or "").strip()
+        image_data = str(args.get("image_data", "") or "").strip()
+        top_k = max(1, min(int(args.get("top_k", 3)), 10))
+        min_confidence = float(args.get("min_confidence", os.getenv("NATURE_ID_MIN_CONFIDENCE", "0.65")))
+
+        if not image_url and not image_data:
+            return ToolResult(success=False, result=None, error="image_url or image_data is required")
+
+        base = (os.getenv("NATURE_ID_URL") or "").strip().rstrip("/")
+        if not base:
+            return ToolResult(success=False, result=None, error="NATURE_ID_URL is not configured")
+
+        try:
+            if image_data:
+                # data URL -> multipart /identify-file
+                if not image_data.startswith("data:") or "," not in image_data:
+                    return ToolResult(success=False, result=None, error="invalid image_data format")
+                header, b64 = image_data.split(",", 1)
+                mime = "image/jpeg"
+                if ";base64" in header:
+                    mime = header.split(":", 1)[1].split(";", 1)[0] or "image/jpeg"
+                ext = "jpg"
+                if "png" in mime:
+                    ext = "png"
+                try:
+                    image_bytes = base64.b64decode(b64)
+                except Exception:
+                    return ToolResult(success=False, result=None, error="invalid image_data base64")
+                files = {"file": (f"upload.{ext}", image_bytes, mime)}
+                resp = await self.http_client.post(
+                    f"{base}/identify-file",
+                    params={"top_k": top_k},
+                    files=files,
+                    timeout=45.0,
+                )
+            else:
+                payload = {"image_url": image_url, "top_k": top_k}
+                resp = await self.http_client.post(f"{base}/identify", json=payload, timeout=45.0)
+
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"nature_id_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            status = str(data.get("status") or "success")
+            raw_top_k = data.get("top_k") or []
+            raw_preds = data.get("predictions") or data.get("results") or []
+
+            top_k_rows = []
+            if isinstance(raw_top_k, list) and raw_top_k:
+                for row in raw_top_k[:top_k]:
+                    if not isinstance(row, dict):
+                        continue
+                    conf = row.get("confidence", 0.0)
+                    try:
+                        conf_f = float(conf)
+                    except Exception:
+                        conf_f = 0.0
+                    top_k_rows.append({
+                        "confidence": conf_f,
+                        "name": str(row.get("name") or row.get("scientific_name") or "unknown"),
+                        "scientific_name": str(row.get("scientific_name") or row.get("name") or "unknown"),
+                    })
+            else:
+                for item in raw_preds[:top_k]:
+                    if not isinstance(item, dict):
+                        continue
+                    score = item.get("score", item.get("confidence", 0.0))
+                    try:
+                        score_f = float(score)
+                    except Exception:
+                        score_f = 0.0
+                    sname = item.get("scientific_name") or item.get("label") or item.get("name") or "unknown"
+                    cname = item.get("common_name") or item.get("common") or sname
+                    top_k_rows.append({
+                        "confidence": score_f,
+                        "name": str(cname),
+                        "scientific_name": str(sname),
+                    })
+
+            if not top_k_rows:
+                return ToolResult(success=True, result=json.dumps({
+                    "status": status,
+                    "model": data.get("model") or "aiy_plants_V1",
+                    "source": data.get("source") or "nature-id-cli",
+                    "top_k": [],
+                    "confidence": 0.0,
+                    "recommend_fallback": True,
+                    "reason": "no_predictions",
+                }, ensure_ascii=False))
+
+            top1 = top_k_rows[0]
+            top1_conf = float(top1.get("confidence", 0.0))
+            recommend_fallback = top1_conf < min_confidence
+
+            out = {
+                "status": status,
+                "model": data.get("model") or "aiy_plants_V1",
+                "source": data.get("source") or "nature-id-cli",
+                "inference_time_sec": data.get("inference_time_sec"),
+                "top_k": top_k_rows,
+                "confidence": top1_conf,
+                "min_confidence": min_confidence,
+                "recommend_fallback": recommend_fallback,
+                "fallback": "gbif_species_lookup",
+            }
+
+            if recommend_fallback:
+                fallback_query = str(top1.get("scientific_name") or top1.get("name") or "").strip()
+                if fallback_query and fallback_query.lower() != "unknown":
+                    gbif = await self._gbif_species_lookup({"query": fallback_query, "limit": min(5, top_k)})
+                    if gbif.success and gbif.result:
+                        out["gbif_validation"] = gbif.result
+
+            return ToolResult(success=True, result=json.dumps(out, ensure_ascii=False))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"nature_id_error: {e}")
+
+    async def _gbif_species_lookup(self, args: Dict) -> ToolResult:
+        """Species lookup via GBIF public API."""
+        query = str(args.get("query", "") or "").strip()
+        limit = max(1, min(int(args.get("limit", 5)), 10))
+        if not query:
+            return ToolResult(success=False, result=None, error="query is required")
+
+        try:
+            resp = await self.http_client.get(
+                "https://api.gbif.org/v1/species/search",
+                params={"q": query, "limit": limit, "status": "ACCEPTED"},
+                timeout=20.0,
+            )
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"gbif_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            results = data.get("results") or []
+            if not results:
+                return ToolResult(success=True, result="GBIF: результатів не знайдено.")
+
+            lines = []
+            for idx, item in enumerate(results[:limit], 1):
+                sci = item.get("scientificName") or item.get("canonicalName") or "unknown"
+                rank = item.get("rank") or "-"
+                status = item.get("taxonomicStatus") or "-"
+                key = item.get("key")
+                lines.append(f"{idx}. {sci} | rank={rank} | status={status} | key={key}")
+            return ToolResult(success=True, result="GBIF matches:\n" + "\n".join(lines))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"gbif_error: {e}")
+
+    async def _agrovoc_lookup(self, args: Dict) -> ToolResult:
+        """AGROVOC term normalization via public SPARQL endpoint."""
+        query = str(args.get("query", "") or "").strip()
+        lang = str(args.get("lang", "en") or "en").strip().lower()
+        limit = max(1, min(int(args.get("limit", 5)), 10))
+        if not query:
+            return ToolResult(success=False, result=None, error="query is required")
+        if lang not in {"en", "uk", "ru"}:
+            lang = "en"
+
+        safe_q = query.replace('\\', ' ').replace('"', ' ').strip()
+        sparql = (
+            "PREFIX skos: <http://www.w3.org/2004/02/skos/core#> "
+            "SELECT ?concept ?label WHERE { "
+            "?concept skos:prefLabel ?label . "
+            f"FILTER(lang(?label) = '{lang}') "
+            f"FILTER(CONTAINS(LCASE(STR(?label)), LCASE(\"{safe_q}\"))) "
+            "} LIMIT " + str(limit)
+        )
+
+        try:
+            resp = await self.http_client.get(
+                "https://agrovoc.fao.org/sparql",
+                params={"query": sparql, "format": "json"},
+                timeout=25.0,
+            )
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"agrovoc_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            bindings = (((data.get("results") or {}).get("bindings")) or [])
+            if not bindings:
+                return ToolResult(success=True, result="AGROVOC: результатів не знайдено.")
+
+            lines = []
+            for idx, b in enumerate(bindings[:limit], 1):
+                label = ((b.get("label") or {}).get("value") or "").strip()
+                concept = ((b.get("concept") or {}).get("value") or "").strip()
+                lines.append(f"{idx}. {label} | {concept}")
+            return ToolResult(success=True, result="AGROVOC matches:\n" + "\n".join(lines))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"agrovoc_error: {e}")
+
    async def _unload_ollama_models(self):
        """Unload all Ollama models to free VRAM for heavy operations like FLUX"""
        ollama_url = os.getenv("OLLAMA_BASE_URL", "http://172.18.0.1:11434")