merge: integrate remote codex/sync-node1-runtime with fabric layer changes

Resolve conflicts in docker-compose.node1.yml, services/router/main.py, and gateway-bot/services/doc_service.py — keeping both fabric layer (NCS, node-worker, Prometheus) and document ingest/query endpoints. Made-with: Cursor
2026-02-27 03:09:12 -08:00
parent ed7ad49d3a 088ca07137
commit a6531507df
76 changed files with 7495 additions and 295 deletions
--- a/services/router/agent_tools_config.py
+++ b/services/router/agent_tools_config.py
@@ -46,8 +46,15 @@ AGENT_SPECIALIZED_TOOLS = {
    "nutra": ['comfy_generate_image', 'comfy_generate_video'],
    
    # AgroMatrix - Agriculture
-    # Specialized: crop analysis, weather integration, field mapping
-    "agromatrix": ['comfy_generate_image', 'comfy_generate_video'],
+    # Specialized: crop analysis, weather integration, field mapping + plant intelligence
+    "agromatrix": [
+        'comfy_generate_image',
+        'comfy_generate_video',
+        'plantnet_lookup',
+        'nature_id_identify',
+        'gbif_species_lookup',
+        'agrovoc_lookup',
+    ],
    
    # GreenFood - Food & Eco
    # Specialized: recipe analysis, eco-scoring
--- a/services/router/main.py
+++ b/services/router/main.py
--- a/services/router/memory_retrieval.py
+++ b/services/router/memory_retrieval.py
--- a/services/router/router-config.yml
+++ b/services/router/router-config.yml
@@ -408,8 +408,9 @@ agents:
    description: "Monitor Agent - архітектор-інспектор DAGI"
    default_llm: local_qwen3_8b
    system_prompt: |
-      Ти - Monitor Agent, стежиш за нодами, сервісами, агентами.
-      Якщо бачиш у чаті інших ботів, відповідай тільки за інфраструктурою або прямим тегом.
+      Ти - Monitor Agent, інфраструктурний інспектор DAGI: ноди, сервіси, пайплайни, алерти.
+      Ти знаєш, що DAARWIZZ — головний оркестратор мережі DAARION.city; для governance/маршрутизації посилайся на нього.
+      Відповідай коротко і по суті; якщо даних бракує — одразу кажи, який саме метрик/лог потрібен.
    tools:
      - id: get_metrics
        type: builtin
--- a/services/router/tool_manager.py
+++ b/services/router/tool_manager.py
@@ -19,6 +19,7 @@ from typing import Dict, List, Any, Optional
 from dataclasses import dataclass
 from io import BytesIO, StringIO
 from pathlib import PurePath
+from urllib.parse import urlparse
 import xml.etree.ElementTree as ET
 from xml.sax.saxutils import escape as xml_escape
 from zipfile import ZIP_DEFLATED, ZipFile
@@ -108,6 +109,115 @@ TOOL_DEFINITIONS = [
            }
        }
    },
+    {
+        "type": "function",
+        "function": {
+            "name": "plantnet_lookup",
+            "description": "Визначення рослин через Pl@ntNet API. Повертає top-k кандидатів з confidence.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Короткий опис рослини/культури (якщо немає image_url)"
+                    },
+                    "image_url": {
+                        "type": "string",
+                        "description": "Публічне посилання на фото рослини"
+                    },
+                    "organ": {
+                        "type": "string",
+                        "description": "Орган рослини: leaf/flower/fruit/bark/auto",
+                        "default": "auto"
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "description": "Скільки кандидатів повернути (1-10)",
+                        "default": 3
+                    }
+                }
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "nature_id_identify",
+            "description": "Локальна/open-source ідентифікація рослин через nature-id сумісний сервіс.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "image_url": {
+                        "type": "string",
+                        "description": "Публічне посилання на фото рослини"
+                    },
+                    "image_data": {
+                        "type": "string",
+                        "description": "Data URL зображення (data:image/...;base64,...)"
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "description": "Скільки кандидатів повернути (1-10)",
+                        "default": 3
+                    },
+                    "min_confidence": {
+                        "type": "number",
+                        "description": "Поріг confidence для fallback на GBIF",
+                        "default": 0.65
+                    }
+                }
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "gbif_species_lookup",
+            "description": "Пошук таксонів у GBIF для валідації назви культури/рослини.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Назва/термін для пошуку виду"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Кількість результатів (1-10)",
+                        "default": 5
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "agrovoc_lookup",
+            "description": "Нормалізація агро-термінів через AGROVOC (SPARQL).",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Термін культури/хвороби/технології"
+                    },
+                    "lang": {
+                        "type": "string",
+                        "description": "Мова міток (en/uk/ru)",
+                        "default": "en"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Кількість результатів (1-10)",
+                        "default": 5
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
    # PRIORITY 3: Generation tools
    {
        "type": "function",
@@ -681,6 +791,42 @@ class ToolManager:
        tool_names = [t.get("function", {}).get("name") for t in filtered]
        logger.debug(f"Agent {agent_id} has {len(filtered)} tools: {tool_names}")
        return filtered
+
+    @staticmethod
+    def _is_image_data_url(value: str) -> bool:
+        v = str(value or "").strip()
+        return bool(v.startswith("data:image/") and ";base64," in v)
+
+    @staticmethod
+    def _is_known_non_direct_image_url(url: str) -> bool:
+        u = str(url or "").strip()
+        if not u:
+            return False
+        try:
+            p = urlparse(u)
+        except Exception:
+            return True
+        host = (p.netloc or "").lower()
+        if host in {"t.me", "telegram.me"}:
+            return True
+        if "web.telegram.org" in host:
+            return True
+        return False
+
+    @staticmethod
+    def _normalize_confidence(value: Any) -> float:
+        try:
+            v = float(value)
+        except Exception:
+            return 0.0
+        if v < 0:
+            return 0.0
+        # Some backends return percentages (e.g. 97.6) instead of 0..1.
+        if v > 1.0 and v <= 100.0:
+            v = v / 100.0
+        if v > 1.0:
+            v = 1.0
+        return v
    
    async def execute_tool(
        self,
@@ -709,6 +855,14 @@ class ToolManager:
                return await self._web_search(arguments)
            elif tool_name == "web_extract":
                return await self._web_extract(arguments)
+            elif tool_name == "plantnet_lookup":
+                return await self._plantnet_lookup(arguments)
+            elif tool_name == "nature_id_identify":
+                return await self._nature_id_identify(arguments)
+            elif tool_name == "gbif_species_lookup":
+                return await self._gbif_species_lookup(arguments)
+            elif tool_name == "agrovoc_lookup":
+                return await self._agrovoc_lookup(arguments)
            elif tool_name == "image_generate":
                return await self._image_generate(arguments)
            elif tool_name == "comfy_generate_image":
@@ -2530,6 +2684,272 @@ class ToolManager:
        except Exception as e:
            return ToolResult(success=False, result=None, error=str(e))
    
+    async def _plantnet_lookup(self, args: Dict) -> ToolResult:
+        """Plant identification via Pl@ntNet API (skeleton adapter)."""
+        query = str(args.get("query", "") or "").strip()
+        image_url = str(args.get("image_url", "") or "").strip()
+        image_data = str(args.get("image_data", "") or "").strip()
+        runtime_image_data = str(args.get("_runtime_image_data", "") or "").strip()
+        if not image_data and self._is_image_data_url(runtime_image_data):
+            image_data = runtime_image_data
+        organ = str(args.get("organ", "auto") or "auto").strip().lower()
+        top_k = max(1, min(int(args.get("top_k", 3)), 5))
+
+        api_key = (os.getenv("PLANTNET_API_KEY") or "").strip()
+        if image_url and api_key:
+            try:
+                params = {
+                    "api-key": api_key,
+                    "images": image_url,
+                    "organs": "leaf" if organ == "auto" else organ,
+                    "lang": "en",
+                }
+                resp = await self.http_client.get(
+                    "https://my-api.plantnet.org/v2/identify/all",
+                    params=params,
+                    timeout=25.0,
+                )
+                if resp.status_code == 200:
+                    data = resp.json()
+                    results = (data.get("results") or [])[:top_k]
+                    if not results:
+                        return ToolResult(success=True, result="Pl@ntNet: кандидатів не знайдено.")
+                    lines = []
+                    for idx, item in enumerate(results, 1):
+                        species = (item.get("species") or {})
+                        sname = species.get("scientificNameWithoutAuthor") or species.get("scientificName") or "unknown"
+                        common = species.get("commonNames") or []
+                        cname = common[0] if common else "-"
+                        score = float(item.get("score") or 0.0)
+                        lines.append(f"{idx}. {sname} ({cname}) score={score:.3f}")
+                    return ToolResult(success=True, result="Pl@ntNet candidates:\n" + "\n".join(lines))
+                return ToolResult(success=False, result=None, error=f"plantnet_http_{resp.status_code}")
+            except Exception as e:
+                return ToolResult(success=False, result=None, error=f"plantnet_error: {e}")
+
+        if image_url or image_data:
+            ni_args: Dict[str, Any] = {"top_k": top_k}
+            if image_data:
+                ni_args["image_data"] = image_data
+            else:
+                ni_args["image_url"] = image_url
+            if runtime_image_data:
+                ni_args["_runtime_image_data"] = runtime_image_data
+            ni = await self._nature_id_identify(ni_args)
+            if ni.success:
+                return ni
+
+        if query:
+            return await self._gbif_species_lookup({"query": query, "limit": top_k})
+
+        return ToolResult(
+            success=False,
+            result=None,
+            error="No available plant ID backend (set PLANTNET_API_KEY or NATURE_ID_URL, or provide text query)",
+        )
+
+    async def _nature_id_identify(self, args: Dict) -> ToolResult:
+        """Open-source plant identification via self-hosted nature-id compatible endpoint."""
+        image_url = str(args.get("image_url", "") or "").strip()
+        image_data = str(args.get("image_data", "") or "").strip()
+        runtime_image_data = str(args.get("_runtime_image_data", "") or "").strip()
+        if not image_data and self._is_image_data_url(runtime_image_data):
+            image_data = runtime_image_data
+        top_k = max(1, min(int(args.get("top_k", 3)), 10))
+        min_confidence = float(args.get("min_confidence", os.getenv("NATURE_ID_MIN_CONFIDENCE", "0.65")))
+
+        if image_url and self._is_known_non_direct_image_url(image_url):
+            if image_data:
+                logger.info("nature_id_identify: replacing non-direct image_url with runtime image_data")
+                image_url = ""
+            else:
+                return ToolResult(
+                    success=False,
+                    result=None,
+                    error="image_url is not direct image URL; provide image_data or direct Telegram file URL",
+                )
+
+        if not image_url and not image_data:
+            return ToolResult(success=False, result=None, error="image_url or image_data is required")
+
+        base = (os.getenv("NATURE_ID_URL") or "").strip().rstrip("/")
+        if not base:
+            return ToolResult(success=False, result=None, error="NATURE_ID_URL is not configured")
+
+        try:
+            if image_data:
+                # data URL -> multipart /identify-file
+                if not image_data.startswith("data:") or "," not in image_data:
+                    return ToolResult(success=False, result=None, error="invalid image_data format")
+                header, b64 = image_data.split(",", 1)
+                mime = "image/jpeg"
+                if ";base64" in header:
+                    mime = header.split(":", 1)[1].split(";", 1)[0] or "image/jpeg"
+                ext = "jpg"
+                if "png" in mime:
+                    ext = "png"
+                try:
+                    image_bytes = base64.b64decode(b64)
+                except Exception:
+                    return ToolResult(success=False, result=None, error="invalid image_data base64")
+                files = {"file": (f"upload.{ext}", image_bytes, mime)}
+                resp = await self.http_client.post(
+                    f"{base}/identify-file",
+                    params={"top_k": top_k},
+                    files=files,
+                    timeout=45.0,
+                )
+            else:
+                payload = {"image_url": image_url, "top_k": top_k}
+                resp = await self.http_client.post(f"{base}/identify", json=payload, timeout=45.0)
+
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"nature_id_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            status = str(data.get("status") or "success")
+            raw_top_k = data.get("top_k") or []
+            raw_preds = data.get("predictions") or data.get("results") or []
+
+            top_k_rows = []
+            if isinstance(raw_top_k, list) and raw_top_k:
+                for row in raw_top_k[:top_k]:
+                    if not isinstance(row, dict):
+                        continue
+                    conf = row.get("confidence", 0.0)
+                    conf_f = self._normalize_confidence(conf)
+                    top_k_rows.append({
+                        "confidence": conf_f,
+                        "name": str(row.get("name") or row.get("scientific_name") or "unknown"),
+                        "scientific_name": str(row.get("scientific_name") or row.get("name") or "unknown"),
+                    })
+            else:
+                for item in raw_preds[:top_k]:
+                    if not isinstance(item, dict):
+                        continue
+                    score = item.get("score", item.get("confidence", 0.0))
+                    score_f = self._normalize_confidence(score)
+                    sname = item.get("scientific_name") or item.get("label") or item.get("name") or "unknown"
+                    cname = item.get("common_name") or item.get("common") or sname
+                    top_k_rows.append({
+                        "confidence": score_f,
+                        "name": str(cname),
+                        "scientific_name": str(sname),
+                    })
+
+            if not top_k_rows:
+                return ToolResult(success=True, result=json.dumps({
+                    "status": status,
+                    "model": data.get("model") or "aiy_plants_V1",
+                    "source": data.get("source") or "nature-id-cli",
+                    "top_k": [],
+                    "confidence": 0.0,
+                    "recommend_fallback": True,
+                    "reason": "no_predictions",
+                }, ensure_ascii=False))
+
+            top1 = top_k_rows[0]
+            top1_conf = float(top1.get("confidence", 0.0))
+            recommend_fallback = top1_conf < min_confidence
+
+            out = {
+                "status": status,
+                "model": data.get("model") or "aiy_plants_V1",
+                "source": data.get("source") or "nature-id-cli",
+                "inference_time_sec": data.get("inference_time_sec"),
+                "top_k": top_k_rows,
+                "confidence": top1_conf,
+                "min_confidence": min_confidence,
+                "recommend_fallback": recommend_fallback,
+                "fallback": "gbif_species_lookup",
+            }
+
+            if recommend_fallback:
+                fallback_query = str(top1.get("scientific_name") or top1.get("name") or "").strip()
+                if fallback_query and fallback_query.lower() != "unknown":
+                    gbif = await self._gbif_species_lookup({"query": fallback_query, "limit": min(5, top_k)})
+                    if gbif.success and gbif.result:
+                        out["gbif_validation"] = gbif.result
+
+            return ToolResult(success=True, result=json.dumps(out, ensure_ascii=False))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"nature_id_error: {e}")
+
+    async def _gbif_species_lookup(self, args: Dict) -> ToolResult:
+        """Species lookup via GBIF public API."""
+        query = str(args.get("query", "") or "").strip()
+        limit = max(1, min(int(args.get("limit", 5)), 10))
+        if not query:
+            return ToolResult(success=False, result=None, error="query is required")
+
+        try:
+            resp = await self.http_client.get(
+                "https://api.gbif.org/v1/species/search",
+                params={"q": query, "limit": limit, "status": "ACCEPTED"},
+                timeout=20.0,
+            )
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"gbif_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            results = data.get("results") or []
+            if not results:
+                return ToolResult(success=True, result="GBIF: результатів не знайдено.")
+
+            lines = []
+            for idx, item in enumerate(results[:limit], 1):
+                sci = item.get("scientificName") or item.get("canonicalName") or "unknown"
+                rank = item.get("rank") or "-"
+                status = item.get("taxonomicStatus") or "-"
+                key = item.get("key")
+                lines.append(f"{idx}. {sci} | rank={rank} | status={status} | key={key}")
+            return ToolResult(success=True, result="GBIF matches:\n" + "\n".join(lines))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"gbif_error: {e}")
+
+    async def _agrovoc_lookup(self, args: Dict) -> ToolResult:
+        """AGROVOC term normalization via public SPARQL endpoint."""
+        query = str(args.get("query", "") or "").strip()
+        lang = str(args.get("lang", "en") or "en").strip().lower()
+        limit = max(1, min(int(args.get("limit", 5)), 10))
+        if not query:
+            return ToolResult(success=False, result=None, error="query is required")
+        if lang not in {"en", "uk", "ru"}:
+            lang = "en"
+
+        safe_q = query.replace('\\', ' ').replace('"', ' ').strip()
+        sparql = (
+            "PREFIX skos: <http://www.w3.org/2004/02/skos/core#> "
+            "SELECT ?concept ?label WHERE { "
+            "?concept skos:prefLabel ?label . "
+            f"FILTER(lang(?label) = '{lang}') "
+            f"FILTER(CONTAINS(LCASE(STR(?label)), LCASE(\"{safe_q}\"))) "
+            "} LIMIT " + str(limit)
+        )
+
+        try:
+            resp = await self.http_client.get(
+                "https://agrovoc.fao.org/sparql",
+                params={"query": sparql, "format": "json"},
+                timeout=25.0,
+            )
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"agrovoc_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            bindings = (((data.get("results") or {}).get("bindings")) or [])
+            if not bindings:
+                return ToolResult(success=True, result="AGROVOC: результатів не знайдено.")
+
+            lines = []
+            for idx, b in enumerate(bindings[:limit], 1):
+                label = ((b.get("label") or {}).get("value") or "").strip()
+                concept = ((b.get("concept") or {}).get("value") or "").strip()
+                lines.append(f"{idx}. {label} | {concept}")
+            return ToolResult(success=True, result="AGROVOC matches:\n" + "\n".join(lines))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"agrovoc_error: {e}")
+
    async def _unload_ollama_models(self):
        """Unload all Ollama models to free VRAM for heavy operations like FLUX"""
        ollama_url = os.getenv("OLLAMA_BASE_URL", "http://172.18.0.1:11434")
@@ -2942,7 +3362,11 @@ class ToolManager:
                
                if results:
                    result = results[0] if isinstance(results, list) else results
-                    markdown = result.get("markdown", "") or result.get("cleaned_html", "") or result.get("text", "")
+                    raw_content = result.get("markdown", "") or result.get("cleaned_html", "") or result.get("text", "")
+                    if isinstance(raw_content, (dict, list, tuple)):
+                        markdown = json.dumps(raw_content, ensure_ascii=False)
+                    else:
+                        markdown = str(raw_content or "")
                    title = result.get("title", url)
                    
                    if len(markdown) > 3000:
@@ -2951,13 +3375,30 @@ class ToolManager:
                    response_parts = [f"**{title}**", "", markdown]
                    
                    if extract_links:
-                        links = result.get("links", [])
-                        if links:
+                        links_raw = result.get("links", [])
+                        normalized_links: List[Any] = []
+                        if isinstance(links_raw, dict):
+                            for bucket in links_raw.values():
+                                if isinstance(bucket, list):
+                                    normalized_links.extend(bucket)
+                                elif bucket:
+                                    normalized_links.append(bucket)
+                        elif isinstance(links_raw, list):
+                            normalized_links = links_raw
+                        elif links_raw:
+                            normalized_links = [links_raw]
+
+                        if normalized_links:
                            response_parts.append("")
                            response_parts.append("**Посилання:**")
-                            for link in links[:10]:
+                            for link in normalized_links[:10]:
                                if isinstance(link, dict):
-                                    link_url = link.get("href", "")
+                                    link_url = (
+                                        link.get("href")
+                                        or link.get("url")
+                                        or link.get("link")
+                                        or ""
+                                    )
                                else:
                                    link_url = str(link)
                                if link_url: