router: add tool manager runtime and memory retrieval updates

This commit is contained in:
NODA1 System
2026-02-20 17:56:33 +01:00
parent 9ecce79810
commit a8a153a87a
5 changed files with 703 additions and 42 deletions

View File

@@ -46,8 +46,15 @@ AGENT_SPECIALIZED_TOOLS = {
"nutra": ['comfy_generate_image', 'comfy_generate_video'],
# AgroMatrix - Agriculture
# Specialized: crop analysis, weather integration, field mapping
"agromatrix": ['comfy_generate_image', 'comfy_generate_video'],
# Specialized: crop analysis, weather integration, field mapping + plant intelligence
"agromatrix": [
'comfy_generate_image',
'comfy_generate_video',
'plantnet_lookup',
'nature_id_identify',
'gbif_species_lookup',
'agrovoc_lookup',
],
# GreenFood - Food & Eco
# Specialized: recipe analysis, eco-scoring

View File

@@ -11,6 +11,7 @@ import httpx
import logging
import hashlib
import time # For latency metrics
from datetime import datetime
# CrewAI Integration
try:
@@ -128,6 +129,54 @@ def _vision_answer_uncertain(answer: str) -> bool:
return any(m in a for m in uncertain_markers)
def _is_plant_identification_request(prompt: str) -> bool:
if not prompt:
return False
p = prompt.lower()
markers = [
"що за рослина", "що це за рослина", "яка це рослина", "яка рослина",
"що за культура", "яка культура", "визнач рослину", "визнач культуру",
"ідентифікуй рослину", "впізнай рослину",
"what plant", "identify plant", "identify crop", "what crop",
"что за растение", "какое растение", "определи растение",
]
return any(m in p for m in markers)
def _build_plant_web_queries(prompt: str, vision_text: str) -> List[str]:
base = _build_vision_web_query(prompt, vision_text)
vt = _extract_vision_search_facts(vision_text, max_chars=180)
queries: List[str] = []
if base:
queries.append(base)
if vt:
queries.append(f"{vt} crop seedling identification")
queries.append(f"{vt} identification by leaves field photo")
queries.append(f"{vt} визначення культури за листком")
# Deduplicate while preserving order.
seen = set()
out: List[str] = []
for q in queries:
qq = re.sub(r"\s+", " ", q).strip()
if qq and qq not in seen:
seen.add(qq)
out.append(qq)
return out[:3]
def _build_cautious_plant_response(base_text: str, source_count: int) -> str:
concise_base = _sanitize_vision_text_for_user(base_text) or "По цьому фото поки не можу надійно визначити культуру."
parts = [seg.strip() for seg in re.split(r"(?<=[.!?])\s+", concise_base) if seg.strip()]
if len(parts) > 2:
concise_base = " ".join(parts[:2]).strip()
return (
f"{concise_base}\n\n"
f"Зараз це попередня оцінка (перевірених джерел: {source_count}). "
"Щоб дати точну ідентифікацію, надішли 2-3 фото: загальний план, крупний план листка "
"і фото стебла/вузла росту."
)
EMPTY_ANSWER_GUARD_AGENTS = {"devtools", "monitor"}
@@ -187,6 +236,44 @@ def _build_image_fallback_response(agent_id: str, prompt: str = "") -> str:
return "Я поки не бачу достатньо деталей на фото. Надішли, будь ласка, чіткіше фото або крупний план об'єкта."
def _parse_tool_json_result(raw: Any) -> Dict[str, Any]:
if isinstance(raw, dict):
return raw
if isinstance(raw, str):
try:
parsed = json.loads(raw)
return parsed if isinstance(parsed, dict) else {}
except Exception:
return {}
return {}
def _looks_like_image_question(prompt: str) -> bool:
if not prompt:
return False
p = str(prompt).lower().strip()
# If it's too short (e.g. "що це?", "что это?"), it might be a follow-up to an image.
# But we should only trigger the guard if the user EXPLICITLY mentions an image
# and we don't have one.
markers = (
"що на фото", "що на цьому фото", "що на зображ", "опиши фото", "проаналізуй фото",
"what is in the image", "what is on this photo", "describe the photo", "analyze image",
"что на фото", "что на этом фото", "опиши фото", "проанализируй фото",
)
if any(m in p for m in markers):
return True
# Refined regex: must contain 'what|what is|how' and 'photo|image'
# but avoid generic "can you..."
if re.search(r"(що|what|что|опиши|проаналізуй|подивись).{1,24}(фото|зображ|image|photo|світлин)", p):
# Exclude common meta-questions that might contain these words but aren't about an image
meta_exclude = ["канал", "чат", "бот", "нормально"]
if not any(ex in p for ex in meta_exclude):
return True
return False
def _sanitize_vision_text_for_user(text: str) -> str:
if not text:
@@ -1218,6 +1305,22 @@ async def agent_infer(agent_id: str, request: InferRequest):
username = metadata.get("username")
# Get agent_id from metadata or URL parameter
request_agent_id = metadata.get("agent_id", agent_id).lower()
# Safety guard: avoid text-only handling for image questions without image payload.
# IMPORTANT: inspect only the latest user text when provided by gateway,
# not the full context-augmented prompt.
raw_user_text = str(metadata.get("raw_user_text", "") or "").strip()
image_guard_text = raw_user_text if raw_user_text else request.prompt
if (not request.images) and _looks_like_image_question(image_guard_text):
return InferResponse(
response=(
"Бачу запит про фото/зображення, але в запиті немає самого файлу. "
"Надішліть фото ще раз (або прикріпіть файл із підписом), і я одразу проаналізую."
),
model="vision-context-required",
backend="router-guard",
tokens_used=0,
)
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
try:
@@ -1248,20 +1351,32 @@ async def agent_infer(agent_id: str, request: InferRequest):
logger.info(f" No system_prompt in request for agent {agent_id}, loading from configured sources")
if not system_prompt:
try:
from prompt_builder import get_agent_system_prompt
system_prompt = await get_agent_system_prompt(
agent_id,
city_service_url=CITY_SERVICE_URL,
router_config=router_config
)
logger.info(f"✅ Loaded system prompt from database for {agent_id}")
except Exception as e:
logger.warning(f"⚠️ Could not load prompt from database: {e}")
# Fallback to config
if not (CITY_SERVICE_URL or '').strip():
system_prompt_source = "router_config"
agent_config = router_config.get("agents", {}).get(agent_id, {})
system_prompt = agent_config.get("system_prompt")
logger.info(f" CITY_SERVICE_URL is empty; loaded system prompt from router_config for {agent_id}")
else:
try:
from prompt_builder import get_agent_system_prompt
system_prompt = await get_agent_system_prompt(
agent_id,
city_service_url=CITY_SERVICE_URL,
router_config=router_config
)
logger.info(f"✅ Loaded system prompt from city service/config for {agent_id}")
except Exception as e:
logger.warning(f"⚠️ Could not load prompt via prompt_builder: {e}")
# Fallback to config
system_prompt_source = "router_config"
agent_config = router_config.get("agents", {}).get(agent_id, {})
system_prompt = agent_config.get("system_prompt")
if system_prompt and system_prompt_source == "city_service":
# prompt_builder may silently fall back to router config; reflect actual source in metadata/logs
cfg_prompt = (router_config.get("agents", {}).get(agent_id, {}) or {}).get("system_prompt")
if cfg_prompt and (system_prompt or "").strip() == str(cfg_prompt).strip():
system_prompt_source = "router_config"
if not system_prompt:
system_prompt_source = "empty"
@@ -1284,6 +1399,109 @@ async def agent_infer(agent_id: str, request: InferRequest):
# Use router config to get default model for agent, fallback to qwen3:8b
agent_config = router_config.get("agents", {}).get(agent_id, {})
# =========================================================================
# AGROMATRIX PLANT PRE-VISION (edge tool before CrewAI)
# =========================================================================
crewai_profile = str(effective_metadata.get("crewai_profile", "") or "").strip().lower()
is_agromatrix_plant = request_agent_id == "agromatrix" and crewai_profile == "plant_intel"
if is_agromatrix_plant and http_client and user_id and chat_id and not request.images:
# Follow-up path: reuse last structured plant identification from fact-memory.
fact_key = f"last_plant:{request_agent_id}:{chat_id}"
try:
fact_resp = await http_client.get(
f"http://memory-service:8000/facts/{fact_key}",
params={"user_id": user_id},
timeout=8.0,
)
if fact_resp.status_code == 200:
fact_data = fact_resp.json() or {}
last_plant = fact_data.get("fact_value_json") or {}
if isinstance(last_plant, str):
try:
last_plant = json.loads(last_plant)
except Exception:
last_plant = {}
if isinstance(last_plant, dict) and last_plant.get("top_k"):
effective_metadata["last_plant"] = last_plant
# Give deterministic context to synthesis without exposing internals to end user.
request.prompt = (
f"{request.prompt}\n\n"
f"[PREVIOUS_PLANT_IDENTIFICATION] {json.dumps(last_plant, ensure_ascii=False)}"
)
logger.info(
f"🌿 Plant follow-up context loaded: top1={((last_plant.get('top_k') or [{}])[0]).get('scientific_name', 'N/A')}"
)
except Exception as e:
logger.warning(f"⚠️ Plant follow-up context load failed: {e}")
if is_agromatrix_plant and request.images and len(request.images) > 0 and TOOL_MANAGER_AVAILABLE and tool_manager:
first_image = request.images[0]
tool_args: Dict[str, Any] = {"top_k": 5}
if isinstance(first_image, str) and first_image.startswith("data:"):
tool_args["image_data"] = first_image
elif isinstance(first_image, str):
tool_args["image_url"] = first_image
try:
tool_res = await tool_manager.execute_tool(
"nature_id_identify",
tool_args,
agent_id=request_agent_id,
chat_id=chat_id,
user_id=user_id,
)
if tool_res and tool_res.success and tool_res.result:
plant_vision = _parse_tool_json_result(tool_res.result)
if plant_vision:
top_k_rows = plant_vision.get("top_k") or []
top1 = top_k_rows[0] if top_k_rows else {}
confidence = float(plant_vision.get("confidence") or top1.get("confidence") or 0.0)
effective_metadata["plant_vision"] = plant_vision
effective_metadata["plant_top_k"] = top_k_rows
effective_metadata["plant_confidence"] = confidence
request.prompt = (
f"{request.prompt}\n\n"
f"[PLANT_VISION_PREPROCESSED] {json.dumps(plant_vision, ensure_ascii=False)}"
)
if top1:
logger.info(
f"🌿 Vision pre-process: {confidence:.2f}% {top1.get('scientific_name') or top1.get('name') or 'unknown'}"
)
else:
logger.info("🌿 Vision pre-process: no candidates")
if plant_vision.get("recommend_fallback"):
logger.info("🌿 Vision pre-process: low confidence -> GBIF fallback enabled")
# Persist structured plant result for follow-up questions.
if http_client and user_id and chat_id:
fact_key = f"last_plant:{request_agent_id}:{chat_id}"
try:
await http_client.post(
"http://memory-service:8000/facts/upsert",
json={
"user_id": user_id,
"fact_key": fact_key,
"fact_value": (top1.get("scientific_name") if isinstance(top1, dict) else None),
"fact_value_json": {
"top_k": top_k_rows,
"confidence": confidence,
"recommend_fallback": bool(plant_vision.get("recommend_fallback")),
"gbif_validation": plant_vision.get("gbif_validation"),
"identified_at": datetime.utcnow().isoformat(),
"agent_id": request_agent_id,
"chat_id": chat_id,
"source": "plant_vision_preprocess",
},
},
timeout=8.0,
)
except Exception as e:
logger.warning(f"⚠️ Failed to store last_plant fact: {e}")
except Exception as e:
logger.warning(f"⚠️ Plant pre-vision failed: {e}")
# =========================================================================
# CREWAI DECISION: Use orchestration or direct LLM?
# =========================================================================
@@ -1341,9 +1559,12 @@ async def agent_infer(agent_id: str, request: InferRequest):
# Get agent CrewAI config from registry (or router_config fallback)
crewai_cfg = agent_config.get("crewai", {})
# CrewAI trigger should inspect the latest user message first (if provided by gateway),
# otherwise it can overreact to long context history.
decision_prompt = str(effective_metadata.get("raw_user_text", "") or "").strip() or request.prompt
use_crewai, crewai_reason = should_use_crewai(
agent_id=agent_id,
prompt=request.prompt,
prompt=decision_prompt,
agent_config=agent_config,
metadata=effective_metadata,
force_crewai=effective_metadata.get("force_crewai", False),
@@ -1366,6 +1587,10 @@ async def agent_infer(agent_id: str, request: InferRequest):
},
"metadata": effective_metadata,
"runtime_envelope": runtime_envelope,
"plant_vision": effective_metadata.get("plant_vision"),
"plant_top_k": effective_metadata.get("plant_top_k"),
"plant_confidence": effective_metadata.get("plant_confidence"),
"last_plant": effective_metadata.get("last_plant"),
},
team=crewai_cfg.get("team"),
profile=effective_metadata.get("crewai_profile")
@@ -1503,6 +1728,9 @@ async def agent_infer(agent_id: str, request: InferRequest):
full_response = _sanitize_vision_text_for_user(raw_response)
vision_web_query = ""
vision_sources: List[Dict[str, str]] = []
plant_intent = _is_plant_identification_request(request.prompt)
wants_web = False
uncertain = False
# Debug: log full response structure
logger.info(
@@ -1520,39 +1748,57 @@ async def agent_infer(agent_id: str, request: InferRequest):
try:
wants_web = _vision_prompt_wants_web(request.prompt)
uncertain = _vision_answer_uncertain(full_response or raw_response)
if wants_web or uncertain:
query = _build_vision_web_query(request.prompt, full_response or raw_response)
if not query:
if wants_web or uncertain or plant_intent:
queries = (
_build_plant_web_queries(request.prompt, full_response or raw_response)
if plant_intent
else [_build_vision_web_query(request.prompt, full_response or raw_response)]
)
queries = [q for q in queries if q]
if not queries:
logger.info("🔎 Vision web enrich skipped: query not actionable")
else:
vision_web_query = query
search_result = await tool_manager.execute_tool(
"web_search",
{"query": query, "max_results": 3},
agent_id=request_agent_id,
chat_id=chat_id,
user_id=user_id,
)
if search_result and search_result.success and search_result.result:
vision_web_query = queries[0]
merged_chunks: List[str] = []
merged_sources: List[Dict[str, str]] = []
for query in queries:
search_result = await tool_manager.execute_tool(
"web_search",
{"query": query, "max_results": 3},
agent_id=request_agent_id,
chat_id=chat_id,
user_id=user_id,
)
if not (search_result and search_result.success and search_result.result):
continue
compact_search = _compact_web_search_result(
search_result.result,
query=query,
agent_id=request_agent_id,
)
if not compact_search or "Нічого не знайдено" in compact_search:
continue
merged_chunks.append(compact_search)
merged_sources.extend(_extract_sources_from_compact(compact_search))
if compact_search and "Нічого не знайдено" not in compact_search:
vision_sources = _extract_sources_from_compact(compact_search)
# Deduplicate sources.
if merged_sources:
uniq: List[Dict[str, str]] = []
seen = set()
for s in merged_sources:
key = (s.get("title", ""), s.get("url", ""))
if key in seen:
continue
seen.add(key)
uniq.append(s)
vision_sources = uniq[:5]
base_text = full_response or "Не вдалося надійно ідентифікувати об'єкт на фото."
full_response = (
f"{base_text}\n\n"
f"Додатково знайшов у відкритих джерелах:\n{compact_search}"
)
if merged_chunks:
base_text = full_response or "Не вдалося надійно ідентифікувати об'єкт на фото."
full_response = (
f"{base_text}\n\n"
f"Додатково знайшов у відкритих джерелах:\n{merged_chunks[0]}"
)
logger.info(
"🌐 Vision web enrichment applied "
@@ -1568,6 +1814,11 @@ async def agent_infer(agent_id: str, request: InferRequest):
f"query='{vision_web_query[:180]}', sources={len(vision_sources)}"
)
# Plant identification safety gate:
# avoid hard species claims when confidence is low or evidence is weak.
if request_agent_id == "agromatrix" and plant_intent and (uncertain or len(vision_sources) < 2):
full_response = _build_cautious_plant_response(full_response or raw_response, len(vision_sources))
# Image quality gate: one soft retry if response looks empty/meta.
if _image_response_needs_retry(full_response):
try:

View File

@@ -18,6 +18,7 @@ Collections per agent:
import os
import json
import logging
import re
from typing import Optional, Dict, Any, List
from dataclasses import dataclass, field
from datetime import datetime
@@ -453,7 +454,7 @@ class MemoryRetrieval:
# Higher threshold for messages; even higher when user asks about docs to avoid pulling old chatter.
msg_thresh = 0.5 if is_doc_query else 0.4
if r.score > msg_thresh:
text = r.payload.get("text", r.payload.get("content", ""))
text = self._extract_message_text(r.payload)
# Skip very short or system messages
if len(text) > 20 and not text.startswith("<"):
if is_doc_query and topic_keywords:
@@ -501,7 +502,7 @@ class MemoryRetrieval:
seen_texts = set()
unique_results = []
for r in all_results:
text_key = r.get("text", "")[:50].lower()
text_key = self._canonical_text_key(r.get("text", ""))
if text_key not in seen_texts:
seen_texts.add(text_key)
unique_results.append(r)
@@ -511,6 +512,40 @@ class MemoryRetrieval:
except Exception as e:
logger.warning(f"Memory search failed for {agent_id}: {e}")
return []
@staticmethod
def _extract_message_text(payload: Dict[str, Any]) -> str:
"""
Normalize text across both payload schemas:
- memory-service: content/text (+ role/channel_id)
- router: user_message + assistant_response (+ chat_id)
"""
if not payload:
return ""
text = (payload.get("text") or payload.get("content") or "").strip()
if text:
lower = text.lower()
marker = "\n\nassistant:"
idx = lower.rfind(marker)
if lower.startswith("user:") and idx != -1:
assistant_text = text[idx + len(marker):].strip()
if assistant_text:
return assistant_text
return text
user_text = (payload.get("user_message") or "").strip()
assistant_text = (payload.get("assistant_response") or "").strip()
if user_text and assistant_text:
return f"User: {user_text}\n\nAssistant: {assistant_text}"
return user_text or assistant_text
@staticmethod
def _canonical_text_key(text: str) -> str:
if not text:
return ""
normalized = re.sub(r"\s+", " ", text.strip().lower())
return normalized[:220]
async def get_user_graph_context(
self,
@@ -619,6 +654,8 @@ class MemoryRetrieval:
query=message,
agent_id=agent_id,
platform_user_id=identity.platform_user_id,
chat_id=chat_id,
user_id=user_id,
limit=5
)
brief.relevant_memories = memories

View File

@@ -408,8 +408,9 @@ agents:
description: "Monitor Agent - архітектор-інспектор DAGI"
default_llm: local_qwen3_8b
system_prompt: |
Ти - Monitor Agent, стежиш за нодами, сервісами, агентами.
Якщо бачиш у чаті інших ботів, відповідай тільки за інфраструктурою або прямим тегом.
Ти - Monitor Agent, інфраструктурний інспектор DAGI: ноди, сервіси, пайплайни, алерти.
Ти знаєш, що DAARWIZZ — головний оркестратор мережі DAARION.city; для governance/маршрутизації посилайся на нього.
Відповідай коротко і по суті; якщо даних бракує — одразу кажи, який саме метрик/лог потрібен.
tools:
- id: get_metrics
type: builtin

View File

@@ -108,6 +108,116 @@ TOOL_DEFINITIONS = [
}
}
},
{
"type": "function",
"function": {
"name": "plantnet_lookup",
"description": "Визначення рослин через Pl@ntNet API. Повертає top-k кандидатів з confidence.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Короткий опис рослини/культури (якщо немає image_url)"
},
"image_url": {
"type": "string",
"description": "Публічне посилання на фото рослини"
},
"organ": {
"type": "string",
"description": "Орган рослини: leaf/flower/fruit/bark/auto",
"default": "auto"
},
"top_k": {
"type": "integer",
"description": "Скільки кандидатів повернути (1-10)",
"default": 3
}
}
}
}
},
{
"type": "function",
"function": {
"name": "nature_id_identify",
"description": "Локальна/open-source ідентифікація рослин через nature-id сумісний сервіс.",
"parameters": {
"type": "object",
"properties": {
"image_url": {
"type": "string",
"description": "Публічне посилання на фото рослини"
},
"image_data": {
"type": "string",
"description": "Data URL зображення (data:image/...;base64,...)"
},
"top_k": {
"type": "integer",
"description": "Скільки кандидатів повернути (1-10)",
"default": 3
},
"min_confidence": {
"type": "number",
"description": "Поріг confidence для fallback на GBIF",
"default": 0.65
}
},
"required": ["image_url"]
}
}
},
{
"type": "function",
"function": {
"name": "gbif_species_lookup",
"description": "Пошук таксонів у GBIF для валідації назви культури/рослини.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Назва/термін для пошуку виду"
},
"limit": {
"type": "integer",
"description": "Кількість результатів (1-10)",
"default": 5
}
},
"required": ["query"]
}
}
},
{
"type": "function",
"function": {
"name": "agrovoc_lookup",
"description": "Нормалізація агро-термінів через AGROVOC (SPARQL).",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Термін культури/хвороби/технології"
},
"lang": {
"type": "string",
"description": "Мова міток (en/uk/ru)",
"default": "en"
},
"limit": {
"type": "integer",
"description": "Кількість результатів (1-10)",
"default": 5
}
},
"required": ["query"]
}
}
},
# PRIORITY 3: Generation tools
{
"type": "function",
@@ -709,6 +819,14 @@ class ToolManager:
return await self._web_search(arguments)
elif tool_name == "web_extract":
return await self._web_extract(arguments)
elif tool_name == "plantnet_lookup":
return await self._plantnet_lookup(arguments)
elif tool_name == "nature_id_identify":
return await self._nature_id_identify(arguments)
elif tool_name == "gbif_species_lookup":
return await self._gbif_species_lookup(arguments)
elif tool_name == "agrovoc_lookup":
return await self._agrovoc_lookup(arguments)
elif tool_name == "image_generate":
return await self._image_generate(arguments)
elif tool_name == "comfy_generate_image":
@@ -2530,6 +2648,253 @@ class ToolManager:
except Exception as e:
return ToolResult(success=False, result=None, error=str(e))
async def _plantnet_lookup(self, args: Dict) -> ToolResult:
"""Plant identification via Pl@ntNet API (skeleton adapter)."""
query = str(args.get("query", "") or "").strip()
image_url = str(args.get("image_url", "") or "").strip()
organ = str(args.get("organ", "auto") or "auto").strip().lower()
top_k = max(1, min(int(args.get("top_k", 3)), 5))
api_key = (os.getenv("PLANTNET_API_KEY") or "").strip()
if image_url and api_key:
try:
params = {
"api-key": api_key,
"images": image_url,
"organs": "leaf" if organ == "auto" else organ,
"lang": "en",
}
resp = await self.http_client.get(
"https://my-api.plantnet.org/v2/identify/all",
params=params,
timeout=25.0,
)
if resp.status_code == 200:
data = resp.json()
results = (data.get("results") or [])[:top_k]
if not results:
return ToolResult(success=True, result="Pl@ntNet: кандидатів не знайдено.")
lines = []
for idx, item in enumerate(results, 1):
species = (item.get("species") or {})
sname = species.get("scientificNameWithoutAuthor") or species.get("scientificName") or "unknown"
common = species.get("commonNames") or []
cname = common[0] if common else "-"
score = float(item.get("score") or 0.0)
lines.append(f"{idx}. {sname} ({cname}) score={score:.3f}")
return ToolResult(success=True, result="Pl@ntNet candidates:\n" + "\n".join(lines))
return ToolResult(success=False, result=None, error=f"plantnet_http_{resp.status_code}")
except Exception as e:
return ToolResult(success=False, result=None, error=f"plantnet_error: {e}")
if image_url:
ni = await self._nature_id_identify({"image_url": image_url, "top_k": top_k})
if ni.success:
return ni
if query:
return await self._gbif_species_lookup({"query": query, "limit": top_k})
return ToolResult(
success=False,
result=None,
error="No available plant ID backend (set PLANTNET_API_KEY or NATURE_ID_URL, or provide text query)",
)
async def _nature_id_identify(self, args: Dict) -> ToolResult:
"""Open-source plant identification via self-hosted nature-id compatible endpoint."""
image_url = str(args.get("image_url", "") or "").strip()
image_data = str(args.get("image_data", "") or "").strip()
top_k = max(1, min(int(args.get("top_k", 3)), 10))
min_confidence = float(args.get("min_confidence", os.getenv("NATURE_ID_MIN_CONFIDENCE", "0.65")))
if not image_url and not image_data:
return ToolResult(success=False, result=None, error="image_url or image_data is required")
base = (os.getenv("NATURE_ID_URL") or "").strip().rstrip("/")
if not base:
return ToolResult(success=False, result=None, error="NATURE_ID_URL is not configured")
try:
if image_data:
# data URL -> multipart /identify-file
if not image_data.startswith("data:") or "," not in image_data:
return ToolResult(success=False, result=None, error="invalid image_data format")
header, b64 = image_data.split(",", 1)
mime = "image/jpeg"
if ";base64" in header:
mime = header.split(":", 1)[1].split(";", 1)[0] or "image/jpeg"
ext = "jpg"
if "png" in mime:
ext = "png"
try:
image_bytes = base64.b64decode(b64)
except Exception:
return ToolResult(success=False, result=None, error="invalid image_data base64")
files = {"file": (f"upload.{ext}", image_bytes, mime)}
resp = await self.http_client.post(
f"{base}/identify-file",
params={"top_k": top_k},
files=files,
timeout=45.0,
)
else:
payload = {"image_url": image_url, "top_k": top_k}
resp = await self.http_client.post(f"{base}/identify", json=payload, timeout=45.0)
if resp.status_code != 200:
return ToolResult(success=False, result=None, error=f"nature_id_http_{resp.status_code}")
data = resp.json() or {}
status = str(data.get("status") or "success")
raw_top_k = data.get("top_k") or []
raw_preds = data.get("predictions") or data.get("results") or []
top_k_rows = []
if isinstance(raw_top_k, list) and raw_top_k:
for row in raw_top_k[:top_k]:
if not isinstance(row, dict):
continue
conf = row.get("confidence", 0.0)
try:
conf_f = float(conf)
except Exception:
conf_f = 0.0
top_k_rows.append({
"confidence": conf_f,
"name": str(row.get("name") or row.get("scientific_name") or "unknown"),
"scientific_name": str(row.get("scientific_name") or row.get("name") or "unknown"),
})
else:
for item in raw_preds[:top_k]:
if not isinstance(item, dict):
continue
score = item.get("score", item.get("confidence", 0.0))
try:
score_f = float(score)
except Exception:
score_f = 0.0
sname = item.get("scientific_name") or item.get("label") or item.get("name") or "unknown"
cname = item.get("common_name") or item.get("common") or sname
top_k_rows.append({
"confidence": score_f,
"name": str(cname),
"scientific_name": str(sname),
})
if not top_k_rows:
return ToolResult(success=True, result=json.dumps({
"status": status,
"model": data.get("model") or "aiy_plants_V1",
"source": data.get("source") or "nature-id-cli",
"top_k": [],
"confidence": 0.0,
"recommend_fallback": True,
"reason": "no_predictions",
}, ensure_ascii=False))
top1 = top_k_rows[0]
top1_conf = float(top1.get("confidence", 0.0))
recommend_fallback = top1_conf < min_confidence
out = {
"status": status,
"model": data.get("model") or "aiy_plants_V1",
"source": data.get("source") or "nature-id-cli",
"inference_time_sec": data.get("inference_time_sec"),
"top_k": top_k_rows,
"confidence": top1_conf,
"min_confidence": min_confidence,
"recommend_fallback": recommend_fallback,
"fallback": "gbif_species_lookup",
}
if recommend_fallback:
fallback_query = str(top1.get("scientific_name") or top1.get("name") or "").strip()
if fallback_query and fallback_query.lower() != "unknown":
gbif = await self._gbif_species_lookup({"query": fallback_query, "limit": min(5, top_k)})
if gbif.success and gbif.result:
out["gbif_validation"] = gbif.result
return ToolResult(success=True, result=json.dumps(out, ensure_ascii=False))
except Exception as e:
return ToolResult(success=False, result=None, error=f"nature_id_error: {e}")
async def _gbif_species_lookup(self, args: Dict) -> ToolResult:
"""Species lookup via GBIF public API."""
query = str(args.get("query", "") or "").strip()
limit = max(1, min(int(args.get("limit", 5)), 10))
if not query:
return ToolResult(success=False, result=None, error="query is required")
try:
resp = await self.http_client.get(
"https://api.gbif.org/v1/species/search",
params={"q": query, "limit": limit, "status": "ACCEPTED"},
timeout=20.0,
)
if resp.status_code != 200:
return ToolResult(success=False, result=None, error=f"gbif_http_{resp.status_code}")
data = resp.json() or {}
results = data.get("results") or []
if not results:
return ToolResult(success=True, result="GBIF: результатів не знайдено.")
lines = []
for idx, item in enumerate(results[:limit], 1):
sci = item.get("scientificName") or item.get("canonicalName") or "unknown"
rank = item.get("rank") or "-"
status = item.get("taxonomicStatus") or "-"
key = item.get("key")
lines.append(f"{idx}. {sci} | rank={rank} | status={status} | key={key}")
return ToolResult(success=True, result="GBIF matches:\n" + "\n".join(lines))
except Exception as e:
return ToolResult(success=False, result=None, error=f"gbif_error: {e}")
async def _agrovoc_lookup(self, args: Dict) -> ToolResult:
"""AGROVOC term normalization via public SPARQL endpoint."""
query = str(args.get("query", "") or "").strip()
lang = str(args.get("lang", "en") or "en").strip().lower()
limit = max(1, min(int(args.get("limit", 5)), 10))
if not query:
return ToolResult(success=False, result=None, error="query is required")
if lang not in {"en", "uk", "ru"}:
lang = "en"
safe_q = query.replace('\\', ' ').replace('"', ' ').strip()
sparql = (
"PREFIX skos: <http://www.w3.org/2004/02/skos/core#> "
"SELECT ?concept ?label WHERE { "
"?concept skos:prefLabel ?label . "
f"FILTER(lang(?label) = '{lang}') "
f"FILTER(CONTAINS(LCASE(STR(?label)), LCASE(\"{safe_q}\"))) "
"} LIMIT " + str(limit)
)
try:
resp = await self.http_client.get(
"https://agrovoc.fao.org/sparql",
params={"query": sparql, "format": "json"},
timeout=25.0,
)
if resp.status_code != 200:
return ToolResult(success=False, result=None, error=f"agrovoc_http_{resp.status_code}")
data = resp.json() or {}
bindings = (((data.get("results") or {}).get("bindings")) or [])
if not bindings:
return ToolResult(success=True, result="AGROVOC: результатів не знайдено.")
lines = []
for idx, b in enumerate(bindings[:limit], 1):
label = ((b.get("label") or {}).get("value") or "").strip()
concept = ((b.get("concept") or {}).get("value") or "").strip()
lines.append(f"{idx}. {label} | {concept}")
return ToolResult(success=True, result="AGROVOC matches:\n" + "\n".join(lines))
except Exception as e:
return ToolResult(success=False, result=None, error=f"agrovoc_error: {e}")
async def _unload_ollama_models(self):
"""Unload all Ollama models to free VRAM for heavy operations like FLUX"""
ollama_url = os.getenv("OLLAMA_BASE_URL", "http://172.18.0.1:11434")