merge: integrate remote codex/sync-node1-runtime with fabric layer changes

Resolve conflicts in docker-compose.node1.yml, services/router/main.py, and gateway-bot/services/doc_service.py — keeping both fabric layer (NCS, node-worker, Prometheus) and document ingest/query endpoints. Made-with: Cursor
2026-02-27 03:09:12 -08:00
parent ed7ad49d3a 088ca07137
commit a6531507df
76 changed files with 7495 additions and 295 deletions
--- a/services/agent-e2e-prober/main.py
+++ b/services/agent-e2e-prober/main.py
@@ -16,9 +16,16 @@ logger = logging.getLogger(__name__)

 # Configuration
 GATEWAY_URL = os.getenv("GATEWAY_URL", "http://gateway:9300")
+ROUTER_URL = os.getenv("ROUTER_URL", "http://router:8000")
 PROBE_INTERVAL = int(os.getenv("PROBE_INTERVAL", "60"))  # seconds
 PROBE_TIMEOUT = int(os.getenv("PROBE_TIMEOUT", "30"))  # seconds
+SEMANTIC_TIMEOUT = int(os.getenv("SEMANTIC_TIMEOUT", "45"))  # seconds
 METRICS_PORT = int(os.getenv("METRICS_PORT", "9108"))
+SEMANTIC_PROBE_ENABLED = os.getenv("SEMANTIC_PROBE_ENABLED", "true").lower() == "true"
+SEMANTIC_AGENTS = [a.strip() for a in os.getenv("SEMANTIC_AGENTS", "clan,sofiia,monitor").split(",") if a.strip()]
+SEMANTIC_PROMPT = os.getenv("SEMANTIC_PROMPT", "Коротко: хто такий DAARWIZZ?")
+SEMANTIC_EXPECT_KEYWORD = os.getenv("SEMANTIC_EXPECT_KEYWORD", "daarwizz").lower()
+MONITOR_EXPECT_LOCAL = os.getenv("MONITOR_EXPECT_LOCAL", "true").lower() == "true"

 # Prometheus metrics
 agent_e2e_success = Gauge('agent_e2e_success', 'Whether last E2E probe succeeded', ['target'])
@@ -42,7 +49,7 @@ async def probe_gateway_health() -> tuple[bool, float, str]:
        async with httpx.AsyncClient(timeout=PROBE_TIMEOUT) as client:
            resp = await client.get(f"{GATEWAY_URL}/health")
            latency = time.time() - start
-            
+
            if resp.status_code == 200:
                data = resp.json()
                if data.get("status") == "healthy":
@@ -67,7 +74,7 @@ async def probe_agent_ping() -> tuple[bool, float, str]:
                json={"probe": True, "timestamp": datetime.utcnow().isoformat()}
            )
            latency = time.time() - start
-            
+
            if resp.status_code == 200:
                data = resp.json()
                if data.get("success"):
@@ -100,7 +107,7 @@ async def probe_webhook_echo() -> tuple[bool, float, str]:
                "text": "/health"  # Simple health check command
            }
        }
-        
+
        async with httpx.AsyncClient(timeout=PROBE_TIMEOUT) as client:
            # Use helion webhook as it's the most tested
            resp = await client.post(
@@ -108,7 +115,7 @@ async def probe_webhook_echo() -> tuple[bool, float, str]:
                json=test_update
            )
            latency = time.time() - start
-            
+
            if resp.status_code == 200:
                return True, latency, ""
            else:
@@ -119,53 +126,102 @@ async def probe_webhook_echo() -> tuple[bool, float, str]:
        return False, time.time() - start, f"error: {str(e)[:50]}"


+async def probe_agent_semantic(agent_id: str) -> tuple[bool, float, str]:
+    """Probe semantic response via router infer and assert DAARWIZZ awareness."""
+    start = time.time()
+    try:
+        payload = {
+            "prompt": SEMANTIC_PROMPT,
+            "max_tokens": 180,
+            "temperature": 0.1,
+            "metadata": {
+                "agent_id": agent_id,
+                "user_id": "tg:0",
+                "chat_id": "0",
+                "username": "e2e-prober",
+                "raw_user_text": SEMANTIC_PROMPT,
+            },
+        }
+        async with httpx.AsyncClient(timeout=SEMANTIC_TIMEOUT) as client:
+            resp = await client.post(f"{ROUTER_URL}/v1/agents/{agent_id}/infer", json=payload)
+            latency = time.time() - start
+            if resp.status_code != 200:
+                return False, latency, f"http_{resp.status_code}"
+
+            data = resp.json()
+            answer = str(data.get("response") or "")
+            backend = str(data.get("backend") or "")
+            model = str(data.get("model") or "")
+
+            answer_lc = answer.lower()
+            if SEMANTIC_EXPECT_KEYWORD not in answer_lc and "даар" not in answer_lc:
+                return False, latency, "no_daarwizz_in_answer"
+
+            if MONITOR_EXPECT_LOCAL and agent_id == "monitor":
+                local_ok = ("ollama" in backend.lower()) or model.lower().startswith("qwen")
+                if not local_ok:
+                    return False, latency, f"monitor_nonlocal_backend:{backend}:{model}"
+
+            return True, latency, ""
+    except httpx.TimeoutException:
+        return False, time.time() - start, "timeout"
+    except Exception as e:
+        return False, time.time() - start, f"error: {str(e)[:50]}"
+
+
+def record_probe(target: str, success: bool, latency: float, reason: str):
+    """Record probe metrics and log line."""
+    agent_e2e_runs_total.labels(target=target).inc()
+    agent_e2e_success.labels(target=target).set(1 if success else 0)
+    agent_e2e_latency.labels(target=target).set(latency)
+    agent_e2e_latency_histogram.labels(target=target).observe(latency)
+    if not success:
+        agent_e2e_failures_total.labels(target=target, reason=reason).inc()
+    logger.info(f"{target}: success={success}, latency={latency:.3f}s, reason={reason}")
+
+
 async def run_probes():
    """Run all probes and update metrics"""
    # Probe 1: Gateway health
    success, latency, reason = await probe_gateway_health()
-    agent_e2e_runs_total.labels(target="gateway_health").inc()
-    agent_e2e_success.labels(target="gateway_health").set(1 if success else 0)
-    agent_e2e_latency.labels(target="gateway_health").set(latency)
-    agent_e2e_latency_histogram.labels(target="gateway_health").observe(latency)
-    if not success:
-        agent_e2e_failures_total.labels(target="gateway_health", reason=reason).inc()
-    logger.info(f"gateway_health: success={success}, latency={latency:.3f}s, reason={reason}")
-    
+    record_probe("gateway_health", success, latency, reason)
+
    # Probe 2: Agent ping (if endpoint exists)
    success, latency, reason = await probe_agent_ping()
-    agent_e2e_runs_total.labels(target="agent_ping").inc()
-    agent_e2e_success.labels(target="agent_ping").set(1 if success else 0)
-    agent_e2e_latency.labels(target="agent_ping").set(latency)
-    agent_e2e_latency_histogram.labels(target="agent_ping").observe(latency)
-    if not success:
-        agent_e2e_failures_total.labels(target="agent_ping", reason=reason).inc()
-    logger.info(f"agent_ping: success={success}, latency={latency:.3f}s, reason={reason}")
-    
+    record_probe("agent_ping", success, latency, reason)
+
    # Probe 3: Webhook E2E (full path test)
    success, latency, reason = await probe_webhook_echo()
-    agent_e2e_runs_total.labels(target="webhook_e2e").inc()
-    agent_e2e_success.labels(target="webhook_e2e").set(1 if success else 0)
-    agent_e2e_latency.labels(target="webhook_e2e").set(latency)
-    agent_e2e_latency_histogram.labels(target="webhook_e2e").observe(latency)
-    if not success:
-        agent_e2e_failures_total.labels(target="webhook_e2e", reason=reason).inc()
-    logger.info(f"webhook_e2e: success={success}, latency={latency:.3f}s, reason={reason}")
+    record_probe("webhook_e2e", success, latency, reason)
+
+    # Probe 4+: semantic checks for selected agents (parallel)
+    if SEMANTIC_PROBE_ENABLED and SEMANTIC_AGENTS:
+        results = await asyncio.gather(*(probe_agent_semantic(agent_id) for agent_id in SEMANTIC_AGENTS))
+        matrix = []
+        for agent_id, (success, latency, reason) in zip(SEMANTIC_AGENTS, results):
+            record_probe(f"semantic_{agent_id}", success, latency, reason)
+            matrix.append(f"{agent_id}:{'PASS' if success else 'FAIL'}")
+        logger.info("semantic_matrix: " + " | ".join(matrix))


 async def main():
-    logger.info(f"Starting E2E Agent Prober")
+    logger.info("Starting E2E Agent Prober")
    logger.info(f"  GATEWAY_URL: {GATEWAY_URL}")
+    logger.info(f"  ROUTER_URL: {ROUTER_URL}")
    logger.info(f"  PROBE_INTERVAL: {PROBE_INTERVAL}s")
    logger.info(f"  PROBE_TIMEOUT: {PROBE_TIMEOUT}s")
    logger.info(f"  METRICS_PORT: {METRICS_PORT}")
-    
+    logger.info(f"  SEMANTIC_TIMEOUT: {SEMANTIC_TIMEOUT}s")
+    logger.info(f"  SEMANTIC_PROBE_ENABLED: {SEMANTIC_PROBE_ENABLED}")
+    logger.info(f"  SEMANTIC_AGENTS: {','.join(SEMANTIC_AGENTS)}")
+
    # Start Prometheus metrics server
    start_http_server(METRICS_PORT)
    logger.info(f"Prometheus metrics available at :{METRICS_PORT}/metrics")
-    
+
    # Initial probe
    await run_probes()
-    
+
    # Continuous probing
    while True:
        await asyncio.sleep(PROBE_INTERVAL)
--- a/services/artifact-registry/app/main.py
+++ b/services/artifact-registry/app/main.py
@@ -6,13 +6,15 @@ Artifact Registry v0
 """

 import asyncio
+import base64
 import hashlib
 import json
 import logging
 import os
+import re
 import uuid
 from io import BytesIO
-from datetime import datetime
+from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional

 import asyncpg
@@ -90,6 +92,14 @@ class ArtifactVersionFromUrlRequest(BaseModel):
    meta_json: Optional[Dict[str, Any]] = None


+class ArtifactVersionFromBase64Request(BaseModel):
+    content_base64: str
+    mime: str
+    filename: Optional[str] = "source.bin"
+    label: Optional[str] = "source"
+    meta_json: Optional[Dict[str, Any]] = None
+
+
 class ArtifactVersionResponse(BaseModel):
    version_id: str
    storage_key: str
@@ -208,15 +218,38 @@ def _normalize_meta_json(meta: Any) -> Dict[str, Any]:

 def _format_to_mime(fmt: str) -> str:
    fmt = fmt.lower()
+    if "/" in fmt:
+        return fmt
    if fmt == "pptx":
        return "application/vnd.openxmlformats-officedocument.presentationml.presentation"
    if fmt == "pdf":
        return "application/pdf"
    if fmt == "source":
        return "application/json"
+    if fmt == "docx":
+        return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+    if fmt == "xlsx":
+        return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    if fmt == "txt":
+        return "text/plain; charset=utf-8"
+    if fmt == "md":
+        return "text/markdown; charset=utf-8"
+    if fmt == "json":
+        return "application/json"
+    if fmt == "csv":
+        return "text/csv; charset=utf-8"
    return "application/octet-stream"


+def _safe_filename(name: Optional[str], fallback: str = "source.bin") -> str:
+    raw = (name or fallback).strip() or fallback
+    cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", raw)
+    cleaned = cleaned.strip("._")
+    if not cleaned:
+        return fallback
+    return cleaned[:120]
+
+
 async def _download_bytes(url: str) -> bytes:
    async with httpx.AsyncClient(timeout=60.0) as client:
        resp = await client.get(url)
@@ -462,6 +495,73 @@ async def add_version_from_url(artifact_id: str, payload: ArtifactVersionFromUrl
    )


+@app.post("/artifacts/{artifact_id}/versions/from_base64", response_model=ArtifactVersionResponse)
+async def add_version_from_base64(artifact_id: str, payload: ArtifactVersionFromBase64Request) -> ArtifactVersionResponse:
+    if not minio_client:
+        raise HTTPException(status_code=500, detail="MinIO not available")
+    if not pool:
+        raise HTTPException(status_code=500, detail="DB not available")
+
+    raw = (payload.content_base64 or "").strip()
+    if not raw:
+        raise HTTPException(status_code=400, detail="content_base64 is required")
+
+    if raw.startswith("data:") and "," in raw:
+        raw = raw.split(",", 1)[1]
+
+    try:
+        content = base64.b64decode(raw, validate=True)
+    except Exception:
+        raise HTTPException(status_code=400, detail="Invalid base64 payload")
+
+    if not content:
+        raise HTTPException(status_code=400, detail="Decoded payload is empty")
+
+    version_id = f"ver_{uuid.uuid4().hex}"
+    filename = _safe_filename(payload.filename, fallback="source.bin")
+    sha256 = _hash_bytes(content)
+    storage_key = _storage_key(artifact_id, version_id, filename)
+
+    try:
+        minio_client.put_object(
+            MINIO_BUCKET,
+            storage_key,
+            data=BytesIO(content),
+            length=len(content),
+            content_type=payload.mime,
+        )
+    except S3Error as e:
+        raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
+
+    meta_json = _normalize_meta_json(payload.meta_json)
+    if "file_name" not in meta_json:
+        meta_json["file_name"] = filename
+
+    async with pool.acquire() as conn:
+        await conn.execute(
+            """
+            insert into artifact_versions
+            (id, artifact_id, label, sha256, mime, size_bytes, storage_key, meta_json)
+            values ($1, $2, $3, $4, $5, $6, $7, $8)
+            """,
+            version_id,
+            artifact_id,
+            payload.label or "source",
+            sha256,
+            payload.mime,
+            len(content),
+            storage_key,
+            json.dumps(meta_json),
+        )
+
+    return ArtifactVersionResponse(
+        version_id=version_id,
+        storage_key=storage_key,
+        sha256=sha256,
+        size_bytes=len(content),
+    )
+
+
@app.post("/artifacts/{artifact_id}/versions", response_model=ArtifactVersionResponse)
 async def add_version(artifact_id: str, payload: ArtifactVersionCreateRequest) -> ArtifactVersionResponse:
    if not pool:
@@ -678,7 +778,39 @@ async def download_artifact(artifact_id: str, format: str = Query("pptx")) -> Di
        if not row:
            raise HTTPException(status_code=404, detail="Version not found")
        try:
-            url = minio_client.presigned_get_object(MINIO_BUCKET, row["storage_key"], expires=1800)
+            url = minio_client.presigned_get_object(
+                MINIO_BUCKET,
+                row["storage_key"],
+                expires=timedelta(seconds=1800),
+            )
        except S3Error as e:
            raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
        return {"url": url, "storage_key": row["storage_key"], "mime": row["mime"]}
+
+
+@app.get("/artifacts/{artifact_id}/versions/{version_id}/download")
+async def download_artifact_version(artifact_id: str, version_id: str) -> Dict[str, Any]:
+    if not pool or not minio_client:
+        raise HTTPException(status_code=500, detail="Service not available")
+
+    async with pool.acquire() as conn:
+        row = await conn.fetchrow(
+            """
+            select * from artifact_versions
+            where artifact_id=$1 and id=$2
+            limit 1
+            """,
+            artifact_id,
+            version_id,
+        )
+        if not row:
+            raise HTTPException(status_code=404, detail="Version not found")
+        try:
+            url = minio_client.presigned_get_object(
+                MINIO_BUCKET,
+                row["storage_key"],
+                expires=timedelta(seconds=1800),
+            )
+        except S3Error as e:
+            raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
+        return {"url": url, "storage_key": row["storage_key"], "mime": row["mime"], "version_id": row["id"]}
--- a/services/crewai-service/app/config/crewai_teams.yml
+++ b/services/crewai-service/app/config/crewai_teams.yml
@@ -361,6 +361,29 @@ agromatrix:
        llm_profile: reasoning
      delegation:
        enabled: false
+    plant_intel:
+      team_name: AgroMatrix Plant Intelligence
+      parallel_roles: true
+      max_concurrency: 3
+      synthesis:
+        role_context: Plant Intelligence Synthesis
+        system_prompt_ref: roles/agx/agx-plant-intel/orchestrator_synthesis.md
+        llm_profile: reasoning
+      team:
+      - id: plant_identifier
+        role_context: Plant Identifier
+        system_prompt_ref: roles/agx/agx-plant-intel/plant_identifier.md
+        llm_profile: science
+      - id: taxonomy_validator
+        role_context: Taxonomy Validator
+        system_prompt_ref: roles/agx/agx-plant-intel/taxonomy_validator.md
+        llm_profile: reasoning
+      - id: agrovoc_normalizer
+        role_context: AGROVOC Normalizer
+        system_prompt_ref: roles/agx/agx-plant-intel/agrovoc_normalizer.md
+        llm_profile: fast
+      delegation:
+        enabled: false
    cadastre_geo:
      team_name: AgroMatrix Cadastre/Geo
      parallel_roles: true
@@ -614,6 +637,16 @@ agromatrix:
    - Stepan
    - координація
    - план
+    plant_intel:
+    - plant
+    - рослина
+    - культура
+    - leaf
+    - disease
+    - хвороба
+    - identify
+    - ідентифікуй
+    - що за рослина
    cadastre_geo:
    - cadastre
    - geo
--- a/services/crewai-service/app/config/roles/agromatrix/agronomist.md
+++ b/services/crewai-service/app/config/roles/agromatrix/agronomist.md
@@ -0,0 +1,8 @@
+# Agronomist
+
+Фокус: агрономія, діагностика стану рослин, фази розвитку, ризики хвороб/стресів.
+
+Правила відповіді:
+- Коротко і прикладно.
+- Ніяких вигаданих фактів; при невизначеності чітко позначити припущення.
+- Для фото-питань: аналізувати в межах доступного контексту; якщо файл відсутній зараз — просити фото повторно.
--- a/services/crewai-service/app/config/roles/agromatrix/communicator.md
+++ b/services/crewai-service/app/config/roles/agromatrix/communicator.md
@@ -0,0 +1,8 @@
+# Communicator
+
+Фокус: людяна та зрозуміла комунікація фінальної відповіді.
+
+Правила:
+- Природна мова, без механістичного тону.
+- Не дублюй технічні обмеження, якщо вони не потрібні для дії користувача.
+- Завершуй конкретним корисним кроком.
--- a/services/crewai-service/app/config/roles/agromatrix/data_analyst.md
+++ b/services/crewai-service/app/config/roles/agromatrix/data_analyst.md
@@ -0,0 +1,7 @@
+# Field Data Analyst
+
+Фокус: аналіз польових даних, тренди, аномалії, порівняння сценаріїв.
+
+Правила:
+- Пояснювати висновки простою мовою.
+- Якщо даних недостатньо — вказати, які саме дані потрібні для точного висновку.
--- a/services/crewai-service/app/config/roles/agromatrix/farm_ops.md
+++ b/services/crewai-service/app/config/roles/agromatrix/farm_ops.md
@@ -0,0 +1,8 @@
+# Farm Ops Planner
+
+Фокус: планування польових робіт, ресурси, пріоритезація задач, таймінги.
+
+Правила:
+- Видавати практичний порядок дій.
+- За простого запиту: коротка відповідь.
+- Для операційних запитів: стислий план з відповідальними і дедлайном.
--- a/services/crewai-service/app/config/roles/agromatrix/orchestrator_synthesis.md
+++ b/services/crewai-service/app/config/roles/agromatrix/orchestrator_synthesis.md
@@ -0,0 +1,10 @@
+# AgroMatrix Orchestrator Synthesis
+
+Ти синтезуєш відповіді ролей у фінальну відповідь Степана.
+
+Правила:
+- За замовчуванням: 1-3 природні речення без шаблонної канцелярії.
+- Детальний формат (пункти/чекліст) тільки коли користувач просить "детально", "план", "чекліст", "розрахунок".
+- Якщо для аналізу бракує фото в поточному контексті, скажи це просто і попроси надіслати фото повторно.
+- Уникай службових формулювань про "технічні обмеження", "text-only" чи "відсутній vision-модуль".
+- Пояснюй по суті агропитання і давай 1 наступний практичний крок.
--- a/services/crewai-service/app/config/roles/agromatrix/risk_assessor.md
+++ b/services/crewai-service/app/config/roles/agromatrix/risk_assessor.md
@@ -0,0 +1,7 @@
+# Risk Assessor
+
+Фокус: агро-ризики, операційні ризики, наслідки рішень.
+
+Правила:
+- Давай коротку оцінку ризику (низький/середній/високий) і як зменшити ризик.
+- Без зайвої бюрократії у відповіді користувачу.
--- a/services/crewai-service/app/config/roles/agx/agx-orchestrator-stepan/orchestrator_stepan.md
+++ b/services/crewai-service/app/config/roles/agx/agx-orchestrator-stepan/orchestrator_stepan.md
@@ -11,6 +11,10 @@
 - Деструктивні дії (delete/migrate/prod) ТІЛЬКИ через план + dry-run + backup
 - Ніколи не логувати секрети/токени
 - Інші ролі НЕ спілкуються з користувачем напряму
+- Мультимодальність активна: фото/голос/документи підтримуються через стек платформи.
+- Якщо в поточному контексті не вистачає зображення для аналізу, пояснюйте це простою людською мовою і попросіть надіслати фото ще раз без технічних формулювань.

 ## Формат відповіді:
-Структурована відповідь з чіткими рекомендаціями та наступними кроками.
+- За замовчуванням: природна коротка відповідь 1-3 речення.
+- Якщо користувач просить детально/план/чекліст: структурована відповідь з чіткими наступними кроками.
+- Тон: живий і професійний, без канцеляризмів, шаблонів і фраз про "обмеження моделі".
--- a/services/crewai-service/app/config/roles/agx/agx-orchestrator-stepan/orchestrator_synthesis.md
+++ b/services/crewai-service/app/config/roles/agx/agx-orchestrator-stepan/orchestrator_synthesis.md
@@ -7,3 +7,7 @@
 - Структурувати інформацію логічно
 - Включати конкретні наступні кроки
 - Позначати ризики якщо є
+- За замовчуванням відповідати природно і коротко (1-3 речення), без шаблонної канцелярії.
+- Для детальних запитів переходити у структурований режим.
+- Якщо для аналізу бракує зображення у поточному контексті, скажіть це природно і попросіть надіслати фото повторно.
+- Не вживати службові формулювання на кшталт "обмеження моделі", "text-only", "vision unavailable".
--- a/services/crewai-service/app/config/roles/agx/agx-plant-intel/agrovoc_normalizer.md
+++ b/services/crewai-service/app/config/roles/agx/agx-plant-intel/agrovoc_normalizer.md
@@ -0,0 +1,11 @@
+You are AGROVOC Normalizer.
+
+Responsibilities:
+- Normalize crop/disease terms using agrovoc_lookup.
+- Provide canonical term mapping for user-facing output.
+- Keep labels practical for agronomy context.
+
+Return format:
+- canonical_terms
+- term_mapping
+- notes_for_user
--- a/services/crewai-service/app/config/roles/agx/agx-plant-intel/orchestrator_synthesis.md
+++ b/services/crewai-service/app/config/roles/agx/agx-plant-intel/orchestrator_synthesis.md
@@ -0,0 +1,24 @@
+Ти — Plant Intel Agent у DAARION.city.
+Відповідай природно, коротко й по-людськи українською, 1–3 речення за замовчуванням.
+
+НАЙГОЛОВНІШЕ:
+- Дані з [PLANT_VISION_PREPROCESSED] (або context.plant_vision) — єдиний source-of-truth для ідентифікації рослини.
+- Для follow-up без нового фото використовуй [PREVIOUS_PLANT_IDENTIFICATION] (або context.last_plant / memory.last_plant).
+
+Правило впевненості (обов'язково):
+- Якщо recommend_fallback == true або confidence < 0.65:
+  "Ймовірно <name>, але впевненість низька. Перевірив через GBIF — найближчі збіги: <gbif_validation>. Краще нове фото при нормальному світлі."
+- Інакше:
+  "Я бачу <name> з впевненістю <X>%."
+
+Правила синтезу:
+- Не ігноруй результати pre-vision, якщо вони присутні.
+- Не стверджуй "фото не надано", якщо у контексті є pre-vision або previous plant data.
+- Уникай шаблонних списків, якщо користувач не просить детальний формат.
+- Якщо дані суперечливі: коротко познач невизначеність і попроси 1 конкретне додаткове фото.
+- Якщо top_k порожній, явно вкажи, що ідентифікація непевна, але все одно надай GBIF-орієнтир, якщо він є в контексті.
+
+Формат відповіді:
+- 1–3 речення за замовчуванням.
+- Без технічного шуму, без внутрішніх JSON/міток у відповіді користувачу.
+- За запитом користувача можна розгорнути відповідь і дати короткі поради з догляду.
--- a/services/crewai-service/app/config/roles/agx/agx-plant-intel/plant_identifier.md
+++ b/services/crewai-service/app/config/roles/agx/agx-plant-intel/plant_identifier.md
@@ -0,0 +1,11 @@
+You are Plant Identifier.
+
+Responsibilities:
+- Parse visual cues from user description/photo context.
+- Build candidate crop/plant hypotheses.
+- Use plantnet_lookup first when image URL is available.
+- If PlantNet is unavailable, provide top hypotheses with explicit uncertainty.
+
+Return format:
+- candidates: numbered list max 5, each with rationale.
+- required_data: what extra image/data is needed.
--- a/services/crewai-service/app/config/roles/agx/agx-plant-intel/taxonomy_validator.md
+++ b/services/crewai-service/app/config/roles/agx/agx-plant-intel/taxonomy_validator.md
@@ -0,0 +1,11 @@
+You are Taxonomy Validator.
+
+Responsibilities:
+- Validate candidate names via gbif_species_lookup.
+- Remove invalid/synonym-conflicted names.
+- Keep accepted taxa and explain conflicts briefly.
+
+Return format:
+- accepted_candidates
+- rejected_candidates_with_reason
+- confidence_adjustment
--- a/services/plant-vision-node1/Dockerfile
+++ b/services/plant-vision-node1/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY main.py .
+
+EXPOSE 8085
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=20s --retries=3 \
+  CMD python -c "import urllib.request; urllib.request.urlopen(http://localhost:8085/health)"
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8085"]
--- a/services/plant-vision-node1/main.py
+++ b/services/plant-vision-node1/main.py
@@ -0,0 +1,238 @@
+import json
+import os
+import re
+import shlex
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import httpx
+from fastapi import FastAPI, File, HTTPException, UploadFile
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+
+app = FastAPI(title="plant-vision-node1", version="0.1.1")
+
+
+class IdentifyRequest(BaseModel):
+    image_url: Optional[str] = None
+    top_k: int = Field(default=3, ge=1, le=10)
+
+
+def _normalize_predictions(raw: Any, top_k: int) -> List[Dict[str, Any]]:
+    preds: List[Dict[str, Any]] = []
+    if isinstance(raw, dict):
+        for key in ("predictions", "results", "candidates"):
+            if isinstance(raw.get(key), list):
+                raw = raw[key]
+                break
+    if isinstance(raw, list):
+        for item in raw[:top_k]:
+            if not isinstance(item, dict):
+                continue
+            name = (
+                item.get("scientific_name")
+                or item.get("scientificName")
+                or item.get("label")
+                or item.get("name")
+                or "unknown"
+            )
+            common = item.get("common_name") or item.get("commonName") or item.get("common") or "-"
+            score = item.get("score", item.get("confidence", 0.0))
+            try:
+                score_f = float(score)
+            except Exception:
+                score_f = 0.0
+            preds.append({"scientific_name": str(name), "common_name": str(common), "score": score_f})
+    return preds[:top_k]
+
+
+def _parse_text_output(text: str, top_k: int) -> List[Dict[str, Any]]:
+    """
+    Parse only model score lines, e.g.:
+      97.6% Persicaria amphibia
+      86.1% Canada Goldenrod (Solidago canadensis)
+    Ignore service lines like "Read ..." or "Classification of ...".
+    """
+    preds: List[Dict[str, Any]] = []
+    for raw_line in (text or "").splitlines():
+        line = raw_line.strip()
+        if not line or "%" not in line:
+            continue
+
+        m = re.match(r"^\s*(\d+(?:\.\d+)?)%\s+(.+)$", line)
+        if not m:
+            continue
+
+        score_str, name_part = m.groups()
+        try:
+            score = float(score_str)
+        except ValueError:
+            continue
+
+        name = name_part.strip()
+        if not name:
+            continue
+
+        common_name = "-"
+        scientific_name = name
+
+        # If output is "Common Name (Scientific name)", preserve both.
+        paren = re.match(r"^(.*?)\s*\(([^()]+)\)\s*$", name)
+        if paren:
+            common, scientific = paren.groups()
+            common = common.strip()
+            scientific = scientific.strip()
+            if common:
+                common_name = common
+            if scientific:
+                scientific_name = scientific
+
+        preds.append(
+            {
+                "scientific_name": scientific_name,
+                "common_name": common_name,
+                "score": score,
+            }
+        )
+
+    preds.sort(key=lambda x: float(x.get("score", 0.0)), reverse=True)
+    return preds[:top_k]
+
+
+def _extract_inference_time(stdout: str) -> Optional[float]:
+    m = re.search(r"took\s+(\d+(?:\.\d+)?)\s+secs", stdout or "")
+    if not m:
+        return None
+    try:
+        return float(m.group(1))
+    except Exception:
+        return None
+
+
+def _run_nature_id_cli(image_path: str, top_k: int) -> Dict[str, Any]:
+    cmd_tmpl = (os.getenv("NATURE_ID_CMD") or "").strip()
+    timeout_s = int(os.getenv("NATURE_ID_TIMEOUT", "40"))
+
+    if not cmd_tmpl:
+        raise RuntimeError("NATURE_ID_CMD is not configured")
+
+    cmd = cmd_tmpl.replace("{image_path}", image_path)
+    proc = subprocess.run(
+        shlex.split(cmd),
+        capture_output=True,
+        text=True,
+        timeout=timeout_s,
+        check=False,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(f"nature-id cli failed rc={proc.returncode}: {proc.stderr.strip()[:240]}")
+
+    out = (proc.stdout or "").strip()
+    inference_time_sec = _extract_inference_time(out)
+    if not out:
+        return {"predictions": [], "inference_time_sec": inference_time_sec}
+
+    try:
+        parsed = json.loads(out)
+        preds = _normalize_predictions(parsed, top_k)
+    except Exception:
+        preds = _parse_text_output(out, top_k)
+
+    return {"predictions": preds, "inference_time_sec": inference_time_sec}
+
+
+async def _download_image(image_url: str) -> str:
+    timeout_s = float(os.getenv("DOWNLOAD_TIMEOUT", "20"))
+    async with httpx.AsyncClient(timeout=timeout_s) as client:
+        resp = await client.get(image_url)
+        resp.raise_for_status()
+        data = resp.content
+
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as f:
+        f.write(data)
+        return f.name
+
+
+def _response_payload(result: Dict[str, Any]) -> Dict[str, Any]:
+    preds = result.get("predictions") or []
+    top_k = [
+        {
+            "confidence": float(p.get("score", 0.0)),
+            "name": str((p.get("common_name") if p.get("common_name") not in (None, "", "-") else p.get("scientific_name")) or "unknown"),
+            "scientific_name": str(p.get("scientific_name") or "unknown"),
+        }
+        for p in preds
+    ]
+    return {
+        "status": "success",
+        "model": "aiy_plants_V1",
+        "source": "nature-id-cli",
+        "count": len(preds),
+        "inference_time_sec": result.get("inference_time_sec"),
+        "predictions": preds,
+        "top_k": top_k,
+    }
+
+
+@app.exception_handler(RequestValidationError)
+async def validation_exception_handler(_, exc: RequestValidationError):
+    # Avoid leaking raw multipart bytes in validation responses.
+    errs: List[Dict[str, Any]] = []
+    for e in exc.errors() or []:
+        errs.append({"loc": e.get("loc"), "msg": e.get("msg"), "type": e.get("type")})
+    return JSONResponse(status_code=422, content={"detail": errs})
+
+
+@app.get("/health")
+def health() -> Dict[str, Any]:
+    cmd = (os.getenv("NATURE_ID_CMD") or "").strip()
+    return {
+        "status": "healthy",
+        "nature_id_cmd_configured": bool(cmd),
+        "nature_id_cmd": cmd,
+    }
+
+
+@app.post("/identify")
+async def identify(payload: IdentifyRequest) -> Dict[str, Any]:
+    if not payload.image_url:
+        raise HTTPException(status_code=400, detail="image_url is required")
+
+    tmp_path = ""
+    try:
+        tmp_path = await _download_image(payload.image_url)
+        result = _run_nature_id_cli(tmp_path, payload.top_k)
+        return _response_payload(result)
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=503, detail=f"identify_failed: {e}")
+    finally:
+        if tmp_path:
+            try:
+                Path(tmp_path).unlink(missing_ok=True)
+            except Exception:
+                pass
+
+
+@app.post("/identify-file")
+async def identify_file(file: UploadFile = File(...), top_k: int = 3) -> Dict[str, Any]:
+    top_k = max(1, min(top_k, 10))
+    tmp_path = ""
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as f:
+            f.write(await file.read())
+            tmp_path = f.name
+        result = _run_nature_id_cli(tmp_path, top_k)
+        return _response_payload(result)
+    except Exception as e:
+        raise HTTPException(status_code=503, detail=f"identify_failed: {e}")
+    finally:
+        if tmp_path:
+            try:
+                Path(tmp_path).unlink(missing_ok=True)
+            except Exception:
+                pass
--- a/services/plant-vision-node1/requirements.txt
+++ b/services/plant-vision-node1/requirements.txt
@@ -0,0 +1,8 @@
+fastapi==0.115.5
+uvicorn[standard]==0.32.1
+httpx==0.28.1
+python-multipart==0.0.17
+Pillow==11.1.0
+requests==2.32.3
+tflite-runtime==2.14.0
+numpy==1.26.4
--- a/services/router/agent_tools_config.py
+++ b/services/router/agent_tools_config.py
@@ -46,8 +46,15 @@ AGENT_SPECIALIZED_TOOLS = {
    "nutra": ['comfy_generate_image', 'comfy_generate_video'],
    
    # AgroMatrix - Agriculture
-    # Specialized: crop analysis, weather integration, field mapping
-    "agromatrix": ['comfy_generate_image', 'comfy_generate_video'],
+    # Specialized: crop analysis, weather integration, field mapping + plant intelligence
+    "agromatrix": [
+        'comfy_generate_image',
+        'comfy_generate_video',
+        'plantnet_lookup',
+        'nature_id_identify',
+        'gbif_species_lookup',
+        'agrovoc_lookup',
+    ],
    
    # GreenFood - Food & Eco
    # Specialized: recipe analysis, eco-scoring
--- a/services/router/main.py
+++ b/services/router/main.py
--- a/services/router/memory_retrieval.py
+++ b/services/router/memory_retrieval.py
--- a/services/router/router-config.yml
+++ b/services/router/router-config.yml
@@ -408,8 +408,9 @@ agents:
    description: "Monitor Agent - архітектор-інспектор DAGI"
    default_llm: local_qwen3_8b
    system_prompt: |
-      Ти - Monitor Agent, стежиш за нодами, сервісами, агентами.
-      Якщо бачиш у чаті інших ботів, відповідай тільки за інфраструктурою або прямим тегом.
+      Ти - Monitor Agent, інфраструктурний інспектор DAGI: ноди, сервіси, пайплайни, алерти.
+      Ти знаєш, що DAARWIZZ — головний оркестратор мережі DAARION.city; для governance/маршрутизації посилайся на нього.
+      Відповідай коротко і по суті; якщо даних бракує — одразу кажи, який саме метрик/лог потрібен.
    tools:
      - id: get_metrics
        type: builtin
--- a/services/router/tool_manager.py
+++ b/services/router/tool_manager.py
@@ -19,6 +19,7 @@ from typing import Dict, List, Any, Optional
 from dataclasses import dataclass
 from io import BytesIO, StringIO
 from pathlib import PurePath
+from urllib.parse import urlparse
 import xml.etree.ElementTree as ET
 from xml.sax.saxutils import escape as xml_escape
 from zipfile import ZIP_DEFLATED, ZipFile
@@ -108,6 +109,115 @@ TOOL_DEFINITIONS = [
            }
        }
    },
+    {
+        "type": "function",
+        "function": {
+            "name": "plantnet_lookup",
+            "description": "Визначення рослин через Pl@ntNet API. Повертає top-k кандидатів з confidence.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Короткий опис рослини/культури (якщо немає image_url)"
+                    },
+                    "image_url": {
+                        "type": "string",
+                        "description": "Публічне посилання на фото рослини"
+                    },
+                    "organ": {
+                        "type": "string",
+                        "description": "Орган рослини: leaf/flower/fruit/bark/auto",
+                        "default": "auto"
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "description": "Скільки кандидатів повернути (1-10)",
+                        "default": 3
+                    }
+                }
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "nature_id_identify",
+            "description": "Локальна/open-source ідентифікація рослин через nature-id сумісний сервіс.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "image_url": {
+                        "type": "string",
+                        "description": "Публічне посилання на фото рослини"
+                    },
+                    "image_data": {
+                        "type": "string",
+                        "description": "Data URL зображення (data:image/...;base64,...)"
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "description": "Скільки кандидатів повернути (1-10)",
+                        "default": 3
+                    },
+                    "min_confidence": {
+                        "type": "number",
+                        "description": "Поріг confidence для fallback на GBIF",
+                        "default": 0.65
+                    }
+                }
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "gbif_species_lookup",
+            "description": "Пошук таксонів у GBIF для валідації назви культури/рослини.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Назва/термін для пошуку виду"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Кількість результатів (1-10)",
+                        "default": 5
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "agrovoc_lookup",
+            "description": "Нормалізація агро-термінів через AGROVOC (SPARQL).",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Термін культури/хвороби/технології"
+                    },
+                    "lang": {
+                        "type": "string",
+                        "description": "Мова міток (en/uk/ru)",
+                        "default": "en"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Кількість результатів (1-10)",
+                        "default": 5
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
    # PRIORITY 3: Generation tools
    {
        "type": "function",
@@ -681,6 +791,42 @@ class ToolManager:
        tool_names = [t.get("function", {}).get("name") for t in filtered]
        logger.debug(f"Agent {agent_id} has {len(filtered)} tools: {tool_names}")
        return filtered
+
+    @staticmethod
+    def _is_image_data_url(value: str) -> bool:
+        v = str(value or "").strip()
+        return bool(v.startswith("data:image/") and ";base64," in v)
+
+    @staticmethod
+    def _is_known_non_direct_image_url(url: str) -> bool:
+        u = str(url or "").strip()
+        if not u:
+            return False
+        try:
+            p = urlparse(u)
+        except Exception:
+            return True
+        host = (p.netloc or "").lower()
+        if host in {"t.me", "telegram.me"}:
+            return True
+        if "web.telegram.org" in host:
+            return True
+        return False
+
+    @staticmethod
+    def _normalize_confidence(value: Any) -> float:
+        try:
+            v = float(value)
+        except Exception:
+            return 0.0
+        if v < 0:
+            return 0.0
+        # Some backends return percentages (e.g. 97.6) instead of 0..1.
+        if v > 1.0 and v <= 100.0:
+            v = v / 100.0
+        if v > 1.0:
+            v = 1.0
+        return v
    
    async def execute_tool(
        self,
@@ -709,6 +855,14 @@ class ToolManager:
                return await self._web_search(arguments)
            elif tool_name == "web_extract":
                return await self._web_extract(arguments)
+            elif tool_name == "plantnet_lookup":
+                return await self._plantnet_lookup(arguments)
+            elif tool_name == "nature_id_identify":
+                return await self._nature_id_identify(arguments)
+            elif tool_name == "gbif_species_lookup":
+                return await self._gbif_species_lookup(arguments)
+            elif tool_name == "agrovoc_lookup":
+                return await self._agrovoc_lookup(arguments)
            elif tool_name == "image_generate":
                return await self._image_generate(arguments)
            elif tool_name == "comfy_generate_image":
@@ -2530,6 +2684,272 @@ class ToolManager:
        except Exception as e:
            return ToolResult(success=False, result=None, error=str(e))
    
+    async def _plantnet_lookup(self, args: Dict) -> ToolResult:
+        """Plant identification via Pl@ntNet API (skeleton adapter)."""
+        query = str(args.get("query", "") or "").strip()
+        image_url = str(args.get("image_url", "") or "").strip()
+        image_data = str(args.get("image_data", "") or "").strip()
+        runtime_image_data = str(args.get("_runtime_image_data", "") or "").strip()
+        if not image_data and self._is_image_data_url(runtime_image_data):
+            image_data = runtime_image_data
+        organ = str(args.get("organ", "auto") or "auto").strip().lower()
+        top_k = max(1, min(int(args.get("top_k", 3)), 5))
+
+        api_key = (os.getenv("PLANTNET_API_KEY") or "").strip()
+        if image_url and api_key:
+            try:
+                params = {
+                    "api-key": api_key,
+                    "images": image_url,
+                    "organs": "leaf" if organ == "auto" else organ,
+                    "lang": "en",
+                }
+                resp = await self.http_client.get(
+                    "https://my-api.plantnet.org/v2/identify/all",
+                    params=params,
+                    timeout=25.0,
+                )
+                if resp.status_code == 200:
+                    data = resp.json()
+                    results = (data.get("results") or [])[:top_k]
+                    if not results:
+                        return ToolResult(success=True, result="Pl@ntNet: кандидатів не знайдено.")
+                    lines = []
+                    for idx, item in enumerate(results, 1):
+                        species = (item.get("species") or {})
+                        sname = species.get("scientificNameWithoutAuthor") or species.get("scientificName") or "unknown"
+                        common = species.get("commonNames") or []
+                        cname = common[0] if common else "-"
+                        score = float(item.get("score") or 0.0)
+                        lines.append(f"{idx}. {sname} ({cname}) score={score:.3f}")
+                    return ToolResult(success=True, result="Pl@ntNet candidates:\n" + "\n".join(lines))
+                return ToolResult(success=False, result=None, error=f"plantnet_http_{resp.status_code}")
+            except Exception as e:
+                return ToolResult(success=False, result=None, error=f"plantnet_error: {e}")
+
+        if image_url or image_data:
+            ni_args: Dict[str, Any] = {"top_k": top_k}
+            if image_data:
+                ni_args["image_data"] = image_data
+            else:
+                ni_args["image_url"] = image_url
+            if runtime_image_data:
+                ni_args["_runtime_image_data"] = runtime_image_data
+            ni = await self._nature_id_identify(ni_args)
+            if ni.success:
+                return ni
+
+        if query:
+            return await self._gbif_species_lookup({"query": query, "limit": top_k})
+
+        return ToolResult(
+            success=False,
+            result=None,
+            error="No available plant ID backend (set PLANTNET_API_KEY or NATURE_ID_URL, or provide text query)",
+        )
+
+    async def _nature_id_identify(self, args: Dict) -> ToolResult:
+        """Open-source plant identification via self-hosted nature-id compatible endpoint."""
+        image_url = str(args.get("image_url", "") or "").strip()
+        image_data = str(args.get("image_data", "") or "").strip()
+        runtime_image_data = str(args.get("_runtime_image_data", "") or "").strip()
+        if not image_data and self._is_image_data_url(runtime_image_data):
+            image_data = runtime_image_data
+        top_k = max(1, min(int(args.get("top_k", 3)), 10))
+        min_confidence = float(args.get("min_confidence", os.getenv("NATURE_ID_MIN_CONFIDENCE", "0.65")))
+
+        if image_url and self._is_known_non_direct_image_url(image_url):
+            if image_data:
+                logger.info("nature_id_identify: replacing non-direct image_url with runtime image_data")
+                image_url = ""
+            else:
+                return ToolResult(
+                    success=False,
+                    result=None,
+                    error="image_url is not direct image URL; provide image_data or direct Telegram file URL",
+                )
+
+        if not image_url and not image_data:
+            return ToolResult(success=False, result=None, error="image_url or image_data is required")
+
+        base = (os.getenv("NATURE_ID_URL") or "").strip().rstrip("/")
+        if not base:
+            return ToolResult(success=False, result=None, error="NATURE_ID_URL is not configured")
+
+        try:
+            if image_data:
+                # data URL -> multipart /identify-file
+                if not image_data.startswith("data:") or "," not in image_data:
+                    return ToolResult(success=False, result=None, error="invalid image_data format")
+                header, b64 = image_data.split(",", 1)
+                mime = "image/jpeg"
+                if ";base64" in header:
+                    mime = header.split(":", 1)[1].split(";", 1)[0] or "image/jpeg"
+                ext = "jpg"
+                if "png" in mime:
+                    ext = "png"
+                try:
+                    image_bytes = base64.b64decode(b64)
+                except Exception:
+                    return ToolResult(success=False, result=None, error="invalid image_data base64")
+                files = {"file": (f"upload.{ext}", image_bytes, mime)}
+                resp = await self.http_client.post(
+                    f"{base}/identify-file",
+                    params={"top_k": top_k},
+                    files=files,
+                    timeout=45.0,
+                )
+            else:
+                payload = {"image_url": image_url, "top_k": top_k}
+                resp = await self.http_client.post(f"{base}/identify", json=payload, timeout=45.0)
+
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"nature_id_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            status = str(data.get("status") or "success")
+            raw_top_k = data.get("top_k") or []
+            raw_preds = data.get("predictions") or data.get("results") or []
+
+            top_k_rows = []
+            if isinstance(raw_top_k, list) and raw_top_k:
+                for row in raw_top_k[:top_k]:
+                    if not isinstance(row, dict):
+                        continue
+                    conf = row.get("confidence", 0.0)
+                    conf_f = self._normalize_confidence(conf)
+                    top_k_rows.append({
+                        "confidence": conf_f,
+                        "name": str(row.get("name") or row.get("scientific_name") or "unknown"),
+                        "scientific_name": str(row.get("scientific_name") or row.get("name") or "unknown"),
+                    })
+            else:
+                for item in raw_preds[:top_k]:
+                    if not isinstance(item, dict):
+                        continue
+                    score = item.get("score", item.get("confidence", 0.0))
+                    score_f = self._normalize_confidence(score)
+                    sname = item.get("scientific_name") or item.get("label") or item.get("name") or "unknown"
+                    cname = item.get("common_name") or item.get("common") or sname
+                    top_k_rows.append({
+                        "confidence": score_f,
+                        "name": str(cname),
+                        "scientific_name": str(sname),
+                    })
+
+            if not top_k_rows:
+                return ToolResult(success=True, result=json.dumps({
+                    "status": status,
+                    "model": data.get("model") or "aiy_plants_V1",
+                    "source": data.get("source") or "nature-id-cli",
+                    "top_k": [],
+                    "confidence": 0.0,
+                    "recommend_fallback": True,
+                    "reason": "no_predictions",
+                }, ensure_ascii=False))
+
+            top1 = top_k_rows[0]
+            top1_conf = float(top1.get("confidence", 0.0))
+            recommend_fallback = top1_conf < min_confidence
+
+            out = {
+                "status": status,
+                "model": data.get("model") or "aiy_plants_V1",
+                "source": data.get("source") or "nature-id-cli",
+                "inference_time_sec": data.get("inference_time_sec"),
+                "top_k": top_k_rows,
+                "confidence": top1_conf,
+                "min_confidence": min_confidence,
+                "recommend_fallback": recommend_fallback,
+                "fallback": "gbif_species_lookup",
+            }
+
+            if recommend_fallback:
+                fallback_query = str(top1.get("scientific_name") or top1.get("name") or "").strip()
+                if fallback_query and fallback_query.lower() != "unknown":
+                    gbif = await self._gbif_species_lookup({"query": fallback_query, "limit": min(5, top_k)})
+                    if gbif.success and gbif.result:
+                        out["gbif_validation"] = gbif.result
+
+            return ToolResult(success=True, result=json.dumps(out, ensure_ascii=False))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"nature_id_error: {e}")
+
+    async def _gbif_species_lookup(self, args: Dict) -> ToolResult:
+        """Species lookup via GBIF public API."""
+        query = str(args.get("query", "") or "").strip()
+        limit = max(1, min(int(args.get("limit", 5)), 10))
+        if not query:
+            return ToolResult(success=False, result=None, error="query is required")
+
+        try:
+            resp = await self.http_client.get(
+                "https://api.gbif.org/v1/species/search",
+                params={"q": query, "limit": limit, "status": "ACCEPTED"},
+                timeout=20.0,
+            )
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"gbif_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            results = data.get("results") or []
+            if not results:
+                return ToolResult(success=True, result="GBIF: результатів не знайдено.")
+
+            lines = []
+            for idx, item in enumerate(results[:limit], 1):
+                sci = item.get("scientificName") or item.get("canonicalName") or "unknown"
+                rank = item.get("rank") or "-"
+                status = item.get("taxonomicStatus") or "-"
+                key = item.get("key")
+                lines.append(f"{idx}. {sci} | rank={rank} | status={status} | key={key}")
+            return ToolResult(success=True, result="GBIF matches:\n" + "\n".join(lines))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"gbif_error: {e}")
+
+    async def _agrovoc_lookup(self, args: Dict) -> ToolResult:
+        """AGROVOC term normalization via public SPARQL endpoint."""
+        query = str(args.get("query", "") or "").strip()
+        lang = str(args.get("lang", "en") or "en").strip().lower()
+        limit = max(1, min(int(args.get("limit", 5)), 10))
+        if not query:
+            return ToolResult(success=False, result=None, error="query is required")
+        if lang not in {"en", "uk", "ru"}:
+            lang = "en"
+
+        safe_q = query.replace('\\', ' ').replace('"', ' ').strip()
+        sparql = (
+            "PREFIX skos: <http://www.w3.org/2004/02/skos/core#> "
+            "SELECT ?concept ?label WHERE { "
+            "?concept skos:prefLabel ?label . "
+            f"FILTER(lang(?label) = '{lang}') "
+            f"FILTER(CONTAINS(LCASE(STR(?label)), LCASE(\"{safe_q}\"))) "
+            "} LIMIT " + str(limit)
+        )
+
+        try:
+            resp = await self.http_client.get(
+                "https://agrovoc.fao.org/sparql",
+                params={"query": sparql, "format": "json"},
+                timeout=25.0,
+            )
+            if resp.status_code != 200:
+                return ToolResult(success=False, result=None, error=f"agrovoc_http_{resp.status_code}")
+
+            data = resp.json() or {}
+            bindings = (((data.get("results") or {}).get("bindings")) or [])
+            if not bindings:
+                return ToolResult(success=True, result="AGROVOC: результатів не знайдено.")
+
+            lines = []
+            for idx, b in enumerate(bindings[:limit], 1):
+                label = ((b.get("label") or {}).get("value") or "").strip()
+                concept = ((b.get("concept") or {}).get("value") or "").strip()
+                lines.append(f"{idx}. {label} | {concept}")
+            return ToolResult(success=True, result="AGROVOC matches:\n" + "\n".join(lines))
+        except Exception as e:
+            return ToolResult(success=False, result=None, error=f"agrovoc_error: {e}")
+
    async def _unload_ollama_models(self):
        """Unload all Ollama models to free VRAM for heavy operations like FLUX"""
        ollama_url = os.getenv("OLLAMA_BASE_URL", "http://172.18.0.1:11434")
@@ -2942,7 +3362,11 @@ class ToolManager:
                
                if results:
                    result = results[0] if isinstance(results, list) else results
-                    markdown = result.get("markdown", "") or result.get("cleaned_html", "") or result.get("text", "")
+                    raw_content = result.get("markdown", "") or result.get("cleaned_html", "") or result.get("text", "")
+                    if isinstance(raw_content, (dict, list, tuple)):
+                        markdown = json.dumps(raw_content, ensure_ascii=False)
+                    else:
+                        markdown = str(raw_content or "")
                    title = result.get("title", url)
                    
                    if len(markdown) > 3000:
@@ -2951,13 +3375,30 @@ class ToolManager:
                    response_parts = [f"**{title}**", "", markdown]
                    
                    if extract_links:
-                        links = result.get("links", [])
-                        if links:
+                        links_raw = result.get("links", [])
+                        normalized_links: List[Any] = []
+                        if isinstance(links_raw, dict):
+                            for bucket in links_raw.values():
+                                if isinstance(bucket, list):
+                                    normalized_links.extend(bucket)
+                                elif bucket:
+                                    normalized_links.append(bucket)
+                        elif isinstance(links_raw, list):
+                            normalized_links = links_raw
+                        elif links_raw:
+                            normalized_links = [links_raw]
+
+                        if normalized_links:
                            response_parts.append("")
                            response_parts.append("**Посилання:**")
-                            for link in links[:10]:
+                            for link in normalized_links[:10]:
                                if isinstance(link, dict):
-                                    link_url = link.get("href", "")
+                                    link_url = (
+                                        link.get("href")
+                                        or link.get("url")
+                                        or link.get("link")
+                                        or ""
+                                    )
                                else:
                                    link_url = str(link)
                                if link_url:
--- a/services/swapper-service/app/main.py
+++ b/services/swapper-service/app/main.py
@@ -11,10 +11,13 @@ import os
 import asyncio
 import logging
 import base64
+import json
+import re
 from typing import Optional, Dict, List, Any, Union
 from datetime import datetime, timedelta
 from enum import Enum
 from io import BytesIO
+import xml.etree.ElementTree as ET

 from fastapi import FastAPI, HTTPException, BackgroundTasks, File, UploadFile, Form
 from fastapi.middleware.cors import CORSMiddleware
@@ -56,16 +59,34 @@ def _csv_to_markdown(content: bytes) -> str:
    text = _decode_text_bytes(content)
    reader = csv.reader(text.splitlines())
    rows = list(reader)
+    return _rows_to_markdown(rows)
+
+
+def _tsv_to_markdown(content: bytes) -> str:
+    text = _decode_text_bytes(content)
+    reader = csv.reader(text.splitlines(), delimiter="\t")
+    rows = list(reader)
+    return _rows_to_markdown(rows)
+
+
+def _rows_to_markdown(rows: List[List[Any]]) -> str:
    if not rows:
        return ""
-    header = rows[0]
-    body = rows[1:]
+    width = max(len(r) for r in rows)
+    norm_rows = []
+    for r in rows:
+        rr = [str(c) if c is not None else "" for c in r]
+        if len(rr) < width:
+            rr.extend([""] * (width - len(rr)))
+        norm_rows.append(rr)
+    header = norm_rows[0]
+    body = norm_rows[1:]
    lines = [
        "| " + " | ".join(header) + " |",
        "| " + " | ".join(["---"] * len(header)) + " |",
    ]
    for row in body:
-        lines.append("| " + " | ".join(row) + " |")
+        lines.append("| " + " | ".join([str(c) if c is not None else "" for c in row]) + " |")
    return "\n".join(lines)


@@ -91,6 +112,69 @@ def _xlsx_to_markdown(content: bytes) -> str:
    return "\n".join(parts)


+def _xls_to_markdown(content: bytes) -> str:
+    try:
+        import xlrd
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"xlrd not available: {e}")
+    wb = xlrd.open_workbook(file_contents=content)
+    parts = []
+    for s in wb.sheets():
+        parts.append(f"## Sheet: {s.name}")
+        rows = []
+        for r in range(s.nrows):
+            rows.append([s.cell_value(r, c) for c in range(s.ncols)])
+        if not rows:
+            parts.append("_Empty sheet_")
+            continue
+        parts.append(_rows_to_markdown(rows))
+    return "\n\n".join(parts)
+
+
+def _ods_to_markdown(content: bytes) -> str:
+    try:
+        from odf.opendocument import load
+        from odf.table import Table, TableRow, TableCell
+        from odf.text import P
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"odfpy not available: {e}")
+
+    try:
+        doc = load(BytesIO(content))
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid ODS file: {e}")
+
+    parts = []
+    for table in doc.spreadsheet.getElementsByType(Table):
+        table_name = str(table.getAttribute("name") or "Sheet")
+        parts.append(f"## Sheet: {table_name}")
+        rows: List[List[str]] = []
+        for row in table.getElementsByType(TableRow):
+            cells_out: List[str] = []
+            for cell in row.getElementsByType(TableCell):
+                txt_parts = []
+                for p in cell.getElementsByType(P):
+                    txt_parts.extend(
+                        [str(getattr(node, "data", "")).strip() for node in p.childNodes if getattr(node, "data", None)]
+                    )
+                cell_text = " ".join([t for t in txt_parts if t]).strip()
+                repeat_raw = cell.getAttribute("numbercolumnsrepeated")
+                try:
+                    repeat = int(repeat_raw) if repeat_raw else 1
+                except Exception:
+                    repeat = 1
+                repeat = max(1, min(repeat, 100))
+                for _ in range(repeat):
+                    cells_out.append(cell_text)
+            if cells_out:
+                rows.append(cells_out)
+        if not rows:
+            parts.append("_Empty sheet_")
+            continue
+        parts.append(_rows_to_markdown(rows))
+    return "\n\n".join(parts)
+
+
 def _docx_to_text(content: bytes) -> str:
    try:
        from docx import Document
@@ -115,18 +199,111 @@ def _pdf_to_text(content: bytes) -> str:
    return "\n\n".join(text_content)


+def _pptx_to_text(content: bytes) -> str:
+    try:
+        from pptx import Presentation
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"python-pptx not available: {e}")
+    prs = Presentation(BytesIO(content))
+    parts = []
+    for idx, slide in enumerate(prs.slides, start=1):
+        parts.append(f"## Slide {idx}")
+        slide_lines = []
+        for shape in slide.shapes:
+            text = getattr(shape, "text", None)
+            if text and str(text).strip():
+                slide_lines.append(str(text).strip())
+        parts.extend(slide_lines if slide_lines else ["_No text on this slide_"])
+    return "\n\n".join(parts)
+
+
+def _json_to_text(content: bytes) -> str:
+    raw = _decode_text_bytes(content)
+    try:
+        parsed = json.loads(raw)
+        return json.dumps(parsed, ensure_ascii=False, indent=2)
+    except Exception:
+        return raw
+
+
+def _yaml_to_text(content: bytes) -> str:
+    raw = _decode_text_bytes(content)
+    try:
+        parsed = yaml.safe_load(raw)
+        return yaml.safe_dump(parsed, allow_unicode=True, sort_keys=False)
+    except Exception:
+        return raw
+
+
+def _xml_to_text(content: bytes) -> str:
+    raw = _decode_text_bytes(content)
+    try:
+        root = ET.fromstring(raw)
+        text = " ".join([t.strip() for t in root.itertext() if t and t.strip()])
+        return text or raw
+    except Exception:
+        return raw
+
+
+def _html_to_text(content: bytes) -> str:
+    raw = _decode_text_bytes(content)
+    try:
+        from bs4 import BeautifulSoup
+
+        soup = BeautifulSoup(raw, "html.parser")
+        text = soup.get_text(separator="\n")
+        text = re.sub(r"\n{3,}", "\n\n", text)
+        return text.strip() or raw
+    except Exception:
+        # Minimal fallback if bs4 is unavailable
+        text = re.sub(r"<[^>]+>", " ", raw)
+        text = re.sub(r"\s+", " ", text)
+        return text.strip()
+
+
+def _rtf_to_text(content: bytes) -> str:
+    raw = _decode_text_bytes(content)
+    try:
+        from striprtf.striprtf import rtf_to_text
+        return rtf_to_text(raw)
+    except Exception:
+        # Basic fallback: strip common RTF control tokens
+        text = re.sub(r"\\'[0-9a-fA-F]{2}", " ", raw)
+        text = re.sub(r"\\[a-zA-Z]+-?\d* ?", " ", text)
+        text = text.replace("{", " ").replace("}", " ")
+        return re.sub(r"\s+", " ", text).strip()
+
+
 def _extract_text_by_ext(filename: str, content: bytes) -> str:
    ext = filename.split(".")[-1].lower() if "." in filename else ""
-    if ext in ["txt", "md"]:
+    if ext in ["txt", "md", "markdown"]:
        return _decode_text_bytes(content)
    if ext == "csv":
        return _csv_to_markdown(content)
-    if ext == "xlsx":
+    if ext == "tsv":
+        return _tsv_to_markdown(content)
+    if ext in {"xlsx", "xlsm"}:
        return _xlsx_to_markdown(content)
+    if ext == "xls":
+        return _xls_to_markdown(content)
+    if ext == "ods":
+        return _ods_to_markdown(content)
    if ext == "docx":
        return _docx_to_text(content)
    if ext == "pdf":
        return _pdf_to_text(content)
+    if ext == "pptx":
+        return _pptx_to_text(content)
+    if ext == "json":
+        return _json_to_text(content)
+    if ext in {"yaml", "yml"}:
+        return _yaml_to_text(content)
+    if ext == "xml":
+        return _xml_to_text(content)
+    if ext in {"html", "htm"}:
+        return _html_to_text(content)
+    if ext == "rtf":
+        return _rtf_to_text(content)
    raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")


@@ -139,7 +316,12 @@ def _zip_to_markdown(content: bytes, max_files: int = 50, max_total_mb: int = 10
    if total_size > max_total_mb * 1024 * 1024:
        raise HTTPException(status_code=400, detail=f"ZIP слишком большой: {total_size / 1024 / 1024:.1f} MB")
    parts = []
-    allowed_exts = {"txt", "md", "csv", "xlsx", "docx", "pdf"}
+    allowed_exts = {
+        "txt", "md", "markdown", "csv", "tsv",
+        "xls", "xlsx", "xlsm", "ods",
+        "docx", "pdf", "pptx",
+        "json", "yaml", "yml", "xml", "html", "htm", "rtf",
+    }
    processed = []
    skipped = []
    for member in members:
@@ -1655,7 +1837,8 @@ async def document_endpoint(
    - json: Structured JSON with document elements
    - text: Plain text extraction
    
-    Supported files: PDF, DOCX, PPTX, images (PNG, JPG)
+    Supported files:
+    PDF, DOCX, XLS/XLSX/XLSM/ODS, PPTX, TXT/MD/CSV/TSV, JSON/YAML/XML/HTML, RTF, ZIP, images.
    """
    try:
        import time
@@ -1672,15 +1855,28 @@ async def document_endpoint(
        filename = file.filename if file else "document"
        file_ext = filename.split(".")[-1].lower() if "." in filename else "pdf"

-        # Handle text-based formats without Docling
-        if file_ext in ["txt", "md", "csv", "xlsx", "zip"]:
+        # Handle deterministic extraction for standard office/text formats
+        if file_ext in [
+            "txt", "md", "markdown", "csv", "tsv",
+            "xlsx", "xls", "xlsm", "ods",
+            "json", "yaml", "yml", "xml", "html", "htm", "rtf",
+            "pptx", "zip",
+        ]:
            try:
                if file_ext == "zip":
                    content = _zip_to_markdown(doc_data)
                    output_format = "markdown"
                else:
                    content = _extract_text_by_ext(filename, doc_data)
-                    output_format = "markdown" if file_ext in ["md", "csv", "xlsx"] else "text"
+                    output_format = (
+                        "markdown"
+                        if file_ext in {
+                            "md", "markdown", "csv", "tsv",
+                            "xlsx", "xls", "xlsm", "ods",
+                            "json", "yaml", "yml", "xml", "html", "htm", "pptx",
+                        }
+                        else "text"
+                    )
                processing_time_ms = (time.time() - start_time) * 1000
                return {
                    "success": True,
@@ -1764,22 +1960,27 @@ async def document_endpoint(
                        "device": swapper.device
                    }
                
-                # For DOCX, try python-docx
-                if file_ext == "docx":
+                # For common office/text formats, try deterministic extractors.
+                if file_ext in {
+                    "docx", "txt", "md", "markdown", "csv", "tsv",
+                    "xlsx", "xls", "xlsm", "ods",
+                    "pptx", "json", "yaml", "yml", "xml", "html", "htm", "rtf",
+                }:
                    try:
-                        content = _docx_to_text(doc_data)
+                        content = _extract_text_by_ext(filename, doc_data)
+                        out_fmt = "markdown" if file_ext not in {"txt", "rtf"} else "text"
                        return {
                            "success": True,
-                            "model": "python-docx (fallback)",
-                            "output_format": "text",
+                            "model": "text-extract (fallback)",
+                            "output_format": out_fmt,
                            "result": content,
                            "filename": filename,
                            "processing_time_ms": (time.time() - start_time) * 1000,
                            "device": swapper.device
                        }
                    except Exception as e:
-                        logger.error(f"DOCX fallback failed: {e}")
-                        raise HTTPException(status_code=500, detail="DOCX extraction failed")
+                        logger.error(f"Text fallback failed for .{file_ext}: {e}")
+                        raise HTTPException(status_code=500, detail=f"Extraction failed for .{file_ext}")
                
                # For PDFs, try pdfplumber
                if file_ext == "pdf":
@@ -1807,7 +2008,7 @@ async def document_endpoint(
                # For other documents, return error
                raise HTTPException(
                    status_code=503, 
-                    detail="Document processing not available. Supported: PDF (with pdfplumber), images (with OCR)"
+                    detail="Document processing unavailable for this type. Supported: office/text/image/zip standard formats."
                )
                
        finally:
@@ -2312,4 +2513,3 @@ async def get_multimodal_stack():
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8890)
-
--- a/services/swapper-service/app/requirements.txt
+++ b/services/swapper-service/app/requirements.txt
@@ -4,6 +4,15 @@ httpx==0.25.2
 pydantic==2.5.0
 pyyaml==6.0.1
 python-multipart==0.0.6
+chardet>=5.2.0
+openpyxl>=3.1.2
+python-docx>=1.1.2
+pdfplumber>=0.11.0
+python-pptx>=0.6.23
+xlrd>=2.0.1
+odfpy>=1.4.1
+beautifulsoup4>=4.12.0
+striprtf>=0.0.26

 # HuggingFace dependencies for OCR models
 torch>=2.0.0
@@ -25,4 +34,4 @@ safetensors>=0.4.0

 # Web Scraping & Search
 trafilatura>=1.6.0
-duckduckgo-search>=4.0.0
+duckduckgo-search>=4.0.0
--- a/services/swapper-service/requirements.txt
+++ b/services/swapper-service/requirements.txt
@@ -43,3 +43,8 @@ pdfplumber>=0.10.0
 python-docx>=1.1.0
 openpyxl>=3.1.2
 chardet>=5.2.0
+python-pptx>=0.6.23
+xlrd>=2.0.1
+odfpy>=1.4.1
+beautifulsoup4>=4.12.0
+striprtf>=0.0.26