agromatrix: add shared-memory review api and crawl4ai robustness

2026-02-21 13:05:18 +01:00
parent 01bfa97783
commit 68ac8fa355
4 changed files with 319 additions and 5 deletions
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -1228,6 +1228,13 @@ class InferResponse(BaseModel):
    file_mime: Optional[str] = None


+class SharedMemoryReviewRequest(BaseModel):
+    point_id: str
+    approve: bool
+    reviewer: Optional[str] = None
+    note: Optional[str] = None
+
+


 # =========================================================================
@@ -2870,6 +2877,40 @@ async def list_available_models():
    return {"models": models, "total": len(models)}


+@app.get("/v1/agromatrix/shared-memory/pending")
+async def agromatrix_shared_pending(limit: int = 50):
+    """List pending shared agronomy memory cases for mentor review."""
+    if not MEMORY_RETRIEVAL_AVAILABLE or not memory_retrieval:
+        raise HTTPException(status_code=503, detail="Memory retrieval not available")
+    if not hasattr(memory_retrieval, "list_shared_pending_cases"):
+        raise HTTPException(status_code=501, detail="Pending review API not enabled")
+    items = await memory_retrieval.list_shared_pending_cases(limit=limit)
+    return {"items": items, "total": len(items)}
+
+
+@app.post("/v1/agromatrix/shared-memory/review")
+async def agromatrix_shared_review(req: SharedMemoryReviewRequest):
+    """Approve or reject a pending shared agronomy memory case."""
+    if not MEMORY_RETRIEVAL_AVAILABLE or not memory_retrieval:
+        raise HTTPException(status_code=503, detail="Memory retrieval not available")
+    if not hasattr(memory_retrieval, "review_shared_pending_case"):
+        raise HTTPException(status_code=501, detail="Review API not enabled")
+
+    result = await memory_retrieval.review_shared_pending_case(
+        point_id=req.point_id,
+        approve=req.approve,
+        reviewer=req.reviewer,
+        note=req.note,
+    )
+    if not isinstance(result, dict):
+        raise HTTPException(status_code=500, detail="Invalid review result")
+    if result.get("ok"):
+        return result
+    if result.get("error") == "not_found":
+        raise HTTPException(status_code=404, detail="Pending case not found")
+    raise HTTPException(status_code=500, detail=result.get("error", "review_failed"))
+
+
 # =============================================================================
 # NEO4J GRAPH API ENDPOINTS
 # =============================================================================
--- a/services/router/memory_retrieval.py
+++ b/services/router/memory_retrieval.py
@@ -1099,6 +1099,144 @@ class MemoryRetrieval:
            logger.warning(f"resolve_pending_question failed: {e}")
            return False

+    @staticmethod
+    def _to_qdrant_point_id(raw_id: Any) -> Any:
+        if isinstance(raw_id, int):
+            return raw_id
+        if isinstance(raw_id, float) and raw_id.is_integer():
+            return int(raw_id)
+        if isinstance(raw_id, str):
+            v = raw_id.strip()
+            if not v:
+                return raw_id
+            if v.isdigit():
+                try:
+                    return int(v)
+                except Exception:
+                    return v
+            return v
+        return raw_id
+
+    async def list_shared_pending_cases(self, limit: int = 50) -> List[Dict[str, Any]]:
+        if not self.qdrant_client or not SHARED_AGRO_LIBRARY_ENABLED:
+            return []
+        size = max(1, min(int(limit or 50), 200))
+        try:
+            points, _ = self.qdrant_client.scroll(
+                collection_name="agromatrix_shared_pending",
+                limit=size,
+                with_payload=True,
+                with_vectors=False,
+            )
+        except Exception as e:
+            logger.debug(f"list_shared_pending_cases failed: {e}")
+            return []
+
+        items: List[Dict[str, Any]] = []
+        for p in points or []:
+            payload = getattr(p, "payload", {}) or {}
+            text = str(payload.get("text") or "").strip()
+            timestamp = payload.get("timestamp") or ""
+            candidates = payload.get("candidates") if isinstance(payload.get("candidates"), list) else []
+            items.append(
+                {
+                    "point_id": str(getattr(p, "id", "")),
+                    "timestamp": timestamp,
+                    "decision": payload.get("decision"),
+                    "reviewed": bool(payload.get("reviewed")),
+                    "excerpt": text[:240],
+                    "candidates": candidates[:5],
+                }
+            )
+        items.sort(key=lambda x: x.get("timestamp") or "", reverse=True)
+        return items
+
+    async def review_shared_pending_case(
+        self,
+        point_id: str,
+        approve: bool,
+        reviewer: Optional[str] = None,
+        note: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        if not self.qdrant_client:
+            return {"ok": False, "error": "qdrant_unavailable"}
+
+        try:
+            from qdrant_client.http import models as qmodels
+            import uuid
+
+            pid = self._to_qdrant_point_id(point_id)
+            records = self.qdrant_client.retrieve(
+                collection_name="agromatrix_shared_pending",
+                ids=[pid],
+                with_payload=True,
+                with_vectors=True,
+            )
+            if not records:
+                return {"ok": False, "error": "not_found"}
+
+            point = records[0]
+            payload = dict(getattr(point, "payload", {}) or {})
+            now_iso = datetime.utcnow().isoformat()
+            payload["reviewed"] = bool(approve)
+            payload["review"] = {
+                "reviewer": (reviewer or "system")[:120],
+                "approved": bool(approve),
+                "note": (note or "")[:500],
+                "reviewed_at": now_iso,
+            }
+
+            library_point_id: Optional[str] = None
+            if approve:
+                vector = getattr(point, "vector", None)
+                if isinstance(vector, dict):
+                    # Named vectors mode: pick first vector value.
+                    vector = next(iter(vector.values()), None)
+                if not vector and COHERE_API_KEY:
+                    basis = str(payload.get("text") or payload.get("assistant_response") or "")[:2000]
+                    vector = await self.get_embedding(basis)
+                if not vector:
+                    return {"ok": False, "error": "missing_vector"}
+
+                try:
+                    self.qdrant_client.get_collection("agromatrix_shared_library")
+                except Exception:
+                    self.qdrant_client.create_collection(
+                        collection_name="agromatrix_shared_library",
+                        vectors_config=qmodels.VectorParams(
+                            size=len(vector),
+                            distance=qmodels.Distance.COSINE,
+                        ),
+                    )
+
+                library_point_id = str(uuid.uuid4())
+                payload["approved_at"] = now_iso
+                self.qdrant_client.upsert(
+                    collection_name="agromatrix_shared_library",
+                    points=[
+                        qmodels.PointStruct(
+                            id=library_point_id,
+                            vector=vector,
+                            payload=payload,
+                        )
+                    ],
+                )
+
+            self.qdrant_client.delete(
+                collection_name="agromatrix_shared_pending",
+                points_selector=qmodels.PointIdsList(points=[pid]),
+            )
+
+            return {
+                "ok": True,
+                "approved": bool(approve),
+                "point_id": str(getattr(point, "id", point_id)),
+                "library_point_id": library_point_id,
+            }
+        except Exception as e:
+            logger.warning(f"review_shared_pending_case failed: {e}")
+            return {"ok": False, "error": str(e)}
+
    async def store_interaction(
        self,
        channel: str,
--- a/services/router/tool_manager.py
+++ b/services/router/tool_manager.py
@@ -3362,7 +3362,11 @@ class ToolManager:
                
                if results:
                    result = results[0] if isinstance(results, list) else results
-                    markdown = result.get("markdown", "") or result.get("cleaned_html", "") or result.get("text", "")
+                    raw_content = result.get("markdown", "") or result.get("cleaned_html", "") or result.get("text", "")
+                    if isinstance(raw_content, (dict, list, tuple)):
+                        markdown = json.dumps(raw_content, ensure_ascii=False)
+                    else:
+                        markdown = str(raw_content or "")
                    title = result.get("title", url)
                    
                    if len(markdown) > 3000:
@@ -3371,13 +3375,30 @@ class ToolManager:
                    response_parts = [f"**{title}**", "", markdown]
                    
                    if extract_links:
-                        links = result.get("links", [])
-                        if links:
+                        links_raw = result.get("links", [])
+                        normalized_links: List[Any] = []
+                        if isinstance(links_raw, dict):
+                            for bucket in links_raw.values():
+                                if isinstance(bucket, list):
+                                    normalized_links.extend(bucket)
+                                elif bucket:
+                                    normalized_links.append(bucket)
+                        elif isinstance(links_raw, list):
+                            normalized_links = links_raw
+                        elif links_raw:
+                            normalized_links = [links_raw]
+
+                        if normalized_links:
                            response_parts.append("")
                            response_parts.append("**Посилання:**")
-                            for link in links[:10]:
+                            for link in normalized_links[:10]:
                                if isinstance(link, dict):
-                                    link_url = link.get("href", "")
+                                    link_url = (
+                                        link.get("href")
+                                        or link.get("url")
+                                        or link.get("link")
+                                        or ""
+                                    )
                                else:
                                    link_url = str(link)
                                if link_url: