agromatrix: add shared-memory review api and crawl4ai robustness

This commit is contained in:
NODA1 System
2026-02-21 13:05:18 +01:00
parent 01bfa97783
commit 68ac8fa355
4 changed files with 319 additions and 5 deletions

View File

@@ -1228,6 +1228,13 @@ class InferResponse(BaseModel):
file_mime: Optional[str] = None
class SharedMemoryReviewRequest(BaseModel):
point_id: str
approve: bool
reviewer: Optional[str] = None
note: Optional[str] = None
# =========================================================================
@@ -2870,6 +2877,40 @@ async def list_available_models():
return {"models": models, "total": len(models)}
@app.get("/v1/agromatrix/shared-memory/pending")
async def agromatrix_shared_pending(limit: int = 50):
"""List pending shared agronomy memory cases for mentor review."""
if not MEMORY_RETRIEVAL_AVAILABLE or not memory_retrieval:
raise HTTPException(status_code=503, detail="Memory retrieval not available")
if not hasattr(memory_retrieval, "list_shared_pending_cases"):
raise HTTPException(status_code=501, detail="Pending review API not enabled")
items = await memory_retrieval.list_shared_pending_cases(limit=limit)
return {"items": items, "total": len(items)}
@app.post("/v1/agromatrix/shared-memory/review")
async def agromatrix_shared_review(req: SharedMemoryReviewRequest):
"""Approve or reject a pending shared agronomy memory case."""
if not MEMORY_RETRIEVAL_AVAILABLE or not memory_retrieval:
raise HTTPException(status_code=503, detail="Memory retrieval not available")
if not hasattr(memory_retrieval, "review_shared_pending_case"):
raise HTTPException(status_code=501, detail="Review API not enabled")
result = await memory_retrieval.review_shared_pending_case(
point_id=req.point_id,
approve=req.approve,
reviewer=req.reviewer,
note=req.note,
)
if not isinstance(result, dict):
raise HTTPException(status_code=500, detail="Invalid review result")
if result.get("ok"):
return result
if result.get("error") == "not_found":
raise HTTPException(status_code=404, detail="Pending case not found")
raise HTTPException(status_code=500, detail=result.get("error", "review_failed"))
# =============================================================================
# NEO4J GRAPH API ENDPOINTS
# =============================================================================

View File

@@ -1099,6 +1099,144 @@ class MemoryRetrieval:
logger.warning(f"resolve_pending_question failed: {e}")
return False
@staticmethod
def _to_qdrant_point_id(raw_id: Any) -> Any:
if isinstance(raw_id, int):
return raw_id
if isinstance(raw_id, float) and raw_id.is_integer():
return int(raw_id)
if isinstance(raw_id, str):
v = raw_id.strip()
if not v:
return raw_id
if v.isdigit():
try:
return int(v)
except Exception:
return v
return v
return raw_id
async def list_shared_pending_cases(self, limit: int = 50) -> List[Dict[str, Any]]:
if not self.qdrant_client or not SHARED_AGRO_LIBRARY_ENABLED:
return []
size = max(1, min(int(limit or 50), 200))
try:
points, _ = self.qdrant_client.scroll(
collection_name="agromatrix_shared_pending",
limit=size,
with_payload=True,
with_vectors=False,
)
except Exception as e:
logger.debug(f"list_shared_pending_cases failed: {e}")
return []
items: List[Dict[str, Any]] = []
for p in points or []:
payload = getattr(p, "payload", {}) or {}
text = str(payload.get("text") or "").strip()
timestamp = payload.get("timestamp") or ""
candidates = payload.get("candidates") if isinstance(payload.get("candidates"), list) else []
items.append(
{
"point_id": str(getattr(p, "id", "")),
"timestamp": timestamp,
"decision": payload.get("decision"),
"reviewed": bool(payload.get("reviewed")),
"excerpt": text[:240],
"candidates": candidates[:5],
}
)
items.sort(key=lambda x: x.get("timestamp") or "", reverse=True)
return items
async def review_shared_pending_case(
self,
point_id: str,
approve: bool,
reviewer: Optional[str] = None,
note: Optional[str] = None,
) -> Dict[str, Any]:
if not self.qdrant_client:
return {"ok": False, "error": "qdrant_unavailable"}
try:
from qdrant_client.http import models as qmodels
import uuid
pid = self._to_qdrant_point_id(point_id)
records = self.qdrant_client.retrieve(
collection_name="agromatrix_shared_pending",
ids=[pid],
with_payload=True,
with_vectors=True,
)
if not records:
return {"ok": False, "error": "not_found"}
point = records[0]
payload = dict(getattr(point, "payload", {}) or {})
now_iso = datetime.utcnow().isoformat()
payload["reviewed"] = bool(approve)
payload["review"] = {
"reviewer": (reviewer or "system")[:120],
"approved": bool(approve),
"note": (note or "")[:500],
"reviewed_at": now_iso,
}
library_point_id: Optional[str] = None
if approve:
vector = getattr(point, "vector", None)
if isinstance(vector, dict):
# Named vectors mode: pick first vector value.
vector = next(iter(vector.values()), None)
if not vector and COHERE_API_KEY:
basis = str(payload.get("text") or payload.get("assistant_response") or "")[:2000]
vector = await self.get_embedding(basis)
if not vector:
return {"ok": False, "error": "missing_vector"}
try:
self.qdrant_client.get_collection("agromatrix_shared_library")
except Exception:
self.qdrant_client.create_collection(
collection_name="agromatrix_shared_library",
vectors_config=qmodels.VectorParams(
size=len(vector),
distance=qmodels.Distance.COSINE,
),
)
library_point_id = str(uuid.uuid4())
payload["approved_at"] = now_iso
self.qdrant_client.upsert(
collection_name="agromatrix_shared_library",
points=[
qmodels.PointStruct(
id=library_point_id,
vector=vector,
payload=payload,
)
],
)
self.qdrant_client.delete(
collection_name="agromatrix_shared_pending",
points_selector=qmodels.PointIdsList(points=[pid]),
)
return {
"ok": True,
"approved": bool(approve),
"point_id": str(getattr(point, "id", point_id)),
"library_point_id": library_point_id,
}
except Exception as e:
logger.warning(f"review_shared_pending_case failed: {e}")
return {"ok": False, "error": str(e)}
async def store_interaction(
self,
channel: str,

View File

@@ -3362,7 +3362,11 @@ class ToolManager:
if results:
result = results[0] if isinstance(results, list) else results
markdown = result.get("markdown", "") or result.get("cleaned_html", "") or result.get("text", "")
raw_content = result.get("markdown", "") or result.get("cleaned_html", "") or result.get("text", "")
if isinstance(raw_content, (dict, list, tuple)):
markdown = json.dumps(raw_content, ensure_ascii=False)
else:
markdown = str(raw_content or "")
title = result.get("title", url)
if len(markdown) > 3000:
@@ -3371,13 +3375,30 @@ class ToolManager:
response_parts = [f"**{title}**", "", markdown]
if extract_links:
links = result.get("links", [])
if links:
links_raw = result.get("links", [])
normalized_links: List[Any] = []
if isinstance(links_raw, dict):
for bucket in links_raw.values():
if isinstance(bucket, list):
normalized_links.extend(bucket)
elif bucket:
normalized_links.append(bucket)
elif isinstance(links_raw, list):
normalized_links = links_raw
elif links_raw:
normalized_links = [links_raw]
if normalized_links:
response_parts.append("")
response_parts.append("**Посилання:**")
for link in links[:10]:
for link in normalized_links[:10]:
if isinstance(link, dict):
link_url = link.get("href", "")
link_url = (
link.get("href")
or link.get("url")
or link.get("link")
or ""
)
else:
link_url = str(link)
if link_url: