feat(docs): add versioned document update and versions APIs
This commit is contained in:
@@ -51,6 +51,17 @@ class IngestResult(BaseModel):
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class UpdateResult(BaseModel):
|
||||
"""Result of document update with version bump."""
|
||||
success: bool
|
||||
doc_id: Optional[str] = None
|
||||
version_no: Optional[int] = None
|
||||
version_id: Optional[int] = None
|
||||
updated_chunks: int = 0
|
||||
status: str = "unknown"
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class QAResult(BaseModel):
|
||||
"""Result of RAG query about a document"""
|
||||
success: bool
|
||||
@@ -106,6 +117,27 @@ class DocumentService:
|
||||
raise RuntimeError(f"Router error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
|
||||
|
||||
async def _router_get_json(
|
||||
self,
|
||||
path: str,
|
||||
timeout: float = 30.0,
|
||||
) -> Dict[str, Any]:
|
||||
import httpx
|
||||
|
||||
base = ROUTER_URL.rstrip("/")
|
||||
url = f"{base}{path}"
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.get(url)
|
||||
body = {}
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {"ok": False, "error": f"Invalid JSON from router ({resp.status_code})"}
|
||||
if resp.status_code >= 400:
|
||||
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
|
||||
raise RuntimeError(f"Router error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
|
||||
|
||||
def _is_excel_filename(self, file_name: Optional[str]) -> bool:
|
||||
if not file_name:
|
||||
return False
|
||||
@@ -572,6 +604,152 @@ class DocumentService:
|
||||
success=False,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def update_document(
|
||||
self,
|
||||
session_id: str,
|
||||
doc_id: Optional[str] = None,
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
text: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
agent_id: str = "daarwizz",
|
||||
storage_ref: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> UpdateResult:
|
||||
"""
|
||||
Update existing document content and bump version in router memory.
|
||||
"""
|
||||
try:
|
||||
context = await self.get_doc_context(session_id)
|
||||
if context:
|
||||
if not doc_id:
|
||||
doc_id = context.doc_id
|
||||
if not doc_url:
|
||||
doc_url = context.doc_url
|
||||
if not file_name:
|
||||
file_name = context.file_name
|
||||
if not dao_id:
|
||||
dao_id = context.dao_id
|
||||
|
||||
if not doc_id:
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
status="failed",
|
||||
error="No document context found. Provide doc_id or parse/ingest first.",
|
||||
)
|
||||
|
||||
effective_text = (text or "").strip()
|
||||
if not effective_text:
|
||||
if not doc_url:
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error="No text or doc_url provided for update",
|
||||
)
|
||||
parsed = await self.parse_document(
|
||||
session_id=session_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name or "document",
|
||||
dao_id=dao_id or "",
|
||||
user_id=user_id or "",
|
||||
output_mode="markdown",
|
||||
metadata={"source": self._extract_source(session_id), "mode": "update"},
|
||||
)
|
||||
if not parsed.success:
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error=parsed.error or "Document parse failed",
|
||||
)
|
||||
effective_text = (parsed.markdown or "").strip()
|
||||
|
||||
if not effective_text:
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error="No extractable text for update",
|
||||
)
|
||||
|
||||
meta = {
|
||||
"session_id": session_id,
|
||||
"source": self._extract_source(session_id),
|
||||
}
|
||||
if isinstance(metadata, dict):
|
||||
meta.update(metadata)
|
||||
|
||||
response = await self._router_post_json(
|
||||
"/v1/documents/update",
|
||||
{
|
||||
"agent_id": (agent_id or "daarwizz").lower(),
|
||||
"doc_id": doc_id,
|
||||
"file_name": file_name,
|
||||
"text": effective_text,
|
||||
"dao_id": dao_id,
|
||||
"user_id": user_id,
|
||||
"storage_ref": storage_ref,
|
||||
"metadata": meta,
|
||||
},
|
||||
timeout=90.0,
|
||||
)
|
||||
|
||||
if not response.get("ok"):
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error=response.get("error", "Router update failed"),
|
||||
)
|
||||
|
||||
await self.save_doc_context(
|
||||
session_id=session_id,
|
||||
doc_id=doc_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
return UpdateResult(
|
||||
success=True,
|
||||
doc_id=response.get("doc_id") or doc_id,
|
||||
version_no=int(response.get("version_no", 0) or 0) or None,
|
||||
version_id=int(response.get("version_id", 0) or 0) or None,
|
||||
updated_chunks=int(response.get("chunks_stored", 0) or 0),
|
||||
status="updated",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Document update failed: {e}", exc_info=True)
|
||||
return UpdateResult(
|
||||
success=False,
|
||||
doc_id=doc_id,
|
||||
status="failed",
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
async def list_document_versions(
|
||||
self,
|
||||
agent_id: str,
|
||||
doc_id: str,
|
||||
limit: int = 20,
|
||||
) -> Dict[str, Any]:
|
||||
aid = (agent_id or "daarwizz").lower()
|
||||
did = (doc_id or "").strip()
|
||||
if not did:
|
||||
return {"ok": False, "error": "doc_id is required", "items": []}
|
||||
try:
|
||||
response = await self._router_get_json(
|
||||
f"/v1/documents/{did}/versions?agent_id={aid}&limit={max(1, min(int(limit or 20), 200))}",
|
||||
timeout=30.0,
|
||||
)
|
||||
return response if isinstance(response, dict) else {"ok": False, "error": "invalid_response", "items": []}
|
||||
except Exception as e:
|
||||
logger.error(f"list_document_versions failed: {e}")
|
||||
return {"ok": False, "error": str(e), "items": []}
|
||||
|
||||
async def ask_about_document(
|
||||
self,
|
||||
@@ -762,6 +940,42 @@ async def ask_about_document(
|
||||
)
|
||||
|
||||
|
||||
async def update_document(
|
||||
session_id: str,
|
||||
doc_id: Optional[str] = None,
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
text: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
agent_id: str = "daarwizz",
|
||||
storage_ref: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> UpdateResult:
|
||||
"""Update document chunks and bump version."""
|
||||
return await doc_service.update_document(
|
||||
session_id=session_id,
|
||||
doc_id=doc_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name,
|
||||
text=text,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
agent_id=agent_id,
|
||||
storage_ref=storage_ref,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
async def list_document_versions(agent_id: str, doc_id: str, limit: int = 20) -> Dict[str, Any]:
|
||||
"""List document versions from router."""
|
||||
return await doc_service.list_document_versions(
|
||||
agent_id=agent_id,
|
||||
doc_id=doc_id,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
async def save_doc_context(
|
||||
session_id: str,
|
||||
doc_id: str,
|
||||
|
||||
Reference in New Issue
Block a user