feat(docs): add versioned document update and versions APIs

This commit is contained in:
NODA1 System
2026-02-21 16:49:24 +01:00
parent 5d52cf81c4
commit f53e71a0f4
4 changed files with 764 additions and 4 deletions

View File

@@ -6,6 +6,8 @@ Endpoints:
- POST /api/doc/parse - Parse a document
- POST /api/doc/ingest - Ingest document to RAG
- POST /api/doc/ask - Ask question about document
- POST /api/doc/update - Update existing document text (versioned)
- GET /api/doc/versions/{doc_id} - List document versions
"""
import logging
from typing import Optional, Dict, Any
@@ -17,9 +19,12 @@ from services.doc_service import (
parse_document,
ingest_document,
ask_about_document,
update_document,
list_document_versions,
get_doc_context,
ParsedResult,
IngestResult,
UpdateResult,
QAResult,
DocContext
)
@@ -52,6 +57,7 @@ class IngestDocumentRequest(BaseModel):
file_name: Optional[str] = None
dao_id: Optional[str] = None
user_id: Optional[str] = None
agent_id: str = "daarwizz"
class AskDocumentRequest(BaseModel):
@@ -61,6 +67,21 @@ class AskDocumentRequest(BaseModel):
doc_id: Optional[str] = None
dao_id: Optional[str] = None
user_id: Optional[str] = None
agent_id: str = "daarwizz"
class UpdateDocumentRequest(BaseModel):
"""Request to update existing document content."""
session_id: str
doc_id: Optional[str] = None
doc_url: Optional[str] = None
file_name: Optional[str] = None
text: Optional[str] = None
dao_id: Optional[str] = None
user_id: Optional[str] = None
agent_id: str = "daarwizz"
storage_ref: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
# ========================================
@@ -167,7 +188,8 @@ async def ingest_document_endpoint(request: IngestDocumentRequest):
doc_url=request.doc_url,
file_name=request.file_name,
dao_id=request.dao_id,
user_id=request.user_id
user_id=request.user_id,
agent_id=request.agent_id,
)
if not result.success:
@@ -209,7 +231,8 @@ async def ask_about_document_endpoint(request: AskDocumentRequest):
question=request.question,
doc_id=doc_id,
dao_id=request.dao_id,
user_id=request.user_id
user_id=request.user_id,
agent_id=request.agent_id,
)
if not result.success:
@@ -227,6 +250,59 @@ async def ask_about_document_endpoint(request: AskDocumentRequest):
raise HTTPException(status_code=500, detail=str(e))
@router.post("/api/doc/update")
async def update_document_endpoint(request: UpdateDocumentRequest):
"""
Update a document and bump its version.
If text is omitted and doc_url exists, text is re-parsed from the source document.
"""
try:
result = await update_document(
session_id=request.session_id,
doc_id=request.doc_id,
doc_url=request.doc_url,
file_name=request.file_name,
text=request.text,
dao_id=request.dao_id,
user_id=request.user_id,
agent_id=request.agent_id,
storage_ref=request.storage_ref,
metadata=request.metadata,
)
if not result.success:
raise HTTPException(status_code=400, detail=result.error)
return {
"ok": True,
"doc_id": result.doc_id,
"version_no": result.version_no,
"version_id": result.version_id,
"updated_chunks": result.updated_chunks,
"status": result.status,
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Update document error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/api/doc/versions/{doc_id}")
async def list_document_versions_endpoint(doc_id: str, agent_id: str = "daarwizz", limit: int = 20):
"""
List document versions for agent/doc pair.
"""
try:
data = await list_document_versions(agent_id=agent_id, doc_id=doc_id, limit=limit)
if not data.get("ok"):
raise HTTPException(status_code=400, detail=data.get("error", "Failed to load versions"))
return data
except HTTPException:
raise
except Exception as e:
logger.error(f"List document versions error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/api/doc/context/{session_id}")
async def get_document_context(session_id: str):
"""
@@ -257,4 +333,3 @@ async def get_document_context(session_id: str):
except Exception as e:
logger.error(f"Get document context error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -51,6 +51,17 @@ class IngestResult(BaseModel):
error: Optional[str] = None
class UpdateResult(BaseModel):
"""Result of document update with version bump."""
success: bool
doc_id: Optional[str] = None
version_no: Optional[int] = None
version_id: Optional[int] = None
updated_chunks: int = 0
status: str = "unknown"
error: Optional[str] = None
class QAResult(BaseModel):
"""Result of RAG query about a document"""
success: bool
@@ -106,6 +117,27 @@ class DocumentService:
raise RuntimeError(f"Router error on {path}: {err}")
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
async def _router_get_json(
self,
path: str,
timeout: float = 30.0,
) -> Dict[str, Any]:
import httpx
base = ROUTER_URL.rstrip("/")
url = f"{base}{path}"
async with httpx.AsyncClient(timeout=timeout) as client:
resp = await client.get(url)
body = {}
try:
body = resp.json()
except Exception:
body = {"ok": False, "error": f"Invalid JSON from router ({resp.status_code})"}
if resp.status_code >= 400:
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
raise RuntimeError(f"Router error on {path}: {err}")
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
def _is_excel_filename(self, file_name: Optional[str]) -> bool:
if not file_name:
return False
@@ -572,6 +604,152 @@ class DocumentService:
success=False,
error=str(e)
)
async def update_document(
self,
session_id: str,
doc_id: Optional[str] = None,
doc_url: Optional[str] = None,
file_name: Optional[str] = None,
text: Optional[str] = None,
dao_id: Optional[str] = None,
user_id: Optional[str] = None,
agent_id: str = "daarwizz",
storage_ref: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> UpdateResult:
"""
Update existing document content and bump version in router memory.
"""
try:
context = await self.get_doc_context(session_id)
if context:
if not doc_id:
doc_id = context.doc_id
if not doc_url:
doc_url = context.doc_url
if not file_name:
file_name = context.file_name
if not dao_id:
dao_id = context.dao_id
if not doc_id:
return UpdateResult(
success=False,
status="failed",
error="No document context found. Provide doc_id or parse/ingest first.",
)
effective_text = (text or "").strip()
if not effective_text:
if not doc_url:
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error="No text or doc_url provided for update",
)
parsed = await self.parse_document(
session_id=session_id,
doc_url=doc_url,
file_name=file_name or "document",
dao_id=dao_id or "",
user_id=user_id or "",
output_mode="markdown",
metadata={"source": self._extract_source(session_id), "mode": "update"},
)
if not parsed.success:
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error=parsed.error or "Document parse failed",
)
effective_text = (parsed.markdown or "").strip()
if not effective_text:
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error="No extractable text for update",
)
meta = {
"session_id": session_id,
"source": self._extract_source(session_id),
}
if isinstance(metadata, dict):
meta.update(metadata)
response = await self._router_post_json(
"/v1/documents/update",
{
"agent_id": (agent_id or "daarwizz").lower(),
"doc_id": doc_id,
"file_name": file_name,
"text": effective_text,
"dao_id": dao_id,
"user_id": user_id,
"storage_ref": storage_ref,
"metadata": meta,
},
timeout=90.0,
)
if not response.get("ok"):
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error=response.get("error", "Router update failed"),
)
await self.save_doc_context(
session_id=session_id,
doc_id=doc_id,
doc_url=doc_url,
file_name=file_name,
dao_id=dao_id,
user_id=user_id,
)
return UpdateResult(
success=True,
doc_id=response.get("doc_id") or doc_id,
version_no=int(response.get("version_no", 0) or 0) or None,
version_id=int(response.get("version_id", 0) or 0) or None,
updated_chunks=int(response.get("chunks_stored", 0) or 0),
status="updated",
)
except Exception as e:
logger.error(f"Document update failed: {e}", exc_info=True)
return UpdateResult(
success=False,
doc_id=doc_id,
status="failed",
error=str(e),
)
async def list_document_versions(
self,
agent_id: str,
doc_id: str,
limit: int = 20,
) -> Dict[str, Any]:
aid = (agent_id or "daarwizz").lower()
did = (doc_id or "").strip()
if not did:
return {"ok": False, "error": "doc_id is required", "items": []}
try:
response = await self._router_get_json(
f"/v1/documents/{did}/versions?agent_id={aid}&limit={max(1, min(int(limit or 20), 200))}",
timeout=30.0,
)
return response if isinstance(response, dict) else {"ok": False, "error": "invalid_response", "items": []}
except Exception as e:
logger.error(f"list_document_versions failed: {e}")
return {"ok": False, "error": str(e), "items": []}
async def ask_about_document(
self,
@@ -762,6 +940,42 @@ async def ask_about_document(
)
async def update_document(
session_id: str,
doc_id: Optional[str] = None,
doc_url: Optional[str] = None,
file_name: Optional[str] = None,
text: Optional[str] = None,
dao_id: Optional[str] = None,
user_id: Optional[str] = None,
agent_id: str = "daarwizz",
storage_ref: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> UpdateResult:
"""Update document chunks and bump version."""
return await doc_service.update_document(
session_id=session_id,
doc_id=doc_id,
doc_url=doc_url,
file_name=file_name,
text=text,
dao_id=dao_id,
user_id=user_id,
agent_id=agent_id,
storage_ref=storage_ref,
metadata=metadata,
)
async def list_document_versions(agent_id: str, doc_id: str, limit: int = 20) -> Dict[str, Any]:
"""List document versions from router."""
return await doc_service.list_document_versions(
agent_id=agent_id,
doc_id=doc_id,
limit=limit,
)
async def save_doc_context(
session_id: str,
doc_id: str,