feat(docs): add document write-back publish pipeline
This commit is contained in:
@@ -22,6 +22,7 @@ RUN pip install --no-cache-dir \
|
||||
nats-py \
|
||||
pandas \
|
||||
openpyxl \
|
||||
python-docx \
|
||||
redis==5.0.1
|
||||
|
||||
# Copy gateway code and DAARWIZZ prompt
|
||||
|
||||
@@ -7,6 +7,7 @@ Endpoints:
|
||||
- POST /api/doc/ingest - Ingest document to RAG
|
||||
- POST /api/doc/ask - Ask question about document
|
||||
- POST /api/doc/update - Update existing document text (versioned)
|
||||
- POST /api/doc/publish - Publish physical file version via artifact registry
|
||||
- GET /api/doc/versions/{doc_id} - List document versions
|
||||
"""
|
||||
import logging
|
||||
@@ -21,6 +22,7 @@ from services.doc_service import (
|
||||
ask_about_document,
|
||||
update_document,
|
||||
list_document_versions,
|
||||
publish_document_artifact,
|
||||
get_doc_context,
|
||||
ParsedResult,
|
||||
IngestResult,
|
||||
@@ -81,6 +83,25 @@ class UpdateDocumentRequest(BaseModel):
|
||||
user_id: Optional[str] = None
|
||||
agent_id: str = "daarwizz"
|
||||
storage_ref: Optional[str] = None
|
||||
publish_artifact: bool = False
|
||||
artifact_id: Optional[str] = None
|
||||
target_format: Optional[str] = None
|
||||
artifact_label: Optional[str] = None
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class PublishDocumentRequest(BaseModel):
|
||||
"""Request to publish document as physical artifact version."""
|
||||
session_id: str
|
||||
doc_id: Optional[str] = None
|
||||
doc_url: Optional[str] = None
|
||||
file_name: Optional[str] = None
|
||||
text: Optional[str] = None
|
||||
dao_id: Optional[str] = None
|
||||
user_id: Optional[str] = None
|
||||
artifact_id: Optional[str] = None
|
||||
target_format: Optional[str] = None
|
||||
artifact_label: Optional[str] = None
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@@ -267,18 +288,29 @@ async def update_document_endpoint(request: UpdateDocumentRequest):
|
||||
user_id=request.user_id,
|
||||
agent_id=request.agent_id,
|
||||
storage_ref=request.storage_ref,
|
||||
publish_artifact=request.publish_artifact,
|
||||
artifact_id=request.artifact_id,
|
||||
target_format=request.target_format,
|
||||
artifact_label=request.artifact_label,
|
||||
metadata=request.metadata,
|
||||
)
|
||||
if not result.success:
|
||||
raise HTTPException(status_code=400, detail=result.error)
|
||||
return {
|
||||
response = {
|
||||
"ok": True,
|
||||
"doc_id": result.doc_id,
|
||||
"version_no": result.version_no,
|
||||
"version_id": result.version_id,
|
||||
"updated_chunks": result.updated_chunks,
|
||||
"status": result.status,
|
||||
"publish_error": result.publish_error,
|
||||
"artifact_id": result.artifact_id,
|
||||
"artifact_version_id": result.artifact_version_id,
|
||||
"artifact_storage_key": result.artifact_storage_key,
|
||||
"artifact_mime": result.artifact_mime,
|
||||
"artifact_download_url": result.artifact_download_url,
|
||||
}
|
||||
return response
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
@@ -286,6 +318,43 @@ async def update_document_endpoint(request: UpdateDocumentRequest):
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/api/doc/publish")
|
||||
async def publish_document_endpoint(request: PublishDocumentRequest):
|
||||
"""
|
||||
Publish current document text as physical file artifact version.
|
||||
"""
|
||||
try:
|
||||
result = await publish_document_artifact(
|
||||
session_id=request.session_id,
|
||||
doc_id=request.doc_id,
|
||||
doc_url=request.doc_url,
|
||||
file_name=request.file_name,
|
||||
text=request.text,
|
||||
dao_id=request.dao_id,
|
||||
user_id=request.user_id,
|
||||
artifact_id=request.artifact_id,
|
||||
target_format=request.target_format,
|
||||
artifact_label=request.artifact_label,
|
||||
metadata=request.metadata,
|
||||
)
|
||||
if not result.success:
|
||||
raise HTTPException(status_code=400, detail=result.error)
|
||||
return {
|
||||
"ok": True,
|
||||
"artifact_id": result.artifact_id,
|
||||
"version_id": result.version_id,
|
||||
"storage_key": result.storage_key,
|
||||
"mime": result.mime,
|
||||
"file_name": result.file_name,
|
||||
"download_url": result.download_url,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Publish document error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/api/doc/versions/{doc_id}")
|
||||
async def list_document_versions_endpoint(doc_id: str, agent_id: str = "daarwizz", limit: int = 20):
|
||||
"""
|
||||
|
||||
@@ -11,11 +11,13 @@ This service can be used by:
|
||||
import os
|
||||
import logging
|
||||
import hashlib
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
from typing import Optional, Dict, Any, List
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
|
||||
from memory_client import memory_client
|
||||
|
||||
@@ -23,6 +25,8 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
SHARED_EXCEL_POLICY_AGENTS = {"agromatrix", "helion", "nutra", "greenfood"}
|
||||
ROUTER_URL = os.getenv("ROUTER_URL", "http://router:8000")
|
||||
ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
|
||||
DOC_WRITEBACK_CREATED_BY = os.getenv("DOC_WRITEBACK_CREATED_BY", "gateway-doc-service")
|
||||
|
||||
|
||||
class QAItem(BaseModel):
|
||||
@@ -59,6 +63,24 @@ class UpdateResult(BaseModel):
|
||||
version_id: Optional[int] = None
|
||||
updated_chunks: int = 0
|
||||
status: str = "unknown"
|
||||
publish_error: Optional[str] = None
|
||||
artifact_id: Optional[str] = None
|
||||
artifact_version_id: Optional[str] = None
|
||||
artifact_storage_key: Optional[str] = None
|
||||
artifact_mime: Optional[str] = None
|
||||
artifact_download_url: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class PublishResult(BaseModel):
|
||||
"""Result of artifact write-back publish."""
|
||||
success: bool
|
||||
artifact_id: Optional[str] = None
|
||||
version_id: Optional[str] = None
|
||||
storage_key: Optional[str] = None
|
||||
mime: Optional[str] = None
|
||||
file_name: Optional[str] = None
|
||||
download_url: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
@@ -138,6 +160,217 @@ class DocumentService:
|
||||
raise RuntimeError(f"Router error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid router response type"}
|
||||
|
||||
async def _artifact_post_json(
|
||||
self,
|
||||
path: str,
|
||||
payload: Dict[str, Any],
|
||||
timeout: float = 45.0,
|
||||
) -> Dict[str, Any]:
|
||||
import httpx
|
||||
|
||||
base = ARTIFACT_REGISTRY_URL.rstrip("/")
|
||||
url = f"{base}{path}"
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.post(url, json=payload)
|
||||
body = {}
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {"ok": False, "error": f"Invalid JSON from artifact-registry ({resp.status_code})"}
|
||||
if resp.status_code >= 400:
|
||||
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
|
||||
raise RuntimeError(f"Artifact registry error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid artifact response type"}
|
||||
|
||||
async def _artifact_get_json(
|
||||
self,
|
||||
path: str,
|
||||
timeout: float = 30.0,
|
||||
) -> Dict[str, Any]:
|
||||
import httpx
|
||||
|
||||
base = ARTIFACT_REGISTRY_URL.rstrip("/")
|
||||
url = f"{base}{path}"
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.get(url)
|
||||
body = {}
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {"ok": False, "error": f"Invalid JSON from artifact-registry ({resp.status_code})"}
|
||||
if resp.status_code >= 400:
|
||||
err = body.get("detail") or body.get("error") or f"HTTP {resp.status_code}"
|
||||
raise RuntimeError(f"Artifact registry error on {path}: {err}")
|
||||
return body if isinstance(body, dict) else {"ok": False, "error": "Invalid artifact response type"}
|
||||
|
||||
def _resolve_format(self, file_name: Optional[str], target_format: Optional[str]) -> str:
|
||||
fmt = (target_format or "").strip().lower().lstrip(".")
|
||||
if fmt:
|
||||
return fmt
|
||||
if file_name and "." in file_name:
|
||||
return file_name.rsplit(".", 1)[1].strip().lower()
|
||||
return "txt"
|
||||
|
||||
def _compose_output_name(self, file_name: Optional[str], doc_id: str, fmt: str) -> str:
|
||||
base = "document"
|
||||
if file_name:
|
||||
base = file_name.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
|
||||
if "." in base:
|
||||
base = base.rsplit(".", 1)[0]
|
||||
elif doc_id:
|
||||
base = doc_id
|
||||
safe_base = re.sub(r"[^A-Za-z0-9._-]+", "_", base).strip("._") or "document"
|
||||
return f"{safe_base}.{fmt}"
|
||||
|
||||
def _render_document_bytes(
|
||||
self,
|
||||
text: str,
|
||||
file_name: Optional[str],
|
||||
doc_id: str,
|
||||
target_format: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
body = (text or "").strip()
|
||||
if not body:
|
||||
raise ValueError("Cannot render empty document text")
|
||||
|
||||
fmt = self._resolve_format(file_name=file_name, target_format=target_format)
|
||||
output_name = self._compose_output_name(file_name=file_name, doc_id=doc_id, fmt=fmt)
|
||||
|
||||
if fmt in {"txt"}:
|
||||
payload = body.encode("utf-8")
|
||||
return {"bytes": payload, "mime": "text/plain; charset=utf-8", "file_name": output_name}
|
||||
if fmt in {"md", "markdown"}:
|
||||
payload = body.encode("utf-8")
|
||||
return {"bytes": payload, "mime": "text/markdown; charset=utf-8", "file_name": output_name}
|
||||
if fmt in {"json"}:
|
||||
parsed: Any
|
||||
try:
|
||||
parsed = json.loads(body)
|
||||
except Exception:
|
||||
parsed = {"text": body}
|
||||
payload = json.dumps(parsed, ensure_ascii=False, indent=2).encode("utf-8")
|
||||
return {"bytes": payload, "mime": "application/json", "file_name": output_name}
|
||||
if fmt in {"csv"}:
|
||||
payload = body.encode("utf-8")
|
||||
return {"bytes": payload, "mime": "text/csv; charset=utf-8", "file_name": output_name}
|
||||
if fmt in {"xlsx", "xlsm", "xls"}:
|
||||
try:
|
||||
from openpyxl import Workbook
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"openpyxl is required for {fmt} rendering: {e}")
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Document"
|
||||
lines = [ln for ln in body.splitlines()] or [body]
|
||||
for idx, line in enumerate(lines, start=1):
|
||||
ws.cell(row=idx, column=1, value=line)
|
||||
buf = BytesIO()
|
||||
wb.save(buf)
|
||||
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
return {"bytes": buf.getvalue(), "mime": mime, "file_name": self._compose_output_name(file_name, doc_id, "xlsx")}
|
||||
if fmt in {"docx"}:
|
||||
try:
|
||||
from docx import Document
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"python-docx is required for docx rendering: {e}")
|
||||
doc = Document()
|
||||
for line in body.splitlines():
|
||||
doc.add_paragraph(line if line else " ")
|
||||
buf = BytesIO()
|
||||
doc.save(buf)
|
||||
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
return {"bytes": buf.getvalue(), "mime": mime, "file_name": self._compose_output_name(file_name, doc_id, "docx")}
|
||||
|
||||
payload = body.encode("utf-8")
|
||||
fallback_name = self._compose_output_name(file_name=file_name, doc_id=doc_id, fmt="txt")
|
||||
return {"bytes": payload, "mime": "text/plain; charset=utf-8", "file_name": fallback_name}
|
||||
|
||||
async def _publish_text_artifact(
|
||||
self,
|
||||
text: str,
|
||||
doc_id: str,
|
||||
file_name: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> PublishResult:
|
||||
try:
|
||||
rendered = self._render_document_bytes(
|
||||
text=text,
|
||||
file_name=file_name,
|
||||
doc_id=doc_id,
|
||||
target_format=target_format,
|
||||
)
|
||||
content_bytes = rendered["bytes"]
|
||||
content_b64 = base64.b64encode(content_bytes).decode("ascii")
|
||||
|
||||
effective_artifact_id = (artifact_id or "").strip()
|
||||
if not effective_artifact_id:
|
||||
create_resp = await self._artifact_post_json(
|
||||
"/artifacts",
|
||||
{
|
||||
"type": "doc",
|
||||
"title": file_name or doc_id,
|
||||
"project_id": dao_id,
|
||||
"acl_ref": dao_id,
|
||||
"created_by": user_id or DOC_WRITEBACK_CREATED_BY,
|
||||
},
|
||||
timeout=30.0,
|
||||
)
|
||||
effective_artifact_id = str(create_resp.get("artifact_id") or "").strip()
|
||||
if not effective_artifact_id:
|
||||
return PublishResult(success=False, error="Artifact create failed: empty artifact_id")
|
||||
|
||||
meta = {"doc_id": doc_id, "source": "doc_update_publish"}
|
||||
if isinstance(metadata, dict):
|
||||
meta.update(metadata)
|
||||
|
||||
version_resp = await self._artifact_post_json(
|
||||
f"/artifacts/{effective_artifact_id}/versions/from_base64",
|
||||
{
|
||||
"content_base64": content_b64,
|
||||
"mime": rendered["mime"],
|
||||
"filename": rendered["file_name"],
|
||||
"label": label or "edited",
|
||||
"meta_json": meta,
|
||||
},
|
||||
timeout=45.0,
|
||||
)
|
||||
version_id = str(version_resp.get("version_id") or "").strip()
|
||||
storage_key = version_resp.get("storage_key")
|
||||
if not version_id:
|
||||
return PublishResult(
|
||||
success=False,
|
||||
artifact_id=effective_artifact_id,
|
||||
error="Artifact version create failed: empty version_id",
|
||||
)
|
||||
|
||||
download_url = None
|
||||
try:
|
||||
dl = await self._artifact_get_json(
|
||||
f"/artifacts/{effective_artifact_id}/versions/{version_id}/download",
|
||||
timeout=20.0,
|
||||
)
|
||||
download_url = dl.get("url")
|
||||
except Exception as e:
|
||||
logger.warning(f"version download url generation failed: {e}")
|
||||
|
||||
return PublishResult(
|
||||
success=True,
|
||||
artifact_id=effective_artifact_id,
|
||||
version_id=version_id,
|
||||
storage_key=storage_key,
|
||||
mime=rendered["mime"],
|
||||
file_name=rendered["file_name"],
|
||||
download_url=download_url,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"publish_text_artifact failed: {e}", exc_info=True)
|
||||
return PublishResult(success=False, error=str(e))
|
||||
|
||||
def _is_excel_filename(self, file_name: Optional[str]) -> bool:
|
||||
if not file_name:
|
||||
return False
|
||||
@@ -616,6 +849,10 @@ class DocumentService:
|
||||
user_id: Optional[str] = None,
|
||||
agent_id: str = "daarwizz",
|
||||
storage_ref: Optional[str] = None,
|
||||
publish_artifact: bool = False,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
artifact_label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> UpdateResult:
|
||||
"""
|
||||
@@ -714,13 +951,33 @@ class DocumentService:
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
publish = PublishResult(success=False)
|
||||
if publish_artifact:
|
||||
publish = await self._publish_text_artifact(
|
||||
text=effective_text,
|
||||
doc_id=doc_id,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
artifact_id=artifact_id,
|
||||
target_format=target_format,
|
||||
label=artifact_label,
|
||||
metadata=meta,
|
||||
)
|
||||
|
||||
return UpdateResult(
|
||||
success=True,
|
||||
doc_id=response.get("doc_id") or doc_id,
|
||||
version_no=int(response.get("version_no", 0) or 0) or None,
|
||||
version_id=int(response.get("version_id", 0) or 0) or None,
|
||||
updated_chunks=int(response.get("chunks_stored", 0) or 0),
|
||||
status="updated",
|
||||
status="updated_published" if publish_artifact and publish.success else ("updated_publish_failed" if publish_artifact else "updated"),
|
||||
publish_error=publish.error if publish_artifact and not publish.success else None,
|
||||
artifact_id=publish.artifact_id if publish_artifact else None,
|
||||
artifact_version_id=publish.version_id if publish_artifact else None,
|
||||
artifact_storage_key=publish.storage_key if publish_artifact else None,
|
||||
artifact_mime=publish.mime if publish_artifact else None,
|
||||
artifact_download_url=publish.download_url if publish_artifact else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Document update failed: {e}", exc_info=True)
|
||||
@@ -751,6 +1008,75 @@ class DocumentService:
|
||||
logger.error(f"list_document_versions failed: {e}")
|
||||
return {"ok": False, "error": str(e), "items": []}
|
||||
|
||||
async def publish_document_artifact(
|
||||
self,
|
||||
session_id: str,
|
||||
doc_id: Optional[str] = None,
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
text: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
artifact_label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> PublishResult:
|
||||
"""
|
||||
Publish text as a physical artifact version (.docx/.xlsx/.txt/...) without changing RAG index.
|
||||
"""
|
||||
try:
|
||||
context = await self.get_doc_context(session_id)
|
||||
if context:
|
||||
if not doc_id:
|
||||
doc_id = context.doc_id
|
||||
if not doc_url:
|
||||
doc_url = context.doc_url
|
||||
if not file_name:
|
||||
file_name = context.file_name
|
||||
if not dao_id:
|
||||
dao_id = context.dao_id
|
||||
if not user_id:
|
||||
user_id = context.user_id
|
||||
|
||||
if not doc_id:
|
||||
return PublishResult(success=False, error="doc_id is required")
|
||||
|
||||
body = (text or "").strip()
|
||||
if not body:
|
||||
if not doc_url:
|
||||
return PublishResult(success=False, error="text or doc_url is required")
|
||||
parsed = await self.parse_document(
|
||||
session_id=session_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name or "document",
|
||||
dao_id=dao_id or "",
|
||||
user_id=user_id or "",
|
||||
output_mode="markdown",
|
||||
metadata={"source": self._extract_source(session_id), "mode": "publish"},
|
||||
)
|
||||
if not parsed.success:
|
||||
return PublishResult(success=False, error=parsed.error or "Document parse failed")
|
||||
body = (parsed.markdown or "").strip()
|
||||
|
||||
if not body:
|
||||
return PublishResult(success=False, error="No text available for publish")
|
||||
|
||||
return await self._publish_text_artifact(
|
||||
text=body,
|
||||
doc_id=doc_id,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
artifact_id=artifact_id,
|
||||
target_format=target_format,
|
||||
label=artifact_label,
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"publish_document_artifact failed: {e}", exc_info=True)
|
||||
return PublishResult(success=False, error=str(e))
|
||||
|
||||
async def ask_about_document(
|
||||
self,
|
||||
session_id: str,
|
||||
@@ -950,6 +1276,10 @@ async def update_document(
|
||||
user_id: Optional[str] = None,
|
||||
agent_id: str = "daarwizz",
|
||||
storage_ref: Optional[str] = None,
|
||||
publish_artifact: bool = False,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
artifact_label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> UpdateResult:
|
||||
"""Update document chunks and bump version."""
|
||||
@@ -963,6 +1293,10 @@ async def update_document(
|
||||
user_id=user_id,
|
||||
agent_id=agent_id,
|
||||
storage_ref=storage_ref,
|
||||
publish_artifact=publish_artifact,
|
||||
artifact_id=artifact_id,
|
||||
target_format=target_format,
|
||||
artifact_label=artifact_label,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
@@ -976,6 +1310,35 @@ async def list_document_versions(agent_id: str, doc_id: str, limit: int = 20) ->
|
||||
)
|
||||
|
||||
|
||||
async def publish_document_artifact(
|
||||
session_id: str,
|
||||
doc_id: Optional[str] = None,
|
||||
doc_url: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
text: Optional[str] = None,
|
||||
dao_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
artifact_id: Optional[str] = None,
|
||||
target_format: Optional[str] = None,
|
||||
artifact_label: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> PublishResult:
|
||||
"""Publish physical artifact version for document text."""
|
||||
return await doc_service.publish_document_artifact(
|
||||
session_id=session_id,
|
||||
doc_id=doc_id,
|
||||
doc_url=doc_url,
|
||||
file_name=file_name,
|
||||
text=text,
|
||||
dao_id=dao_id,
|
||||
user_id=user_id,
|
||||
artifact_id=artifact_id,
|
||||
target_format=target_format,
|
||||
artifact_label=artifact_label,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
async def save_doc_context(
|
||||
session_id: str,
|
||||
doc_id: str,
|
||||
|
||||
@@ -6,13 +6,15 @@ Artifact Registry v0
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import asyncpg
|
||||
@@ -90,6 +92,14 @@ class ArtifactVersionFromUrlRequest(BaseModel):
|
||||
meta_json: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class ArtifactVersionFromBase64Request(BaseModel):
|
||||
content_base64: str
|
||||
mime: str
|
||||
filename: Optional[str] = "source.bin"
|
||||
label: Optional[str] = "source"
|
||||
meta_json: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class ArtifactVersionResponse(BaseModel):
|
||||
version_id: str
|
||||
storage_key: str
|
||||
@@ -208,15 +218,38 @@ def _normalize_meta_json(meta: Any) -> Dict[str, Any]:
|
||||
|
||||
def _format_to_mime(fmt: str) -> str:
|
||||
fmt = fmt.lower()
|
||||
if "/" in fmt:
|
||||
return fmt
|
||||
if fmt == "pptx":
|
||||
return "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
if fmt == "pdf":
|
||||
return "application/pdf"
|
||||
if fmt == "source":
|
||||
return "application/json"
|
||||
if fmt == "docx":
|
||||
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
if fmt == "xlsx":
|
||||
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
if fmt == "txt":
|
||||
return "text/plain; charset=utf-8"
|
||||
if fmt == "md":
|
||||
return "text/markdown; charset=utf-8"
|
||||
if fmt == "json":
|
||||
return "application/json"
|
||||
if fmt == "csv":
|
||||
return "text/csv; charset=utf-8"
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def _safe_filename(name: Optional[str], fallback: str = "source.bin") -> str:
|
||||
raw = (name or fallback).strip() or fallback
|
||||
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", raw)
|
||||
cleaned = cleaned.strip("._")
|
||||
if not cleaned:
|
||||
return fallback
|
||||
return cleaned[:120]
|
||||
|
||||
|
||||
async def _download_bytes(url: str) -> bytes:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
resp = await client.get(url)
|
||||
@@ -462,6 +495,73 @@ async def add_version_from_url(artifact_id: str, payload: ArtifactVersionFromUrl
|
||||
)
|
||||
|
||||
|
||||
@app.post("/artifacts/{artifact_id}/versions/from_base64", response_model=ArtifactVersionResponse)
|
||||
async def add_version_from_base64(artifact_id: str, payload: ArtifactVersionFromBase64Request) -> ArtifactVersionResponse:
|
||||
if not minio_client:
|
||||
raise HTTPException(status_code=500, detail="MinIO not available")
|
||||
if not pool:
|
||||
raise HTTPException(status_code=500, detail="DB not available")
|
||||
|
||||
raw = (payload.content_base64 or "").strip()
|
||||
if not raw:
|
||||
raise HTTPException(status_code=400, detail="content_base64 is required")
|
||||
|
||||
if raw.startswith("data:") and "," in raw:
|
||||
raw = raw.split(",", 1)[1]
|
||||
|
||||
try:
|
||||
content = base64.b64decode(raw, validate=True)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=400, detail="Invalid base64 payload")
|
||||
|
||||
if not content:
|
||||
raise HTTPException(status_code=400, detail="Decoded payload is empty")
|
||||
|
||||
version_id = f"ver_{uuid.uuid4().hex}"
|
||||
filename = _safe_filename(payload.filename, fallback="source.bin")
|
||||
sha256 = _hash_bytes(content)
|
||||
storage_key = _storage_key(artifact_id, version_id, filename)
|
||||
|
||||
try:
|
||||
minio_client.put_object(
|
||||
MINIO_BUCKET,
|
||||
storage_key,
|
||||
data=BytesIO(content),
|
||||
length=len(content),
|
||||
content_type=payload.mime,
|
||||
)
|
||||
except S3Error as e:
|
||||
raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
|
||||
|
||||
meta_json = _normalize_meta_json(payload.meta_json)
|
||||
if "file_name" not in meta_json:
|
||||
meta_json["file_name"] = filename
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
insert into artifact_versions
|
||||
(id, artifact_id, label, sha256, mime, size_bytes, storage_key, meta_json)
|
||||
values ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
""",
|
||||
version_id,
|
||||
artifact_id,
|
||||
payload.label or "source",
|
||||
sha256,
|
||||
payload.mime,
|
||||
len(content),
|
||||
storage_key,
|
||||
json.dumps(meta_json),
|
||||
)
|
||||
|
||||
return ArtifactVersionResponse(
|
||||
version_id=version_id,
|
||||
storage_key=storage_key,
|
||||
sha256=sha256,
|
||||
size_bytes=len(content),
|
||||
)
|
||||
|
||||
|
||||
@app.post("/artifacts/{artifact_id}/versions", response_model=ArtifactVersionResponse)
|
||||
async def add_version(artifact_id: str, payload: ArtifactVersionCreateRequest) -> ArtifactVersionResponse:
|
||||
if not pool:
|
||||
@@ -678,7 +778,39 @@ async def download_artifact(artifact_id: str, format: str = Query("pptx")) -> Di
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Version not found")
|
||||
try:
|
||||
url = minio_client.presigned_get_object(MINIO_BUCKET, row["storage_key"], expires=1800)
|
||||
url = minio_client.presigned_get_object(
|
||||
MINIO_BUCKET,
|
||||
row["storage_key"],
|
||||
expires=timedelta(seconds=1800),
|
||||
)
|
||||
except S3Error as e:
|
||||
raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
|
||||
return {"url": url, "storage_key": row["storage_key"], "mime": row["mime"]}
|
||||
|
||||
|
||||
@app.get("/artifacts/{artifact_id}/versions/{version_id}/download")
|
||||
async def download_artifact_version(artifact_id: str, version_id: str) -> Dict[str, Any]:
|
||||
if not pool or not minio_client:
|
||||
raise HTTPException(status_code=500, detail="Service not available")
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
select * from artifact_versions
|
||||
where artifact_id=$1 and id=$2
|
||||
limit 1
|
||||
""",
|
||||
artifact_id,
|
||||
version_id,
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Version not found")
|
||||
try:
|
||||
url = minio_client.presigned_get_object(
|
||||
MINIO_BUCKET,
|
||||
row["storage_key"],
|
||||
expires=timedelta(seconds=1800),
|
||||
)
|
||||
except S3Error as e:
|
||||
raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
|
||||
return {"url": url, "storage_key": row["storage_key"], "mime": row["mime"], "version_id": row["id"]}
|
||||
|
||||
Reference in New Issue
Block a user