feat(docs): add document write-back publish pipeline

This commit is contained in:
NODA1 System
2026-02-21 17:02:55 +01:00
parent f53e71a0f4
commit cca16254e5
4 changed files with 569 additions and 4 deletions

View File

@@ -6,13 +6,15 @@ Artifact Registry v0
"""
import asyncio
import base64
import hashlib
import json
import logging
import os
import re
import uuid
from io import BytesIO
from datetime import datetime
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
import asyncpg
@@ -90,6 +92,14 @@ class ArtifactVersionFromUrlRequest(BaseModel):
meta_json: Optional[Dict[str, Any]] = None
class ArtifactVersionFromBase64Request(BaseModel):
content_base64: str
mime: str
filename: Optional[str] = "source.bin"
label: Optional[str] = "source"
meta_json: Optional[Dict[str, Any]] = None
class ArtifactVersionResponse(BaseModel):
version_id: str
storage_key: str
@@ -208,15 +218,38 @@ def _normalize_meta_json(meta: Any) -> Dict[str, Any]:
def _format_to_mime(fmt: str) -> str:
fmt = fmt.lower()
if "/" in fmt:
return fmt
if fmt == "pptx":
return "application/vnd.openxmlformats-officedocument.presentationml.presentation"
if fmt == "pdf":
return "application/pdf"
if fmt == "source":
return "application/json"
if fmt == "docx":
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
if fmt == "xlsx":
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
if fmt == "txt":
return "text/plain; charset=utf-8"
if fmt == "md":
return "text/markdown; charset=utf-8"
if fmt == "json":
return "application/json"
if fmt == "csv":
return "text/csv; charset=utf-8"
return "application/octet-stream"
def _safe_filename(name: Optional[str], fallback: str = "source.bin") -> str:
raw = (name or fallback).strip() or fallback
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", raw)
cleaned = cleaned.strip("._")
if not cleaned:
return fallback
return cleaned[:120]
async def _download_bytes(url: str) -> bytes:
async with httpx.AsyncClient(timeout=60.0) as client:
resp = await client.get(url)
@@ -462,6 +495,73 @@ async def add_version_from_url(artifact_id: str, payload: ArtifactVersionFromUrl
)
@app.post("/artifacts/{artifact_id}/versions/from_base64", response_model=ArtifactVersionResponse)
async def add_version_from_base64(artifact_id: str, payload: ArtifactVersionFromBase64Request) -> ArtifactVersionResponse:
if not minio_client:
raise HTTPException(status_code=500, detail="MinIO not available")
if not pool:
raise HTTPException(status_code=500, detail="DB not available")
raw = (payload.content_base64 or "").strip()
if not raw:
raise HTTPException(status_code=400, detail="content_base64 is required")
if raw.startswith("data:") and "," in raw:
raw = raw.split(",", 1)[1]
try:
content = base64.b64decode(raw, validate=True)
except Exception:
raise HTTPException(status_code=400, detail="Invalid base64 payload")
if not content:
raise HTTPException(status_code=400, detail="Decoded payload is empty")
version_id = f"ver_{uuid.uuid4().hex}"
filename = _safe_filename(payload.filename, fallback="source.bin")
sha256 = _hash_bytes(content)
storage_key = _storage_key(artifact_id, version_id, filename)
try:
minio_client.put_object(
MINIO_BUCKET,
storage_key,
data=BytesIO(content),
length=len(content),
content_type=payload.mime,
)
except S3Error as e:
raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
meta_json = _normalize_meta_json(payload.meta_json)
if "file_name" not in meta_json:
meta_json["file_name"] = filename
async with pool.acquire() as conn:
await conn.execute(
"""
insert into artifact_versions
(id, artifact_id, label, sha256, mime, size_bytes, storage_key, meta_json)
values ($1, $2, $3, $4, $5, $6, $7, $8)
""",
version_id,
artifact_id,
payload.label or "source",
sha256,
payload.mime,
len(content),
storage_key,
json.dumps(meta_json),
)
return ArtifactVersionResponse(
version_id=version_id,
storage_key=storage_key,
sha256=sha256,
size_bytes=len(content),
)
@app.post("/artifacts/{artifact_id}/versions", response_model=ArtifactVersionResponse)
async def add_version(artifact_id: str, payload: ArtifactVersionCreateRequest) -> ArtifactVersionResponse:
if not pool:
@@ -678,7 +778,39 @@ async def download_artifact(artifact_id: str, format: str = Query("pptx")) -> Di
if not row:
raise HTTPException(status_code=404, detail="Version not found")
try:
url = minio_client.presigned_get_object(MINIO_BUCKET, row["storage_key"], expires=1800)
url = minio_client.presigned_get_object(
MINIO_BUCKET,
row["storage_key"],
expires=timedelta(seconds=1800),
)
except S3Error as e:
raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
return {"url": url, "storage_key": row["storage_key"], "mime": row["mime"]}
@app.get("/artifacts/{artifact_id}/versions/{version_id}/download")
async def download_artifact_version(artifact_id: str, version_id: str) -> Dict[str, Any]:
if not pool or not minio_client:
raise HTTPException(status_code=500, detail="Service not available")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
select * from artifact_versions
where artifact_id=$1 and id=$2
limit 1
""",
artifact_id,
version_id,
)
if not row:
raise HTTPException(status_code=404, detail="Version not found")
try:
url = minio_client.presigned_get_object(
MINIO_BUCKET,
row["storage_key"],
expires=timedelta(seconds=1800),
)
except S3Error as e:
raise HTTPException(status_code=502, detail=f"MinIO error: {e}")
return {"url": url, "storage_key": row["storage_key"], "mime": row["mime"], "version_id": row["id"]}