From 088ca0713747d8527b1fa6ff690c5962b6fd1148 Mon Sep 17 00:00:00 2001 From: NODA1 System Date: Sat, 21 Feb 2026 17:22:06 +0100 Subject: [PATCH] feat(gateway): proxy artifact downloads via public doc endpoints --- gateway-bot/http_api_doc.py | 61 +++++++++++++++++++++++++++++ gateway-bot/services/doc_service.py | 25 +++++++----- 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/gateway-bot/http_api_doc.py b/gateway-bot/http_api_doc.py index 6053d58f..e8e56eae 100644 --- a/gateway-bot/http_api_doc.py +++ b/gateway-bot/http_api_doc.py @@ -9,11 +9,16 @@ Endpoints: - POST /api/doc/update - Update existing document text (versioned) - POST /api/doc/publish - Publish physical file version via artifact registry - GET /api/doc/versions/{doc_id} - List document versions +- GET /api/doc/artifacts/{artifact_id}/versions/{version_id}/download - Download via gateway proxy """ import logging +import os +import re from typing import Optional, Dict, Any from fastapi import APIRouter, HTTPException, UploadFile, File, Form +from fastapi.responses import Response from pydantic import BaseModel +import httpx from services.doc_service import ( doc_service, @@ -34,6 +39,8 @@ from services.doc_service import ( logger = logging.getLogger(__name__) router = APIRouter() +ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/") +DOC_DOWNLOAD_TIMEOUT_SECONDS = float(os.getenv("DOC_DOWNLOAD_TIMEOUT_SECONDS", "60")) # ======================================== @@ -402,3 +409,57 @@ async def get_document_context(session_id: str): except Exception as e: logger.error(f"Get document context error: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/api/doc/artifacts/{artifact_id}/versions/{version_id}/download") +async def download_artifact_version_via_gateway( + artifact_id: str, + version_id: str, + filename: Optional[str] = None, + inline: bool = False, +): + """ + Proxy download for artifact version to avoid exposing internal MinIO host to browser clients. + """ + aid = (artifact_id or "").strip() + vid = (version_id or "").strip() + if not aid or not vid: + raise HTTPException(status_code=400, detail="artifact_id and version_id are required") + + try: + async with httpx.AsyncClient(timeout=DOC_DOWNLOAD_TIMEOUT_SECONDS) as client: + meta_resp = await client.get( + f"{ARTIFACT_REGISTRY_URL}/artifacts/{aid}/versions/{vid}/download" + ) + if meta_resp.status_code >= 400: + detail = "" + try: + detail = meta_resp.json().get("detail") # type: ignore[assignment] + except Exception: + detail = meta_resp.text[:200] + raise HTTPException(status_code=meta_resp.status_code, detail=detail or "Version download info failed") + meta = meta_resp.json() + signed_url = (meta.get("url") or "").strip() + if not signed_url: + raise HTTPException(status_code=502, detail="artifact-registry returned empty download URL") + + file_resp = await client.get(signed_url) + if file_resp.status_code >= 400: + raise HTTPException(status_code=502, detail=f"Artifact storage download failed: {file_resp.status_code}") + + mime = (meta.get("mime") or file_resp.headers.get("content-type") or "application/octet-stream").strip() + storage_key = str(meta.get("storage_key") or "") + inferred_name = storage_key.rsplit("/", 1)[-1] if "/" in storage_key else storage_key + out_name = (filename or inferred_name or f"{aid}_{vid}.bin").strip() + out_name = re.sub(r"[^A-Za-z0-9._-]+", "_", out_name).strip("._") or f"{aid}_{vid}.bin" + disposition = "inline" if inline else "attachment" + headers = { + "Content-Disposition": f'{disposition}; filename="{out_name}"', + "Cache-Control": "private, max-age=60", + } + return Response(content=file_resp.content, media_type=mime, headers=headers) + except HTTPException: + raise + except Exception as e: + logger.error(f"Artifact version proxy download failed: aid={aid}, vid={vid}, err={e}", exc_info=True) + raise HTTPException(status_code=500, detail="Artifact proxy download failed") diff --git a/gateway-bot/services/doc_service.py b/gateway-bot/services/doc_service.py index 0aa85df4..4ad2a691 100644 --- a/gateway-bot/services/doc_service.py +++ b/gateway-bot/services/doc_service.py @@ -27,6 +27,7 @@ SHARED_EXCEL_POLICY_AGENTS = {"agromatrix", "helion", "nutra", "greenfood"} ROUTER_URL = os.getenv("ROUTER_URL", "http://router:8000") ARTIFACT_REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/") DOC_WRITEBACK_CREATED_BY = os.getenv("DOC_WRITEBACK_CREATED_BY", "gateway-doc-service") +GATEWAY_PUBLIC_BASE_URL = os.getenv("GATEWAY_PUBLIC_BASE_URL", "").rstrip("/") class QAItem(BaseModel): @@ -222,6 +223,17 @@ class DocumentService: safe_base = re.sub(r"[^A-Za-z0-9._-]+", "_", base).strip("._") or "document" return f"{safe_base}.{fmt}" + def _gateway_artifact_download_path(self, artifact_id: str, version_id: str) -> str: + aid = (artifact_id or "").strip() + vid = (version_id or "").strip() + return f"/api/doc/artifacts/{aid}/versions/{vid}/download" + + def _gateway_artifact_download_url(self, artifact_id: str, version_id: str) -> str: + path = self._gateway_artifact_download_path(artifact_id, version_id) + if GATEWAY_PUBLIC_BASE_URL: + return f"{GATEWAY_PUBLIC_BASE_URL}{path}" + return path + def _render_document_bytes( self, text: str, @@ -348,15 +360,10 @@ class DocumentService: error="Artifact version create failed: empty version_id", ) - download_url = None - try: - dl = await self._artifact_get_json( - f"/artifacts/{effective_artifact_id}/versions/{version_id}/download", - timeout=20.0, - ) - download_url = dl.get("url") - except Exception as e: - logger.warning(f"version download url generation failed: {e}") + download_url = self._gateway_artifact_download_url( + artifact_id=effective_artifact_id, + version_id=version_id, + ) return PublishResult( success=True,