""" render-pdf-worker - Subscribes to artifact.job.render_pdf.requested - Downloads PPTX from MinIO - Converts to PDF via LibreOffice - Uploads PDF to MinIO - Marks job done in artifact-registry """ import asyncio import hashlib import json import logging import os import subprocess import tempfile import time from pathlib import Path import httpx from minio import Minio from minio.error import S3Error from nats.aio.client import Client as NATS import nats.errors logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) NATS_URL = os.getenv("NATS_URL", "nats://nats:4222") REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/") MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "minio:9000") MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "minioadmin") MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "minioadmin") MINIO_BUCKET = os.getenv("MINIO_BUCKET", "artifacts") MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true" # Rate-limit for "idle, no messages" log (seconds) IDLE_LOG_INTERVAL = int(os.getenv("RENDER_PDF_IDLE_LOG_INTERVAL", "60")) minio_client = Minio( MINIO_ENDPOINT, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=MINIO_SECURE, ) def _sha256(path: Path) -> str: h = hashlib.sha256() with path.open("rb") as f: for chunk in iter(lambda: f.read(8192), b""): h.update(chunk) return h.hexdigest() async def _post(url: str, payload: dict) -> None: async with httpx.AsyncClient(timeout=20.0) as client: resp = await client.post(url, json=payload) if resp.status_code >= 400: raise RuntimeError(f"Registry error: {resp.status_code} {resp.text[:200]}") async def _handle_job(data: dict) -> None: job_id = data.get("job_id") artifact_id = data.get("artifact_id") input_version_id = data.get("input_version_id") if not job_id or not artifact_id or not input_version_id: logger.error("Invalid job payload: %s", data) return pptx_key = f"artifacts/{artifact_id}/versions/{input_version_id}/presentation.pptx" try: with tempfile.TemporaryDirectory() as tmpdir: tmpdir_path = Path(tmpdir) pptx_path = tmpdir_path / "input.pptx" pdf_path = tmpdir_path / "input.pdf" obj = minio_client.get_object(MINIO_BUCKET, pptx_key) with pptx_path.open("wb") as f: for chunk in obj.stream(32 * 1024): f.write(chunk) # Convert PPTX to PDF with LibreOffice cmd = [ "soffice", "--headless", "--convert-to", "pdf", "--outdir", str(tmpdir_path), str(pptx_path), ] proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120) if proc.returncode != 0: raise RuntimeError(f"LibreOffice failed: {proc.stderr[:200]}") if not pdf_path.exists(): raise RuntimeError("PDF not generated") pdf_key = f"artifacts/{artifact_id}/versions/{input_version_id}/presentation.pdf" try: minio_client.fput_object( MINIO_BUCKET, pdf_key, str(pdf_path), content_type="application/pdf", ) except S3Error as e: raise RuntimeError(f"MinIO error: {e}") await _post( f"{REGISTRY_URL}/jobs/{job_id}/complete", { "output_storage_key": pdf_key, "mime": "application/pdf", "size_bytes": pdf_path.stat().st_size, "sha256": _sha256(pdf_path), "label": "pdf", }, ) except Exception as e: try: await _post( f"{REGISTRY_URL}/jobs/{job_id}/fail", {"error_text": str(e)}, ) except Exception: logger.exception("Failed to report job failure") async def main() -> None: nc = NATS() await nc.connect(servers=[NATS_URL]) sub = await nc.subscribe("artifact.job.render_pdf.requested") last_idle_log = 0.0 while True: try: msg = await sub.next_msg() except nats.errors.TimeoutError: now = time.monotonic() if now - last_idle_log >= IDLE_LOG_INTERVAL: logger.info("idle, no messages (subject=artifact.job.render_pdf.requested)") last_idle_log = now continue try: payload = msg.data.decode("utf-8") data = json.loads(payload) except Exception: logger.exception("Invalid message payload") continue await _handle_job(data) if __name__ == "__main__": asyncio.run(main())