Files
microdao-daarion/services/render-pdf-worker/app/main.py
Apple ef3473db21 snapshot: NODE1 production state 2026-02-09
Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.

Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles

Excluded from snapshot: venv/, .env, data/, backups, .tgz archives

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-09 08:46:46 -08:00

159 lines
4.9 KiB
Python

"""
render-pdf-worker
- Subscribes to artifact.job.render_pdf.requested
- Downloads PPTX from MinIO
- Converts to PDF via LibreOffice
- Uploads PDF to MinIO
- Marks job done in artifact-registry
"""
import asyncio
import hashlib
import json
import logging
import os
import subprocess
import tempfile
import time
from pathlib import Path
import httpx
from minio import Minio
from minio.error import S3Error
from nats.aio.client import Client as NATS
import nats.errors
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
NATS_URL = os.getenv("NATS_URL", "nats://nats:4222")
REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "minio:9000")
MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "minioadmin")
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "minioadmin")
MINIO_BUCKET = os.getenv("MINIO_BUCKET", "artifacts")
MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
# Rate-limit for "idle, no messages" log (seconds)
IDLE_LOG_INTERVAL = int(os.getenv("RENDER_PDF_IDLE_LOG_INTERVAL", "60"))
minio_client = Minio(
MINIO_ENDPOINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY,
secure=MINIO_SECURE,
)
def _sha256(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
async def _post(url: str, payload: dict) -> None:
async with httpx.AsyncClient(timeout=20.0) as client:
resp = await client.post(url, json=payload)
if resp.status_code >= 400:
raise RuntimeError(f"Registry error: {resp.status_code} {resp.text[:200]}")
async def _handle_job(data: dict) -> None:
job_id = data.get("job_id")
artifact_id = data.get("artifact_id")
input_version_id = data.get("input_version_id")
if not job_id or not artifact_id or not input_version_id:
logger.error("Invalid job payload: %s", data)
return
pptx_key = f"artifacts/{artifact_id}/versions/{input_version_id}/presentation.pptx"
try:
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir_path = Path(tmpdir)
pptx_path = tmpdir_path / "input.pptx"
pdf_path = tmpdir_path / "input.pdf"
obj = minio_client.get_object(MINIO_BUCKET, pptx_key)
with pptx_path.open("wb") as f:
for chunk in obj.stream(32 * 1024):
f.write(chunk)
# Convert PPTX to PDF with LibreOffice
cmd = [
"soffice",
"--headless",
"--convert-to",
"pdf",
"--outdir",
str(tmpdir_path),
str(pptx_path),
]
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if proc.returncode != 0:
raise RuntimeError(f"LibreOffice failed: {proc.stderr[:200]}")
if not pdf_path.exists():
raise RuntimeError("PDF not generated")
pdf_key = f"artifacts/{artifact_id}/versions/{input_version_id}/presentation.pdf"
try:
minio_client.fput_object(
MINIO_BUCKET,
pdf_key,
str(pdf_path),
content_type="application/pdf",
)
except S3Error as e:
raise RuntimeError(f"MinIO error: {e}")
await _post(
f"{REGISTRY_URL}/jobs/{job_id}/complete",
{
"output_storage_key": pdf_key,
"mime": "application/pdf",
"size_bytes": pdf_path.stat().st_size,
"sha256": _sha256(pdf_path),
"label": "pdf",
},
)
except Exception as e:
try:
await _post(
f"{REGISTRY_URL}/jobs/{job_id}/fail",
{"error_text": str(e)},
)
except Exception:
logger.exception("Failed to report job failure")
async def main() -> None:
nc = NATS()
await nc.connect(servers=[NATS_URL])
sub = await nc.subscribe("artifact.job.render_pdf.requested")
last_idle_log = 0.0
while True:
try:
msg = await sub.next_msg()
except nats.errors.TimeoutError:
now = time.monotonic()
if now - last_idle_log >= IDLE_LOG_INTERVAL:
logger.info("idle, no messages (subject=artifact.job.render_pdf.requested)")
last_idle_log = now
continue
try:
payload = msg.data.decode("utf-8")
data = json.loads(payload)
except Exception:
logger.exception("Invalid message payload")
continue
await _handle_job(data)
if __name__ == "__main__":
asyncio.run(main())