Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.
Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles
Excluded from snapshot: venv/, .env, data/, backups, .tgz archives
Co-authored-by: Cursor <cursoragent@cursor.com>
159 lines
4.9 KiB
Python
159 lines
4.9 KiB
Python
"""
|
|
render-pdf-worker
|
|
- Subscribes to artifact.job.render_pdf.requested
|
|
- Downloads PPTX from MinIO
|
|
- Converts to PDF via LibreOffice
|
|
- Uploads PDF to MinIO
|
|
- Marks job done in artifact-registry
|
|
"""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from minio import Minio
|
|
from minio.error import S3Error
|
|
from nats.aio.client import Client as NATS
|
|
import nats.errors
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
NATS_URL = os.getenv("NATS_URL", "nats://nats:4222")
|
|
REGISTRY_URL = os.getenv("ARTIFACT_REGISTRY_URL", "http://artifact-registry:9220").rstrip("/")
|
|
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "minio:9000")
|
|
MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "minioadmin")
|
|
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "minioadmin")
|
|
MINIO_BUCKET = os.getenv("MINIO_BUCKET", "artifacts")
|
|
MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
|
|
|
|
# Rate-limit for "idle, no messages" log (seconds)
|
|
IDLE_LOG_INTERVAL = int(os.getenv("RENDER_PDF_IDLE_LOG_INTERVAL", "60"))
|
|
|
|
minio_client = Minio(
|
|
MINIO_ENDPOINT,
|
|
access_key=MINIO_ACCESS_KEY,
|
|
secret_key=MINIO_SECRET_KEY,
|
|
secure=MINIO_SECURE,
|
|
)
|
|
|
|
|
|
def _sha256(path: Path) -> str:
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as f:
|
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
async def _post(url: str, payload: dict) -> None:
|
|
async with httpx.AsyncClient(timeout=20.0) as client:
|
|
resp = await client.post(url, json=payload)
|
|
if resp.status_code >= 400:
|
|
raise RuntimeError(f"Registry error: {resp.status_code} {resp.text[:200]}")
|
|
|
|
|
|
async def _handle_job(data: dict) -> None:
|
|
job_id = data.get("job_id")
|
|
artifact_id = data.get("artifact_id")
|
|
input_version_id = data.get("input_version_id")
|
|
|
|
if not job_id or not artifact_id or not input_version_id:
|
|
logger.error("Invalid job payload: %s", data)
|
|
return
|
|
|
|
pptx_key = f"artifacts/{artifact_id}/versions/{input_version_id}/presentation.pptx"
|
|
|
|
try:
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
tmpdir_path = Path(tmpdir)
|
|
pptx_path = tmpdir_path / "input.pptx"
|
|
pdf_path = tmpdir_path / "input.pdf"
|
|
|
|
obj = minio_client.get_object(MINIO_BUCKET, pptx_key)
|
|
with pptx_path.open("wb") as f:
|
|
for chunk in obj.stream(32 * 1024):
|
|
f.write(chunk)
|
|
|
|
# Convert PPTX to PDF with LibreOffice
|
|
cmd = [
|
|
"soffice",
|
|
"--headless",
|
|
"--convert-to",
|
|
"pdf",
|
|
"--outdir",
|
|
str(tmpdir_path),
|
|
str(pptx_path),
|
|
]
|
|
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
|
if proc.returncode != 0:
|
|
raise RuntimeError(f"LibreOffice failed: {proc.stderr[:200]}")
|
|
|
|
if not pdf_path.exists():
|
|
raise RuntimeError("PDF not generated")
|
|
|
|
pdf_key = f"artifacts/{artifact_id}/versions/{input_version_id}/presentation.pdf"
|
|
try:
|
|
minio_client.fput_object(
|
|
MINIO_BUCKET,
|
|
pdf_key,
|
|
str(pdf_path),
|
|
content_type="application/pdf",
|
|
)
|
|
except S3Error as e:
|
|
raise RuntimeError(f"MinIO error: {e}")
|
|
|
|
await _post(
|
|
f"{REGISTRY_URL}/jobs/{job_id}/complete",
|
|
{
|
|
"output_storage_key": pdf_key,
|
|
"mime": "application/pdf",
|
|
"size_bytes": pdf_path.stat().st_size,
|
|
"sha256": _sha256(pdf_path),
|
|
"label": "pdf",
|
|
},
|
|
)
|
|
|
|
except Exception as e:
|
|
try:
|
|
await _post(
|
|
f"{REGISTRY_URL}/jobs/{job_id}/fail",
|
|
{"error_text": str(e)},
|
|
)
|
|
except Exception:
|
|
logger.exception("Failed to report job failure")
|
|
|
|
|
|
async def main() -> None:
|
|
nc = NATS()
|
|
await nc.connect(servers=[NATS_URL])
|
|
sub = await nc.subscribe("artifact.job.render_pdf.requested")
|
|
last_idle_log = 0.0
|
|
while True:
|
|
try:
|
|
msg = await sub.next_msg()
|
|
except nats.errors.TimeoutError:
|
|
now = time.monotonic()
|
|
if now - last_idle_log >= IDLE_LOG_INTERVAL:
|
|
logger.info("idle, no messages (subject=artifact.job.render_pdf.requested)")
|
|
last_idle_log = now
|
|
continue
|
|
try:
|
|
payload = msg.data.decode("utf-8")
|
|
data = json.loads(payload)
|
|
except Exception:
|
|
logger.exception("Invalid message payload")
|
|
continue
|
|
await _handle_job(data)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|