Files
microdao-daarion/services/image-gen-service/main.py
Apple ef3473db21 snapshot: NODE1 production state 2026-02-09
Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.

Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles

Excluded from snapshot: venv/, .env, data/, backups, .tgz archives

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-09 08:46:46 -08:00

127 lines
3.6 KiB
Python

import base64
import io
import os
from typing import Optional
import torch
from diffusers import Flux2KleinPipeline
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
app = FastAPI(title="Image Generation Service", version="1.0.0")
class GenerateRequest(BaseModel):
prompt: str = Field(..., min_length=1)
negative_prompt: Optional[str] = None
width: int = Field(1024, ge=256, le=2048)
height: int = Field(1024, ge=256, le=2048)
num_inference_steps: int = Field(50, ge=1, le=100)
guidance_scale: float = Field(4.0, ge=0.0, le=20.0)
seed: Optional[int] = Field(None, ge=0)
MODEL_ID = os.getenv("IMAGE_GEN_MODEL", "black-forest-labs/FLUX.2-klein-base-4B")
DEVICE = os.getenv("IMAGE_GEN_DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
DTYPE_ENV = os.getenv("IMAGE_GEN_DTYPE", "float16")
def _resolve_dtype() -> torch.dtype:
if DEVICE.startswith("cuda"):
return torch.float16 if DTYPE_ENV == "float16" else torch.bfloat16
return torch.float32
PIPELINE: Optional[Flux2KleinPipeline] = None
LOAD_ERROR: Optional[str] = None
def _load_pipeline() -> None:
global PIPELINE, LOAD_ERROR
try:
dtype = _resolve_dtype()
# Use bfloat16 for FLUX.2 Klein as recommended
if dtype == torch.float16 and DEVICE.startswith("cuda"):
dtype = torch.bfloat16
pipe = Flux2KleinPipeline.from_pretrained(
MODEL_ID,
torch_dtype=dtype,
)
# Enable CPU offload to reduce VRAM usage
if DEVICE.startswith("cuda"):
pipe.enable_model_cpu_offload()
else:
pipe.to(DEVICE)
PIPELINE = pipe
LOAD_ERROR = None
except Exception as exc: # pragma: no cover - surface error via health/info
PIPELINE = None
LOAD_ERROR = str(exc)
@app.on_event("startup")
def startup_event() -> None:
_load_pipeline()
@app.get("/health")
def health() -> dict:
if LOAD_ERROR:
raise HTTPException(status_code=503, detail=LOAD_ERROR)
return {
"status": "ok",
"model_loaded": PIPELINE is not None,
"model_id": MODEL_ID,
"device": DEVICE,
"dtype": str(_resolve_dtype()).replace("torch.", ""),
}
@app.get("/info")
def info() -> dict:
return {
"model_id": MODEL_ID,
"device": DEVICE,
"dtype": str(_resolve_dtype()).replace("torch.", ""),
"pipeline_loaded": PIPELINE is not None,
"load_error": LOAD_ERROR,
}
@app.post("/generate")
def generate(payload: GenerateRequest) -> dict:
if LOAD_ERROR:
raise HTTPException(status_code=503, detail=LOAD_ERROR)
if PIPELINE is None:
raise HTTPException(status_code=503, detail="Model is not loaded yet")
generator = None
if payload.seed is not None:
generator = torch.Generator(device="cuda" if DEVICE.startswith("cuda") else "cpu")
generator.manual_seed(payload.seed)
with torch.inference_mode():
result = PIPELINE(
prompt=payload.prompt,
negative_prompt=payload.negative_prompt if payload.negative_prompt else None,
height=payload.height,
width=payload.width,
num_inference_steps=payload.num_inference_steps,
guidance_scale=payload.guidance_scale,
generator=generator,
)
image = result.images[0]
buffer = io.BytesIO()
image.save(buffer, format="PNG")
encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
return {
"image_base64": encoded,
"seed": payload.seed,
"model_id": MODEL_ID,
}