snapshot: NODE1 production state 2026-02-09

Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179). This represents the actual running production code that has diverged significantly from the previous main branch. Key changes from old main: - Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support - Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing - Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT) - Agent Registry: config/agent_registry.yml as single source of truth - 13 agents configured (was 3) - Memory service integration - CrewAI teams and roles Excluded from snapshot: venv/, .env, data/, backups, .tgz archives Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-09 08:46:46 -08:00
parent 134c044c21
commit ef3473db21
9473 changed files with 408933 additions and 2769877 deletions
--- a/services/image-gen-service/app/main.py
+++ b/services/image-gen-service/app/main.py
@@ -4,7 +4,7 @@ import os
 from typing import Optional

 import torch
-from diffusers import Flux2KleinPipeline
+from diffusers import DiffusionPipeline
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field

@@ -16,8 +16,8 @@ class GenerateRequest(BaseModel):
    negative_prompt: Optional[str] = None
    width: int = Field(1024, ge=256, le=2048)
    height: int = Field(1024, ge=256, le=2048)
-    num_inference_steps: int = Field(50, ge=1, le=100)
-    guidance_scale: float = Field(4.0, ge=0.0, le=20.0)
+    num_inference_steps: int = Field(25, ge=1, le=100)
+    guidance_scale: float = Field(3.5, ge=0.0, le=20.0)
    seed: Optional[int] = Field(None, ge=0)


@@ -32,7 +32,7 @@ def _resolve_dtype() -> torch.dtype:
    return torch.float32


-PIPELINE: Optional[Flux2KleinPipeline] = None
+PIPELINE: Optional[DiffusionPipeline] = None
 LOAD_ERROR: Optional[str] = None


@@ -40,21 +40,13 @@ def _load_pipeline() -> None:
    global PIPELINE, LOAD_ERROR
    try:
        dtype = _resolve_dtype()
-        # Use bfloat16 for FLUX.2 Klein as recommended
-        if dtype == torch.float16 and DEVICE.startswith("cuda"):
-            dtype = torch.bfloat16
-        
-        pipe = Flux2KleinPipeline.from_pretrained(
+        pipe = DiffusionPipeline.from_pretrained(
            MODEL_ID,
            torch_dtype=dtype,
+            variant="fp16" if dtype == torch.float16 else None,
+            use_safetensors=True
        )
-        
-        # Enable CPU offload to reduce VRAM usage
-        if DEVICE.startswith("cuda"):
-            pipe.enable_model_cpu_offload()
-        else:
-            pipe.to(DEVICE)
-        
+        pipe.to(DEVICE)
        PIPELINE = pipe
        LOAD_ERROR = None
    except Exception as exc:  # pragma: no cover - surface error via health/info
@@ -100,15 +92,15 @@ def generate(payload: GenerateRequest) -> dict:

    generator = None
    if payload.seed is not None:
-        generator = torch.Generator(device="cuda" if DEVICE.startswith("cuda") else "cpu")
+        generator = torch.Generator(device=DEVICE)
        generator.manual_seed(payload.seed)

    with torch.inference_mode():
        result = PIPELINE(
            prompt=payload.prompt,
-            negative_prompt=payload.negative_prompt if payload.negative_prompt else None,
-            height=payload.height,
+            negative_prompt=payload.negative_prompt,
            width=payload.width,
+            height=payload.height,
            num_inference_steps=payload.num_inference_steps,
            guidance_scale=payload.guidance_scale,
            generator=generator,