snapshot: NODE1 production state 2026-02-09

Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.

Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles

Excluded from snapshot: venv/, .env, data/, backups, .tgz archives

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Apple
2026-02-09 08:46:46 -08:00
parent 134c044c21
commit ef3473db21
9473 changed files with 408933 additions and 2769877 deletions

View File

@@ -4,7 +4,7 @@ import os
from typing import Optional
import torch
from diffusers import Flux2KleinPipeline
from diffusers import DiffusionPipeline
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
@@ -16,8 +16,8 @@ class GenerateRequest(BaseModel):
negative_prompt: Optional[str] = None
width: int = Field(1024, ge=256, le=2048)
height: int = Field(1024, ge=256, le=2048)
num_inference_steps: int = Field(50, ge=1, le=100)
guidance_scale: float = Field(4.0, ge=0.0, le=20.0)
num_inference_steps: int = Field(25, ge=1, le=100)
guidance_scale: float = Field(3.5, ge=0.0, le=20.0)
seed: Optional[int] = Field(None, ge=0)
@@ -32,7 +32,7 @@ def _resolve_dtype() -> torch.dtype:
return torch.float32
PIPELINE: Optional[Flux2KleinPipeline] = None
PIPELINE: Optional[DiffusionPipeline] = None
LOAD_ERROR: Optional[str] = None
@@ -40,21 +40,13 @@ def _load_pipeline() -> None:
global PIPELINE, LOAD_ERROR
try:
dtype = _resolve_dtype()
# Use bfloat16 for FLUX.2 Klein as recommended
if dtype == torch.float16 and DEVICE.startswith("cuda"):
dtype = torch.bfloat16
pipe = Flux2KleinPipeline.from_pretrained(
pipe = DiffusionPipeline.from_pretrained(
MODEL_ID,
torch_dtype=dtype,
variant="fp16" if dtype == torch.float16 else None,
use_safetensors=True
)
# Enable CPU offload to reduce VRAM usage
if DEVICE.startswith("cuda"):
pipe.enable_model_cpu_offload()
else:
pipe.to(DEVICE)
pipe.to(DEVICE)
PIPELINE = pipe
LOAD_ERROR = None
except Exception as exc: # pragma: no cover - surface error via health/info
@@ -100,15 +92,15 @@ def generate(payload: GenerateRequest) -> dict:
generator = None
if payload.seed is not None:
generator = torch.Generator(device="cuda" if DEVICE.startswith("cuda") else "cpu")
generator = torch.Generator(device=DEVICE)
generator.manual_seed(payload.seed)
with torch.inference_mode():
result = PIPELINE(
prompt=payload.prompt,
negative_prompt=payload.negative_prompt if payload.negative_prompt else None,
height=payload.height,
negative_prompt=payload.negative_prompt,
width=payload.width,
height=payload.height,
num_inference_steps=payload.num_inference_steps,
guidance_scale=payload.guidance_scale,
generator=generator,