snapshot: NODE1 production state 2026-02-09

Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.

Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles

Excluded from snapshot: venv/, .env, data/, backups, .tgz archives

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Apple
2026-02-09 08:46:46 -08:00
parent 134c044c21
commit ef3473db21
9473 changed files with 408933 additions and 2769877 deletions

View File

@@ -1,4 +1,5 @@
from fastapi import FastAPI, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel
from typing import Literal, Optional, Dict, Any, List
import asyncio
@@ -7,6 +8,16 @@ import os
import yaml
import httpx
import logging
import time # For latency metrics
# CrewAI Integration
try:
from crewai_client import should_use_crewai, call_crewai, get_crewai_health
CREWAI_CLIENT_AVAILABLE = True
except ImportError:
CREWAI_CLIENT_AVAILABLE = False
should_use_crewai = None
call_crewai = None
from neo4j import AsyncGraphDatabase
# Memory Retrieval Pipeline v3.0
@@ -41,6 +52,10 @@ OCR_URL = os.getenv("OCR_URL", "http://swapper-service:8890") # Swapper /ocr en
DOCUMENT_URL = os.getenv("DOCUMENT_URL", "http://swapper-service:8890") # Swapper /document endpoint
CITY_SERVICE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")
# CrewAI Routing Configuration
CREWAI_ROUTING_ENABLED = os.getenv("CREWAI_ROUTING_ENABLED", "true").lower() == "true"
CREWAI_URL = os.getenv("CREWAI_URL", "http://dagi-staging-crewai-service:9010")
# Neo4j Configuration
NEO4J_URI = os.getenv("NEO4J_BOLT_URL", "bolt://neo4j:7687")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
@@ -269,6 +284,21 @@ async def publish_agent_invocation(invocation: AgentInvocation):
else:
print(f"⚠️ NATS not available, invocation not published: {invocation.json()}")
# ==============================================================
# PROMETHEUS METRICS ENDPOINT
# ==============================================================
@app.get("/metrics")
async def prometheus_metrics():
"""Prometheus metrics endpoint."""
try:
from agent_metrics import get_metrics, get_content_type
return Response(content=get_metrics(), media_type=get_content_type())
except Exception as e:
logger.error(f"Metrics error: {e}")
return Response(content=b"# Error generating metrics", media_type="text/plain")
@app.get("/health")
async def health():
"""Health check endpoint"""
@@ -346,6 +376,31 @@ class InferResponse(BaseModel):
image_base64: Optional[str] = None # Generated image in base64 format
# =========================================================================
# INTERNAL LLM API (for CrewAI and internal services)
# =========================================================================
class InternalLLMRequest(BaseModel):
prompt: str
system_prompt: Optional[str] = None
llm_profile: Optional[str] = "reasoning"
model: Optional[str] = None
max_tokens: Optional[int] = 2048
temperature: Optional[float] = 0.2
role_context: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
class InternalLLMResponse(BaseModel):
text: str
model: str
provider: str
tokens_used: int = 0
latency_ms: int = 0
class BackendStatus(BaseModel):
"""Status of a backend service"""
name: str
@@ -447,6 +502,100 @@ async def get_backends_status():
return backends
# =========================================================================
# INTERNAL LLM COMPLETE ENDPOINT (for CrewAI)
# =========================================================================
@app.post("/internal/llm/complete", response_model=InternalLLMResponse)
async def internal_llm_complete(request: InternalLLMRequest):
"""
Internal LLM completion endpoint.
NO routing, NO CrewAI decision, NO agent selection.
Used by CrewAI service for multi-role orchestration.
"""
import time as time_module
t0 = time_module.time()
logger.info(f"Internal LLM: profile={request.llm_profile}, role={request.role_context}")
# Get LLM profile configuration
llm_profiles = router_config.get("llm_profiles", {})
profile_name = request.llm_profile or "reasoning"
llm_profile = llm_profiles.get(profile_name, {})
provider = llm_profile.get("provider", "deepseek")
model = request.model or llm_profile.get("model", "deepseek-chat")
max_tokens = request.max_tokens or llm_profile.get("max_tokens", 2048)
temperature = request.temperature or llm_profile.get("temperature", 0.2)
# Build messages
messages = []
if request.system_prompt:
system_content = request.system_prompt
if request.role_context:
system_content = f"[Role: {request.role_context}]\n\n{system_content}"
messages.append({"role": "system", "content": system_content})
elif request.role_context:
messages.append({"role": "system", "content": f"You are acting as {request.role_context}. Respond professionally."})
messages.append({"role": "user", "content": request.prompt})
# Cloud providers
cloud_providers = [
{"name": "deepseek", "api_key_env": "DEEPSEEK_API_KEY", "base_url": "https://api.deepseek.com", "model": "deepseek-chat", "timeout": 60},
{"name": "mistral", "api_key_env": "MISTRAL_API_KEY", "base_url": "https://api.mistral.ai", "model": "mistral-large-latest", "timeout": 60},
{"name": "grok", "api_key_env": "GROK_API_KEY", "base_url": "https://api.x.ai", "model": "grok-2-1212", "timeout": 60}
]
if provider in ["deepseek", "mistral", "grok"]:
cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1)
# Try cloud providers
for cloud in cloud_providers:
api_key = os.getenv(cloud["api_key_env"])
if not api_key:
continue
try:
logger.debug(f"Internal LLM trying {cloud['name']}")
cloud_resp = await http_client.post(
f"{cloud['base_url']}/v1/chat/completions",
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
json={"model": cloud["model"], "messages": messages, "max_tokens": max_tokens, "temperature": temperature, "stream": False},
timeout=cloud["timeout"]
)
if cloud_resp.status_code == 200:
data = cloud_resp.json()
response_text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
tokens = data.get("usage", {}).get("total_tokens", 0)
latency = int((time_module.time() - t0) * 1000)
logger.info(f"Internal LLM success: {cloud['name']}, {tokens} tokens, {latency}ms")
return InternalLLMResponse(text=response_text, model=cloud["model"], provider=cloud["name"], tokens_used=tokens, latency_ms=latency)
except Exception as e:
logger.warning(f"Internal LLM {cloud['name']} failed: {e}")
continue
# Fallback to Ollama
try:
logger.info("Internal LLM fallback to Ollama")
ollama_resp = await http_client.post(
"http://172.18.0.1:11434/api/generate",
json={"model": "qwen3:8b", "prompt": request.prompt, "system": request.system_prompt or "", "stream": False, "options": {"num_predict": max_tokens, "temperature": temperature}},
timeout=120.0
)
if ollama_resp.status_code == 200:
data = ollama_resp.json()
latency = int((time_module.time() - t0) * 1000)
return InternalLLMResponse(text=data.get("response", ""), model="qwen3:8b", provider="ollama", tokens_used=0, latency_ms=latency)
except Exception as e:
logger.error(f"Internal LLM Ollama failed: {e}")
raise HTTPException(status_code=503, detail="All LLM providers unavailable")
@app.post("/v1/agents/{agent_id}/infer", response_model=InferResponse)
async def agent_infer(agent_id: str, request: InferRequest):
"""
@@ -519,9 +668,73 @@ async def agent_infer(agent_id: str, request: InferRequest):
system_prompt = agent_config.get("system_prompt")
# Determine which backend to use
# Use router config to get default model for agent, fallback to qwen3-8b
# Use router config to get default model for agent, fallback to qwen3:8b
agent_config = router_config.get("agents", {}).get(agent_id, {})
default_llm = agent_config.get("default_llm", "qwen3-8b")
# =========================================================================
# CREWAI DECISION: Use orchestration or direct LLM?
# =========================================================================
if CREWAI_ROUTING_ENABLED and CREWAI_CLIENT_AVAILABLE:
try:
# Get agent CrewAI config from registry (or router_config fallback)
crewai_cfg = agent_config.get("crewai", {})
use_crewai, crewai_reason = should_use_crewai(
agent_id=agent_id,
prompt=request.prompt,
agent_config=agent_config,
force_crewai=request.metadata.get("force_crewai", False) if request.metadata else False,
)
logger.info(f"🎭 CrewAI decision for {agent_id}: {use_crewai} ({crewai_reason})")
if use_crewai:
t0 = time.time()
crew_result = await call_crewai(
agent_id=agent_id,
task=request.prompt,
context={
"memory_brief": memory_brief_text,
"system_prompt": system_prompt,
"metadata": metadata,
},
team=crewai_cfg.get("team")
)
latency = time.time() - t0
if crew_result.get("success") and crew_result.get("result"):
logger.info(f"✅ CrewAI success for {agent_id}: {latency:.2f}s")
# Store interaction in memory
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
try:
await memory_retrieval.store_interaction(
channel=channel,
chat_id=chat_id,
user_id=user_id,
agent_id=request_agent_id,
username=username,
user_message=request.prompt,
assistant_response=crew_result["result"]
)
except Exception as e:
logger.warning(f"⚠️ Memory storage failed: {e}")
return InferResponse(
response=crew_result["result"],
model="crewai-" + agent_id,
provider="crewai",
tokens_used=0,
latency_ms=int(latency * 1000)
)
else:
logger.warning(f"⚠️ CrewAI failed, falling back to direct LLM")
except Exception as e:
logger.exception(f"❌ CrewAI error: {e}, falling back to direct LLM")
default_llm = agent_config.get("default_llm", "qwen3:8b")
# Check if there's a routing rule for this agent
routing_rules = router_config.get("routing", [])
@@ -542,7 +755,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
model = llm_profile.get("model", "deepseek-chat")
else:
# For local ollama, use swapper model name format
model = request.model or "qwen3-8b"
model = request.model or "qwen3:8b"
# =========================================================================
# VISION PROCESSING (if images present)
@@ -929,9 +1142,9 @@ async def agent_infer(agent_id: str, request: InferRequest):
# Check if default_llm is local
if llm_profile.get("provider") == "ollama":
# Extract model name and convert format (qwen3:8b → qwen3-8b for Swapper)
# Extract model name and convert format (qwen3:8b → qwen3:8b for Swapper)
ollama_model = llm_profile.get("model", "qwen3:8b")
local_model = ollama_model.replace(":", "-") # qwen3:8b → qwen3-8b
local_model = ollama_model.replace(":", "-") # qwen3:8b → qwen3:8b
logger.debug(f"✅ Using agent's default local model: {local_model}")
else:
# Find first local model from config
@@ -944,7 +1157,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
# Final fallback if no local model found
if not local_model:
local_model = "qwen3-8b"
local_model = "qwen3:8b"
logger.warning(f"⚠️ No local model in config, using hardcoded fallback: {local_model}")
try: