snapshot: NODE1 production state 2026-02-09
Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.
Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles
Excluded from snapshot: venv/, .env, data/, backups, .tgz archives
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
from typing import Literal, Optional, Dict, Any, List
|
||||
import asyncio
|
||||
@@ -7,6 +8,16 @@ import os
|
||||
import yaml
|
||||
import httpx
|
||||
import logging
|
||||
import time # For latency metrics
|
||||
|
||||
# CrewAI Integration
|
||||
try:
|
||||
from crewai_client import should_use_crewai, call_crewai, get_crewai_health
|
||||
CREWAI_CLIENT_AVAILABLE = True
|
||||
except ImportError:
|
||||
CREWAI_CLIENT_AVAILABLE = False
|
||||
should_use_crewai = None
|
||||
call_crewai = None
|
||||
from neo4j import AsyncGraphDatabase
|
||||
|
||||
# Memory Retrieval Pipeline v3.0
|
||||
@@ -41,6 +52,10 @@ OCR_URL = os.getenv("OCR_URL", "http://swapper-service:8890") # Swapper /ocr en
|
||||
DOCUMENT_URL = os.getenv("DOCUMENT_URL", "http://swapper-service:8890") # Swapper /document endpoint
|
||||
CITY_SERVICE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:7001")
|
||||
|
||||
# CrewAI Routing Configuration
|
||||
CREWAI_ROUTING_ENABLED = os.getenv("CREWAI_ROUTING_ENABLED", "true").lower() == "true"
|
||||
CREWAI_URL = os.getenv("CREWAI_URL", "http://dagi-staging-crewai-service:9010")
|
||||
|
||||
# Neo4j Configuration
|
||||
NEO4J_URI = os.getenv("NEO4J_BOLT_URL", "bolt://neo4j:7687")
|
||||
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
|
||||
@@ -269,6 +284,21 @@ async def publish_agent_invocation(invocation: AgentInvocation):
|
||||
else:
|
||||
print(f"⚠️ NATS not available, invocation not published: {invocation.json()}")
|
||||
|
||||
|
||||
|
||||
# ==============================================================
|
||||
# PROMETHEUS METRICS ENDPOINT
|
||||
# ==============================================================
|
||||
@app.get("/metrics")
|
||||
async def prometheus_metrics():
|
||||
"""Prometheus metrics endpoint."""
|
||||
try:
|
||||
from agent_metrics import get_metrics, get_content_type
|
||||
return Response(content=get_metrics(), media_type=get_content_type())
|
||||
except Exception as e:
|
||||
logger.error(f"Metrics error: {e}")
|
||||
return Response(content=b"# Error generating metrics", media_type="text/plain")
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
"""Health check endpoint"""
|
||||
@@ -346,6 +376,31 @@ class InferResponse(BaseModel):
|
||||
image_base64: Optional[str] = None # Generated image in base64 format
|
||||
|
||||
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# INTERNAL LLM API (for CrewAI and internal services)
|
||||
# =========================================================================
|
||||
|
||||
class InternalLLMRequest(BaseModel):
|
||||
prompt: str
|
||||
system_prompt: Optional[str] = None
|
||||
llm_profile: Optional[str] = "reasoning"
|
||||
model: Optional[str] = None
|
||||
max_tokens: Optional[int] = 2048
|
||||
temperature: Optional[float] = 0.2
|
||||
role_context: Optional[str] = None
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class InternalLLMResponse(BaseModel):
|
||||
text: str
|
||||
model: str
|
||||
provider: str
|
||||
tokens_used: int = 0
|
||||
latency_ms: int = 0
|
||||
|
||||
|
||||
class BackendStatus(BaseModel):
|
||||
"""Status of a backend service"""
|
||||
name: str
|
||||
@@ -447,6 +502,100 @@ async def get_backends_status():
|
||||
return backends
|
||||
|
||||
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# INTERNAL LLM COMPLETE ENDPOINT (for CrewAI)
|
||||
# =========================================================================
|
||||
|
||||
@app.post("/internal/llm/complete", response_model=InternalLLMResponse)
|
||||
async def internal_llm_complete(request: InternalLLMRequest):
|
||||
"""
|
||||
Internal LLM completion endpoint.
|
||||
NO routing, NO CrewAI decision, NO agent selection.
|
||||
Used by CrewAI service for multi-role orchestration.
|
||||
"""
|
||||
import time as time_module
|
||||
t0 = time_module.time()
|
||||
|
||||
logger.info(f"Internal LLM: profile={request.llm_profile}, role={request.role_context}")
|
||||
|
||||
# Get LLM profile configuration
|
||||
llm_profiles = router_config.get("llm_profiles", {})
|
||||
profile_name = request.llm_profile or "reasoning"
|
||||
llm_profile = llm_profiles.get(profile_name, {})
|
||||
|
||||
provider = llm_profile.get("provider", "deepseek")
|
||||
model = request.model or llm_profile.get("model", "deepseek-chat")
|
||||
max_tokens = request.max_tokens or llm_profile.get("max_tokens", 2048)
|
||||
temperature = request.temperature or llm_profile.get("temperature", 0.2)
|
||||
|
||||
# Build messages
|
||||
messages = []
|
||||
if request.system_prompt:
|
||||
system_content = request.system_prompt
|
||||
if request.role_context:
|
||||
system_content = f"[Role: {request.role_context}]\n\n{system_content}"
|
||||
messages.append({"role": "system", "content": system_content})
|
||||
elif request.role_context:
|
||||
messages.append({"role": "system", "content": f"You are acting as {request.role_context}. Respond professionally."})
|
||||
|
||||
messages.append({"role": "user", "content": request.prompt})
|
||||
|
||||
# Cloud providers
|
||||
cloud_providers = [
|
||||
{"name": "deepseek", "api_key_env": "DEEPSEEK_API_KEY", "base_url": "https://api.deepseek.com", "model": "deepseek-chat", "timeout": 60},
|
||||
{"name": "mistral", "api_key_env": "MISTRAL_API_KEY", "base_url": "https://api.mistral.ai", "model": "mistral-large-latest", "timeout": 60},
|
||||
{"name": "grok", "api_key_env": "GROK_API_KEY", "base_url": "https://api.x.ai", "model": "grok-2-1212", "timeout": 60}
|
||||
]
|
||||
|
||||
if provider in ["deepseek", "mistral", "grok"]:
|
||||
cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1)
|
||||
|
||||
# Try cloud providers
|
||||
for cloud in cloud_providers:
|
||||
api_key = os.getenv(cloud["api_key_env"])
|
||||
if not api_key:
|
||||
continue
|
||||
|
||||
try:
|
||||
logger.debug(f"Internal LLM trying {cloud['name']}")
|
||||
cloud_resp = await http_client.post(
|
||||
f"{cloud['base_url']}/v1/chat/completions",
|
||||
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
||||
json={"model": cloud["model"], "messages": messages, "max_tokens": max_tokens, "temperature": temperature, "stream": False},
|
||||
timeout=cloud["timeout"]
|
||||
)
|
||||
|
||||
if cloud_resp.status_code == 200:
|
||||
data = cloud_resp.json()
|
||||
response_text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
tokens = data.get("usage", {}).get("total_tokens", 0)
|
||||
latency = int((time_module.time() - t0) * 1000)
|
||||
logger.info(f"Internal LLM success: {cloud['name']}, {tokens} tokens, {latency}ms")
|
||||
return InternalLLMResponse(text=response_text, model=cloud["model"], provider=cloud["name"], tokens_used=tokens, latency_ms=latency)
|
||||
except Exception as e:
|
||||
logger.warning(f"Internal LLM {cloud['name']} failed: {e}")
|
||||
continue
|
||||
|
||||
# Fallback to Ollama
|
||||
try:
|
||||
logger.info("Internal LLM fallback to Ollama")
|
||||
ollama_resp = await http_client.post(
|
||||
"http://172.18.0.1:11434/api/generate",
|
||||
json={"model": "qwen3:8b", "prompt": request.prompt, "system": request.system_prompt or "", "stream": False, "options": {"num_predict": max_tokens, "temperature": temperature}},
|
||||
timeout=120.0
|
||||
)
|
||||
if ollama_resp.status_code == 200:
|
||||
data = ollama_resp.json()
|
||||
latency = int((time_module.time() - t0) * 1000)
|
||||
return InternalLLMResponse(text=data.get("response", ""), model="qwen3:8b", provider="ollama", tokens_used=0, latency_ms=latency)
|
||||
except Exception as e:
|
||||
logger.error(f"Internal LLM Ollama failed: {e}")
|
||||
|
||||
raise HTTPException(status_code=503, detail="All LLM providers unavailable")
|
||||
|
||||
|
||||
@app.post("/v1/agents/{agent_id}/infer", response_model=InferResponse)
|
||||
async def agent_infer(agent_id: str, request: InferRequest):
|
||||
"""
|
||||
@@ -519,9 +668,73 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
system_prompt = agent_config.get("system_prompt")
|
||||
|
||||
# Determine which backend to use
|
||||
# Use router config to get default model for agent, fallback to qwen3-8b
|
||||
# Use router config to get default model for agent, fallback to qwen3:8b
|
||||
agent_config = router_config.get("agents", {}).get(agent_id, {})
|
||||
default_llm = agent_config.get("default_llm", "qwen3-8b")
|
||||
|
||||
# =========================================================================
|
||||
# CREWAI DECISION: Use orchestration or direct LLM?
|
||||
# =========================================================================
|
||||
if CREWAI_ROUTING_ENABLED and CREWAI_CLIENT_AVAILABLE:
|
||||
try:
|
||||
# Get agent CrewAI config from registry (or router_config fallback)
|
||||
crewai_cfg = agent_config.get("crewai", {})
|
||||
|
||||
use_crewai, crewai_reason = should_use_crewai(
|
||||
agent_id=agent_id,
|
||||
prompt=request.prompt,
|
||||
agent_config=agent_config,
|
||||
force_crewai=request.metadata.get("force_crewai", False) if request.metadata else False,
|
||||
|
||||
)
|
||||
|
||||
logger.info(f"🎭 CrewAI decision for {agent_id}: {use_crewai} ({crewai_reason})")
|
||||
|
||||
if use_crewai:
|
||||
t0 = time.time()
|
||||
crew_result = await call_crewai(
|
||||
agent_id=agent_id,
|
||||
task=request.prompt,
|
||||
context={
|
||||
"memory_brief": memory_brief_text,
|
||||
"system_prompt": system_prompt,
|
||||
"metadata": metadata,
|
||||
},
|
||||
team=crewai_cfg.get("team")
|
||||
)
|
||||
|
||||
latency = time.time() - t0
|
||||
|
||||
if crew_result.get("success") and crew_result.get("result"):
|
||||
logger.info(f"✅ CrewAI success for {agent_id}: {latency:.2f}s")
|
||||
|
||||
# Store interaction in memory
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
|
||||
try:
|
||||
await memory_retrieval.store_interaction(
|
||||
channel=channel,
|
||||
chat_id=chat_id,
|
||||
user_id=user_id,
|
||||
agent_id=request_agent_id,
|
||||
username=username,
|
||||
user_message=request.prompt,
|
||||
assistant_response=crew_result["result"]
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Memory storage failed: {e}")
|
||||
|
||||
return InferResponse(
|
||||
response=crew_result["result"],
|
||||
model="crewai-" + agent_id,
|
||||
provider="crewai",
|
||||
tokens_used=0,
|
||||
latency_ms=int(latency * 1000)
|
||||
)
|
||||
else:
|
||||
logger.warning(f"⚠️ CrewAI failed, falling back to direct LLM")
|
||||
except Exception as e:
|
||||
logger.exception(f"❌ CrewAI error: {e}, falling back to direct LLM")
|
||||
|
||||
default_llm = agent_config.get("default_llm", "qwen3:8b")
|
||||
|
||||
# Check if there's a routing rule for this agent
|
||||
routing_rules = router_config.get("routing", [])
|
||||
@@ -542,7 +755,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
model = llm_profile.get("model", "deepseek-chat")
|
||||
else:
|
||||
# For local ollama, use swapper model name format
|
||||
model = request.model or "qwen3-8b"
|
||||
model = request.model or "qwen3:8b"
|
||||
|
||||
# =========================================================================
|
||||
# VISION PROCESSING (if images present)
|
||||
@@ -929,9 +1142,9 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
|
||||
# Check if default_llm is local
|
||||
if llm_profile.get("provider") == "ollama":
|
||||
# Extract model name and convert format (qwen3:8b → qwen3-8b for Swapper)
|
||||
# Extract model name and convert format (qwen3:8b → qwen3:8b for Swapper)
|
||||
ollama_model = llm_profile.get("model", "qwen3:8b")
|
||||
local_model = ollama_model.replace(":", "-") # qwen3:8b → qwen3-8b
|
||||
local_model = ollama_model.replace(":", "-") # qwen3:8b → qwen3:8b
|
||||
logger.debug(f"✅ Using agent's default local model: {local_model}")
|
||||
else:
|
||||
# Find first local model from config
|
||||
@@ -944,7 +1157,7 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
|
||||
# Final fallback if no local model found
|
||||
if not local_model:
|
||||
local_model = "qwen3-8b"
|
||||
local_model = "qwen3:8b"
|
||||
logger.warning(f"⚠️ No local model in config, using hardcoded fallback: {local_model}")
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user