feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection
## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
This commit is contained in:
@@ -9,6 +9,22 @@ import httpx
|
||||
import logging
|
||||
from neo4j import AsyncGraphDatabase
|
||||
|
||||
# Memory Retrieval Pipeline v3.0
|
||||
try:
|
||||
from memory_retrieval import memory_retrieval, MemoryBrief
|
||||
MEMORY_RETRIEVAL_AVAILABLE = True
|
||||
except ImportError:
|
||||
MEMORY_RETRIEVAL_AVAILABLE = False
|
||||
memory_retrieval = None
|
||||
|
||||
# Tool Manager for Function Calling
|
||||
try:
|
||||
from tool_manager import ToolManager, ToolResult, format_tool_calls_for_response
|
||||
TOOL_MANAGER_AVAILABLE = True
|
||||
except ImportError:
|
||||
TOOL_MANAGER_AVAILABLE = False
|
||||
ToolManager = None
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -41,6 +57,9 @@ neo4j_available = False
|
||||
nc = None
|
||||
nats_available = False
|
||||
|
||||
# Tool Manager
|
||||
tool_manager = None
|
||||
|
||||
# Models
|
||||
class FilterDecision(BaseModel):
|
||||
channel_id: str
|
||||
@@ -135,6 +154,26 @@ async def startup_event():
|
||||
logger.warning("⚠️ Running in test mode (HTTP only)")
|
||||
nats_available = False
|
||||
|
||||
# Initialize Memory Retrieval Pipeline
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
|
||||
try:
|
||||
await memory_retrieval.initialize()
|
||||
logger.info("✅ Memory Retrieval Pipeline initialized")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Memory Retrieval init failed: {e}")
|
||||
|
||||
# Initialize Tool Manager for function calling
|
||||
global tool_manager
|
||||
if TOOL_MANAGER_AVAILABLE and ToolManager:
|
||||
try:
|
||||
tool_manager = ToolManager(router_config)
|
||||
logger.info("✅ Tool Manager initialized with function calling")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Tool Manager init failed: {e}")
|
||||
tool_manager = None
|
||||
else:
|
||||
tool_manager = None
|
||||
|
||||
# Log backend URLs
|
||||
logger.info(f"📡 Swapper URL: {SWAPPER_URL}")
|
||||
logger.info(f"📡 STT URL: {STT_URL}")
|
||||
@@ -294,6 +333,8 @@ class InferRequest(BaseModel):
|
||||
max_tokens: Optional[int] = 2048
|
||||
temperature: Optional[float] = 0.7
|
||||
system_prompt: Optional[str] = None
|
||||
images: Optional[List[str]] = None # List of base64 data URLs for vision
|
||||
metadata: Optional[Dict[str, Any]] = None # Additional metadata (user_id, chat_id, etc.)
|
||||
|
||||
|
||||
class InferResponse(BaseModel):
|
||||
@@ -302,6 +343,7 @@ class InferResponse(BaseModel):
|
||||
model: str
|
||||
tokens_used: Optional[int] = None
|
||||
backend: str
|
||||
image_base64: Optional[str] = None # Generated image in base64 format
|
||||
|
||||
|
||||
class BackendStatus(BaseModel):
|
||||
@@ -416,13 +458,51 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
- Backend availability
|
||||
|
||||
System prompt is fetched from database via city-service API.
|
||||
Memory context is retrieved via Memory Retrieval Pipeline v3.0.
|
||||
"""
|
||||
logger.info(f"🔀 Inference request for agent: {agent_id}")
|
||||
logger.info(f"📝 Prompt: {request.prompt[:100]}...")
|
||||
|
||||
# =========================================================================
|
||||
# MEMORY RETRIEVAL (v4.0 - Universal for all agents)
|
||||
# =========================================================================
|
||||
memory_brief_text = ""
|
||||
# Extract metadata once for both retrieval and storage
|
||||
metadata = request.metadata or {}
|
||||
channel = "telegram" # Default
|
||||
chat_id = str(metadata.get("chat_id", ""))
|
||||
user_id = str(metadata.get("user_id", "")).replace("tg:", "")
|
||||
username = metadata.get("username")
|
||||
# Get agent_id from metadata or URL parameter
|
||||
request_agent_id = metadata.get("agent_id", agent_id).lower()
|
||||
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
|
||||
try:
|
||||
if chat_id and user_id:
|
||||
brief = await memory_retrieval.retrieve(
|
||||
channel=channel,
|
||||
chat_id=chat_id,
|
||||
user_id=user_id,
|
||||
agent_id=request_agent_id, # Agent-specific collections
|
||||
username=username,
|
||||
message=request.prompt
|
||||
)
|
||||
memory_brief_text = brief.to_text(max_lines=10)
|
||||
if memory_brief_text:
|
||||
logger.info(f"🧠 Memory brief for {request_agent_id}: {len(memory_brief_text)} chars")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Memory retrieval failed for {request_agent_id}: {e}")
|
||||
|
||||
# Get system prompt from database or config
|
||||
system_prompt = request.system_prompt
|
||||
|
||||
# Debug logging for system prompt
|
||||
if system_prompt:
|
||||
logger.info(f"📝 Received system_prompt from request: {len(system_prompt)} chars")
|
||||
logger.debug(f"System prompt preview: {system_prompt[:200]}...")
|
||||
else:
|
||||
logger.warning(f"⚠️ No system_prompt in request for agent {agent_id}, trying to load...")
|
||||
|
||||
if not system_prompt:
|
||||
try:
|
||||
from prompt_builder import get_agent_system_prompt
|
||||
@@ -465,77 +545,418 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
model = request.model or "qwen3-8b"
|
||||
|
||||
# =========================================================================
|
||||
# CLOUD PROVIDERS (DeepSeek, OpenAI, etc.)
|
||||
# VISION PROCESSING (if images present)
|
||||
# =========================================================================
|
||||
if provider == "deepseek":
|
||||
if request.images and len(request.images) > 0:
|
||||
logger.info(f"🖼️ Vision request: {len(request.images)} image(s)")
|
||||
try:
|
||||
api_key = os.getenv(llm_profile.get("api_key_env", "DEEPSEEK_API_KEY"))
|
||||
base_url = llm_profile.get("base_url", "https://api.deepseek.com")
|
||||
# Use Swapper's /vision endpoint (manages model loading)
|
||||
vision_payload = {
|
||||
"model": "qwen3-vl-8b",
|
||||
"prompt": request.prompt,
|
||||
"images": request.images, # Swapper handles data URL conversion
|
||||
"max_tokens": request.max_tokens or 1024,
|
||||
"temperature": request.temperature or 0.7
|
||||
}
|
||||
|
||||
if not api_key:
|
||||
logger.error("❌ DeepSeek API key not configured")
|
||||
raise HTTPException(status_code=500, detail="DeepSeek API key not configured")
|
||||
|
||||
logger.info(f"🌐 Calling DeepSeek API with model: {model}")
|
||||
|
||||
# Build messages array for chat completion
|
||||
messages = []
|
||||
# Add system prompt if available
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": request.prompt})
|
||||
if memory_brief_text:
|
||||
vision_payload["system"] = f"{system_prompt}\n\n[Контекст пам'яті]\n{memory_brief_text}"
|
||||
else:
|
||||
vision_payload["system"] = system_prompt
|
||||
|
||||
deepseek_resp = await http_client.post(
|
||||
f"{base_url}/v1/chat/completions",
|
||||
logger.info(f"🖼️ Sending to Swapper /vision: {SWAPPER_URL}/vision")
|
||||
|
||||
vision_resp = await http_client.post(
|
||||
f"{SWAPPER_URL}/vision",
|
||||
json=vision_payload,
|
||||
timeout=120.0
|
||||
)
|
||||
|
||||
if vision_resp.status_code == 200:
|
||||
vision_data = vision_resp.json()
|
||||
full_response = vision_data.get("text", "")
|
||||
|
||||
# Debug: log full response structure
|
||||
logger.info(f"✅ Vision response: {len(full_response)} chars, success={vision_data.get('success')}, keys={list(vision_data.keys())}")
|
||||
if not full_response:
|
||||
logger.warning(f"⚠️ Empty vision response! Full data: {str(vision_data)[:500]}")
|
||||
|
||||
# Store vision message in agent-specific memory
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id and full_response:
|
||||
asyncio.create_task(
|
||||
memory_retrieval.store_message(
|
||||
agent_id=request_agent_id,
|
||||
user_id=user_id,
|
||||
username=username,
|
||||
message_text=f"[Image] {request.prompt}",
|
||||
response_text=full_response,
|
||||
chat_id=chat_id,
|
||||
message_type="vision"
|
||||
)
|
||||
)
|
||||
|
||||
return InferResponse(
|
||||
response=full_response,
|
||||
model="qwen3-vl-8b",
|
||||
tokens_used=None,
|
||||
backend="swapper-vision"
|
||||
)
|
||||
else:
|
||||
logger.error(f"❌ Swapper vision error: {vision_resp.status_code} - {vision_resp.text[:200]}")
|
||||
# Fall through to text processing
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Vision processing failed: {e}", exc_info=True)
|
||||
# Fall through to text processing
|
||||
|
||||
# =========================================================================
|
||||
# SMART LLM ROUTER WITH AUTO-FALLBACK
|
||||
# Priority: DeepSeek → Mistral → Grok → Local Ollama
|
||||
# =========================================================================
|
||||
|
||||
# Build messages array once for all providers
|
||||
messages = []
|
||||
if system_prompt:
|
||||
if memory_brief_text:
|
||||
enhanced_prompt = f"{system_prompt}\n\n[Контекст пам'яті]\n{memory_brief_text}"
|
||||
messages.append({"role": "system", "content": enhanced_prompt})
|
||||
logger.info(f"📝 Added system message with prompt ({len(system_prompt)} chars) + memory ({len(memory_brief_text)} chars)")
|
||||
else:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
logger.info(f"📝 Added system message with prompt ({len(system_prompt)} chars)")
|
||||
elif memory_brief_text:
|
||||
messages.append({"role": "system", "content": f"[Контекст пам'яті]\n{memory_brief_text}"})
|
||||
logger.warning(f"⚠️ No system_prompt! Using only memory brief ({len(memory_brief_text)} chars)")
|
||||
else:
|
||||
logger.error(f"❌ No system_prompt AND no memory_brief! LLM will have no context!")
|
||||
|
||||
messages.append({"role": "user", "content": request.prompt})
|
||||
logger.debug(f"📨 Messages array: {len(messages)} messages, system={len(messages[0].get('content', '')) if messages else 0} chars")
|
||||
|
||||
max_tokens = request.max_tokens or llm_profile.get("max_tokens", 2048)
|
||||
temperature = request.temperature or llm_profile.get("temperature", 0.2)
|
||||
|
||||
# Define cloud providers with fallback order
|
||||
cloud_providers = [
|
||||
{
|
||||
"name": "deepseek",
|
||||
"api_key_env": "DEEPSEEK_API_KEY",
|
||||
"base_url": "https://api.deepseek.com",
|
||||
"model": "deepseek-chat",
|
||||
"timeout": 40
|
||||
},
|
||||
{
|
||||
"name": "mistral",
|
||||
"api_key_env": "MISTRAL_API_KEY",
|
||||
"base_url": "https://api.mistral.ai",
|
||||
"model": "mistral-large-latest",
|
||||
"timeout": 60
|
||||
},
|
||||
{
|
||||
"name": "grok",
|
||||
"api_key_env": "GROK_API_KEY",
|
||||
"base_url": "https://api.x.ai",
|
||||
"model": "grok-2-1212",
|
||||
"timeout": 60
|
||||
}
|
||||
]
|
||||
|
||||
# If specific provider requested, try it first
|
||||
if provider in ["deepseek", "mistral", "grok"]:
|
||||
# Reorder to put requested provider first
|
||||
cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1)
|
||||
|
||||
last_error = None
|
||||
|
||||
# Get tool definitions if Tool Manager is available
|
||||
tools_payload = None
|
||||
if TOOL_MANAGER_AVAILABLE and tool_manager:
|
||||
tools_payload = tool_manager.get_tool_definitions()
|
||||
logger.debug(f"🔧 {len(tools_payload)} tools available for function calling")
|
||||
|
||||
for cloud in cloud_providers:
|
||||
api_key = os.getenv(cloud["api_key_env"])
|
||||
if not api_key:
|
||||
logger.debug(f"⏭️ Skipping {cloud['name']}: API key not configured")
|
||||
continue
|
||||
|
||||
try:
|
||||
logger.info(f"🌐 Trying {cloud['name'].upper()} API with model: {cloud['model']}")
|
||||
|
||||
# Build request payload
|
||||
request_payload = {
|
||||
"model": cloud["model"],
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Add tools for function calling (if available and supported)
|
||||
if tools_payload and cloud["name"] in ["deepseek", "mistral", "grok"]:
|
||||
request_payload["tools"] = tools_payload
|
||||
request_payload["tool_choice"] = "auto"
|
||||
|
||||
cloud_resp = await http_client.post(
|
||||
f"{cloud['base_url']}/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": request.max_tokens or llm_profile.get("max_tokens", 2048),
|
||||
"temperature": request.temperature or llm_profile.get("temperature", 0.2),
|
||||
"stream": False
|
||||
},
|
||||
timeout=llm_profile.get("timeout_ms", 40000) / 1000
|
||||
json=request_payload,
|
||||
timeout=cloud["timeout"]
|
||||
)
|
||||
|
||||
if deepseek_resp.status_code == 200:
|
||||
data = deepseek_resp.json()
|
||||
response_text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
if cloud_resp.status_code == 200:
|
||||
data = cloud_resp.json()
|
||||
choice = data.get("choices", [{}])[0]
|
||||
message = choice.get("message", {})
|
||||
response_text = message.get("content", "") or ""
|
||||
tokens_used = data.get("usage", {}).get("total_tokens", 0)
|
||||
|
||||
logger.info(f"✅ DeepSeek response received, {tokens_used} tokens")
|
||||
return InferResponse(
|
||||
response=response_text,
|
||||
model=model,
|
||||
tokens_used=tokens_used,
|
||||
backend="deepseek-cloud"
|
||||
)
|
||||
else:
|
||||
logger.error(f"❌ DeepSeek error: {deepseek_resp.status_code} - {deepseek_resp.text}")
|
||||
raise HTTPException(status_code=deepseek_resp.status_code, detail=f"DeepSeek API error: {deepseek_resp.text}")
|
||||
# Initialize tool_results to avoid UnboundLocalError
|
||||
tool_results = []
|
||||
|
||||
# Check for tool calls (standard format)
|
||||
tool_calls = message.get("tool_calls", [])
|
||||
|
||||
# Also check for DSML format in content (DeepSeek sometimes returns this)
|
||||
# AGGRESSIVE check - any DSML-like pattern should be caught
|
||||
import re
|
||||
has_dsml = False
|
||||
if response_text:
|
||||
# Check for DSML patterns with regex (handles Unicode variations)
|
||||
dsml_patterns_check = [
|
||||
r'DSML', # Any mention of DSML
|
||||
r'function_calls>',
|
||||
r'invoke\s*name\s*=',
|
||||
r'parameter\s*name\s*=',
|
||||
r'<[^>]*invoke[^>]*>',
|
||||
r'</[^>]*invoke[^>]*>',
|
||||
]
|
||||
for pattern in dsml_patterns_check:
|
||||
if re.search(pattern, response_text, re.IGNORECASE):
|
||||
has_dsml = True
|
||||
logger.warning(f"⚠️ DSML detected via pattern: {pattern}")
|
||||
break
|
||||
|
||||
if has_dsml:
|
||||
logger.warning("⚠️ Detected DSML format in content, parsing...")
|
||||
# Extract tool name and parameters from DSML
|
||||
import re
|
||||
# Try multiple DSML patterns
|
||||
dsml_patterns = [
|
||||
r'invoke name="(\w+)".*?parameter name="(\w+)"[^>]*>([^<]+)',
|
||||
r'invoke\s+name="(\w+)".*?parameter\s+name="(\w+)"[^>]*>([^<]+)',
|
||||
r'name="web_extract".*?url.*?>([^\s<]+)',
|
||||
]
|
||||
dsml_match = None
|
||||
for pattern in dsml_patterns:
|
||||
dsml_match = re.search(pattern, response_text, re.DOTALL | re.IGNORECASE)
|
||||
if dsml_match:
|
||||
break
|
||||
|
||||
if dsml_match and len(dsml_match.groups()) >= 3:
|
||||
tool_name = dsml_match.group(1)
|
||||
param_name = dsml_match.group(2)
|
||||
param_value = dsml_match.group(3).strip()
|
||||
# Create synthetic tool call with Mistral-compatible ID (exactly 9 chars, a-zA-Z0-9)
|
||||
import string
|
||||
import random
|
||||
tool_call_id = ''.join(random.choices(string.ascii_letters + string.digits, k=9))
|
||||
tool_calls = [{
|
||||
"id": tool_call_id,
|
||||
"function": {
|
||||
"name": tool_name,
|
||||
"arguments": json.dumps({param_name: param_value})
|
||||
}
|
||||
}]
|
||||
logger.info(f"🔧 Parsed DSML tool call: {tool_name}({param_name}={param_value[:50]}...) id={tool_call_id}")
|
||||
|
||||
# ALWAYS clear DSML content - never show raw DSML to user
|
||||
logger.warning(f"🧹 Clearing DSML content from response ({len(response_text)} chars)")
|
||||
response_text = ""
|
||||
if tool_calls and tool_manager:
|
||||
logger.info(f"🔧 LLM requested {len(tool_calls)} tool call(s)")
|
||||
|
||||
# Execute each tool call
|
||||
tool_results = []
|
||||
for tc in tool_calls:
|
||||
func = tc.get("function", {})
|
||||
tool_name = func.get("name", "")
|
||||
try:
|
||||
tool_args = json.loads(func.get("arguments", "{}"))
|
||||
except:
|
||||
tool_args = {}
|
||||
|
||||
result = await tool_manager.execute_tool(tool_name, tool_args)
|
||||
tool_result_dict = {
|
||||
"tool_call_id": tc.get("id", ""),
|
||||
"name": tool_name,
|
||||
"success": result.success,
|
||||
"result": result.result,
|
||||
"error": result.error,
|
||||
"image_base64": result.image_base64 # Store image if generated
|
||||
}
|
||||
if result.image_base64:
|
||||
logger.info(f"🖼️ Tool {tool_name} generated image: {len(result.image_base64)} chars")
|
||||
tool_results.append(tool_result_dict)
|
||||
|
||||
# Append tool results to messages and call LLM again
|
||||
messages.append({"role": "assistant", "content": None, "tool_calls": tool_calls})
|
||||
|
||||
for tr in tool_results:
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tr["tool_call_id"],
|
||||
"content": str(tr["result"]) if tr["success"] else f"Error: {tr['error']}"
|
||||
})
|
||||
|
||||
# Second call to get final response
|
||||
logger.info(f"🔄 Calling LLM again with tool results")
|
||||
final_payload = {
|
||||
"model": cloud["model"],
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
"stream": False
|
||||
}
|
||||
# Don't include tools in second call (some APIs don't support it)
|
||||
# Tools are only needed in first call
|
||||
|
||||
final_resp = await http_client.post(
|
||||
f"{cloud['base_url']}/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json=final_payload,
|
||||
timeout=cloud["timeout"]
|
||||
)
|
||||
|
||||
if final_resp.status_code == 200:
|
||||
final_data = final_resp.json()
|
||||
response_text = final_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
|
||||
# CRITICAL: Check for DSML in second response too!
|
||||
if response_text and "DSML" in response_text:
|
||||
logger.warning(f"🧹 DSML detected in second LLM response, clearing ({len(response_text)} chars)")
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
|
||||
if not response_text:
|
||||
logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call")
|
||||
# Fallback to tool result summary
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="empty_response")
|
||||
tokens_used += final_data.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
logger.error(f"❌ {cloud['name'].upper()} second call failed: {final_resp.status_code} - {final_resp.text[:200]}")
|
||||
# Fallback to tool result summary
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="empty_response")
|
||||
|
||||
if response_text:
|
||||
# FINAL DSML check before returning - never show DSML to user
|
||||
if "DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text:
|
||||
logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)")
|
||||
# Use dsml_detected mode - LLM confused, just acknowledge presence
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
|
||||
# Check if any tool generated an image
|
||||
generated_image = None
|
||||
logger.debug(f"🔍 Checking {len(tool_results)} tool results for images...")
|
||||
for tr in tool_results:
|
||||
img_b64 = tr.get("image_base64")
|
||||
if img_b64:
|
||||
generated_image = img_b64
|
||||
logger.info(f"🖼️ Image generated by tool: {tr['name']} ({len(img_b64)} chars)")
|
||||
break
|
||||
else:
|
||||
logger.debug(f" Tool {tr['name']}: no image_base64")
|
||||
|
||||
logger.info(f"✅ {cloud['name'].upper()} response received, {tokens_used} tokens")
|
||||
|
||||
# Store message in agent-specific memory (async, non-blocking)
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id:
|
||||
asyncio.create_task(
|
||||
memory_retrieval.store_message(
|
||||
agent_id=request_agent_id,
|
||||
user_id=user_id,
|
||||
username=username,
|
||||
message_text=request.prompt,
|
||||
response_text=response_text,
|
||||
chat_id=chat_id
|
||||
)
|
||||
)
|
||||
|
||||
return InferResponse(
|
||||
response=response_text,
|
||||
model=cloud["model"],
|
||||
tokens_used=tokens_used,
|
||||
backend=f"{cloud['name']}-cloud",
|
||||
image_base64=generated_image
|
||||
)
|
||||
else:
|
||||
logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response, trying next provider")
|
||||
continue
|
||||
else:
|
||||
logger.warning(f"⚠️ {cloud['name'].upper()} returned {cloud_resp.status_code}, trying next...")
|
||||
last_error = f"{cloud['name']}: {cloud_resp.status_code}"
|
||||
continue
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"❌ DeepSeek error: {e}")
|
||||
# Don't fallback to local for cloud agents - raise error
|
||||
raise HTTPException(status_code=503, detail=f"DeepSeek API error: {str(e)}")
|
||||
import traceback
|
||||
error_details = traceback.format_exc()
|
||||
logger.warning(f"⚠️ {cloud['name'].upper()} failed: {e}")
|
||||
if not str(e).strip(): # Empty error string
|
||||
logger.error(f"❌ {cloud['name'].upper()} failed with empty error! Check traceback:")
|
||||
logger.error(error_details)
|
||||
else:
|
||||
logger.debug(f"Full error traceback: {error_details}")
|
||||
last_error = f"{cloud['name']}: {str(e)}"
|
||||
continue
|
||||
|
||||
# If all cloud providers failed, log and fall through to local
|
||||
if last_error:
|
||||
logger.warning(f"⚠️ All cloud providers failed ({last_error}), falling back to local Ollama")
|
||||
|
||||
# =========================================================================
|
||||
# LOCAL PROVIDERS (Ollama via Swapper)
|
||||
# =========================================================================
|
||||
# Determine local model from config (not hardcoded)
|
||||
# Strategy: Use agent's default_llm if it's local (ollama), otherwise find first local model
|
||||
local_model = None
|
||||
|
||||
# Check if default_llm is local
|
||||
if llm_profile.get("provider") == "ollama":
|
||||
# Extract model name and convert format (qwen3:8b → qwen3-8b for Swapper)
|
||||
ollama_model = llm_profile.get("model", "qwen3:8b")
|
||||
local_model = ollama_model.replace(":", "-") # qwen3:8b → qwen3-8b
|
||||
logger.debug(f"✅ Using agent's default local model: {local_model}")
|
||||
else:
|
||||
# Find first local model from config
|
||||
for profile_name, profile in llm_profiles.items():
|
||||
if profile.get("provider") == "ollama":
|
||||
ollama_model = profile.get("model", "qwen3:8b")
|
||||
local_model = ollama_model.replace(":", "-")
|
||||
logger.info(f"🔄 Found fallback local model: {local_model} from profile {profile_name}")
|
||||
break
|
||||
|
||||
# Final fallback if no local model found
|
||||
if not local_model:
|
||||
local_model = "qwen3-8b"
|
||||
logger.warning(f"⚠️ No local model in config, using hardcoded fallback: {local_model}")
|
||||
|
||||
try:
|
||||
# Check if Swapper is available
|
||||
health_resp = await http_client.get(f"{SWAPPER_URL}/health", timeout=5.0)
|
||||
if health_resp.status_code == 200:
|
||||
logger.info(f"📡 Calling Swapper with model: {model}")
|
||||
logger.info(f"📡 Calling Swapper with local model: {local_model}")
|
||||
# Generate response via Swapper (which handles model loading)
|
||||
generate_resp = await http_client.post(
|
||||
f"{SWAPPER_URL}/generate",
|
||||
json={
|
||||
"model": model,
|
||||
"model": local_model,
|
||||
"prompt": request.prompt,
|
||||
"system": system_prompt,
|
||||
"max_tokens": request.max_tokens,
|
||||
@@ -547,9 +968,24 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
|
||||
if generate_resp.status_code == 200:
|
||||
data = generate_resp.json()
|
||||
local_response = data.get("response", "")
|
||||
|
||||
# Store in agent-specific memory
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval and chat_id and user_id and local_response:
|
||||
asyncio.create_task(
|
||||
memory_retrieval.store_message(
|
||||
agent_id=request_agent_id,
|
||||
user_id=user_id,
|
||||
username=username,
|
||||
message_text=request.prompt,
|
||||
response_text=local_response,
|
||||
chat_id=chat_id
|
||||
)
|
||||
)
|
||||
|
||||
return InferResponse(
|
||||
response=data.get("response", ""),
|
||||
model=model,
|
||||
response=local_response,
|
||||
model=local_model,
|
||||
tokens_used=data.get("eval_count", 0),
|
||||
backend="swapper+ollama"
|
||||
)
|
||||
@@ -909,6 +1345,14 @@ async def shutdown_event():
|
||||
"""Cleanup connections on shutdown"""
|
||||
global neo4j_driver, http_client, nc
|
||||
|
||||
# Close Memory Retrieval
|
||||
if MEMORY_RETRIEVAL_AVAILABLE and memory_retrieval:
|
||||
try:
|
||||
await memory_retrieval.close()
|
||||
logger.info("🔌 Memory Retrieval closed")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Memory Retrieval close error: {e}")
|
||||
|
||||
if neo4j_driver:
|
||||
await neo4j_driver.close()
|
||||
logger.info("🔌 Neo4j connection closed")
|
||||
|
||||
Reference in New Issue
Block a user