Sync NODE1 runtime config for Sofiia monitor + Clan canary fixes
This commit is contained in:
@@ -38,6 +38,14 @@ except ImportError:
|
||||
TOOL_MANAGER_AVAILABLE = False
|
||||
ToolManager = None
|
||||
|
||||
# Runtime Guard (Envelope/Artifact validation for CLAN orchestration)
|
||||
try:
|
||||
from runtime_guard import RuntimeGuard
|
||||
RUNTIME_GUARD_AVAILABLE = True
|
||||
except ImportError:
|
||||
RUNTIME_GUARD_AVAILABLE = False
|
||||
RuntimeGuard = None
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -603,6 +611,16 @@ CITY_SERVICE_URL = os.getenv("CITY_SERVICE_URL", "http://daarion-city-service:70
|
||||
# CrewAI Routing Configuration
|
||||
CREWAI_ROUTING_ENABLED = os.getenv("CREWAI_ROUTING_ENABLED", "true").lower() == "true"
|
||||
CREWAI_URL = os.getenv("CREWAI_URL", "http://dagi-staging-crewai-service:9010")
|
||||
CLAN_RUNTIME_GUARD_ENABLED = os.getenv("CLAN_RUNTIME_GUARD_ENABLED", "true").lower() == "true"
|
||||
CLAN_RUNTIME_GUARD_MODE = os.getenv("CLAN_RUNTIME_GUARD_MODE", "soft").lower()
|
||||
CLAN_GUARD_TEST_MODE = os.getenv("CLAN_GUARD_TEST_MODE", "false").lower() == "true"
|
||||
CLAN_RUNTIME_REGISTRY_PATH = os.getenv("CLAN_RUNTIME_REGISTRY_PATH", "/app/config/roles/clan/zhos/agents_registry.yaml")
|
||||
CLAN_RUNTIME_ENVELOPE_SCHEMA_PATH = os.getenv("CLAN_RUNTIME_ENVELOPE_SCHEMA_PATH", "/app/docs/contracts/clan-envelope.schema.json")
|
||||
CLAN_RUNTIME_ARTIFACT_SCHEMA_PATH = os.getenv("CLAN_RUNTIME_ARTIFACT_SCHEMA_PATH", "/app/docs/contracts/clan-artifact.schema.json")
|
||||
CLAN_RUNTIME_CONSENT_EVENT_SCHEMA_PATH = os.getenv(
|
||||
"CLAN_RUNTIME_CONSENT_EVENT_SCHEMA_PATH",
|
||||
"/app/docs/contracts/clan-consent-event.schema.json",
|
||||
)
|
||||
|
||||
# Neo4j Configuration
|
||||
NEO4J_URI = os.getenv("NEO4J_BOLT_URL", "bolt://neo4j:7687")
|
||||
@@ -622,6 +640,7 @@ nats_available = False
|
||||
|
||||
# Tool Manager
|
||||
tool_manager = None
|
||||
runtime_guard_engine = None
|
||||
|
||||
# Models
|
||||
class FilterDecision(BaseModel):
|
||||
@@ -677,7 +696,7 @@ router_config = load_router_config()
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Initialize NATS connection and subscriptions"""
|
||||
global nc, nats_available, http_client, neo4j_driver, neo4j_available
|
||||
global nc, nats_available, http_client, neo4j_driver, neo4j_available, runtime_guard_engine
|
||||
logger.info("🚀 DAGI Router v2.0.0 starting up...")
|
||||
|
||||
# Initialize HTTP client
|
||||
@@ -736,6 +755,26 @@ async def startup_event():
|
||||
tool_manager = None
|
||||
else:
|
||||
tool_manager = None
|
||||
|
||||
# Initialize CLAN runtime guard
|
||||
if RUNTIME_GUARD_AVAILABLE and RuntimeGuard and CLAN_RUNTIME_GUARD_ENABLED:
|
||||
try:
|
||||
runtime_guard_engine = RuntimeGuard(
|
||||
registry_path=CLAN_RUNTIME_REGISTRY_PATH,
|
||||
envelope_schema_path=CLAN_RUNTIME_ENVELOPE_SCHEMA_PATH,
|
||||
artifact_schema_path=CLAN_RUNTIME_ARTIFACT_SCHEMA_PATH,
|
||||
consent_event_schema_path=CLAN_RUNTIME_CONSENT_EVENT_SCHEMA_PATH,
|
||||
mode=CLAN_RUNTIME_GUARD_MODE,
|
||||
)
|
||||
logger.info(
|
||||
"✅ CLAN Runtime Guard initialized "
|
||||
f"(mode={CLAN_RUNTIME_GUARD_MODE}, registry={CLAN_RUNTIME_REGISTRY_PATH})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Runtime Guard init failed: {e}")
|
||||
runtime_guard_engine = None
|
||||
else:
|
||||
runtime_guard_engine = None
|
||||
|
||||
# Log backend URLs
|
||||
logger.info(f"📡 Swapper URL: {SWAPPER_URL}")
|
||||
@@ -1099,11 +1138,14 @@ async def internal_llm_complete(request: InternalLLMRequest):
|
||||
{"name": "mistral", "api_key_env": "MISTRAL_API_KEY", "base_url": "https://api.mistral.ai", "model": "mistral-large-latest", "timeout": 60},
|
||||
{"name": "grok", "api_key_env": "GROK_API_KEY", "base_url": "https://api.x.ai", "model": "grok-2-1212", "timeout": 60}
|
||||
]
|
||||
|
||||
|
||||
# Respect configured provider: local profiles should stay local.
|
||||
if provider in ["deepseek", "mistral", "grok"]:
|
||||
cloud_providers = sorted(cloud_providers, key=lambda x: 0 if x["name"] == provider else 1)
|
||||
|
||||
# Try cloud providers
|
||||
elif provider == "ollama":
|
||||
cloud_providers = []
|
||||
|
||||
# Try cloud providers (only when provider is cloud)
|
||||
for cloud in cloud_providers:
|
||||
api_key = os.getenv(cloud["api_key_env"])
|
||||
if not api_key:
|
||||
@@ -1129,18 +1171,19 @@ async def internal_llm_complete(request: InternalLLMRequest):
|
||||
logger.warning(f"Internal LLM {cloud['name']} failed: {e}")
|
||||
continue
|
||||
|
||||
# Fallback to Ollama
|
||||
# Fallback/target local provider (Ollama)
|
||||
try:
|
||||
logger.info("Internal LLM fallback to Ollama")
|
||||
logger.info("Internal LLM to Ollama")
|
||||
ollama_model = model or "qwen3:8b"
|
||||
ollama_resp = await http_client.post(
|
||||
"http://172.18.0.1:11434/api/generate",
|
||||
json={"model": "qwen3:8b", "prompt": request.prompt, "system": request.system_prompt or "", "stream": False, "options": {"num_predict": max_tokens, "temperature": temperature}},
|
||||
json={"model": ollama_model, "prompt": request.prompt, "system": request.system_prompt or "", "stream": False, "options": {"num_predict": max_tokens, "temperature": temperature}},
|
||||
timeout=120.0
|
||||
)
|
||||
if ollama_resp.status_code == 200:
|
||||
data = ollama_resp.json()
|
||||
latency = int((time_module.time() - t0) * 1000)
|
||||
return InternalLLMResponse(text=data.get("response", ""), model="qwen3:8b", provider="ollama", tokens_used=0, latency_ms=latency)
|
||||
return InternalLLMResponse(text=data.get("response", ""), model=ollama_model, provider="ollama", tokens_used=0, latency_ms=latency)
|
||||
except Exception as e:
|
||||
logger.error(f"Internal LLM Ollama failed: {e}")
|
||||
|
||||
@@ -1246,6 +1289,55 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
# =========================================================================
|
||||
if CREWAI_ROUTING_ENABLED and CREWAI_CLIENT_AVAILABLE:
|
||||
try:
|
||||
runtime_envelope = None
|
||||
if runtime_guard_engine and request_agent_id == "clan":
|
||||
runtime_envelope = runtime_guard_engine.build_envelope(
|
||||
agent_id=request_agent_id,
|
||||
prompt=request.prompt,
|
||||
metadata=effective_metadata,
|
||||
)
|
||||
ok_pre, pre_info = runtime_guard_engine.pre_dispatch_checks(runtime_envelope)
|
||||
if not ok_pre:
|
||||
stop_payload = runtime_guard_engine.stop_payload(runtime_envelope, pre_info)
|
||||
logger.warning(
|
||||
"🛑 Runtime guard pre-dispatch stop: "
|
||||
f"code={stop_payload.get('stop_code')} request_id={stop_payload.get('request_id')} "
|
||||
f"input_hash={stop_payload.get('input_hash')}"
|
||||
)
|
||||
return InferResponse(
|
||||
response=json.dumps(stop_payload, ensure_ascii=False),
|
||||
model="runtime-guard",
|
||||
backend="runtime-guard",
|
||||
tokens_used=0,
|
||||
)
|
||||
if (
|
||||
CLAN_GUARD_TEST_MODE
|
||||
and effective_metadata.get("guard_self_test") is True
|
||||
and isinstance(effective_metadata.get("__inject_fake_agent_result"), dict)
|
||||
):
|
||||
fake_result = effective_metadata.get("__inject_fake_agent_result")
|
||||
ok_post, post_info = runtime_guard_engine.post_return_checks(runtime_envelope, fake_result)
|
||||
if not ok_post:
|
||||
stop_payload = runtime_guard_engine.stop_payload(runtime_envelope, post_info)
|
||||
logger.warning(
|
||||
"🧪 Runtime guard self-test stop: "
|
||||
f"code={stop_payload.get('stop_code')} request_id={stop_payload.get('request_id')} "
|
||||
f"input_hash={stop_payload.get('input_hash')}"
|
||||
)
|
||||
return InferResponse(
|
||||
response=json.dumps(stop_payload, ensure_ascii=False),
|
||||
model="runtime-guard",
|
||||
backend="runtime-guard",
|
||||
tokens_used=0,
|
||||
)
|
||||
logger.info("🧪 Runtime guard self-test passed (fake result accepted)")
|
||||
return InferResponse(
|
||||
response=json.dumps({"ok": True, "self_test": True}, ensure_ascii=False),
|
||||
model="runtime-guard",
|
||||
backend="runtime-guard",
|
||||
tokens_used=0,
|
||||
)
|
||||
|
||||
# Get agent CrewAI config from registry (or router_config fallback)
|
||||
crewai_cfg = agent_config.get("crewai", {})
|
||||
|
||||
@@ -1273,13 +1365,52 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
"hash": system_prompt_hash,
|
||||
},
|
||||
"metadata": effective_metadata,
|
||||
"runtime_envelope": runtime_envelope,
|
||||
},
|
||||
team=crewai_cfg.get("team")
|
||||
team=crewai_cfg.get("team"),
|
||||
profile=effective_metadata.get("crewai_profile")
|
||||
)
|
||||
|
||||
latency = time.time() - t0
|
||||
|
||||
if crew_result.get("success") and crew_result.get("result"):
|
||||
if runtime_guard_engine and request_agent_id == "clan" and runtime_envelope:
|
||||
ok_post, post_info = runtime_guard_engine.post_return_checks(runtime_envelope, crew_result)
|
||||
if not ok_post:
|
||||
stop_payload = runtime_guard_engine.stop_payload(runtime_envelope, post_info)
|
||||
logger.warning(
|
||||
"🛑 Runtime guard post-return stop: "
|
||||
f"code={stop_payload.get('stop_code')} request_id={stop_payload.get('request_id')} "
|
||||
f"input_hash={stop_payload.get('input_hash')}"
|
||||
)
|
||||
return InferResponse(
|
||||
response=json.dumps(stop_payload, ensure_ascii=False),
|
||||
model="runtime-guard",
|
||||
backend="runtime-guard",
|
||||
tokens_used=0,
|
||||
)
|
||||
crew_result = runtime_guard_engine.stamp_result_artifacts(runtime_envelope, crew_result)
|
||||
ok_stamp, stamp_info = runtime_guard_engine.ensure_stamped_trails(crew_result)
|
||||
if not ok_stamp:
|
||||
stop_payload = runtime_guard_engine.stop_payload(runtime_envelope, stamp_info)
|
||||
logger.warning(
|
||||
"🛑 Runtime guard stamped-trail stop: "
|
||||
f"code={stop_payload.get('stop_code')} request_id={stop_payload.get('request_id')} "
|
||||
f"input_hash={stop_payload.get('input_hash')}"
|
||||
)
|
||||
return InferResponse(
|
||||
response=json.dumps(stop_payload, ensure_ascii=False),
|
||||
model="runtime-guard",
|
||||
backend="runtime-guard",
|
||||
tokens_used=0,
|
||||
)
|
||||
for row in runtime_guard_engine.artifact_runtime_rows(runtime_envelope, crew_result):
|
||||
logger.info(json.dumps(row, ensure_ascii=False))
|
||||
for row in runtime_guard_engine.consent_runtime_rows(runtime_envelope, crew_result):
|
||||
logger.info(json.dumps(row, ensure_ascii=False))
|
||||
for row in (crew_result.get("artifact_state_transition_rows") or []):
|
||||
if isinstance(row, dict):
|
||||
logger.info(json.dumps(row, ensure_ascii=False))
|
||||
logger.info(f"✅ CrewAI success for {agent_id}: {latency:.2f}s")
|
||||
|
||||
# Store interaction in memory
|
||||
@@ -1677,127 +1808,224 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
logger.warning(f"🧹 Clearing DSML content from response ({len(response_text)} chars)")
|
||||
response_text = ""
|
||||
if tool_calls and tool_manager:
|
||||
logger.info(f"🔧 LLM requested {len(tool_calls)} tool call(s)")
|
||||
|
||||
# Execute each tool call
|
||||
tool_results = []
|
||||
for tc in tool_calls:
|
||||
func = tc.get("function", {})
|
||||
tool_name = func.get("name", "")
|
||||
try:
|
||||
tool_args = json.loads(func.get("arguments", "{}"))
|
||||
except:
|
||||
tool_args = {}
|
||||
|
||||
result = await tool_manager.execute_tool(
|
||||
tool_name,
|
||||
tool_args,
|
||||
agent_id=request_agent_id,
|
||||
chat_id=chat_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
tool_result_dict = {
|
||||
"tool_call_id": tc.get("id", ""),
|
||||
"name": tool_name,
|
||||
"success": result.success,
|
||||
"result": result.result,
|
||||
"error": result.error,
|
||||
"image_base64": result.image_base64, # Store image if generated
|
||||
"file_base64": result.file_base64,
|
||||
"file_name": result.file_name,
|
||||
"file_mime": result.file_mime,
|
||||
}
|
||||
if result.image_base64:
|
||||
logger.info(f"🖼️ Tool {tool_name} generated image: {len(result.image_base64)} chars")
|
||||
tool_results.append(tool_result_dict)
|
||||
|
||||
# Append tool results to messages and call LLM again
|
||||
messages.append({"role": "assistant", "content": None, "tool_calls": tool_calls})
|
||||
|
||||
for tr in tool_results:
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tr["tool_call_id"],
|
||||
"content": str(tr["result"]) if tr["success"] else f"Error: {tr['error']}"
|
||||
})
|
||||
|
||||
# Second call to get final response
|
||||
logger.info(f"🔄 Calling LLM again with tool results")
|
||||
final_payload = {
|
||||
"model": cloud["model"],
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
"stream": False
|
||||
max_tool_rounds = int(os.getenv("ROUTER_TOOL_MAX_ROUNDS", "10"))
|
||||
logger.info(f"🔧 LLM requested tool calls; running iterative mode up to {max_tool_rounds} rounds")
|
||||
|
||||
all_tool_results = []
|
||||
current_tool_calls = tool_calls
|
||||
rounds_done = 0
|
||||
oneok_ctx = {
|
||||
"client_id": None,
|
||||
"site_id": None,
|
||||
"calc_result": None,
|
||||
"quote_id": None,
|
||||
}
|
||||
# Don't include tools in second call (some APIs don't support it)
|
||||
# Tools are only needed in first call
|
||||
|
||||
final_resp = await http_client.post(
|
||||
f"{cloud['base_url']}/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json=final_payload,
|
||||
timeout=cloud["timeout"]
|
||||
)
|
||||
|
||||
if final_resp.status_code == 200:
|
||||
final_data = final_resp.json()
|
||||
response_text = final_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
|
||||
# CRITICAL: Check for DSML in second response too!
|
||||
if response_text and ("DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text):
|
||||
prefix_before_dsml = _strip_dsml_keep_text_before(response_text)
|
||||
if prefix_before_dsml:
|
||||
logger.warning(f"🧹 DSML in 2nd response: keeping text before DSML ({len(prefix_before_dsml)} chars), discarding {len(response_text) - len(prefix_before_dsml)} chars")
|
||||
response_text = prefix_before_dsml
|
||||
repeated_failures = {}
|
||||
|
||||
while current_tool_calls and rounds_done < max_tool_rounds:
|
||||
rounds_done += 1
|
||||
logger.info(f"🔁 Tool round {rounds_done}/{max_tool_rounds}: {len(current_tool_calls)} call(s)")
|
||||
round_results = []
|
||||
abort_loop_due_repeats = False
|
||||
|
||||
for tc in current_tool_calls:
|
||||
func = tc.get("function", {})
|
||||
tool_name = func.get("name", "")
|
||||
try:
|
||||
tool_args = json.loads(func.get("arguments", "{}"))
|
||||
except Exception:
|
||||
tool_args = {}
|
||||
|
||||
# Light auto-repair for 1OK multi-step flows when model omits required args.
|
||||
if request_agent_id == "oneok":
|
||||
if tool_name == "calc_window_quote":
|
||||
ip = (tool_args or {}).get("input_payload")
|
||||
if isinstance(ip, dict) and isinstance(ip.get("windows"), list) and "window_units" not in ip:
|
||||
ip2 = dict(ip)
|
||||
ip2["window_units"] = ip.get("windows")
|
||||
tool_args = dict(tool_args or {})
|
||||
tool_args["input_payload"] = ip2
|
||||
elif tool_name == "crm_create_quote":
|
||||
quote_payload = (tool_args or {}).get("quote_payload")
|
||||
if not isinstance(quote_payload, dict):
|
||||
calc_res = oneok_ctx.get("calc_result") or {}
|
||||
line_items = calc_res.get("line_items") if isinstance(calc_res, dict) else None
|
||||
totals = calc_res.get("totals") if isinstance(calc_res, dict) else None
|
||||
if isinstance(line_items, list) and isinstance(totals, dict):
|
||||
tool_args = {
|
||||
"quote_payload": {
|
||||
"client_id": oneok_ctx.get("client_id"),
|
||||
"site_id": oneok_ctx.get("site_id"),
|
||||
"currency": calc_res.get("currency", "UAH"),
|
||||
"line_items": line_items,
|
||||
"totals": totals,
|
||||
"assumptions": calc_res.get("assumptions", []),
|
||||
"validity_days": 14,
|
||||
"lead_time_estimate": calc_res.get("lead_time_if_known") or calc_res.get("lead_time_estimate"),
|
||||
}
|
||||
}
|
||||
logger.info("🛠️ oneok: auto-filled crm_create_quote.quote_payload from calc context")
|
||||
elif tool_name == "docs_render_quote_pdf":
|
||||
quote_id = (tool_args or {}).get("quote_id")
|
||||
quote_payload = (tool_args or {}).get("quote_payload")
|
||||
if not quote_id and not isinstance(quote_payload, dict) and oneok_ctx.get("quote_id"):
|
||||
tool_args = {"quote_id": oneok_ctx.get("quote_id")}
|
||||
logger.info("🛠️ oneok: auto-filled docs_render_quote_pdf.quote_id from quote context")
|
||||
elif tool_name == "schedule_propose_slots":
|
||||
params = (tool_args or {}).get("params")
|
||||
if not isinstance(params, dict):
|
||||
tool_args = {"params": {"count": 3, "timezone": "Europe/Kyiv"}}
|
||||
logger.info("🛠️ oneok: auto-filled schedule_propose_slots.params")
|
||||
|
||||
result = await tool_manager.execute_tool(
|
||||
tool_name,
|
||||
tool_args,
|
||||
agent_id=request_agent_id,
|
||||
chat_id=chat_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
tool_result_dict = {
|
||||
"tool_call_id": tc.get("id", ""),
|
||||
"name": tool_name,
|
||||
"success": result.success,
|
||||
"result": result.result,
|
||||
"error": result.error,
|
||||
"image_base64": result.image_base64,
|
||||
"file_base64": result.file_base64,
|
||||
"file_name": result.file_name,
|
||||
"file_mime": result.file_mime,
|
||||
}
|
||||
if result.image_base64:
|
||||
logger.info(f"🖼️ Tool {tool_name} generated image: {len(result.image_base64)} chars")
|
||||
round_results.append(tool_result_dict)
|
||||
all_tool_results.append(tool_result_dict)
|
||||
|
||||
# Track oneok context to help subsequent tool calls in the same request.
|
||||
if request_agent_id == "oneok" and result.success and isinstance(result.result, dict):
|
||||
if tool_name == "crm_upsert_client":
|
||||
oneok_ctx["client_id"] = result.result.get("client_id") or oneok_ctx.get("client_id")
|
||||
elif tool_name == "crm_upsert_site":
|
||||
oneok_ctx["site_id"] = result.result.get("site_id") or oneok_ctx.get("site_id")
|
||||
elif tool_name == "calc_window_quote":
|
||||
oneok_ctx["calc_result"] = result.result
|
||||
elif tool_name == "crm_create_quote":
|
||||
oneok_ctx["quote_id"] = result.result.get("quote_id") or oneok_ctx.get("quote_id")
|
||||
|
||||
# Guardrail: stop if model repeats same failing tool call too many times.
|
||||
sig = f"{tool_name}:{json.dumps(tool_args, ensure_ascii=False, sort_keys=True, default=str)}"
|
||||
if result.success:
|
||||
repeated_failures.pop(sig, None)
|
||||
else:
|
||||
logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)")
|
||||
# Third LLM call: explicitly ask to synthesize tool results
|
||||
tool_summary_parts = []
|
||||
for tr in tool_results:
|
||||
if tr.get("success") and tr.get("result"):
|
||||
res_text = str(tr["result"])[:500]
|
||||
tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}")
|
||||
if tool_summary_parts:
|
||||
synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts)
|
||||
try:
|
||||
synth_resp = await http_client.post(
|
||||
f"{cloud['base_url']}/v1/chat/completions",
|
||||
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
||||
json={"model": cloud["model"], "messages": [
|
||||
{"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."},
|
||||
{"role": "user", "content": synthesis_prompt}
|
||||
], "max_tokens": max_tokens, "temperature": 0.3, "stream": False},
|
||||
timeout=cloud["timeout"]
|
||||
)
|
||||
if synth_resp.status_code == 200:
|
||||
synth_data = synth_resp.json()
|
||||
synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
if synth_text and "DSML" not in synth_text and "invoke" not in synth_text:
|
||||
response_text = synth_text
|
||||
tokens_used += synth_data.get("usage", {}).get("total_tokens", 0)
|
||||
logger.info("\u2705 3rd LLM call synthesized clean response from tool results")
|
||||
else:
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
else:
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
except Exception as synth_err:
|
||||
logger.warning(f"3rd LLM call failed: {synth_err}")
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
else:
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
|
||||
if not response_text:
|
||||
logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call")
|
||||
# Fallback to tool result summary
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="empty_response")
|
||||
tokens_used += final_data.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
logger.error(f"❌ {cloud['name'].upper()} second call failed: {final_resp.status_code} - {final_resp.text[:200]}")
|
||||
# Fallback to tool result summary
|
||||
repeated_failures[sig] = repeated_failures.get(sig, 0) + 1
|
||||
if repeated_failures[sig] >= 3:
|
||||
logger.warning(f"⚠️ Repeated failing tool call detected ({tool_name}) x{repeated_failures[sig]}; breaking iterative loop")
|
||||
abort_loop_due_repeats = True
|
||||
break
|
||||
|
||||
if abort_loop_due_repeats:
|
||||
current_tool_calls = []
|
||||
response_text = response_text or format_tool_calls_for_response(all_tool_results, fallback_mode="empty_response")
|
||||
break
|
||||
|
||||
messages.append({"role": "assistant", "content": None, "tool_calls": current_tool_calls})
|
||||
for tr in round_results:
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tr["tool_call_id"],
|
||||
"content": str(tr["result"]) if tr["success"] else f"Error: {tr['error']}"
|
||||
})
|
||||
|
||||
logger.info("🔄 Calling LLM again after tool round")
|
||||
loop_payload = {
|
||||
"model": cloud["model"],
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
"stream": False
|
||||
}
|
||||
# Keep tools enabled for multi-step chains.
|
||||
if tools_payload and cloud["name"] in ["deepseek", "mistral", "grok"]:
|
||||
loop_payload["tools"] = tools_payload
|
||||
loop_payload["tool_choice"] = "auto"
|
||||
|
||||
loop_resp = await http_client.post(
|
||||
f"{cloud['base_url']}/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json=loop_payload,
|
||||
timeout=cloud["timeout"]
|
||||
)
|
||||
|
||||
if loop_resp.status_code != 200:
|
||||
logger.error(f"❌ {cloud['name'].upper()} loop call failed: {loop_resp.status_code} - {loop_resp.text[:200]}")
|
||||
response_text = format_tool_calls_for_response(all_tool_results, fallback_mode="empty_response")
|
||||
current_tool_calls = []
|
||||
break
|
||||
|
||||
loop_data = loop_resp.json()
|
||||
loop_message = loop_data.get("choices", [{}])[0].get("message", {})
|
||||
response_text = loop_message.get("content", "") or ""
|
||||
tokens_used += loop_data.get("usage", {}).get("total_tokens", 0)
|
||||
current_tool_calls = loop_message.get("tool_calls", [])
|
||||
|
||||
# DSML fallback parsing for providers that return markup instead of tool_calls.
|
||||
has_dsml_loop = False
|
||||
if response_text:
|
||||
dsml_patterns_check = [
|
||||
r'DSML',
|
||||
r'function_calls>',
|
||||
r'invoke\s*name\s*=',
|
||||
r'parameter\s*name\s*=',
|
||||
r'<[^>]*invoke[^>]*>',
|
||||
r'</[^>]*invoke[^>]*>',
|
||||
]
|
||||
for pattern in dsml_patterns_check:
|
||||
if re.search(pattern, response_text, re.IGNORECASE):
|
||||
has_dsml_loop = True
|
||||
logger.warning(f"⚠️ DSML detected in loop via pattern: {pattern}")
|
||||
break
|
||||
|
||||
if has_dsml_loop and not current_tool_calls:
|
||||
dsml_patterns = [
|
||||
r'invoke name="(\w+)".*?parameter name="(\w+)"[^>]*>([^<]+)',
|
||||
r'invoke\s+name="(\w+)".*?parameter\s+name="(\w+)"[^>]*>([^<]+)',
|
||||
]
|
||||
dsml_match = None
|
||||
for pattern in dsml_patterns:
|
||||
dsml_match = re.search(pattern, response_text, re.DOTALL | re.IGNORECASE)
|
||||
if dsml_match:
|
||||
break
|
||||
if dsml_match and len(dsml_match.groups()) >= 3:
|
||||
import string
|
||||
import random
|
||||
tool_call_id = ''.join(random.choices(string.ascii_letters + string.digits, k=9))
|
||||
current_tool_calls = [{
|
||||
"id": tool_call_id,
|
||||
"function": {
|
||||
"name": dsml_match.group(1),
|
||||
"arguments": json.dumps({dsml_match.group(2): dsml_match.group(3).strip()})
|
||||
}
|
||||
}]
|
||||
response_text = ""
|
||||
|
||||
tool_results = all_tool_results
|
||||
|
||||
if current_tool_calls:
|
||||
logger.warning(f"⚠️ Reached max tool rounds ({max_tool_rounds}) for {request_agent_id}, returning summary")
|
||||
response_text = response_text or format_tool_calls_for_response(tool_results, fallback_mode="empty_response")
|
||||
|
||||
if response_text and ("DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text):
|
||||
prefix_before_dsml = _strip_dsml_keep_text_before(response_text)
|
||||
if prefix_before_dsml:
|
||||
logger.warning(f"🧹 DSML in loop final response: keeping text before DSML ({len(prefix_before_dsml)} chars)")
|
||||
response_text = prefix_before_dsml
|
||||
else:
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
|
||||
if not response_text:
|
||||
logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after iterative tool calls")
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="empty_response")
|
||||
|
||||
if response_text:
|
||||
|
||||
Reference in New Issue
Block a user