fix: DSML fallback — 3rd LLM call for clean synthesis + think tag stripping
Router (main.py): - When DSML detected in 2nd LLM response after tool execution, make a 3rd LLM call with explicit synthesis prompt instead of returning raw tool results to the user - Falls back to format_tool_calls_for_response only if 3rd call fails Router (tool_manager.py): - Added _strip_think_tags() helper for <think>...</think> removal from DeepSeek reasoning artifacts Gateway (http_api.py): - Strip <think>...</think> tags before sending to Telegram - Strip DSML/XML-like markup (function_calls, invoke, parameter tags) - Ensure empty text after stripping gets "..." fallback Deployed to NODE1 and verified services running. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -699,6 +699,16 @@ async def send_telegram_message(chat_id: str, text: str, bot_token: Optional[str
|
|||||||
logger.error("TELEGRAM_BOT_TOKEN not set")
|
logger.error("TELEGRAM_BOT_TOKEN not set")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Strip <think>...</think> tags (DeepSeek reasoning leak)
|
||||||
|
import re
|
||||||
|
text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
|
||||||
|
text = re.sub(r'<think>.*$', '', text, flags=re.DOTALL) # unclosed tag
|
||||||
|
# Strip any DSML/XML-like markup
|
||||||
|
text = re.sub(r'</?(?:function_calls|invoke|parameter)[^>]*>', '', text)
|
||||||
|
text = text.strip()
|
||||||
|
if not text:
|
||||||
|
text = "..."
|
||||||
|
|
||||||
url = f"https://api.telegram.org/bot{token}/sendMessage"
|
url = f"https://api.telegram.org/bot{token}/sendMessage"
|
||||||
payload = {
|
payload = {
|
||||||
"chat_id": chat_id,
|
"chat_id": chat_id,
|
||||||
|
|||||||
@@ -1055,8 +1055,41 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
|||||||
|
|
||||||
# CRITICAL: Check for DSML in second response too!
|
# CRITICAL: Check for DSML in second response too!
|
||||||
if response_text and "DSML" in response_text:
|
if response_text and "DSML" in response_text:
|
||||||
logger.warning(f"🧹 DSML detected in second LLM response, clearing ({len(response_text)} chars)")
|
logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)")
|
||||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
# Third LLM call: explicitly ask to synthesize tool results
|
||||||
|
tool_summary_parts = []
|
||||||
|
for tr in tool_results:
|
||||||
|
if tr.get("success") and tr.get("result"):
|
||||||
|
res_text = str(tr["result"])[:500]
|
||||||
|
tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}")
|
||||||
|
if tool_summary_parts:
|
||||||
|
synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts)
|
||||||
|
try:
|
||||||
|
synth_resp = await http_client.post(
|
||||||
|
f"{cloud['base_url']}/v1/chat/completions",
|
||||||
|
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
||||||
|
json={"model": cloud["model"], "messages": [
|
||||||
|
{"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."},
|
||||||
|
{"role": "user", "content": synthesis_prompt}
|
||||||
|
], "max_tokens": max_tokens, "temperature": 0.3, "stream": False},
|
||||||
|
timeout=cloud["timeout"]
|
||||||
|
)
|
||||||
|
if synth_resp.status_code == 200:
|
||||||
|
synth_data = synth_resp.json()
|
||||||
|
synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||||
|
if synth_text and "DSML" not in synth_text and "invoke" not in synth_text:
|
||||||
|
response_text = synth_text
|
||||||
|
tokens_used += synth_data.get("usage", {}).get("total_tokens", 0)
|
||||||
|
logger.info("\u2705 3rd LLM call synthesized clean response from tool results")
|
||||||
|
else:
|
||||||
|
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||||
|
else:
|
||||||
|
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||||
|
except Exception as synth_err:
|
||||||
|
logger.warning(f"3rd LLM call failed: {synth_err}")
|
||||||
|
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||||
|
else:
|
||||||
|
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||||
|
|
||||||
if not response_text:
|
if not response_text:
|
||||||
logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call")
|
logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call")
|
||||||
|
|||||||
@@ -831,6 +831,14 @@ class ToolManager:
|
|||||||
await self.http_client.aclose()
|
await self.http_client.aclose()
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_think_tags(text: str) -> str:
|
||||||
|
"""Remove <think>...</think> tags from DeepSeek responses."""
|
||||||
|
import re
|
||||||
|
text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
|
||||||
|
text = re.sub(r'<think>.*$', '', text, flags=re.DOTALL) # unclosed tag
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str = "normal") -> str:
|
def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str = "normal") -> str:
|
||||||
"""
|
"""
|
||||||
Format tool results in human-friendly way - NOT raw data!
|
Format tool results in human-friendly way - NOT raw data!
|
||||||
|
|||||||
Reference in New Issue
Block a user