From 7887f7cbe98bb0d68f4862ac35120babd5260338 Mon Sep 17 00:00:00 2001 From: Apple Date: Mon, 9 Feb 2026 10:30:37 -0800 Subject: [PATCH] =?UTF-8?q?fix:=20DSML=20fallback=20=E2=80=94=203rd=20LLM?= =?UTF-8?q?=20call=20for=20clean=20synthesis=20+=20think=20tag=20stripping?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Router (main.py): - When DSML detected in 2nd LLM response after tool execution, make a 3rd LLM call with explicit synthesis prompt instead of returning raw tool results to the user - Falls back to format_tool_calls_for_response only if 3rd call fails Router (tool_manager.py): - Added _strip_think_tags() helper for ... removal from DeepSeek reasoning artifacts Gateway (http_api.py): - Strip ... tags before sending to Telegram - Strip DSML/XML-like markup (function_calls, invoke, parameter tags) - Ensure empty text after stripping gets "..." fallback Deployed to NODE1 and verified services running. Co-authored-by: Cursor --- gateway-bot/http_api.py | 10 +++++++++ services/router/main.py | 37 +++++++++++++++++++++++++++++++-- services/router/tool_manager.py | 8 +++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/gateway-bot/http_api.py b/gateway-bot/http_api.py index ea2d04fd..f64822fa 100644 --- a/gateway-bot/http_api.py +++ b/gateway-bot/http_api.py @@ -699,6 +699,16 @@ async def send_telegram_message(chat_id: str, text: str, bot_token: Optional[str logger.error("TELEGRAM_BOT_TOKEN not set") return False + # Strip ... tags (DeepSeek reasoning leak) + import re + text = re.sub(r'.*?', '', text, flags=re.DOTALL) + text = re.sub(r'.*$', '', text, flags=re.DOTALL) # unclosed tag + # Strip any DSML/XML-like markup + text = re.sub(r']*>', '', text) + text = text.strip() + if not text: + text = "..." + url = f"https://api.telegram.org/bot{token}/sendMessage" payload = { "chat_id": chat_id, diff --git a/services/router/main.py b/services/router/main.py index 396fe4ae..158b4aa7 100644 --- a/services/router/main.py +++ b/services/router/main.py @@ -1055,8 +1055,41 @@ async def agent_infer(agent_id: str, request: InferRequest): # CRITICAL: Check for DSML in second response too! if response_text and "DSML" in response_text: - logger.warning(f"🧹 DSML detected in second LLM response, clearing ({len(response_text)} chars)") - response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") + logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)") + # Third LLM call: explicitly ask to synthesize tool results + tool_summary_parts = [] + for tr in tool_results: + if tr.get("success") and tr.get("result"): + res_text = str(tr["result"])[:500] + tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}") + if tool_summary_parts: + synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts) + try: + synth_resp = await http_client.post( + f"{cloud['base_url']}/v1/chat/completions", + headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, + json={"model": cloud["model"], "messages": [ + {"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."}, + {"role": "user", "content": synthesis_prompt} + ], "max_tokens": max_tokens, "temperature": 0.3, "stream": False}, + timeout=cloud["timeout"] + ) + if synth_resp.status_code == 200: + synth_data = synth_resp.json() + synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "") + if synth_text and "DSML" not in synth_text and "invoke" not in synth_text: + response_text = synth_text + tokens_used += synth_data.get("usage", {}).get("total_tokens", 0) + logger.info("\u2705 3rd LLM call synthesized clean response from tool results") + else: + response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") + else: + response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") + except Exception as synth_err: + logger.warning(f"3rd LLM call failed: {synth_err}") + response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") + else: + response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") if not response_text: logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call") diff --git a/services/router/tool_manager.py b/services/router/tool_manager.py index 3f0ed349..2fd7643a 100644 --- a/services/router/tool_manager.py +++ b/services/router/tool_manager.py @@ -831,6 +831,14 @@ class ToolManager: await self.http_client.aclose() +def _strip_think_tags(text: str) -> str: + """Remove ... tags from DeepSeek responses.""" + import re + text = re.sub(r'.*?', '', text, flags=re.DOTALL) + text = re.sub(r'.*$', '', text, flags=re.DOTALL) # unclosed tag + return text.strip() + + def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str = "normal") -> str: """ Format tool results in human-friendly way - NOT raw data!