From 7887f7cbe98bb0d68f4862ac35120babd5260338 Mon Sep 17 00:00:00 2001
From: Apple <apple@MacBook-Pro.local>
Date: Mon, 9 Feb 2026 10:30:37 -0800
Subject: [PATCH] =?UTF-8?q?fix:=20DSML=20fallback=20=E2=80=94=203rd=20LLM?=
 =?UTF-8?q?=20call=20for=20clean=20synthesis=20+=20think=20tag=20stripping?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Router (main.py):
- When DSML detected in 2nd LLM response after tool execution,
  make a 3rd LLM call with explicit synthesis prompt instead of
  returning raw tool results to the user
- Falls back to format_tool_calls_for_response only if 3rd call fails

Router (tool_manager.py):
- Added _strip_think_tags() helper for <think>...</think> removal
  from DeepSeek reasoning artifacts

Gateway (http_api.py):
- Strip <think>...</think> tags before sending to Telegram
- Strip DSML/XML-like markup (function_calls, invoke, parameter tags)
- Ensure empty text after stripping gets "..." fallback

Deployed to NODE1 and verified services running.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 gateway-bot/http_api.py         | 10 +++++++++
 services/router/main.py         | 37 +++++++++++++++++++++++++++++++--
 services/router/tool_manager.py |  8 +++++++
 3 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/gateway-bot/http_api.py b/gateway-bot/http_api.py
index ea2d04fd..f64822fa 100644
--- a/gateway-bot/http_api.py
+++ b/gateway-bot/http_api.py
@@ -699,6 +699,16 @@ async def send_telegram_message(chat_id: str, text: str, bot_token: Optional[str
             logger.error("TELEGRAM_BOT_TOKEN not set")
             return False
         
+        # Strip <think>...</think> tags (DeepSeek reasoning leak)
+        import re
+        text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+        text = re.sub(r'<think>.*$', '', text, flags=re.DOTALL)  # unclosed tag
+        # Strip any DSML/XML-like markup
+        text = re.sub(r'</?(?:function_calls|invoke|parameter)[^>]*>', '', text)
+        text = text.strip()
+        if not text:
+            text = "..."
+
         url = f"https://api.telegram.org/bot{token}/sendMessage"
         payload = {
             "chat_id": chat_id,
diff --git a/services/router/main.py b/services/router/main.py
index 396fe4ae..158b4aa7 100644
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -1055,8 +1055,41 @@ async def agent_infer(agent_id: str, request: InferRequest):
                         
                         # CRITICAL: Check for DSML in second response too!
                         if response_text and "DSML" in response_text:
-                            logger.warning(f"🧹 DSML detected in second LLM response, clearing ({len(response_text)} chars)")
-                            response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
+                            logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)")
+                            # Third LLM call: explicitly ask to synthesize tool results
+                            tool_summary_parts = []
+                            for tr in tool_results:
+                                if tr.get("success") and tr.get("result"):
+                                    res_text = str(tr["result"])[:500]
+                                    tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}")
+                            if tool_summary_parts:
+                                synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts)
+                                try:
+                                    synth_resp = await http_client.post(
+                                        f"{cloud['base_url']}/v1/chat/completions",
+                                        headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
+                                        json={"model": cloud["model"], "messages": [
+                                            {"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."},
+                                            {"role": "user", "content": synthesis_prompt}
+                                        ], "max_tokens": max_tokens, "temperature": 0.3, "stream": False},
+                                        timeout=cloud["timeout"]
+                                    )
+                                    if synth_resp.status_code == 200:
+                                        synth_data = synth_resp.json()
+                                        synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "")
+                                        if synth_text and "DSML" not in synth_text and "invoke" not in synth_text:
+                                            response_text = synth_text
+                                            tokens_used += synth_data.get("usage", {}).get("total_tokens", 0)
+                                            logger.info("\u2705 3rd LLM call synthesized clean response from tool results")
+                                        else:
+                                            response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
+                                    else:
+                                        response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
+                                except Exception as synth_err:
+                                    logger.warning(f"3rd LLM call failed: {synth_err}")
+                                    response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
+                            else:
+                                response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
                         
                         if not response_text:
                             logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call")
diff --git a/services/router/tool_manager.py b/services/router/tool_manager.py
index 3f0ed349..2fd7643a 100644
--- a/services/router/tool_manager.py
+++ b/services/router/tool_manager.py
@@ -831,6 +831,14 @@ class ToolManager:
         await self.http_client.aclose()
 
 
+def _strip_think_tags(text: str) -> str:
+    """Remove <think>...</think> tags from DeepSeek responses."""
+    import re
+    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+    text = re.sub(r'<think>.*$', '', text, flags=re.DOTALL)  # unclosed tag
+    return text.strip()
+
+
 def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str = "normal") -> str:
     """
     Format tool results in human-friendly way - NOT raw data!