fix(router): guard DSML tool-call flows

Prevent DeepSeek DSML from leaking to users and avoid returning raw memory_search/web results when DSML is detected.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Apple
2026-02-10 04:19:57 -08:00
parent c41c68dc08
commit 7f3ee700a4
2 changed files with 80 additions and 36 deletions

View File

@@ -5,6 +5,7 @@ from typing import Literal, Optional, Dict, Any, List
import asyncio import asyncio
import json import json
import os import os
import re
import yaml import yaml
import httpx import httpx
import logging import logging
@@ -39,6 +40,35 @@ except ImportError:
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _strip_dsml_keep_text_before(text: str) -> str:
"""If response contains DSML, return only the part before the first DSML-like tag. Otherwise return empty (caller will use fallback)."""
if not text or len(text.strip()) < 10:
return ""
# Find first occurrence of DSML-like patterns (tag or keyword that starts markup)
dsml_start_patterns = [
r"<function_calls",
r"<invoke\s",
r"<parameter\s",
r"<think>",
# DSML variants (ASCII and Unicode separators, e.g. <DSMLinvoke ...>)
r"<\s*(?:\||)?\s*DSML",
r"DSML\s*(?:\||)",
r"DSML\s*>\s*",
]
earliest = len(text)
for pat in dsml_start_patterns:
m = re.search(pat, text, re.IGNORECASE | re.DOTALL)
if m:
earliest = min(earliest, m.start())
if earliest == 0:
return ""
prefix = text[:earliest].strip()
# Remove trailing incomplete tags
prefix = re.sub(r"<[^>]*$", "", prefix).strip()
return prefix if len(prefix) > 30 else ""
app = FastAPI(title="DAARION Router", version="2.0.0") app = FastAPI(title="DAARION Router", version="2.0.0")
# Configuration # Configuration
@@ -1054,42 +1084,47 @@ async def agent_infer(agent_id: str, request: InferRequest):
response_text = final_data.get("choices", [{}])[0].get("message", {}).get("content", "") response_text = final_data.get("choices", [{}])[0].get("message", {}).get("content", "")
# CRITICAL: Check for DSML in second response too! # CRITICAL: Check for DSML in second response too!
if response_text and "DSML" in response_text: if response_text and ("DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text):
logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)") prefix_before_dsml = _strip_dsml_keep_text_before(response_text)
# Third LLM call: explicitly ask to synthesize tool results if prefix_before_dsml:
tool_summary_parts = [] logger.warning(f"🧹 DSML in 2nd response: keeping text before DSML ({len(prefix_before_dsml)} chars), discarding {len(response_text) - len(prefix_before_dsml)} chars")
for tr in tool_results: response_text = prefix_before_dsml
if tr.get("success") and tr.get("result"): else:
res_text = str(tr["result"])[:500] logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)")
tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}") # Third LLM call: explicitly ask to synthesize tool results
if tool_summary_parts: tool_summary_parts = []
synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts) for tr in tool_results:
try: if tr.get("success") and tr.get("result"):
synth_resp = await http_client.post( res_text = str(tr["result"])[:500]
f"{cloud['base_url']}/v1/chat/completions", tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}")
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, if tool_summary_parts:
json={"model": cloud["model"], "messages": [ synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts)
{"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."}, try:
{"role": "user", "content": synthesis_prompt} synth_resp = await http_client.post(
], "max_tokens": max_tokens, "temperature": 0.3, "stream": False}, f"{cloud['base_url']}/v1/chat/completions",
timeout=cloud["timeout"] headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
) json={"model": cloud["model"], "messages": [
if synth_resp.status_code == 200: {"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."},
synth_data = synth_resp.json() {"role": "user", "content": synthesis_prompt}
synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "") ], "max_tokens": max_tokens, "temperature": 0.3, "stream": False},
if synth_text and "DSML" not in synth_text and "invoke" not in synth_text: timeout=cloud["timeout"]
response_text = synth_text )
tokens_used += synth_data.get("usage", {}).get("total_tokens", 0) if synth_resp.status_code == 200:
logger.info("\u2705 3rd LLM call synthesized clean response from tool results") synth_data = synth_resp.json()
synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "")
if synth_text and "DSML" not in synth_text and "invoke" not in synth_text:
response_text = synth_text
tokens_used += synth_data.get("usage", {}).get("total_tokens", 0)
logger.info("\u2705 3rd LLM call synthesized clean response from tool results")
else:
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
else: else:
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
else: except Exception as synth_err:
logger.warning(f"3rd LLM call failed: {synth_err}")
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
except Exception as synth_err: else:
logger.warning(f"3rd LLM call failed: {synth_err}")
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
else:
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
if not response_text: if not response_text:
logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call") logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call")
@@ -1104,9 +1139,13 @@ async def agent_infer(agent_id: str, request: InferRequest):
if response_text: if response_text:
# FINAL DSML check before returning - never show DSML to user # FINAL DSML check before returning - never show DSML to user
if "DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text: if "DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text:
logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)") prefix_before_dsml = _strip_dsml_keep_text_before(response_text)
# Use dsml_detected mode - LLM confused, just acknowledge presence if prefix_before_dsml:
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") logger.warning(f"🧹 DSML in final response: keeping text before DSML ({len(prefix_before_dsml)} chars)")
response_text = prefix_before_dsml
else:
logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)")
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
# Check if any tool generated an image # Check if any tool generated an image
generated_image = None generated_image = None

View File

@@ -854,6 +854,11 @@ def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str
if tool_results: if tool_results:
for tr in tool_results: for tr in tool_results:
if tr.get("success") and tr.get("result"): if tr.get("success") and tr.get("result"):
# Avoid dumping raw retrieval/search payloads to the user.
# These often look like "memory dumps" and are perceived as incorrect answers.
tool_name = (tr.get("name") or "").strip()
if tool_name in {"memory_search", "web_search", "web_extract", "web_read"}:
continue
result = str(tr.get("result", "")) result = str(tr.get("result", ""))
if result and len(result) > 10 and "error" not in result.lower(): if result and len(result) > 10 and "error" not in result.lower():
# We have a useful tool result - use it! # We have a useful tool result - use it!
@@ -861,7 +866,7 @@ def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str
return result[:600] + "..." return result[:600] + "..."
return result return result
# No useful tool results - give presence acknowledgment # No useful tool results - give presence acknowledgment
return "Я тут. Чим можу допомогти?" return "Вибач, відповідь згенерувалась некоректно. Спробуй ще раз (коротше/конкретніше) або повтори питання одним реченням."
if not tool_results: if not tool_results:
if fallback_mode == "empty_response": if fallback_mode == "empty_response":