fix(router): guard DSML tool-call flows
Prevent DeepSeek DSML from leaking to users and avoid returning raw memory_search/web results when DSML is detected. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -4,6 +4,7 @@ from typing import Literal, Optional, Dict, Any, List
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
import httpx
|
||||
import logging
|
||||
@@ -28,6 +29,35 @@ except ImportError:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _strip_dsml_keep_text_before(text: str) -> str:
|
||||
"""If response contains DSML, return only the part before the first DSML-like tag. Otherwise return empty (caller will use fallback)."""
|
||||
if not text or len(text.strip()) < 10:
|
||||
return ""
|
||||
# Find first occurrence of DSML-like patterns (tag or keyword that starts markup)
|
||||
dsml_start_patterns = [
|
||||
r"<function_calls",
|
||||
r"<invoke\s",
|
||||
r"<parameter\s",
|
||||
r"<think>",
|
||||
# DSML variants (ASCII and Unicode separators, e.g. <|DSML|invoke ...>)
|
||||
r"<\s*(?:\|||)?\s*DSML",
|
||||
r"DSML\s*(?:\|||)",
|
||||
r"DSML\s*>\s*",
|
||||
]
|
||||
earliest = len(text)
|
||||
for pat in dsml_start_patterns:
|
||||
m = re.search(pat, text, re.IGNORECASE | re.DOTALL)
|
||||
if m:
|
||||
earliest = min(earliest, m.start())
|
||||
if earliest == 0:
|
||||
return ""
|
||||
prefix = text[:earliest].strip()
|
||||
# Remove trailing incomplete tags
|
||||
prefix = re.sub(r"<[^>]*$", "", prefix).strip()
|
||||
return prefix if len(prefix) > 30 else ""
|
||||
|
||||
|
||||
app = FastAPI(title="DAARION Router", version="2.0.0")
|
||||
|
||||
# Configuration
|
||||
@@ -841,9 +871,47 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
response_text = final_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
|
||||
# CRITICAL: Check for DSML in second response too!
|
||||
if response_text and "DSML" in response_text:
|
||||
logger.warning(f"🧹 DSML detected in second LLM response, clearing ({len(response_text)} chars)")
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
if response_text and ("DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text):
|
||||
prefix_before_dsml = _strip_dsml_keep_text_before(response_text)
|
||||
if prefix_before_dsml:
|
||||
logger.warning(f"🧹 DSML in 2nd response: keeping text before DSML ({len(prefix_before_dsml)} chars), discarding {len(response_text) - len(prefix_before_dsml)} chars")
|
||||
response_text = prefix_before_dsml
|
||||
else:
|
||||
logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)")
|
||||
# Third LLM call: explicitly ask to synthesize tool results
|
||||
tool_summary_parts = []
|
||||
for tr in tool_results:
|
||||
if tr.get("success") and tr.get("result"):
|
||||
res_text = str(tr["result"])[:500]
|
||||
tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}")
|
||||
if tool_summary_parts:
|
||||
synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts)
|
||||
try:
|
||||
synth_resp = await http_client.post(
|
||||
f"{cloud['base_url']}/v1/chat/completions",
|
||||
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
||||
json={"model": cloud["model"], "messages": [
|
||||
{"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."},
|
||||
{"role": "user", "content": synthesis_prompt}
|
||||
], "max_tokens": max_tokens, "temperature": 0.3, "stream": False},
|
||||
timeout=cloud["timeout"]
|
||||
)
|
||||
if synth_resp.status_code == 200:
|
||||
synth_data = synth_resp.json()
|
||||
synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
if synth_text and "DSML" not in synth_text and "invoke" not in synth_text:
|
||||
response_text = synth_text
|
||||
tokens_used += synth_data.get("usage", {}).get("total_tokens", 0)
|
||||
logger.info("\u2705 3rd LLM call synthesized clean response from tool results")
|
||||
else:
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
else:
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
except Exception as synth_err:
|
||||
logger.warning(f"3rd LLM call failed: {synth_err}")
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
else:
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
|
||||
if not response_text:
|
||||
logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call")
|
||||
@@ -858,9 +926,13 @@ async def agent_infer(agent_id: str, request: InferRequest):
|
||||
if response_text:
|
||||
# FINAL DSML check before returning - never show DSML to user
|
||||
if "DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text:
|
||||
logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)")
|
||||
# Use dsml_detected mode - LLM confused, just acknowledge presence
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
prefix_before_dsml = _strip_dsml_keep_text_before(response_text)
|
||||
if prefix_before_dsml:
|
||||
logger.warning(f"🧹 DSML in final response: keeping text before DSML ({len(prefix_before_dsml)} chars)")
|
||||
response_text = prefix_before_dsml
|
||||
else:
|
||||
logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)")
|
||||
response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
|
||||
|
||||
# Check if any tool generated an image
|
||||
generated_image = None
|
||||
|
||||
@@ -671,6 +671,11 @@ def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str
|
||||
if tool_results:
|
||||
for tr in tool_results:
|
||||
if tr.get("success") and tr.get("result"):
|
||||
# Avoid dumping raw retrieval/search payloads to the user.
|
||||
# These often look like "memory dumps" and are perceived as incorrect answers.
|
||||
tool_name = (tr.get("name") or "").strip()
|
||||
if tool_name in {"memory_search", "web_search", "web_extract", "web_read"}:
|
||||
continue
|
||||
result = str(tr.get("result", ""))
|
||||
if result and len(result) > 10 and "error" not in result.lower():
|
||||
# We have a useful tool result - use it!
|
||||
@@ -678,7 +683,7 @@ def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str
|
||||
return result[:600] + "..."
|
||||
return result
|
||||
# No useful tool results - give presence acknowledgment
|
||||
return "Я тут. Чим можу допомогти?"
|
||||
return "Вибач, відповідь згенерувалась некоректно. Спробуй ще раз (коротше/конкретніше) або повтори питання одним реченням."
|
||||
|
||||
if not tool_results:
|
||||
if fallback_mode == "empty_response":
|
||||
|
||||
Reference in New Issue
Block a user