diff --git a/services/router/main.py b/services/router/main.py index 158b4aa7..8050a7dd 100644 --- a/services/router/main.py +++ b/services/router/main.py @@ -5,6 +5,7 @@ from typing import Literal, Optional, Dict, Any, List import asyncio import json import os +import re import yaml import httpx import logging @@ -39,6 +40,35 @@ except ImportError: logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) + +def _strip_dsml_keep_text_before(text: str) -> str: + """If response contains DSML, return only the part before the first DSML-like tag. Otherwise return empty (caller will use fallback).""" + if not text or len(text.strip()) < 10: + return "" + # Find first occurrence of DSML-like patterns (tag or keyword that starts markup) + dsml_start_patterns = [ + r"", + # DSML variants (ASCII and Unicode separators, e.g. <|DSML|invoke ...>) + r"<\s*(?:\|||)?\s*DSML", + r"DSML\s*(?:\|||)", + r"DSML\s*>\s*", + ] + earliest = len(text) + for pat in dsml_start_patterns: + m = re.search(pat, text, re.IGNORECASE | re.DOTALL) + if m: + earliest = min(earliest, m.start()) + if earliest == 0: + return "" + prefix = text[:earliest].strip() + # Remove trailing incomplete tags + prefix = re.sub(r"<[^>]*$", "", prefix).strip() + return prefix if len(prefix) > 30 else "" + + app = FastAPI(title="DAARION Router", version="2.0.0") # Configuration @@ -1054,42 +1084,47 @@ async def agent_infer(agent_id: str, request: InferRequest): response_text = final_data.get("choices", [{}])[0].get("message", {}).get("content", "") # CRITICAL: Check for DSML in second response too! - if response_text and "DSML" in response_text: - logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)") - # Third LLM call: explicitly ask to synthesize tool results - tool_summary_parts = [] - for tr in tool_results: - if tr.get("success") and tr.get("result"): - res_text = str(tr["result"])[:500] - tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}") - if tool_summary_parts: - synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts) - try: - synth_resp = await http_client.post( - f"{cloud['base_url']}/v1/chat/completions", - headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, - json={"model": cloud["model"], "messages": [ - {"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."}, - {"role": "user", "content": synthesis_prompt} - ], "max_tokens": max_tokens, "temperature": 0.3, "stream": False}, - timeout=cloud["timeout"] - ) - if synth_resp.status_code == 200: - synth_data = synth_resp.json() - synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "") - if synth_text and "DSML" not in synth_text and "invoke" not in synth_text: - response_text = synth_text - tokens_used += synth_data.get("usage", {}).get("total_tokens", 0) - logger.info("\u2705 3rd LLM call synthesized clean response from tool results") + if response_text and ("DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text): + prefix_before_dsml = _strip_dsml_keep_text_before(response_text) + if prefix_before_dsml: + logger.warning(f"🧹 DSML in 2nd response: keeping text before DSML ({len(prefix_before_dsml)} chars), discarding {len(response_text) - len(prefix_before_dsml)} chars") + response_text = prefix_before_dsml + else: + logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)") + # Third LLM call: explicitly ask to synthesize tool results + tool_summary_parts = [] + for tr in tool_results: + if tr.get("success") and tr.get("result"): + res_text = str(tr["result"])[:500] + tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}") + if tool_summary_parts: + synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts) + try: + synth_resp = await http_client.post( + f"{cloud['base_url']}/v1/chat/completions", + headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, + json={"model": cloud["model"], "messages": [ + {"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."}, + {"role": "user", "content": synthesis_prompt} + ], "max_tokens": max_tokens, "temperature": 0.3, "stream": False}, + timeout=cloud["timeout"] + ) + if synth_resp.status_code == 200: + synth_data = synth_resp.json() + synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "") + if synth_text and "DSML" not in synth_text and "invoke" not in synth_text: + response_text = synth_text + tokens_used += synth_data.get("usage", {}).get("total_tokens", 0) + logger.info("\u2705 3rd LLM call synthesized clean response from tool results") + else: + response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") else: response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") - else: + except Exception as synth_err: + logger.warning(f"3rd LLM call failed: {synth_err}") response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") - except Exception as synth_err: - logger.warning(f"3rd LLM call failed: {synth_err}") + else: response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") - else: - response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") if not response_text: logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call") @@ -1104,9 +1139,13 @@ async def agent_infer(agent_id: str, request: InferRequest): if response_text: # FINAL DSML check before returning - never show DSML to user if "DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text: - logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)") - # Use dsml_detected mode - LLM confused, just acknowledge presence - response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") + prefix_before_dsml = _strip_dsml_keep_text_before(response_text) + if prefix_before_dsml: + logger.warning(f"🧹 DSML in final response: keeping text before DSML ({len(prefix_before_dsml)} chars)") + response_text = prefix_before_dsml + else: + logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)") + response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected") # Check if any tool generated an image generated_image = None diff --git a/services/router/tool_manager.py b/services/router/tool_manager.py index 2fd7643a..c6cc1208 100644 --- a/services/router/tool_manager.py +++ b/services/router/tool_manager.py @@ -854,6 +854,11 @@ def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str if tool_results: for tr in tool_results: if tr.get("success") and tr.get("result"): + # Avoid dumping raw retrieval/search payloads to the user. + # These often look like "memory dumps" and are perceived as incorrect answers. + tool_name = (tr.get("name") or "").strip() + if tool_name in {"memory_search", "web_search", "web_extract", "web_read"}: + continue result = str(tr.get("result", "")) if result and len(result) > 10 and "error" not in result.lower(): # We have a useful tool result - use it! @@ -861,7 +866,7 @@ def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str return result[:600] + "..." return result # No useful tool results - give presence acknowledgment - return "Я тут. Чим можу допомогти?" + return "Вибач, відповідь згенерувалась некоректно. Спробуй ще раз (коротше/конкретніше) або повтори питання одним реченням." if not tool_results: if fallback_mode == "empty_response":