diff --git a/services/router/main.py b/services/router/main.py
index 158b4aa7..8050a7dd 100644
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -5,6 +5,7 @@ from typing import Literal, Optional, Dict, Any, List
 import asyncio
 import json
 import os
+import re
 import yaml
 import httpx
 import logging
@@ -39,6 +40,35 @@ except ImportError:
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+
+def _strip_dsml_keep_text_before(text: str) -> str:
+    """If response contains DSML, return only the part before the first DSML-like tag. Otherwise return empty (caller will use fallback)."""
+    if not text or len(text.strip()) < 10:
+        return ""
+    # Find first occurrence of DSML-like patterns (tag or keyword that starts markup)
+    dsml_start_patterns = [
+        r"<function_calls",
+        r"<invoke\s",
+        r"<parameter\s",
+        r"<think>",
+        # DSML variants (ASCII and Unicode separators, e.g. <｜DSML｜invoke ...>)
+        r"<\s*(?:\||｜)?\s*DSML",
+        r"DSML\s*(?:\||｜)",
+        r"DSML\s*>\s*",
+    ]
+    earliest = len(text)
+    for pat in dsml_start_patterns:
+        m = re.search(pat, text, re.IGNORECASE | re.DOTALL)
+        if m:
+            earliest = min(earliest, m.start())
+    if earliest == 0:
+        return ""
+    prefix = text[:earliest].strip()
+    # Remove trailing incomplete tags
+    prefix = re.sub(r"<[^>]*$", "", prefix).strip()
+    return prefix if len(prefix) > 30 else ""
+
+
 app = FastAPI(title="DAARION Router", version="2.0.0")
 
 # Configuration
@@ -1054,42 +1084,47 @@ async def agent_infer(agent_id: str, request: InferRequest):
                         response_text = final_data.get("choices", [{}])[0].get("message", {}).get("content", "")
                         
                         # CRITICAL: Check for DSML in second response too!
-                        if response_text and "DSML" in response_text:
-                            logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)")
-                            # Third LLM call: explicitly ask to synthesize tool results
-                            tool_summary_parts = []
-                            for tr in tool_results:
-                                if tr.get("success") and tr.get("result"):
-                                    res_text = str(tr["result"])[:500]
-                                    tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}")
-                            if tool_summary_parts:
-                                synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts)
-                                try:
-                                    synth_resp = await http_client.post(
-                                        f"{cloud['base_url']}/v1/chat/completions",
-                                        headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
-                                        json={"model": cloud["model"], "messages": [
-                                            {"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."},
-                                            {"role": "user", "content": synthesis_prompt}
-                                        ], "max_tokens": max_tokens, "temperature": 0.3, "stream": False},
-                                        timeout=cloud["timeout"]
-                                    )
-                                    if synth_resp.status_code == 200:
-                                        synth_data = synth_resp.json()
-                                        synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "")
-                                        if synth_text and "DSML" not in synth_text and "invoke" not in synth_text:
-                                            response_text = synth_text
-                                            tokens_used += synth_data.get("usage", {}).get("total_tokens", 0)
-                                            logger.info("\u2705 3rd LLM call synthesized clean response from tool results")
+                        if response_text and ("DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text):
+                            prefix_before_dsml = _strip_dsml_keep_text_before(response_text)
+                            if prefix_before_dsml:
+                                logger.warning(f"🧹 DSML in 2nd response: keeping text before DSML ({len(prefix_before_dsml)} chars), discarding {len(response_text) - len(prefix_before_dsml)} chars")
+                                response_text = prefix_before_dsml
+                            else:
+                                logger.warning(f"🧹 DSML detected in 2nd LLM response, trying 3rd call ({len(response_text)} chars)")
+                                # Third LLM call: explicitly ask to synthesize tool results
+                                tool_summary_parts = []
+                                for tr in tool_results:
+                                    if tr.get("success") and tr.get("result"):
+                                        res_text = str(tr["result"])[:500]
+                                        tool_summary_parts.append(f"Tool '{tr['name']}' returned: {res_text}")
+                                if tool_summary_parts:
+                                    synthesis_prompt = "Based on the following tool results, provide a helpful response to the user in their language. Do NOT use any markup or XML. Just respond naturally.\n\n" + "\n".join(tool_summary_parts)
+                                    try:
+                                        synth_resp = await http_client.post(
+                                            f"{cloud['base_url']}/v1/chat/completions",
+                                            headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
+                                            json={"model": cloud["model"], "messages": [
+                                                {"role": "system", "content": system_prompt or "You are a helpful assistant. Respond naturally."},
+                                                {"role": "user", "content": synthesis_prompt}
+                                            ], "max_tokens": max_tokens, "temperature": 0.3, "stream": False},
+                                            timeout=cloud["timeout"]
+                                        )
+                                        if synth_resp.status_code == 200:
+                                            synth_data = synth_resp.json()
+                                            synth_text = synth_data.get("choices", [{}])[0].get("message", {}).get("content", "")
+                                            if synth_text and "DSML" not in synth_text and "invoke" not in synth_text:
+                                                response_text = synth_text
+                                                tokens_used += synth_data.get("usage", {}).get("total_tokens", 0)
+                                                logger.info("\u2705 3rd LLM call synthesized clean response from tool results")
+                                            else:
+                                                response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
                                         else:
                                             response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
-                                    else:
+                                    except Exception as synth_err:
+                                        logger.warning(f"3rd LLM call failed: {synth_err}")
                                         response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
-                                except Exception as synth_err:
-                                    logger.warning(f"3rd LLM call failed: {synth_err}")
+                                else:
                                     response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
-                            else:
-                                response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
                         
                         if not response_text:
                             logger.warning(f"⚠️ {cloud['name'].upper()} returned empty response after tool call")
@@ -1104,9 +1139,13 @@ async def agent_infer(agent_id: str, request: InferRequest):
                 if response_text:
                     # FINAL DSML check before returning - never show DSML to user
                     if "DSML" in response_text or "invoke name=" in response_text or "function_calls>" in response_text:
-                        logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)")
-                        # Use dsml_detected mode - LLM confused, just acknowledge presence
-                        response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
+                        prefix_before_dsml = _strip_dsml_keep_text_before(response_text)
+                        if prefix_before_dsml:
+                            logger.warning(f"🧹 DSML in final response: keeping text before DSML ({len(prefix_before_dsml)} chars)")
+                            response_text = prefix_before_dsml
+                        else:
+                            logger.warning(f"🧹 DSML in final response! Replacing with fallback ({len(response_text)} chars)")
+                            response_text = format_tool_calls_for_response(tool_results, fallback_mode="dsml_detected")
                     
                     # Check if any tool generated an image
                     generated_image = None
diff --git a/services/router/tool_manager.py b/services/router/tool_manager.py
index 2fd7643a..c6cc1208 100644
--- a/services/router/tool_manager.py
+++ b/services/router/tool_manager.py
@@ -854,6 +854,11 @@ def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str
         if tool_results:
             for tr in tool_results:
                 if tr.get("success") and tr.get("result"):
+                    # Avoid dumping raw retrieval/search payloads to the user.
+                    # These often look like "memory dumps" and are perceived as incorrect answers.
+                    tool_name = (tr.get("name") or "").strip()
+                    if tool_name in {"memory_search", "web_search", "web_extract", "web_read"}:
+                        continue
                     result = str(tr.get("result", ""))
                     if result and len(result) > 10 and "error" not in result.lower():
                         # We have a useful tool result - use it!
@@ -861,7 +866,7 @@ def format_tool_calls_for_response(tool_results: List[Dict], fallback_mode: str
                             return result[:600] + "..."
                         return result
         # No useful tool results - give presence acknowledgment
-        return "Я тут. Чим можу допомогти?"
+        return "Вибач, відповідь згенерувалась некоректно. Спробуй ще раз (коротше/конкретніше) або повтори питання одним реченням."
     
     if not tool_results:
         if fallback_mode == "empty_response":