gateway: add public invoke/jobs facade with redis queue worker and SSE

This commit is contained in:
NODA1 System
2026-02-20 17:55:47 +01:00
parent 7e82a427e3
commit 2e76ef9ccb
7 changed files with 619 additions and 55 deletions

View File

@@ -88,16 +88,84 @@ def _get_recent_photo_file_id(agent_id: str, chat_id: str, user_id: str) -> Opti
return rec.get("file_id")
def _extract_recent_photo_file_id_from_memory(memory_context: Dict[str, Any]) -> Optional[str]:
"""
Extract last seen Telegram photo file_id from memory context.
Looks for patterns like: [Photo: <file_id>]
"""
if not memory_context:
return None
pattern = re.compile(r"\[Photo:\s*([^\]\s]+)\]")
recent_events = memory_context.get("recent_events", []) or []
for ev in reversed(recent_events):
body = (ev.get("body_text") or "").strip()
if not body:
continue
m = pattern.search(body)
if m:
return m.group(1)
local_text = memory_context.get("local_context_text") or ""
for line in reversed(local_text.splitlines()):
m = pattern.search(line)
if m:
return m.group(1)
return None
def _looks_like_photo_followup(text: str) -> bool:
if not text:
return False
t = text.strip().lower()
markers = [
direct_markers = [
"що ти бачиш", "що на фото", "що на зображенні", "опиши фото", "подивись фото",
"що на цьому фото", "що на цій фотографії", "що на цій світлині",
"проаналізуй фото", "аналіз фото", "переглянь фото", "повернись до фото",
"яка це рослина", "що це за рослина", "що за рослина", "що за культура",
"яка культура", "визнач рослину",
"what do you see", "what is in the image", "describe the photo",
"analyze the photo", "analyze image", "what plant is this",
"что ты видишь", "что на фото", "опиши фото", "посмотри фото",
"проанализируй фото", "какое это растение", "что за растение",
]
return any(m in t for m in markers)
if any(m in t for m in direct_markers):
return True
# If user is correcting previous visual interpretation, route to vision again.
correction_markers = [
"неправильна відповідь", "не правильна відповідь", "не видумуй", "це не так",
"ти помилився", "ти помилилась", "неправильно визначив",
"wrong answer", "you are wrong", "that is incorrect",
"неправильный ответ", "это не так", "ты ошибся",
]
photo_topic_markers = ["фото", "зображ", "рослин", "image", "photo", "plant", "растен"]
if any(c in t for c in correction_markers) and any(p in t for p in photo_topic_markers):
return True
# Flexible forms: "що на ... фото/зображенні/світлині"
if re.search(r"(що|what|что)\s+на\s+.*(фото|зображ|світлин|image|photo)", t):
# Exclude common meta-questions
meta_exclude = ["канал", "чат", "бот", "нормально"]
if not any(ex in t for ex in meta_exclude):
return True
return False
def _is_agromatrix_plant_intel_intent(agent_id: str, text: str) -> bool:
if (agent_id or "").lower() != "agromatrix":
return False
if not text:
return False
tl = text.strip().lower()
markers = [
"що за рослина", "що це за рослина", "яка це рослина", "яка культура",
"визнач рослину", "ідентифікуй рослину", "хвороба рослини", "плями на листі",
"what plant", "identify plant", "identify crop", "plant disease",
"что за растение", "определи растение", "болезнь растения",
]
return any(m in tl for m in markers)
def _cleanup_user_language_prefs() -> None:
@@ -855,6 +923,112 @@ def should_force_concise_reply(text: str) -> bool:
return True
def _strip_answer_markup_noise(answer_text: str) -> str:
if not answer_text:
return ""
cleaned = answer_text.strip()
cleaned = re.sub(r"^\s*\*{1,3}\s*коротка відповідь\s*:?\s*\*{0,3}\s*", "", cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r"^\s*\*{1,3}\s*відповідь\s*:?\s*\*{0,3}\s*", "", cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r"^\s*#+\s*", "", cleaned)
# Remove markdown emphasis noise that leaks into short answers
cleaned = cleaned.replace("**", "")
cleaned = cleaned.replace("__", "")
return cleaned.strip()
def _compress_bulleted_answer(answer_text: str, max_items: int = 3) -> str:
if not answer_text:
return ""
lines = [ln.strip() for ln in answer_text.splitlines() if ln.strip()]
bullet_lines: List[str] = []
for ln in lines:
normalized = ln.replace("**", "").replace("__", "").strip()
if re.match(r"^(\*?\s*[-*•]|\*?\s*\d+[\.\):])\s*", normalized):
item = re.sub(r"^(\*?\s*[-*•]|\*?\s*\d+[\.\):])\s*", "", normalized).strip()
item = re.sub(r"\s+", " ", item).strip(" -–—")
item = re.sub(r"\.{2,}", ".", item)
item = re.sub(r"\s+\.", ".", item)
# Keep concise mode truly short: first complete sentence from each bullet.
parts = re.split(r"(?<=[.!?…])\s+", item)
if parts:
item = parts[0].strip()
item = item.rstrip(":").strip()
if item:
bullet_lines.append(item)
if not bullet_lines:
return answer_text.strip()
picked = bullet_lines[:max_items]
joined = ". ".join(picked)
if joined and not joined.endswith((".", "!", "?")):
joined += "."
joined = re.sub(r"\s+", " ", joined).strip()
return joined or answer_text.strip()
def _limit_to_sentences(text: str, max_sentences: int = 3) -> str:
if not text:
return ""
parts = re.split(r"(?<=[.!?…])\s+", text.strip())
parts = [p.strip() for p in parts if p.strip()]
if len(parts) <= max_sentences:
return " ".join(parts).strip()
return " ".join(parts[:max_sentences]).strip()
def _agromatrix_rewrite_capability_limitations(user_text: str, answer_text: str) -> str:
if not answer_text:
return answer_text
low = answer_text.lower()
limitation_markers = (
"не можу бачити", "не можу переглядати зображення", "не маю доступу до зображень",
"працюю лише з текстом", "працюю виключно з текстом",
"cannot view images", "cannot analyze images", "as a text model",
)
if not any(m in low for m in limitation_markers):
return answer_text
ulow = (user_text or "").lower()
photo_markers = ("фото", "зображ", "image", "photo", "картин", "світлин")
if any(m in ulow for m in photo_markers):
return (
"Можу аналізувати фото. Надішли, будь ласка, зображення ще раз одним повідомленням "
"з коротким питанням, і я дам точний розбір."
)
return (
"Можу працювати природною мовою та з мультимодальністю: фото, голос і документи. "
"Сформулюй запит коротко, і я відповім по суті."
)
def postprocess_agent_answer(
agent_id: str,
user_text: str,
answer_text: str,
force_detailed: bool,
needs_complex_reasoning: bool,
) -> str:
if not answer_text:
return answer_text
if (agent_id or "").lower() != "agromatrix":
return answer_text
# Keep detailed/complex answers intact.
if force_detailed or needs_complex_reasoning:
return answer_text
user_text_len = len((user_text or "").strip())
if user_text_len > 280:
return _agromatrix_rewrite_capability_limitations(user_text, answer_text)
cleaned = _strip_answer_markup_noise(answer_text)
cleaned = _agromatrix_rewrite_capability_limitations(user_text, cleaned)
compact = _compress_bulleted_answer(cleaned, max_items=1)
short = _limit_to_sentences(compact, max_sentences=3)
return short or answer_text
COMPLEX_REASONING_KEYWORDS = [
"стратег", "roadmap", "алгоритм", "architecture", "архітектур",
"прогноз", "scenario", "модель", "аналіз", "побудуй", "plan", "дослідж",
@@ -2148,12 +2322,45 @@ async def handle_telegram_webhook(
text = update.message.get("text", "")
caption = update.message.get("caption", "")
# If user asks about a recently sent photo, run vision on cached photo file_id.
if text and _looks_like_photo_followup(text):
# Photo/image intent guard:
# if text references a photo/image, try to resolve latest file_id and route to vision.
photo_intent = False
if text:
tl = text.lower()
photo_intent = _looks_like_photo_followup(text) or any(
k in tl for k in ("фото", "зображ", "світлин", "image", "photo")
)
if not photo_intent:
# Robust fallback for common formulations like "що на цьому фото?"
photo_intent = bool(
re.search(r"(що|what|что).{0,24}(цьому|этом|this).{0,24}(фото|зображ|світлин|image|photo)", tl)
)
if photo_intent:
recent_file_id = _get_recent_photo_file_id(agent_config.agent_id, chat_id, user_id)
# Fallback: recover latest photo file_id from memory-service context (survives process restarts).
if not recent_file_id:
try:
mc = await memory_client.get_context(
user_id=f"tg:{user_id}",
agent_id=agent_config.agent_id,
team_id=dao_id,
channel_id=chat_id,
limit=80,
)
recent_file_id = _extract_recent_photo_file_id_from_memory(mc)
if recent_file_id:
_set_recent_photo_context(agent_config.agent_id, chat_id, user_id, recent_file_id)
logger.info(
f"{agent_config.name}: Recovered photo file_id from memory context for follow-up: {recent_file_id}"
)
except Exception as e:
logger.warning(f"{agent_config.name}: failed to recover photo file_id from memory: {e}")
if recent_file_id:
logger.info(
f"{agent_config.name}: Detected follow-up photo question; using cached file_id={recent_file_id}"
f"{agent_config.name}: Photo intent detected; using file_id={recent_file_id}"
)
followup_result = await process_photo(
agent_config=agent_config,
@@ -2167,6 +2374,16 @@ async def handle_telegram_webhook(
bypass_media_gate=True,
)
return followup_result
# Hard guard: don't send photo-related requests to text LLM path when image context is missing.
is_question_like = ("?" in text) or any(k in tl for k in ("що", "опиши", "проанал", "what", "describe", "analy", "что"))
if is_question_like:
await send_telegram_message(
chat_id,
"Бачу питання про фото, але не знайшов зображення в історії сесії. Надішли фото ще раз з коротким питанням, і я одразу проаналізую.",
telegram_token,
)
return {"ok": True, "handled": True, "reason": "photo_followup_without_image_context"}
if not text and not caption:
# Check for unsupported message types and silently ignore
@@ -2432,6 +2649,7 @@ async def handle_telegram_webhook(
"session_id": f"tg:{chat_id}:{dao_id}",
"username": username,
"chat_id": chat_id,
"raw_user_text": text,
"sender_is_bot": is_sender_bot,
"mentioned_bots": mentioned_bots,
"requires_complex_reasoning": needs_complex_reasoning,
@@ -2454,6 +2672,9 @@ async def handle_telegram_webhook(
if should_force_detailed_reply(text):
router_request["metadata"]["force_detailed"] = True
if _is_agromatrix_plant_intel_intent(agent_config.agent_id, text):
router_request["metadata"]["crewai_profile"] = "plant_intel"
if should_force_concise_reply(text):
# IMPORTANT: preserve conversation context! Only append concise instruction
router_request["metadata"]["force_concise"] = True
@@ -2551,9 +2772,14 @@ async def handle_telegram_webhook(
)
return {"ok": True, "skipped": True, "reason": "no_output_from_llm"}
# Truncate if too long for Telegram
if len(answer_text) > TELEGRAM_SAFE_LENGTH:
answer_text = answer_text[:TELEGRAM_SAFE_LENGTH] + "\n\n_... (відповідь обрізано)_"
force_detailed_reply = bool(router_request.get("metadata", {}).get("force_detailed"))
answer_text = postprocess_agent_answer(
agent_id=agent_config.agent_id,
user_text=text or "",
answer_text=answer_text,
force_detailed=force_detailed_reply,
needs_complex_reasoning=needs_complex_reasoning,
)
# Skip Telegram sending for prober requests (chat_id=0)
if is_prober:
@@ -2591,7 +2817,9 @@ async def handle_telegram_webhook(
async with httpx.AsyncClient() as client:
files = {"photo": ("image.png", BytesIO(image_bytes), "image/png")}
data = {"chat_id": chat_id, "caption": answer_text}
# Telegram caption limit is 1024 chars.
safe_caption = (answer_text or "")[:1024]
data = {"chat_id": chat_id, "caption": safe_caption}
response_photo = await client.post(url, files=files, data=data, timeout=30.0)
response_photo.raise_for_status()
logger.info(f"✅ Sent generated image to Telegram chat {chat_id}")
@@ -3532,44 +3760,51 @@ async def send_telegram_message(chat_id: str, text: str, bot_token: Optional[str
return False
# Defensive cleanup for occasional reasoning/markup leaks.
import re
safe_text = re.sub(r'<think>.*?</think>', '', text or "", flags=re.DOTALL)
safe_text = re.sub(r'<think>.*$', '', safe_text, flags=re.DOTALL)
safe_text = safe_text.strip() or "..."
token_id = telegram_token.split(":", 1)[0] if ":" in telegram_token else "unknown"
url = f"https://api.telegram.org/bot{telegram_token}/sendMessage"
payload = {
"chat_id": str(chat_id),
"text": safe_text,
"disable_web_page_preview": True,
}
try:
async with httpx.AsyncClient() as client:
response = await client.post(url, json=payload, timeout=15.0)
async def _send_chunk(chunk: str) -> bool:
payload = {
"chat_id": str(chat_id),
"text": chunk,
"disable_web_page_preview": True,
}
try:
async with httpx.AsyncClient() as client:
response = await client.post(url, json=payload, timeout=15.0)
if response.status_code >= 400:
err_desc = response.text[:300]
try:
body = response.json()
err_desc = body.get("description") or err_desc
except Exception:
pass
logger.error(
"Telegram sendMessage failed: bot_id=%s chat_id=%s status=%s desc=%s",
token_id,
chat_id,
response.status_code,
err_desc,
)
if response.status_code >= 400:
err_desc = response.text[:300]
try:
body = response.json()
err_desc = body.get("description") or err_desc
except Exception:
pass
logger.error(
"Telegram sendMessage failed: bot_id=%s chat_id=%s status=%s desc=%s",
token_id,
chat_id,
response.status_code,
err_desc,
)
return False
return True
except Exception as e:
logger.error("Telegram sendMessage exception: bot_id=%s chat_id=%s error=%s", token_id, chat_id, e)
return False
logger.info("Telegram message sent: bot_id=%s chat_id=%s", token_id, chat_id)
return True
except Exception as e:
logger.error("Telegram sendMessage exception: bot_id=%s chat_id=%s error=%s", token_id, chat_id, e)
return False
all_ok = True
chunks = _chunk_text(safe_text, max_len=TELEGRAM_MAX_MESSAGE_LENGTH)
for chunk in chunks:
sent = await _send_chunk(chunk)
all_ok = all_ok and sent
if all_ok:
logger.info("Telegram message sent: bot_id=%s chat_id=%s chunks=%s", token_id, chat_id, len(chunks))
return all_ok
# ========================================