fix: змінено обробку фото для використання Swapper vision-8b замість Vision Encoder

This commit is contained in:
Apple
2025-11-21 00:50:57 -08:00
parent 7d65aeff21
commit 6d58532d68

View File

@@ -331,12 +331,12 @@ async def telegram_webhook(update: TelegramUpdate):
file_url = f"https://api.telegram.org/file/bot{telegram_token}/{file_path}" file_url = f"https://api.telegram.org/file/bot{telegram_token}/{file_path}"
# Send "Processing..." message # Send "Processing..." message
await send_telegram_message(chat_id, "📸 Обробляю фото через Vision Encoder...") await send_telegram_message(chat_id, "📸 Обробляю фото через Vision-8b модель...")
# Send to Router with vision_embed mode # Send to Router with specialist_vision_8b model (Swapper)
router_request = { router_request = {
"message": "Оброби фото", "message": f"Опиши це зображення детально: {file_url}",
"mode": "vision_embed", "mode": "chat",
"agent": "daarwizz", "agent": "daarwizz",
"metadata": { "metadata": {
"source": "telegram", "source": "telegram",
@@ -347,32 +347,30 @@ async def telegram_webhook(update: TelegramUpdate):
"chat_id": chat_id, "chat_id": chat_id,
"file_id": file_id, "file_id": file_id,
"file_url": file_url, "file_url": file_url,
"has_image": True,
}, },
"payload": { "context": {
"operation": "embed_image", "agent_name": DAARWIZZ_NAME,
"image_url": file_url, "system_prompt": DAARWIZZ_SYSTEM_PROMPT,
"normalize": True,
}, },
} }
# Override LLM to use specialist_vision_8b for image understanding
router_request["metadata"]["use_llm"] = "specialist_vision_8b"
# Send to Router # Send to Router
logger.info(f"Sending photo to Router: file_url={file_url[:50]}...") logger.info(f"Sending photo to Router with vision-8b: file_url={file_url[:50]}...")
response = await send_to_router(router_request) response = await send_to_router(router_request)
# Extract response # Extract response
if isinstance(response, dict) and response.get("ok"): if isinstance(response, dict) and response.get("ok"):
embedding_data = response.get("data", {}) answer_text = response.get("data", {}).get("text") or response.get("response", "")
embedding = embedding_data.get("embedding")
if embedding: if answer_text:
# Photo processed successfully # Photo processed successfully
dimension = embedding_data.get("dimension", 768)
await send_telegram_message( await send_telegram_message(
chat_id, chat_id,
f"✅ **Фото оброблено**\n\n" f"✅ **Фото оброблено**\n\n{answer_text}"
f"📊 Embedding dimension: {dimension}\n"
f"🔍 Фото закодовано для пошуку та аналізу.\n\n"
f"💡 Можна використати текстовий опис для пошуку схожих фото."
) )
# Save to memory for context # Save to memory for context
@@ -381,18 +379,18 @@ async def telegram_webhook(update: TelegramUpdate):
team_id=dao_id, team_id=dao_id,
user_id=f"tg:{user_id}", user_id=f"tg:{user_id}",
message=f"[Photo: {file_id}]", message=f"[Photo: {file_id}]",
response=f"Photo processed with Vision Encoder (dim={dimension})", response=answer_text,
channel_id=chat_id, channel_id=chat_id,
scope="short_term" scope="short_term"
) )
return {"ok": True, "agent": "daarwizz", "mode": "vision_embed", "dimension": dimension} return {"ok": True, "agent": "daarwizz", "model": "specialist_vision_8b"}
else: else:
await send_telegram_message(chat_id, "Фото оброблено, але embedding не отримано.") await send_telegram_message(chat_id, "Фото оброблено, але не вдалося отримати опис.")
return {"ok": False, "error": "No embedding in response"} return {"ok": False, "error": "No description in response"}
else: else:
error_msg = response.get("error", "Unknown error") if isinstance(response, dict) else "Router error" error_msg = response.get("error", "Unknown error") if isinstance(response, dict) else "Router error"
logger.error(f"Vision Encoder error: {error_msg}") logger.error(f"Vision-8b error: {error_msg}")
await send_telegram_message(chat_id, f"Вибач, не вдалося обробити фото: {error_msg}") await send_telegram_message(chat_id, f"Вибач, не вдалося обробити фото: {error_msg}")
return {"ok": False, "error": error_msg} return {"ok": False, "error": error_msg}