feat(docs): add standard file processing and router document ingest/query
This commit is contained in:
@@ -1871,23 +1871,53 @@ async def process_document(
|
||||
Dict з результатом обробки
|
||||
"""
|
||||
mime_type = document.get("mime_type", "")
|
||||
mime_type_l = (mime_type or "").lower()
|
||||
file_name = document.get("file_name", "")
|
||||
file_id = document.get("file_id")
|
||||
|
||||
file_name_lower = file_name.lower()
|
||||
allowed_exts = {".pdf", ".docx", ".txt", ".md", ".csv", ".xlsx", ".zip"}
|
||||
allowed_exts = {
|
||||
".pdf", ".doc", ".docx", ".rtf", ".odt",
|
||||
".txt", ".md", ".markdown",
|
||||
".csv", ".tsv", ".xls", ".xlsx", ".xlsm", ".ods",
|
||||
".ppt", ".pptx", ".odp",
|
||||
".json", ".yaml", ".yml", ".xml", ".html", ".htm",
|
||||
".zip",
|
||||
".jpg", ".jpeg", ".png", ".webp", ".gif", ".bmp", ".tiff",
|
||||
}
|
||||
is_allowed = any(file_name_lower.endswith(ext) for ext in allowed_exts)
|
||||
if mime_type == "application/pdf":
|
||||
if mime_type_l == "application/pdf":
|
||||
is_allowed = True
|
||||
if mime_type in {
|
||||
if mime_type_l in {
|
||||
"application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/rtf",
|
||||
"text/rtf",
|
||||
"application/vnd.oasis.opendocument.text",
|
||||
"application/vnd.ms-excel",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.ms-excel.sheet.macroenabled.12",
|
||||
"application/vnd.oasis.opendocument.spreadsheet",
|
||||
"application/vnd.ms-powerpoint",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/vnd.oasis.opendocument.presentation",
|
||||
"text/plain",
|
||||
"text/markdown",
|
||||
"text/csv",
|
||||
"text/tab-separated-values",
|
||||
"application/json",
|
||||
"application/yaml",
|
||||
"application/x-yaml",
|
||||
"text/yaml",
|
||||
"application/xml",
|
||||
"text/xml",
|
||||
"text/html",
|
||||
"application/zip",
|
||||
"application/x-zip-compressed",
|
||||
}:
|
||||
is_allowed = True
|
||||
if mime_type_l.startswith("image/"):
|
||||
is_allowed = True
|
||||
|
||||
if is_allowed and file_id:
|
||||
logger.info(f"{agent_config.name}: Document from {username} (tg:{user_id}), file_id: {file_id}, file_name: {file_name}")
|
||||
@@ -2027,7 +2057,7 @@ async def process_document(
|
||||
telegram_token = agent_config.get_telegram_token()
|
||||
await send_telegram_message(
|
||||
chat_id,
|
||||
"Наразі підтримуються формати: PDF, DOCX, TXT, MD, CSV, XLSX, ZIP.",
|
||||
"Підтримуються формати: PDF/DOC/DOCX/RTF/ODT, TXT/MD/CSV/TSV, XLS/XLSX/XLSM/ODS, PPT/PPTX/ODP, JSON/YAML/XML/HTML, ZIP, зображення.",
|
||||
telegram_token,
|
||||
)
|
||||
return {"ok": False, "error": "Unsupported document type"}
|
||||
@@ -3681,7 +3711,8 @@ async def _old_telegram_webhook(update: TelegramUpdate):
|
||||
doc_url=file_url,
|
||||
file_name=file_name,
|
||||
dao_id=dao_id,
|
||||
user_id=f"tg:{user_id}"
|
||||
user_id=f"tg:{user_id}",
|
||||
agent_id=agent_config.agent_id,
|
||||
)
|
||||
|
||||
if result.success:
|
||||
@@ -3705,7 +3736,8 @@ async def _old_telegram_webhook(update: TelegramUpdate):
|
||||
result = await ingest_document(
|
||||
session_id=session_id,
|
||||
dao_id=dao_id,
|
||||
user_id=f"tg:{user_id}"
|
||||
user_id=f"tg:{user_id}",
|
||||
agent_id=agent_config.agent_id,
|
||||
)
|
||||
|
||||
if result.success:
|
||||
|
||||
Reference in New Issue
Block a user