feat(docs): add standard file processing and router document ingest/query

This commit is contained in:
NODA1 System
2026-02-21 14:02:59 +01:00
parent 3e3546ea89
commit 5d52cf81c4
7 changed files with 755 additions and 104 deletions

View File

@@ -1871,23 +1871,53 @@ async def process_document(
Dict з результатом обробки
"""
mime_type = document.get("mime_type", "")
mime_type_l = (mime_type or "").lower()
file_name = document.get("file_name", "")
file_id = document.get("file_id")
file_name_lower = file_name.lower()
allowed_exts = {".pdf", ".docx", ".txt", ".md", ".csv", ".xlsx", ".zip"}
allowed_exts = {
".pdf", ".doc", ".docx", ".rtf", ".odt",
".txt", ".md", ".markdown",
".csv", ".tsv", ".xls", ".xlsx", ".xlsm", ".ods",
".ppt", ".pptx", ".odp",
".json", ".yaml", ".yml", ".xml", ".html", ".htm",
".zip",
".jpg", ".jpeg", ".png", ".webp", ".gif", ".bmp", ".tiff",
}
is_allowed = any(file_name_lower.endswith(ext) for ext in allowed_exts)
if mime_type == "application/pdf":
if mime_type_l == "application/pdf":
is_allowed = True
if mime_type in {
if mime_type_l in {
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/rtf",
"text/rtf",
"application/vnd.oasis.opendocument.text",
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel.sheet.macroenabled.12",
"application/vnd.oasis.opendocument.spreadsheet",
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.oasis.opendocument.presentation",
"text/plain",
"text/markdown",
"text/csv",
"text/tab-separated-values",
"application/json",
"application/yaml",
"application/x-yaml",
"text/yaml",
"application/xml",
"text/xml",
"text/html",
"application/zip",
"application/x-zip-compressed",
}:
is_allowed = True
if mime_type_l.startswith("image/"):
is_allowed = True
if is_allowed and file_id:
logger.info(f"{agent_config.name}: Document from {username} (tg:{user_id}), file_id: {file_id}, file_name: {file_name}")
@@ -2027,7 +2057,7 @@ async def process_document(
telegram_token = agent_config.get_telegram_token()
await send_telegram_message(
chat_id,
"Наразі підтримуються формати: PDF, DOCX, TXT, MD, CSV, XLSX, ZIP.",
"Підтримуються формати: PDF/DOC/DOCX/RTF/ODT, TXT/MD/CSV/TSV, XLS/XLSX/XLSM/ODS, PPT/PPTX/ODP, JSON/YAML/XML/HTML, ZIP, зображення.",
telegram_token,
)
return {"ok": False, "error": "Unsupported document type"}
@@ -3681,7 +3711,8 @@ async def _old_telegram_webhook(update: TelegramUpdate):
doc_url=file_url,
file_name=file_name,
dao_id=dao_id,
user_id=f"tg:{user_id}"
user_id=f"tg:{user_id}",
agent_id=agent_config.agent_id,
)
if result.success:
@@ -3705,7 +3736,8 @@ async def _old_telegram_webhook(update: TelegramUpdate):
result = await ingest_document(
session_id=session_id,
dao_id=dao_id,
user_id=f"tg:{user_id}"
user_id=f"tg:{user_id}",
agent_id=agent_config.agent_id,
)
if result.success: