feat(file-tool): add djvu conversion and extraction actions
This commit is contained in:
@@ -67,6 +67,8 @@ Implemented in actual NODE1 stack (`services/router/*` + gateway):
|
|||||||
- `pdf_split`
|
- `pdf_split`
|
||||||
- `pdf_fill`
|
- `pdf_fill`
|
||||||
- `pdf_update`
|
- `pdf_update`
|
||||||
|
- `djvu_to_pdf`
|
||||||
|
- `djvu_extract_text`
|
||||||
- `text_create`
|
- `text_create`
|
||||||
- `text_update`
|
- `text_update`
|
||||||
- `markdown_create`
|
- `markdown_create`
|
||||||
@@ -95,6 +97,7 @@ For file-producing tool calls, router now propagates:
|
|||||||
- `services/router/agent_tools_config.py`
|
- `services/router/agent_tools_config.py`
|
||||||
- `services/router/tool_manager.py`
|
- `services/router/tool_manager.py`
|
||||||
- `services/router/main.py`
|
- `services/router/main.py`
|
||||||
|
- `services/router/Dockerfile`
|
||||||
- `gateway-bot/router_client.py`
|
- `gateway-bot/router_client.py`
|
||||||
- `gateway-bot/http_api.py`
|
- `gateway-bot/http_api.py`
|
||||||
|
|
||||||
@@ -123,6 +126,8 @@ Run inside `dagi-router-node1` to validate actions deterministically:
|
|||||||
- DOCX create/update
|
- DOCX create/update
|
||||||
- PDF merge/split/fill
|
- PDF merge/split/fill
|
||||||
- PDF update (rotate/reorder/remove/extract/metadata)
|
- PDF update (rotate/reorder/remove/extract/metadata)
|
||||||
|
- DJVU to PDF conversion
|
||||||
|
- DJVU text extraction
|
||||||
- Image create/edit/convert
|
- Image create/edit/convert
|
||||||
- Image bundle (zip)
|
- Image bundle (zip)
|
||||||
- SVG export + SVG->PNG convert (rect/circle/ellipse/line/polyline/polygon/text)
|
- SVG export + SVG->PNG convert (rect/circle/ellipse/line/polyline/polygon/text)
|
||||||
@@ -146,6 +151,9 @@ Also verify infer endpoint still works:
|
|||||||
- `services/router/tool_manager.py.bak_20260215_113301`
|
- `services/router/tool_manager.py.bak_20260215_113301`
|
||||||
- `services/router/tool_manager.py.bak_20260215_114512`
|
- `services/router/tool_manager.py.bak_20260215_114512`
|
||||||
- `services/router/tool_manager.py.bak_20260215_114740`
|
- `services/router/tool_manager.py.bak_20260215_114740`
|
||||||
|
- `services/router/tool_manager.py.bak_20260215_120912`
|
||||||
|
- `services/router/Dockerfile.bak_20260215_120912`
|
||||||
|
- `services/router/tool_manager.py.bak_20260215_121116`
|
||||||
|
|
||||||
## Rollback (NODE1)
|
## Rollback (NODE1)
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -2,6 +2,11 @@ FROM python:3.11-slim
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# System packages for file conversions
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
djvulibre-bin \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
@@ -33,4 +38,3 @@ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import hashlib
|
|||||||
import base64
|
import base64
|
||||||
import csv
|
import csv
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import subprocess
|
||||||
import httpx
|
import httpx
|
||||||
from typing import Dict, List, Any, Optional
|
from typing import Dict, List, Any, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -330,6 +331,7 @@ TOOL_DEFINITIONS = [
|
|||||||
"pptx_create", "pptx_update",
|
"pptx_create", "pptx_update",
|
||||||
"ods_create", "ods_update", "parquet_create", "parquet_update",
|
"ods_create", "ods_update", "parquet_create", "parquet_update",
|
||||||
"csv_create", "csv_update", "pdf_fill", "pdf_merge", "pdf_split", "pdf_update",
|
"csv_create", "csv_update", "pdf_fill", "pdf_merge", "pdf_split", "pdf_update",
|
||||||
|
"djvu_to_pdf", "djvu_extract_text",
|
||||||
"json_export", "yaml_export", "zip_bundle",
|
"json_export", "yaml_export", "zip_bundle",
|
||||||
"text_create", "text_update", "markdown_create", "markdown_update",
|
"text_create", "text_update", "markdown_create", "markdown_update",
|
||||||
"xml_export", "html_export",
|
"xml_export", "html_export",
|
||||||
@@ -671,6 +673,10 @@ class ToolManager:
|
|||||||
return self._file_pdf_update(args)
|
return self._file_pdf_update(args)
|
||||||
if action == "pdf_fill":
|
if action == "pdf_fill":
|
||||||
return self._file_pdf_fill(args)
|
return self._file_pdf_fill(args)
|
||||||
|
if action == "djvu_to_pdf":
|
||||||
|
return self._file_djvu_to_pdf(args)
|
||||||
|
if action == "djvu_extract_text":
|
||||||
|
return self._file_djvu_extract_text(args)
|
||||||
|
|
||||||
return ToolResult(success=False, result=None, error=f"Action not implemented yet: {action}")
|
return ToolResult(success=False, result=None, error=f"Action not implemented yet: {action}")
|
||||||
|
|
||||||
@@ -2102,6 +2108,89 @@ class ToolManager:
|
|||||||
file_name=file_name,
|
file_name=file_name,
|
||||||
file_mime="application/pdf",
|
file_mime="application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _file_djvu_to_pdf(self, args: Dict[str, Any]) -> ToolResult:
|
||||||
|
src_b64 = args.get("file_base64")
|
||||||
|
if not src_b64:
|
||||||
|
return ToolResult(success=False, result=None, error="file_base64 is required for djvu_to_pdf")
|
||||||
|
file_name = self._sanitize_file_name(args.get("file_name"), "converted.pdf", force_ext=".pdf")
|
||||||
|
|
||||||
|
timeout_sec = max(5, min(int(args.get("timeout_sec") or 60), 300))
|
||||||
|
with tempfile.TemporaryDirectory(prefix="djvu2pdf_") as tmpdir:
|
||||||
|
src = os.path.join(tmpdir, "input.djvu")
|
||||||
|
out_pdf = os.path.join(tmpdir, "output.pdf")
|
||||||
|
with open(src, "wb") as f:
|
||||||
|
f.write(self._bytes_from_b64(src_b64))
|
||||||
|
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(
|
||||||
|
["ddjvu", "-format=pdf", src, out_pdf],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=timeout_sec,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return ToolResult(success=False, result=None, error="ddjvu not found in runtime image")
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
return ToolResult(success=False, result=None, error=f"DJVU conversion timed out ({timeout_sec}s)")
|
||||||
|
|
||||||
|
if proc.returncode != 0 or not os.path.exists(out_pdf):
|
||||||
|
stderr = (proc.stderr or "").strip()
|
||||||
|
return ToolResult(success=False, result=None, error=f"ddjvu failed: {stderr or 'unknown error'}")
|
||||||
|
|
||||||
|
data = open(out_pdf, "rb").read()
|
||||||
|
if not data:
|
||||||
|
return ToolResult(success=False, result=None, error="ddjvu produced empty PDF")
|
||||||
|
|
||||||
|
return ToolResult(
|
||||||
|
success=True,
|
||||||
|
result={"message": f"DJVU converted to PDF: {file_name}"},
|
||||||
|
file_base64=self._b64_from_bytes(data),
|
||||||
|
file_name=file_name,
|
||||||
|
file_mime="application/pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _file_djvu_extract_text(self, args: Dict[str, Any]) -> ToolResult:
|
||||||
|
src_b64 = args.get("file_base64")
|
||||||
|
if not src_b64:
|
||||||
|
return ToolResult(success=False, result=None, error="file_base64 is required for djvu_extract_text")
|
||||||
|
file_name = self._sanitize_file_name(args.get("file_name"), "extracted.txt", force_ext=".txt")
|
||||||
|
timeout_sec = max(5, min(int(args.get("timeout_sec") or 60), 300))
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory(prefix="djvutxt_") as tmpdir:
|
||||||
|
src = os.path.join(tmpdir, "input.djvu")
|
||||||
|
with open(src, "wb") as f:
|
||||||
|
f.write(self._bytes_from_b64(src_b64))
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(
|
||||||
|
["djvutxt", src],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=timeout_sec,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return ToolResult(success=False, result=None, error="djvutxt not found in runtime image")
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
return ToolResult(success=False, result=None, error=f"DJVU text extraction timed out ({timeout_sec}s)")
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
stderr = (proc.stderr or "").strip()
|
||||||
|
return ToolResult(success=False, result=None, error=f"djvutxt failed: {stderr or 'unknown error'}")
|
||||||
|
text = proc.stdout or ""
|
||||||
|
msg = f"DJVU text extracted: {file_name}"
|
||||||
|
if not text.strip():
|
||||||
|
msg = f"DJVU has no extractable text layer, returned empty text file: {file_name}"
|
||||||
|
|
||||||
|
payload = text.encode("utf-8")
|
||||||
|
return ToolResult(
|
||||||
|
success=True,
|
||||||
|
result={"message": msg},
|
||||||
|
file_base64=self._b64_from_bytes(payload),
|
||||||
|
file_name=file_name,
|
||||||
|
file_mime="text/plain",
|
||||||
|
)
|
||||||
|
|
||||||
async def _memory_search(self, args: Dict, agent_id: str = None, chat_id: str = None, user_id: str = None) -> ToolResult:
|
async def _memory_search(self, args: Dict, agent_id: str = None, chat_id: str = None, user_id: str = None) -> ToolResult:
|
||||||
"""Search in Qdrant vector memory using Router's memory_retrieval - PRIORITY 1"""
|
"""Search in Qdrant vector memory using Router's memory_retrieval - PRIORITY 1"""
|
||||||
|
|||||||
Reference in New Issue
Block a user