feat(docs): add standard file processing and router document ingest/query

2026-02-21 14:02:59 +01:00
parent 3e3546ea89
commit 5d52cf81c4
7 changed files with 755 additions and 104 deletions
--- a/services/router/main.py
+++ b/services/router/main.py
@@ -1235,6 +1235,27 @@ class InferResponse(BaseModel):
    file_mime: Optional[str] = None


+class DocumentIngestRequest(BaseModel):
+    """Ingest document text into agent-specific docs collection."""
+    agent_id: str
+    doc_id: str
+    file_name: Optional[str] = None
+    text: str
+    dao_id: Optional[str] = None
+    user_id: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+
+
+class DocumentQueryRequest(BaseModel):
+    """Query document context from agent-specific docs collection."""
+    agent_id: str
+    question: str
+    doc_id: Optional[str] = None
+    dao_id: Optional[str] = None
+    user_id: Optional[str] = None
+    limit: int = 5
+
+
 class SharedMemoryReviewRequest(BaseModel):
    point_id: str
    approve: bool
@@ -2867,6 +2888,149 @@ async def agent_infer(agent_id: str, request: InferRequest):
    )


+@app.post("/v1/documents/ingest")
+async def documents_ingest(request: DocumentIngestRequest):
+    """
+    Ingest raw document text into Qdrant {agent_id}_docs.
+    """
+    if not MEMORY_RETRIEVAL_AVAILABLE or not memory_retrieval:
+        raise HTTPException(status_code=503, detail="Memory retrieval not available")
+
+    agent_id = (request.agent_id or "").strip().lower()
+    if not agent_id:
+        raise HTTPException(status_code=400, detail="agent_id is required")
+
+    text = (request.text or "").strip()
+    if not text:
+        raise HTTPException(status_code=400, detail="text is required")
+
+    doc_id = (request.doc_id or "").strip()
+    if not doc_id:
+        # Fallback should be deterministic for same text + file
+        seed = f"{agent_id}:{request.file_name or ''}:{text[:400]}"
+        doc_id = hashlib.md5(seed.encode("utf-8")).hexdigest()[:16]
+
+    result = await memory_retrieval.ingest_document_chunks(
+        agent_id=agent_id,
+        doc_id=doc_id,
+        file_name=request.file_name,
+        text=text,
+        dao_id=request.dao_id,
+        user_id=request.user_id,
+        metadata=request.metadata,
+    )
+    if not result.get("ok"):
+        return {
+            "ok": False,
+            "error": result.get("error", "ingest_failed"),
+            "doc_id": doc_id,
+            "collection": result.get("collection"),
+        }
+    return result
+
+
+@app.post("/v1/documents/query")
+async def documents_query(request: DocumentQueryRequest):
+    """
+    Query ingested document chunks and synthesize source-locked answer.
+    """
+    if not MEMORY_RETRIEVAL_AVAILABLE or not memory_retrieval:
+        raise HTTPException(status_code=503, detail="Memory retrieval not available")
+
+    agent_id = (request.agent_id or "").strip().lower()
+    if not agent_id:
+        raise HTTPException(status_code=400, detail="agent_id is required")
+
+    question = (request.question or "").strip()
+    if not question:
+        raise HTTPException(status_code=400, detail="question is required")
+
+    lookup = await memory_retrieval.query_document_chunks(
+        agent_id=agent_id,
+        question=question,
+        doc_id=request.doc_id,
+        dao_id=request.dao_id,
+        limit=request.limit,
+    )
+    chunks = lookup.get("chunks") or []
+    if not chunks:
+        return {
+            "ok": False,
+            "error": lookup.get("error", "no_relevant_chunks"),
+            "data": {
+                "answer": None,
+                "citations": [],
+                "doc_id": request.doc_id,
+            },
+        }
+
+    citations: List[Dict[str, Any]] = []
+    context_blocks: List[str] = []
+    for i, ch in enumerate(chunks, start=1):
+        c_doc_id = ch.get("doc_id") or request.doc_id
+        c_file = ch.get("file_name")
+        c_idx = ch.get("chunk_index")
+        c_score = float(ch.get("score", 0.0) or 0.0)
+        citations.append(
+            {
+                "doc_id": c_doc_id,
+                "file_name": c_file,
+                "chunk_index": c_idx,
+                "score": round(c_score, 4),
+            }
+        )
+        src = []
+        if c_file:
+            src.append(f"file={c_file}")
+        if c_idx is not None:
+            src.append(f"chunk={int(c_idx) + 1}")
+        src_label = ", ".join(src) if src else "chunk"
+        context_blocks.append(f"[{i}] ({src_label}) {str(ch.get('text') or '').strip()[:1400]}")
+
+    answer_text = ""
+    try:
+        llm_req = InternalLLMRequest(
+            prompt=(
+                "Питання користувача:\n"
+                f"{question}\n\n"
+                "Контекст із документа (дозволено використовувати ТІЛЬКИ його):\n"
+                + "\n\n".join(context_blocks)
+                + "\n\n"
+                "Правила відповіді:\n"
+                "1) Відповідай лише на основі наведеного контексту.\n"
+                "2) Якщо даних недостатньо, прямо скажи: 'Недостатньо даних у документі'.\n"
+                "3) В кінці додай коротке посилання на джерело у форматі [source: N].\n"
+            ),
+            llm_profile="reasoning",
+            max_tokens=320,
+            temperature=0.1,
+            role_context="Document QA source-locked",
+            metadata={"agent_id": agent_id, "mode": "documents_query"},
+        )
+        llm_resp = await internal_llm_complete(llm_req)
+        answer_text = (llm_resp.text or "").strip()
+    except Exception as e:
+        logger.warning(f"documents_query LLM synthesis failed: {e}")
+
+    if not answer_text:
+        top = chunks[0]
+        answer_text = (
+            "Знайшов релевантний фрагмент у документі, але не вдалося сформувати підсумок. "
+            f"Ось ключовий уривок:\n{str(top.get('text') or '').strip()[:1200]}"
+        )
+
+    return {
+        "ok": True,
+        "data": {
+            "answer": answer_text,
+            "citations": citations,
+            "doc_id": request.doc_id or chunks[0].get("doc_id"),
+            "chunks_used": len(chunks),
+            "collection": lookup.get("collection"),
+        },
+    }
+
+
@app.get("/v1/models")
 async def list_available_models():
    """List all available models across backends"""