Complete snapshot of /opt/microdao-daarion/ from NODE1 (144.76.224.179).
This represents the actual running production code that has diverged
significantly from the previous main branch.
Key changes from old main:
- Gateway (http_api.py): expanded from ~40KB to 164KB with full agent support
- Router: new /v1/agents/{id}/infer endpoint with vision + DeepSeek routing
- Behavior Policy: SOWA v2.2 (3-level: FULL/ACK/SILENT)
- Agent Registry: config/agent_registry.yml as single source of truth
- 13 agents configured (was 3)
- Memory service integration
- CrewAI teams and roles
Excluded from snapshot: venv/, .env, data/, backups, .tgz archives
Co-authored-by: Cursor <cursoragent@cursor.com>
1257 lines
41 KiB
HTML
1257 lines
41 KiB
HTML
|
||
<!doctype html>
|
||
<html lang="en" class="no-js">
|
||
<head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||
|
||
|
||
|
||
<link rel="canonical" href="https://IvanTytar.github.io/microdao-daarion/cursor/channel_agnostic_doc_flow_task/">
|
||
|
||
|
||
|
||
|
||
<link rel="icon" href="../../assets/images/favicon.png">
|
||
<meta name="generator" content="mkdocs-1.5.3, mkdocs-material-9.5.18">
|
||
|
||
|
||
|
||
<title>Task: Channel-agnostic document workflow (PDF + RAG) - DAARION Documentation</title>
|
||
|
||
|
||
|
||
<link rel="stylesheet" href="../../assets/stylesheets/main.66ac8b77.min.css">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||
|
||
|
||
|
||
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</head>
|
||
|
||
|
||
<body dir="ltr">
|
||
|
||
|
||
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||
<label class="md-overlay" for="__drawer"></label>
|
||
<div data-md-component="skip">
|
||
|
||
|
||
<a href="#task-channel-agnostic-document-workflow-pdf-rag" class="md-skip">
|
||
Skip to content
|
||
</a>
|
||
|
||
</div>
|
||
<div data-md-component="announce">
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<header class="md-header md-header--shadow" data-md-component="header">
|
||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||
<a href="../.." title="DAARION Documentation" class="md-header__button md-logo" aria-label="DAARION Documentation" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
|
||
|
||
</a>
|
||
<label class="md-header__button md-icon" for="__drawer">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
|
||
</label>
|
||
<div class="md-header__title" data-md-component="header-title">
|
||
<div class="md-header__ellipsis">
|
||
<div class="md-header__topic">
|
||
<span class="md-ellipsis">
|
||
DAARION Documentation
|
||
</span>
|
||
</div>
|
||
<div class="md-header__topic" data-md-component="header-topic">
|
||
<span class="md-ellipsis">
|
||
|
||
Task: Channel-agnostic document workflow (PDF + RAG)
|
||
|
||
</span>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<script>var media,input,key,value,palette=__md_get("__palette");if(palette&&palette.color){"(prefers-color-scheme)"===palette.color.media&&(media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']"),palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent"));for([key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
||
|
||
|
||
|
||
<label class="md-header__button md-icon" for="__search">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
|
||
</label>
|
||
<div class="md-search" data-md-component="search" role="dialog">
|
||
<label class="md-search__overlay" for="__search"></label>
|
||
<div class="md-search__inner" role="search">
|
||
<form class="md-search__form" name="search">
|
||
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
||
<label class="md-search__icon md-icon" for="__search">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
|
||
</label>
|
||
<nav class="md-search__options" aria-label="Search">
|
||
|
||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
|
||
</button>
|
||
</nav>
|
||
|
||
</form>
|
||
<div class="md-search__output">
|
||
<div class="md-search__scrollwrap" data-md-scrollfix>
|
||
<div class="md-search-result" data-md-component="search-result">
|
||
<div class="md-search-result__meta">
|
||
Initializing search
|
||
</div>
|
||
<ol class="md-search-result__list" role="presentation"></ol>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</nav>
|
||
|
||
</header>
|
||
|
||
<div class="md-container" data-md-component="container">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<main class="md-main" data-md-component="main">
|
||
<div class="md-main__inner md-grid">
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||
<label class="md-nav__title" for="__drawer">
|
||
<a href="../.." title="DAARION Documentation" class="md-nav__button md-logo" aria-label="DAARION Documentation" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
|
||
|
||
</a>
|
||
DAARION Documentation
|
||
</label>
|
||
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../public/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Home
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../public/getting-started/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Getting Started
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../public/architecture-overview/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Architecture
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../public/daiS_daos_overview/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
DAIS & DAOS
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Internal
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_5">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Internal
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_5_1" id="__nav_5_1_label" tabindex="0">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Infra
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_1_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_5_1">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Infra
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/infra/INFRA_AUTOMATION_PACK_V1/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Infra Automation Pack v1
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/infra/monitoring_overview/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Monitoring Overview
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/infra/nodes_registry_v0/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Nodes Registry v0
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_2" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_5_2" id="__nav_5_2_label" tabindex="0">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Specs
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_2_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_5_2">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Specs
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/specs/matrix_presence_aggregator/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Matrix Presence Aggregator
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/specs/city_map_spec/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
City Map Spec
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/specs/node_join_protocol_draft/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Node Join Protocol (Draft)
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#goal" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Goal
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#context" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Context
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Context">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#existing-components-expected-state" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Existing components (expected state)
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#changes-to-implement" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Changes to implement
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Changes to implement">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#1-create-service-gateway-botservicesdoc_servicepy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
1. Create service: gateway-bot/services/doc_service.py
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="1. Create service: gateway-bot/services/doc_service.py">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#11-pydantic-models" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
1.1. Pydantic models
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#12-documentservice-class" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
1.2. DocumentService class
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#2-extend-memoryclient-gateway-botmemory_clientpy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
2. Extend MemoryClient: gateway-bot/memory_client.py
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#3-http-api-for-webmobile-gateway-bothttp_api_docpy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
3. HTTP API for web/mobile: gateway-bot/http_api_doc.py
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#4-wire-api-into-app-gateway-botapppy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
4. Wire API into app: gateway-bot/app.py
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#5-refactor-telegram-handlers-gateway-bothttp_apipy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
5. Refactor Telegram handlers: gateway-bot/http_api.py
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="5. Refactor Telegram handlers: gateway-bot/http_api.py">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#51-imports-and-constants" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
5.1. Imports and constants
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#52-daarwizz-telegramwebhook" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
5.2. DAARWIZZ /telegram/webhook
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#53-helion-heliontelegramwebhook" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
5.3. Helion /helion/telegram/webhook
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#54-formatting-helpers" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
5.4. Formatting helpers
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#acceptance-criteria" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Acceptance criteria
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#how-to-run-this-task-with-cursor" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
How to run this task with Cursor
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-content" data-md-component="content">
|
||
<article class="md-content__inner md-typeset">
|
||
|
||
|
||
|
||
|
||
<h1 id="task-channel-agnostic-document-workflow-pdf-rag">Task: Channel-agnostic document workflow (PDF + RAG)<a class="headerlink" href="#task-channel-agnostic-document-workflow-pdf-rag" title="Permanent link">¶</a></h1>
|
||
<h2 id="goal">Goal<a class="headerlink" href="#goal" title="Permanent link">¶</a></h2>
|
||
<p>Make the document (PDF) parsing + RAG workflow <strong>channel-agnostic</strong>, so it can be reused by:</p>
|
||
<ul>
|
||
<li>Telegram bots (DAARWIZZ, Helion)</li>
|
||
<li>Web applications</li>
|
||
<li>Mobile apps</li>
|
||
<li>Any other client via HTTP API</li>
|
||
</ul>
|
||
<p>This task defines a shared <code>doc_service</code>, HTTP endpoints for non-Telegram clients, and integration of Telegram handlers with this shared layer.</p>
|
||
<blockquote>
|
||
<p>NOTE: If this task is re-run on a repo where it is already implemented, it should be treated as a validation/refinement task. Existing structures (services, endpoints) SHOULD NOT be removed, only improved if necessary.</p>
|
||
</blockquote>
|
||
<hr />
|
||
<h2 id="context">Context<a class="headerlink" href="#context" title="Permanent link">¶</a></h2>
|
||
<h3 id="existing-components-expected-state">Existing components (expected state)<a class="headerlink" href="#existing-components-expected-state" title="Permanent link">¶</a></h3>
|
||
<ul>
|
||
<li>Repo root: <code>microdao-daarion/</code></li>
|
||
<li>Gateway service: <code>gateway-bot/</code></li>
|
||
</ul>
|
||
<p>Key files:</p>
|
||
<ul>
|
||
<li><code>gateway-bot/http_api.py</code></li>
|
||
<li>Telegram handlers for DAARWIZZ (<code>/telegram/webhook</code>) and Helion (<code>/helion/telegram/webhook</code>).</li>
|
||
<li>Voice → STT flow (Whisper via <code>STT_SERVICE_URL</code>).</li>
|
||
<li>Discord handler.</li>
|
||
<li>
|
||
<p>Helper functions: <code>get_telegram_file_path</code>, <code>send_telegram_message</code>.</p>
|
||
</li>
|
||
<li>
|
||
<p><code>gateway-bot/memory_client.py</code></p>
|
||
</li>
|
||
<li>
|
||
<p><code>MemoryClient</code> with methods:</p>
|
||
<ul>
|
||
<li><code>get_context</code>, <code>save_chat_turn</code>, <code>create_dialog_summary</code>, <code>upsert_fact</code>.</li>
|
||
</ul>
|
||
</li>
|
||
<li>
|
||
<p><code>gateway-bot/app.py</code></p>
|
||
</li>
|
||
<li>FastAPI app, includes <code>http_api.router</code> as <code>gateway_router</code>.</li>
|
||
<li>CORS configuration.</li>
|
||
</ul>
|
||
<p>Router + parser (already implemented in router project):</p>
|
||
<ul>
|
||
<li>DAGI Router supports:</li>
|
||
<li><code>mode: "doc_parse"</code> with provider <code>parser</code> → OCRProvider → <code>parser-service</code> (DotsOCR).</li>
|
||
<li><code>mode: "rag_query"</code> for RAG questions.</li>
|
||
<li><code>parser-service</code> is available at <code>http://parser-service:9400</code>.</li>
|
||
</ul>
|
||
<p>The goal of this task is to:</p>
|
||
<ol>
|
||
<li>Add <strong>channel-agnostic</strong> document service into <code>gateway-bot</code>.</li>
|
||
<li>Add <code>/api/doc/*</code> HTTP endpoints for web/mobile.</li>
|
||
<li>Refactor Telegram handlers to use this service for PDF, <code>/ingest</code>, and RAG follow-ups.</li>
|
||
<li>Store document context in Memory Service via <code>fact_key = "doc_context:{session_id}"</code>.</li>
|
||
</ol>
|
||
<hr />
|
||
<h2 id="changes-to-implement">Changes to implement<a class="headerlink" href="#changes-to-implement" title="Permanent link">¶</a></h2>
|
||
<h3 id="1-create-service-gateway-botservicesdoc_servicepy">1. Create service: <code>gateway-bot/services/doc_service.py</code><a class="headerlink" href="#1-create-service-gateway-botservicesdoc_servicepy" title="Permanent link">¶</a></h3>
|
||
<p>Create a new directory and file:</p>
|
||
<ul>
|
||
<li><code>gateway-bot/services/__init__.py</code></li>
|
||
<li><code>gateway-bot/services/doc_service.py</code></li>
|
||
</ul>
|
||
<h4 id="11-pydantic-models">1.1. Pydantic models<a class="headerlink" href="#11-pydantic-models" title="Permanent link">¶</a></h4>
|
||
<p>Define models:</p>
|
||
<ul>
|
||
<li><code>QAItem</code> — single Q&A pair</li>
|
||
<li><code>ParsedResult</code> — result of document parsing</li>
|
||
<li><code>IngestResult</code> — result of ingestion into RAG</li>
|
||
<li><code>QAResult</code> — result of RAG query about a document</li>
|
||
<li><code>DocContext</code> — stored document context</li>
|
||
</ul>
|
||
<p>Example fields (can be extended as needed):</p>
|
||
<ul>
|
||
<li><code>QAItem</code>: <code>question: str</code>, <code>answer: str</code></li>
|
||
<li><code>ParsedResult</code>:</li>
|
||
<li><code>success: bool</code></li>
|
||
<li><code>doc_id: Optional[str]</code></li>
|
||
<li><code>qa_pairs: Optional[List[QAItem]]</code></li>
|
||
<li><code>markdown: Optional[str]</code></li>
|
||
<li><code>chunks_meta: Optional[Dict[str, Any]]</code> (e.g., <code>{"count": int, "chunks": [...]}</code>)</li>
|
||
<li><code>raw: Optional[Dict[str, Any]]</code> (full payload from router)</li>
|
||
<li><code>error: Optional[str]</code></li>
|
||
<li><code>IngestResult</code>:</li>
|
||
<li><code>success: bool</code></li>
|
||
<li><code>doc_id: Optional[str]</code></li>
|
||
<li><code>ingested_chunks: int</code></li>
|
||
<li><code>status: str</code></li>
|
||
<li><code>error: Optional[str]</code></li>
|
||
<li><code>QAResult</code>:</li>
|
||
<li><code>success: bool</code></li>
|
||
<li><code>answer: Optional[str]</code></li>
|
||
<li><code>doc_id: Optional[str]</code></li>
|
||
<li><code>sources: Optional[List[Dict[str, Any]]]</code></li>
|
||
<li><code>error: Optional[str]</code></li>
|
||
<li><code>DocContext</code>:</li>
|
||
<li><code>doc_id: str</code></li>
|
||
<li><code>dao_id: Optional[str]</code></li>
|
||
<li><code>user_id: Optional[str]</code></li>
|
||
<li><code>doc_url: Optional[str]</code></li>
|
||
<li><code>file_name: Optional[str]</code></li>
|
||
<li><code>saved_at: Optional[str]</code></li>
|
||
</ul>
|
||
<h4 id="12-documentservice-class">1.2. DocumentService class<a class="headerlink" href="#12-documentservice-class" title="Permanent link">¶</a></h4>
|
||
<p>Implement <code>DocumentService</code> using <code>router_client.send_to_router</code> and <code>memory_client</code>:</p>
|
||
<p>Methods:</p>
|
||
<ul>
|
||
<li><code>async def save_doc_context(session_id, doc_id, doc_url=None, file_name=None, dao_id=None) -> bool</code></li>
|
||
<li>Uses <code>memory_client.upsert_fact</code> with:<ul>
|
||
<li><code>fact_key = f"doc_context:{session_id}"</code></li>
|
||
<li><code>fact_value_json = {"doc_id", "doc_url", "file_name", "dao_id", "saved_at"}</code>.</li>
|
||
</ul>
|
||
</li>
|
||
<li>
|
||
<p>Extract <code>user_id</code> from <code>session_id</code> (e.g., <code>telegram:123</code> → <code>user_id="123"</code>).</p>
|
||
</li>
|
||
<li>
|
||
<p><code>async def get_doc_context(session_id) -> Optional[DocContext]</code></p>
|
||
</li>
|
||
<li>Uses <code>memory_client.get_fact(user_id, fact_key)</code>.</li>
|
||
<li>
|
||
<p>If <code>fact_value_json</code> exists, return <code>DocContext(**fact_value_json)</code>.</p>
|
||
</li>
|
||
<li>
|
||
<p><code>async def parse_document(session_id, doc_url, file_name, dao_id, user_id, output_mode="qa_pairs", metadata=None) -> ParsedResult</code></p>
|
||
</li>
|
||
<li>Builds router request:<ul>
|
||
<li><code>mode: "doc_parse"</code></li>
|
||
<li><code>agent: "parser"</code></li>
|
||
<li><code>metadata</code>: includes <code>source</code> (derived from session_id), <code>dao_id</code>, <code>user_id</code>, <code>session_id</code> and optional metadata.</li>
|
||
<li><code>payload</code>: includes <code>doc_url</code>, <code>file_name</code>, <code>output_mode</code>, <code>dao_id</code>, <code>user_id</code>.</li>
|
||
</ul>
|
||
</li>
|
||
<li>Calls <code>send_to_router</code>.</li>
|
||
<li>
|
||
<p>On success:</p>
|
||
<ul>
|
||
<li>Extract <code>doc_id</code> from response.</li>
|
||
<li>Call <code>save_doc_context</code>.</li>
|
||
<li>Map <code>qa_pairs</code>, <code>markdown</code>, <code>chunks</code> into <code>ParsedResult</code>.</li>
|
||
</ul>
|
||
</li>
|
||
<li>
|
||
<p><code>async def ingest_document(session_id, doc_id=None, doc_url=None, file_name=None, dao_id=None, user_id=None) -> IngestResult</code></p>
|
||
</li>
|
||
<li>If <code>doc_id</code> is <code>None</code>, load from <code>get_doc_context</code>.</li>
|
||
<li>Build router request with <code>mode: "doc_parse"</code>, <code>payload.output_mode="chunks"</code>, <code>payload.ingest=True</code> and <code>doc_url</code> / <code>doc_id</code>.</li>
|
||
<li>
|
||
<p>Return <code>IngestResult</code> with <code>ingested_chunks</code> based on <code>chunks</code> length.</p>
|
||
</li>
|
||
<li>
|
||
<p><code>async def ask_about_document(session_id, question, doc_id=None, dao_id=None, user_id=None) -> QAResult</code></p>
|
||
</li>
|
||
<li>If <code>doc_id</code> is <code>None</code>, load from <code>get_doc_context</code>.</li>
|
||
<li>Build router request with <code>mode: "rag_query"</code> and <code>payload</code> containing <code>question</code>, <code>dao_id</code>, <code>user_id</code>, <code>doc_id</code>.</li>
|
||
<li>Return <code>QAResult</code> with <code>answer</code> and optional <code>sources</code>.</li>
|
||
</ul>
|
||
<p>Provide small helper method:</p>
|
||
<ul>
|
||
<li><code>_extract_source(session_id: str) -> str</code> → returns first segment before <code>:</code> (e.g. <code>"telegram"</code>, <code>"web"</code>).</li>
|
||
</ul>
|
||
<p>At bottom of the file, export convenience functions:</p>
|
||
<ul>
|
||
<li><code>doc_service = DocumentService()</code></li>
|
||
<li>Top-level async wrappers:</li>
|
||
<li><code>parse_document(...)</code>, <code>ingest_document(...)</code>, <code>ask_about_document(...)</code>, <code>save_doc_context(...)</code>, <code>get_doc_context(...)</code>.</li>
|
||
</ul>
|
||
<blockquote>
|
||
<p>IMPORTANT: No Telegram-specific logic (emoji, message length, <code>/ingest</code> hints) in this file.</p>
|
||
</blockquote>
|
||
<hr />
|
||
<h3 id="2-extend-memoryclient-gateway-botmemory_clientpy">2. Extend MemoryClient: <code>gateway-bot/memory_client.py</code><a class="headerlink" href="#2-extend-memoryclient-gateway-botmemory_clientpy" title="Permanent link">¶</a></h3>
|
||
<p>Add method:</p>
|
||
<div class="codehilite"><pre><span></span><code><span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">get_fact</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">user_id</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">fact_key</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">team_id</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span>
|
||
<span class="w"> </span><span class="sd">"""Get single fact by key"""</span>
|
||
</code></pre></div>
|
||
|
||
<ul>
|
||
<li>Use Memory Service HTTP API, e.g.:</li>
|
||
<li><code>GET {base_url}/facts/{fact_key}</code> with <code>user_id</code> and optional <code>team_id</code> in query params.</li>
|
||
<li>Return <code>response.json()</code> on 200, else <code>None</code>.</li>
|
||
</ul>
|
||
<p>This method will be used by <code>doc_service.get_doc_context</code>.</p>
|
||
<p>Do <strong>not</strong> change existing public methods.</p>
|
||
<hr />
|
||
<h3 id="3-http-api-for-webmobile-gateway-bothttp_api_docpy">3. HTTP API for web/mobile: <code>gateway-bot/http_api_doc.py</code><a class="headerlink" href="#3-http-api-for-webmobile-gateway-bothttp_api_docpy" title="Permanent link">¶</a></h3>
|
||
<p>Create <code>gateway-bot/http_api_doc.py</code> with:</p>
|
||
<ul>
|
||
<li><code>APIRouter()</code> named <code>router</code>.</li>
|
||
<li>Import from <code>services.doc_service</code>:</li>
|
||
<li><code>parse_document</code>, <code>ingest_document</code>, <code>ask_about_document</code>, <code>get_doc_context</code>, and models.</li>
|
||
</ul>
|
||
<p>Endpoints:</p>
|
||
<ol>
|
||
<li><code>POST /api/doc/parse</code></li>
|
||
</ol>
|
||
<p>Request (JSON body, Pydantic model <code>ParseDocumentRequest</code>):</p>
|
||
<ul>
|
||
<li><code>session_id: str</code></li>
|
||
<li><code>doc_url: str</code></li>
|
||
<li><code>file_name: str</code></li>
|
||
<li><code>dao_id: str</code></li>
|
||
<li><code>user_id: str</code></li>
|
||
<li><code>output_mode: str = "qa_pairs"</code></li>
|
||
<li><code>metadata: Optional[Dict[str, Any]]</code></li>
|
||
</ul>
|
||
<p>Behaviour:</p>
|
||
<ul>
|
||
<li>Call <code>parse_document(...)</code> from doc_service.</li>
|
||
<li>On failure → <code>HTTPException(status_code=400, detail=result.error)</code>.</li>
|
||
<li>
|
||
<p>On success → JSON with <code>doc_id</code>, <code>qa_pairs</code> (as list of dict), <code>markdown</code>, <code>chunks_meta</code>, <code>raw</code>.</p>
|
||
</li>
|
||
<li>
|
||
<p><code>POST /api/doc/ingest</code></p>
|
||
</li>
|
||
</ul>
|
||
<p>Request (<code>IngestDocumentRequest</code>):</p>
|
||
<ul>
|
||
<li><code>session_id: str</code></li>
|
||
<li><code>doc_id: Optional[str]</code></li>
|
||
<li><code>doc_url: Optional[str]</code></li>
|
||
<li><code>file_name: Optional[str]</code></li>
|
||
<li><code>dao_id: Optional[str]</code></li>
|
||
<li><code>user_id: Optional[str]</code></li>
|
||
</ul>
|
||
<p>Behaviour:</p>
|
||
<ul>
|
||
<li>If <code>doc_id</code> is missing, use <code>get_doc_context(session_id)</code>.</li>
|
||
<li>Call <code>ingest_document(...)</code>.</li>
|
||
<li>
|
||
<p>Return <code>doc_id</code>, <code>ingested_chunks</code>, <code>status</code>.</p>
|
||
</li>
|
||
<li>
|
||
<p><code>POST /api/doc/ask</code></p>
|
||
</li>
|
||
</ul>
|
||
<p>Request (<code>AskDocumentRequest</code>):</p>
|
||
<ul>
|
||
<li><code>session_id: str</code></li>
|
||
<li><code>question: str</code></li>
|
||
<li><code>doc_id: Optional[str]</code></li>
|
||
<li><code>dao_id: Optional[str]</code></li>
|
||
<li><code>user_id: Optional[str]</code></li>
|
||
</ul>
|
||
<p>Behaviour:</p>
|
||
<ul>
|
||
<li>If <code>doc_id</code> is missing, use <code>get_doc_context(session_id)</code>.</li>
|
||
<li>Call <code>ask_about_document(...)</code>.</li>
|
||
<li>
|
||
<p>Return <code>answer</code>, <code>doc_id</code>, and <code>sources</code> (if any).</p>
|
||
</li>
|
||
<li>
|
||
<p><code>GET /api/doc/context/{session_id}</code></p>
|
||
</li>
|
||
</ul>
|
||
<p>Behaviour:</p>
|
||
<ul>
|
||
<li>Use <code>get_doc_context(session_id)</code>.</li>
|
||
<li>If missing → 404.</li>
|
||
<li>Else return <code>doc_id</code>, <code>dao_id</code>, <code>user_id</code>, <code>doc_url</code>, <code>file_name</code>, <code>saved_at</code>.</li>
|
||
</ul>
|
||
<p>Optional: <code>POST /api/doc/parse/upload</code> stub for future file-upload handling (currently can return 501 with note to use <code>doc_url</code>).</p>
|
||
<hr />
|
||
<h3 id="4-wire-api-into-app-gateway-botapppy">4. Wire API into app: <code>gateway-bot/app.py</code><a class="headerlink" href="#4-wire-api-into-app-gateway-botapppy" title="Permanent link">¶</a></h3>
|
||
<p>Update <code>app.py</code>:</p>
|
||
<ul>
|
||
<li>Import both routers:</li>
|
||
</ul>
|
||
<p><code>python
|
||
from http_api import router as gateway_router
|
||
from http_api_doc import router as doc_router</code></p>
|
||
<ul>
|
||
<li>Include them:</li>
|
||
</ul>
|
||
<p><code>python
|
||
app.include_router(gateway_router, prefix="", tags=["gateway"])
|
||
app.include_router(doc_router, prefix="", tags=["docs"])</code></p>
|
||
<ul>
|
||
<li>
|
||
<p>Update root endpoint <code>/</code> to list new endpoints:</p>
|
||
</li>
|
||
<li>
|
||
<p><code>"POST /api/doc/parse"</code></p>
|
||
</li>
|
||
<li><code>"POST /api/doc/ingest"</code></li>
|
||
<li><code>"POST /api/doc/ask"</code></li>
|
||
<li><code>"GET /api/doc/context/{session_id}"</code></li>
|
||
</ul>
|
||
<hr />
|
||
<h3 id="5-refactor-telegram-handlers-gateway-bothttp_apipy">5. Refactor Telegram handlers: <code>gateway-bot/http_api.py</code><a class="headerlink" href="#5-refactor-telegram-handlers-gateway-bothttp_apipy" title="Permanent link">¶</a></h3>
|
||
<p>Update <code>http_api.py</code> so Telegram uses <code>doc_service</code> for PDF/ingest/RAG, keeping existing chat/voice flows.</p>
|
||
<h4 id="51-imports-and-constants">5.1. Imports and constants<a class="headerlink" href="#51-imports-and-constants" title="Permanent link">¶</a></h4>
|
||
<ul>
|
||
<li>Add imports:</li>
|
||
</ul>
|
||
<p><code>python
|
||
from services.doc_service import (
|
||
parse_document,
|
||
ingest_document,
|
||
ask_about_document,
|
||
get_doc_context,
|
||
)</code></p>
|
||
<ul>
|
||
<li>Define Telegram length limits:</li>
|
||
</ul>
|
||
<p><code>python
|
||
TELEGRAM_MAX_MESSAGE_LENGTH = 4096
|
||
TELEGRAM_SAFE_LENGTH = 3500</code></p>
|
||
<h4 id="52-daarwizz-telegramwebhook">5.2. DAARWIZZ <code>/telegram/webhook</code><a class="headerlink" href="#52-daarwizz-telegramwebhook" title="Permanent link">¶</a></h4>
|
||
<p>Inside <code>telegram_webhook</code>:</p>
|
||
<ol>
|
||
<li>
|
||
<p><strong>/ingest command</strong></p>
|
||
</li>
|
||
<li>
|
||
<p>Check <code>text</code> from message: if starts with <code>/ingest</code>:</p>
|
||
<ul>
|
||
<li><code>session_id = f"telegram:{chat_id}"</code>.</li>
|
||
<li>If message also contains a PDF document:</li>
|
||
<li>Use <code>get_telegram_file_path(file_id)</code> and correct bot token to build <code>file_url</code>.</li>
|
||
<li><code>await send_telegram_message(chat_id, "📥 Імпортую документ у RAG...")</code>.</li>
|
||
<li>Call <code>ingest_document(session_id, doc_url=file_url, file_name=file_name, dao_id, user_id=f"tg:{user_id}")</code>.</li>
|
||
<li>Else:</li>
|
||
<li>Call <code>ingest_document(session_id, dao_id=dao_id, user_id=f"tg:{user_id}")</code> and rely on stored context.</li>
|
||
<li>Send success/failure message.</li>
|
||
</ul>
|
||
</li>
|
||
<li>
|
||
<p><strong>PDF detection</strong></p>
|
||
</li>
|
||
<li>
|
||
<p>Check <code>document = update.message.get("document")</code>.</p>
|
||
</li>
|
||
<li>Determine <code>is_pdf</code> via <code>mime_type</code> and/or <code>file_name.endswith(".pdf")</code>.</li>
|
||
<li>
|
||
<p>If PDF:</p>
|
||
<ul>
|
||
<li>Log file info.</li>
|
||
<li>Get <code>file_path</code> via <code>get_telegram_file_path(file_id)</code> + correct token → <code>file_url</code>.</li>
|
||
<li>Send "📄 Обробляю PDF-документ...".</li>
|
||
<li><code>session_id = f"telegram:{chat_id}"</code>.</li>
|
||
<li>Call <code>parse_document(session_id, doc_url=file_url, file_name=file_name, dao_id, user_id=f"tg:{user_id}", output_mode="qa_pairs", metadata={"username": username, "chat_id": chat_id})</code>.</li>
|
||
<li>On success, format:</li>
|
||
<li>Prefer Q&A (<code>result.qa_pairs</code>) → <code>format_qa_response(...)</code>.</li>
|
||
<li>Else markdown → <code>format_markdown_response(...)</code>.</li>
|
||
<li>Else chunks → <code>format_chunks_response(...)</code>.</li>
|
||
<li>Append hint: <code>"\n\n💡 _Використай /ingest для імпорту документа у RAG_"</code>.</li>
|
||
<li>Send response via <code>send_telegram_message</code>.</li>
|
||
</ul>
|
||
</li>
|
||
<li>
|
||
<p><strong>RAG follow-up questions</strong></p>
|
||
</li>
|
||
<li>
|
||
<p>After computing <code>text</code> (from voice or direct text), before regular chat routing:</p>
|
||
<ul>
|
||
<li><code>session_id = f"telegram:{chat_id}"</code>.</li>
|
||
<li>Load <code>doc_context = await get_doc_context(session_id)</code>.</li>
|
||
<li>If <code>doc_context.doc_id</code> exists and text looks like a question (contains <code>?</code> or Ukrainian question words):</li>
|
||
<li>Call <code>ask_about_document(session_id, question=text, doc_id=doc_context.doc_id, dao_id=dao_id or doc_context.dao_id, user_id=f"tg:{user_id}")</code>.</li>
|
||
<li>If success, truncate answer to <code>TELEGRAM_SAFE_LENGTH</code> and send as Telegram message.</li>
|
||
<li>If RAG fails → fall back to normal chat routing.</li>
|
||
</ul>
|
||
</li>
|
||
<li>
|
||
<p><strong>Keep voice + normal chat flows</strong></p>
|
||
</li>
|
||
<li>
|
||
<p>Existing STT flow and chat→router logic should remain as fallback for non-PDF / non-ingest / non-RAG messages.</p>
|
||
</li>
|
||
</ol>
|
||
<h4 id="53-helion-heliontelegramwebhook">5.3. Helion <code>/helion/telegram/webhook</code><a class="headerlink" href="#53-helion-heliontelegramwebhook" title="Permanent link">¶</a></h4>
|
||
<p>Mirror the same behaviours for Helion handler:</p>
|
||
<ul>
|
||
<li><code>/ingest</code> command support.</li>
|
||
<li>PDF detection and <code>parse_document</code> usage.</li>
|
||
<li>RAG follow-up via <code>ask_about_document</code>.</li>
|
||
<li>Use <code>HELION_TELEGRAM_BOT_TOKEN</code> for file download and message sending.</li>
|
||
<li>Preserve existing chat→router behaviour when doc flow does not apply.</li>
|
||
</ul>
|
||
<h4 id="54-formatting-helpers">5.4. Formatting helpers<a class="headerlink" href="#54-formatting-helpers" title="Permanent link">¶</a></h4>
|
||
<p>Add helper functions at the bottom of <code>http_api.py</code> (Telegram-specific):</p>
|
||
<ul>
|
||
<li><code>format_qa_response(qa_pairs: list, max_pairs: int = 5) -> str</code></li>
|
||
<li>Adds header, enumerates Q&A pairs, truncates long answers, respects <code>TELEGRAM_SAFE_LENGTH</code>.</li>
|
||
<li><code>format_markdown_response(markdown: str) -> str</code></li>
|
||
<li>Wraps markdown with header; truncates to <code>TELEGRAM_SAFE_LENGTH</code> and appends hint about <code>/ingest</code> if truncated.</li>
|
||
<li><code>format_chunks_response(chunks: list) -> str</code></li>
|
||
<li>Shows summary about number of chunks and previews first ~3.</li>
|
||
</ul>
|
||
<blockquote>
|
||
<p>IMPORTANT: These helpers handle Telegram-specific constraints and SHOULD NOT be moved into <code>doc_service</code>.</p>
|
||
</blockquote>
|
||
<hr />
|
||
<h2 id="acceptance-criteria">Acceptance criteria<a class="headerlink" href="#acceptance-criteria" title="Permanent link">¶</a></h2>
|
||
<ol>
|
||
<li><code>gateway-bot/services/doc_service.py</code> exists and provides:</li>
|
||
<li><code>parse_document</code>, <code>ingest_document</code>, <code>ask_about_document</code>, <code>save_doc_context</code>, <code>get_doc_context</code>.</li>
|
||
<li>
|
||
<p>Uses DAGI Router and Memory Service, with <code>session_id</code>-based context.</p>
|
||
</li>
|
||
<li>
|
||
<p><code>gateway-bot/http_api_doc.py</code> exists and defines:</p>
|
||
</li>
|
||
<li><code>POST /api/doc/parse</code></li>
|
||
<li><code>POST /api/doc/ingest</code></li>
|
||
<li><code>POST /api/doc/ask</code></li>
|
||
<li>
|
||
<p><code>GET /api/doc/context/{session_id}</code></p>
|
||
</li>
|
||
<li>
|
||
<p><code>gateway-bot/app.py</code>:</p>
|
||
</li>
|
||
<li>Includes both <code>http_api.router</code> and <code>http_api_doc.router</code>.</li>
|
||
<li>
|
||
<p>Root <code>/</code> lists new <code>/api/doc/*</code> endpoints.</p>
|
||
</li>
|
||
<li>
|
||
<p><code>gateway-bot/memory_client.py</code>:</p>
|
||
</li>
|
||
<li>Includes <code>get_fact(...)</code> and existing methods still work.</li>
|
||
<li>
|
||
<p><code>doc_service</code> uses <code>upsert_fact</code> + <code>get_fact</code> for <code>doc_context:{session_id}</code>.</p>
|
||
</li>
|
||
<li>
|
||
<p><code>gateway-bot/http_api.py</code>:</p>
|
||
</li>
|
||
<li>Telegram handlers use <code>doc_service</code> for:<ul>
|
||
<li>PDF parsing,</li>
|
||
<li><code>/ingest</code> command,</li>
|
||
<li>RAG follow-up questions.</li>
|
||
</ul>
|
||
</li>
|
||
<li>
|
||
<p>Continue to support existing voice→STT→chat flow and regular chat routing when doc flow isnt triggered.</p>
|
||
</li>
|
||
<li>
|
||
<p>Web/mobile clients can call <code>/api/doc/*</code> to:</p>
|
||
</li>
|
||
<li>Parse documents via <code>doc_url</code>.</li>
|
||
<li>Ingest into RAG.</li>
|
||
<li>Ask questions about the last parsed document for given <code>session_id</code>.</li>
|
||
</ol>
|
||
<hr />
|
||
<h2 id="how-to-run-this-task-with-cursor">How to run this task with Cursor<a class="headerlink" href="#how-to-run-this-task-with-cursor" title="Permanent link">¶</a></h2>
|
||
<p>From repo root (<code>microdao-daarion</code>):</p>
|
||
<div class="codehilite"><pre><span></span><code>cursor<span class="w"> </span>task<span class="w"> </span><<span class="w"> </span>docs/cursor/channel_agnostic_doc_flow_task.md
|
||
</code></pre></div>
|
||
|
||
<p>Cursor should then:</p>
|
||
<ul>
|
||
<li>Create/modify the files listed above.</li>
|
||
<li>Ensure implementation matches the described architecture and acceptance criteria.</li>
|
||
</ul>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</article>
|
||
</div>
|
||
|
||
|
||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||
</div>
|
||
|
||
</main>
|
||
|
||
<footer class="md-footer">
|
||
|
||
<div class="md-footer-meta md-typeset">
|
||
<div class="md-footer-meta__inner md-grid">
|
||
<div class="md-copyright">
|
||
|
||
|
||
Made with
|
||
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||
Material for MkDocs
|
||
</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
<div class="md-dialog" data-md-component="dialog">
|
||
<div class="md-dialog__inner md-typeset"></div>
|
||
</div>
|
||
|
||
|
||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.instant", "content.code.copy"], "search": "../../assets/javascripts/workers/search.b8dbb3d2.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
|
||
|
||
|
||
<script src="../../assets/javascripts/bundle.3220b9d7.min.js"></script>
|
||
|
||
|
||
</body>
|
||
</html> |