1138 lines
46 KiB
HTML
1138 lines
46 KiB
HTML
|
||
<!doctype html>
|
||
<html lang="en" class="no-js">
|
||
<head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||
|
||
|
||
|
||
<link rel="canonical" href="https://IvanTytar.github.io/microdao-daarion/agents/parser/">
|
||
|
||
|
||
|
||
|
||
<link rel="icon" href="../../assets/images/favicon.png">
|
||
<meta name="generator" content="mkdocs-1.5.3, mkdocs-material-9.5.18">
|
||
|
||
|
||
|
||
<title>PARSER Agent (dots.ocr) - DAARION Documentation</title>
|
||
|
||
|
||
|
||
<link rel="stylesheet" href="../../assets/stylesheets/main.66ac8b77.min.css">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||
|
||
|
||
|
||
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</head>
|
||
|
||
|
||
<body dir="ltr">
|
||
|
||
|
||
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||
<label class="md-overlay" for="__drawer"></label>
|
||
<div data-md-component="skip">
|
||
|
||
|
||
<a href="#parser-agent-dotsocr" class="md-skip">
|
||
Skip to content
|
||
</a>
|
||
|
||
</div>
|
||
<div data-md-component="announce">
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<header class="md-header md-header--shadow" data-md-component="header">
|
||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||
<a href="../.." title="DAARION Documentation" class="md-header__button md-logo" aria-label="DAARION Documentation" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
|
||
|
||
</a>
|
||
<label class="md-header__button md-icon" for="__drawer">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
|
||
</label>
|
||
<div class="md-header__title" data-md-component="header-title">
|
||
<div class="md-header__ellipsis">
|
||
<div class="md-header__topic">
|
||
<span class="md-ellipsis">
|
||
DAARION Documentation
|
||
</span>
|
||
</div>
|
||
<div class="md-header__topic" data-md-component="header-topic">
|
||
<span class="md-ellipsis">
|
||
|
||
PARSER Agent (dots.ocr)
|
||
|
||
</span>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<script>var media,input,key,value,palette=__md_get("__palette");if(palette&&palette.color){"(prefers-color-scheme)"===palette.color.media&&(media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']"),palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent"));for([key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
||
|
||
|
||
|
||
<label class="md-header__button md-icon" for="__search">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
|
||
</label>
|
||
<div class="md-search" data-md-component="search" role="dialog">
|
||
<label class="md-search__overlay" for="__search"></label>
|
||
<div class="md-search__inner" role="search">
|
||
<form class="md-search__form" name="search">
|
||
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
||
<label class="md-search__icon md-icon" for="__search">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
|
||
</label>
|
||
<nav class="md-search__options" aria-label="Search">
|
||
|
||
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
|
||
</button>
|
||
</nav>
|
||
|
||
</form>
|
||
<div class="md-search__output">
|
||
<div class="md-search__scrollwrap" data-md-scrollfix>
|
||
<div class="md-search-result" data-md-component="search-result">
|
||
<div class="md-search-result__meta">
|
||
Initializing search
|
||
</div>
|
||
<ol class="md-search-result__list" role="presentation"></ol>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</nav>
|
||
|
||
</header>
|
||
|
||
<div class="md-container" data-md-component="container">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<main class="md-main" data-md-component="main">
|
||
<div class="md-main__inner md-grid">
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||
<label class="md-nav__title" for="__drawer">
|
||
<a href="../.." title="DAARION Documentation" class="md-nav__button md-logo" aria-label="DAARION Documentation" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
|
||
|
||
</a>
|
||
DAARION Documentation
|
||
</label>
|
||
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../public/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Home
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../public/getting-started/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Getting Started
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../public/architecture-overview/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Architecture
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../public/daiS_daos_overview/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
DAIS & DAOS
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Internal
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_5">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Internal
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_5_1" id="__nav_5_1_label" tabindex="0">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Infra
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_1_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_5_1">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Infra
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/infra/INFRA_AUTOMATION_PACK_V1/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Infra Automation Pack v1
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/infra/monitoring_overview/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Monitoring Overview
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/infra/nodes_registry_v0/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Nodes Registry v0
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_2" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_5_2" id="__nav_5_2_label" tabindex="0">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Specs
|
||
</span>
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_2_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_5_2">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Specs
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/specs/matrix_presence_aggregator/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Matrix Presence Aggregator
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/specs/city_map_spec/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
City Map Spec
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../internal/specs/node_join_protocol_draft/" class="md-nav__link">
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
Node Join Protocol (Draft)
|
||
</span>
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_1" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Роль та призначення
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_2" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Технічна база
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Технічна база">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#rednote-hilabdotsocr" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Модель: rednote-hilab/dots.ocr
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_3" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Ключові можливості моделі
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_4" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Вхідні дані
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Вхідні дані">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_5" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Підтримувані формати
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_6" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Режими виводу
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Режими виводу">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#1-raw_json" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
1. raw_json
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#2-markdown" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
2. markdown
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#3-qa_pairs" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
3. qa_pairs
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#4-chunks" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
4. chunks
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_7" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Вихідні дані
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Вихідні дані">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#parseddocument" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Структура ParsedDocument
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_8" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Обмеження
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_9" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Інтеграція з системою
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Інтеграція з системою">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#1-dagi-router" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
1. DAGI Router
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#2-crewai-orchestrator" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
2. CrewAI Orchestrator
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#3-rbac-integration" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
3. RBAC Integration
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_10" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Використання
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Використання">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#parser" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Приклад запиту до PARSER
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#dagi-router" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Приклад через DAGI Router
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_11" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Архітектура сервісу
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_12" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Залежності
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#_13" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
Посилання
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-content" data-md-component="content">
|
||
<article class="md-content__inner md-typeset">
|
||
|
||
|
||
|
||
|
||
<h1 id="parser-agent-dotsocr">PARSER Agent (dots.ocr)<a class="headerlink" href="#parser-agent-dotsocr" title="Permanent link">¶</a></h1>
|
||
<p><strong>Document Ingestion & Structuring Agent</strong> для DAARION / microDAO / SecondMe.</p>
|
||
<h2 id="_1">Роль та призначення<a class="headerlink" href="#_1" title="Permanent link">¶</a></h2>
|
||
<p>PARSER — це агент, який перетворює неструктуровані документи (PDF, зображення) у структуровані дані для RAG (Retrieval-Augmented Generation) та знань-орієнтованих систем.</p>
|
||
<p><strong>Основна мета:</strong> Забезпечити якісний інжест документів у базу знань зі збереженням структури, layout та семантики.</p>
|
||
<h2 id="_2">Технічна база<a class="headerlink" href="#_2" title="Permanent link">¶</a></h2>
|
||
<h3 id="rednote-hilabdotsocr">Модель: <code>rednote-hilab/dots.ocr</code><a class="headerlink" href="#rednote-hilabdotsocr" title="Permanent link">¶</a></h3>
|
||
<ul>
|
||
<li><strong>Тип:</strong> Image-Text-to-Text VLM (Vision Language Model)</li>
|
||
<li><strong>Орієнтація:</strong> Документ-орієнтований OCR з layout detection</li>
|
||
<li><strong>GitHub:</strong> https://github.com/QwenLM/Qwen3-ASR-Toolkit (або відповідний репозиторій)</li>
|
||
</ul>
|
||
<h3 id="_3">Ключові можливості моделі<a class="headerlink" href="#_3" title="Permanent link">¶</a></h3>
|
||
<ol>
|
||
<li><strong>Мультимовний OCR + Layout</strong></li>
|
||
<li>Розпізнає текст на багатьох мовах (включаючи low-resource)</li>
|
||
<li>Правильно відновлює <strong>reading order</strong> (колонки, блоки, змішаний макет)</li>
|
||
<li>
|
||
<p>Підтримка складних макетів (наукові статті, звіти, форми)</p>
|
||
</li>
|
||
<li>
|
||
<p><strong>Єдиний VLM для всього</strong></p>
|
||
</li>
|
||
<li>Один модельний стек для <strong>layout detection + OCR</strong></li>
|
||
<li>Не потребує окремих моделей для таблиць/тексту/формул</li>
|
||
<li>
|
||
<p>Уніфікований підхід до різних типів контенту</p>
|
||
</li>
|
||
<li>
|
||
<p><strong>Структурований вихід</strong></p>
|
||
</li>
|
||
<li>JSON з блоками (<code>paragraph</code>, <code>heading</code>, <code>table</code>, <code>formula</code>, <code>figure_caption</code>, ...)</li>
|
||
<li>Bbox-координати, сторінка, читальний порядок</li>
|
||
<li>Окремі структури для таблиць (рядки/колонки, merged cells)</li>
|
||
<li>
|
||
<p>Markdown/HTML-подібний текст (таблиці можна відтворювати як Markdown)</p>
|
||
</li>
|
||
<li>
|
||
<p><strong>Орієнтація на документи</strong></p>
|
||
</li>
|
||
<li>Підтримка форм, інвойсів, звітів, наукових статей, презентацій</li>
|
||
<li>Добре працює із змішаним контентом (текст навколо формул, підписи до рисунків)</li>
|
||
</ol>
|
||
<h2 id="_4">Вхідні дані<a class="headerlink" href="#_4" title="Permanent link">¶</a></h2>
|
||
<h3 id="_5">Підтримувані формати<a class="headerlink" href="#_5" title="Permanent link">¶</a></h3>
|
||
<ul>
|
||
<li><strong>PDF:</strong></li>
|
||
<li>Скани (зображення сторінок)</li>
|
||
<li>"Цифрові" PDF (текст + векторна графіка)</li>
|
||
<li>
|
||
<p>Багатосторінкові документи</p>
|
||
</li>
|
||
<li>
|
||
<p><strong>Зображення:</strong></p>
|
||
</li>
|
||
<li>PNG, JPEG, TIFF</li>
|
||
<li>
|
||
<p>Підтримка різних роздільних здатностей</p>
|
||
</li>
|
||
<li>
|
||
<p><strong>Документи зі змішаним контентом:</strong></p>
|
||
</li>
|
||
<li>Текст + таблиці + схеми + формули</li>
|
||
<li>Наукові статті, звіти, презентації</li>
|
||
</ul>
|
||
<h2 id="_6">Режими виводу<a class="headerlink" href="#_6" title="Permanent link">¶</a></h2>
|
||
<p>PARSER підтримує кілька режимів виводу (конфігурується через промпт/параметри):</p>
|
||
<h3 id="1-raw_json">1. <code>raw_json</code><a class="headerlink" href="#1-raw_json" title="Permanent link">¶</a></h3>
|
||
<p>Повний структурований JSON з усіма блоками:</p>
|
||
<div class="codehilite"><pre><span></span><code><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"pages"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"page_num"</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"blocks"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"heading"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"text"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Заголовок"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"bbox"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="err">x</span><span class="p">,</span><span class="w"> </span><span class="err">y</span><span class="p">,</span><span class="w"> </span><span class="err">wid</span><span class="kc">t</span><span class="err">h</span><span class="p">,</span><span class="w"> </span><span class="err">heigh</span><span class="kc">t</span><span class="p">],</span>
|
||
<span class="w"> </span><span class="nt">"reading_order"</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span>
|
||
<span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"paragraph"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"text"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Текст параграфу..."</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"bbox"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="err">...</span><span class="p">],</span>
|
||
<span class="w"> </span><span class="nt">"reading_order"</span><span class="p">:</span><span class="w"> </span><span class="mi">2</span>
|
||
<span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"table"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"rows"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="err">...</span><span class="p">],</span>
|
||
<span class="w"> </span><span class="nt">"columns"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="err">...</span><span class="p">],</span>
|
||
<span class="w"> </span><span class="nt">"merged_cells"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="err">...</span><span class="p">]</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">]</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">]</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
|
||
<h3 id="2-markdown">2. <code>markdown</code><a class="headerlink" href="#2-markdown" title="Permanent link">¶</a></h3>
|
||
<p>Таблиці/розділи у Markdown форматі:</p>
|
||
<div class="codehilite"><pre><span></span><code><span class="gh"># Заголовок</span>
|
||
|
||
Текст параграфу...
|
||
|
||
| Колонка 1 | Колонка 2 |
|
||
|-----------|-----------|
|
||
| Значення 1 | Значення 2 |
|
||
</code></pre></div>
|
||
|
||
<h3 id="3-qa_pairs">3. <code>qa_pairs</code><a class="headerlink" href="#3-qa_pairs" title="Permanent link">¶</a></h3>
|
||
<p>Парсер одразу повертає Q&A-пари по документу (через LLM-постпроцес):</p>
|
||
<div class="codehilite"><pre><span></span><code><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"qa_pairs"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"question"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Що таке токеноміка microDAO?"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"answer"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Токеноміка microDAO включає..."</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"source_page"</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"source_bbox"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="err">...</span><span class="p">]</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">]</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
|
||
<h3 id="4-chunks">4. <code>chunks</code><a class="headerlink" href="#4-chunks" title="Permanent link">¶</a></h3>
|
||
<p>Масив семантичних фрагментів для RAG:</p>
|
||
<div class="codehilite"><pre><span></span><code><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"chunks"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"text"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Фрагмент тексту..."</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"page"</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"bbox"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="err">...</span><span class="p">],</span>
|
||
<span class="w"> </span><span class="nt">"section"</span><span class="p">:</span><span class="w"> </span><span class="s2">"introduction"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"metadata"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"dao_id"</span><span class="p">:</span><span class="w"> </span><span class="s2">"daarion"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"doc_id"</span><span class="p">:</span><span class="w"> </span><span class="s2">"tokenomics_v1"</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">]</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
|
||
<h2 id="_7">Вихідні дані<a class="headerlink" href="#_7" title="Permanent link">¶</a></h2>
|
||
<h3 id="parseddocument">Структура <code>ParsedDocument</code><a class="headerlink" href="#parseddocument" title="Permanent link">¶</a></h3>
|
||
<div class="codehilite"><pre><span></span><code><span class="kd">interface</span><span class="w"> </span><span class="nx">ParsedDocument</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nx">doc_id</span><span class="o">:</span><span class="w"> </span><span class="kt">string</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">doc_url?</span><span class="o">:</span><span class="w"> </span><span class="kt">string</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">doc_type</span><span class="o">:</span><span class="w"> </span><span class="s2">"pdf"</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="s2">"image"</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">pages</span><span class="o">:</span><span class="w"> </span><span class="kt">ParsedPage</span><span class="p">[];</span>
|
||
<span class="w"> </span><span class="nx">metadata</span><span class="o">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nx">dao_id</span><span class="o">:</span><span class="w"> </span><span class="kt">string</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">user_id</span><span class="o">:</span><span class="w"> </span><span class="kt">string</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">uploaded_at</span><span class="o">:</span><span class="w"> </span><span class="kt">string</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">file_size</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">page_count</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="p">};</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="kd">interface</span><span class="w"> </span><span class="nx">ParsedPage</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nx">page_num</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">blocks</span><span class="o">:</span><span class="w"> </span><span class="kt">ParsedBlock</span><span class="p">[];</span>
|
||
<span class="w"> </span><span class="nx">width</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">height</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">;</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="kd">interface</span><span class="w"> </span><span class="nx">ParsedBlock</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="kr">type</span><span class="o">:</span><span class="w"> </span><span class="s2">"paragraph"</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="s2">"heading"</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="s2">"table"</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="s2">"formula"</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="s2">"figure_caption"</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="s2">"list"</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">text</span><span class="o">:</span><span class="w"> </span><span class="kt">string</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="nx">bbox</span><span class="o">:</span><span class="w"> </span><span class="p">[</span><span class="nx">x</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">,</span><span class="w"> </span><span class="nx">y</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">,</span><span class="w"> </span><span class="nx">width</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">,</span><span class="w"> </span><span class="nx">height</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">];</span>
|
||
<span class="w"> </span><span class="nx">reading_order</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="c1">// Для таблиць:</span>
|
||
<span class="w"> </span><span class="nx">table_data</span><span class="o">?:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nx">rows</span><span class="o">:</span><span class="w"> </span><span class="kt">string</span><span class="p">[][];</span>
|
||
<span class="w"> </span><span class="nx">columns</span><span class="o">:</span><span class="w"> </span><span class="kt">string</span><span class="p">[];</span>
|
||
<span class="w"> </span><span class="nx">merged_cells?</span><span class="o">:</span><span class="w"> </span><span class="kt">Array</span><span class="o"><</span><span class="p">{</span><span class="nx">row</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">,</span><span class="w"> </span><span class="nx">col</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">,</span><span class="w"> </span><span class="nx">rowspan</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">,</span><span class="w"> </span><span class="nx">colspan</span><span class="o">:</span><span class="w"> </span><span class="kt">number</span><span class="p">}</span><span class="o">></span><span class="p">;</span>
|
||
<span class="w"> </span><span class="p">};</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
|
||
<h2 id="_8">Обмеження<a class="headerlink" href="#_8" title="Permanent link">¶</a></h2>
|
||
<ul>
|
||
<li><strong>Max pages:</strong> Конфігурується через <code>PARSER_MAX_PAGES</code> (за замовчуванням: 100)</li>
|
||
<li><strong>Max resolution:</strong> Конфігурується через <code>PARSER_MAX_RESOLUTION</code> (за замовчуванням: 4096x4096)</li>
|
||
<li><strong>Max file size:</strong> Залежить від runtime (рекомендовано: до 50MB для PDF)</li>
|
||
<li><strong>Підтримка мов:</strong> Залежить від моделі dots.ocr (українська підтримується)</li>
|
||
</ul>
|
||
<h2 id="_9">Інтеграція з системою<a class="headerlink" href="#_9" title="Permanent link">¶</a></h2>
|
||
<h3 id="1-dagi-router">1. DAGI Router<a class="headerlink" href="#1-dagi-router" title="Permanent link">¶</a></h3>
|
||
<p>PARSER інтегрується як окремий провайдер:</p>
|
||
<div class="codehilite"><pre><span></span><code><span class="nt">providers</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="nt">parser</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="nt">type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">ocr</span>
|
||
<span class="w"> </span><span class="nt">base_url</span><span class="p">:</span><span class="w"> </span><span class="s">"http://parser-service:9400"</span>
|
||
</code></pre></div>
|
||
|
||
<p><strong>Routing rule:</strong></p>
|
||
<div class="codehilite"><pre><span></span><code><span class="nt">routing</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">doc_parse</span>
|
||
<span class="w"> </span><span class="nt">when</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="nt">mode</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">doc_parse</span>
|
||
<span class="w"> </span><span class="nt">use_provider</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">parser</span>
|
||
</code></pre></div>
|
||
|
||
<h3 id="2-crewai-orchestrator">2. CrewAI Orchestrator<a class="headerlink" href="#2-crewai-orchestrator" title="Permanent link">¶</a></h3>
|
||
<p>PARSER як агент у CrewAI workflow:</p>
|
||
<ul>
|
||
<li><strong><code>doc_ingest_workflow</code>:</strong> Перевірка типу документа → виклик PARSER → інжест у RAG</li>
|
||
<li><strong><code>rag_answer_workflow</code>:</strong> Використання розпарсених документів для відповідей</li>
|
||
</ul>
|
||
<h3 id="3-rbac-integration">3. RBAC Integration<a class="headerlink" href="#3-rbac-integration" title="Permanent link">¶</a></h3>
|
||
<ul>
|
||
<li>Перевірка прав на інжест документів (<code>role: admin</code>, <code>role: researcher</code>)</li>
|
||
<li>Обмеження на приватні/публічні документи</li>
|
||
<li>Перевірка <code>dao_id</code> для ізоляції даних</li>
|
||
</ul>
|
||
<h2 id="_10">Використання<a class="headerlink" href="#_10" title="Permanent link">¶</a></h2>
|
||
<h3 id="parser">Приклад запиту до PARSER<a class="headerlink" href="#parser" title="Permanent link">¶</a></h3>
|
||
<div class="codehilite"><pre><span></span><code>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://parser-service:9400/ocr/parse<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||
<span class="s1"> "doc_url": "https://example.com/tokenomics.pdf",</span>
|
||
<span class="s1"> "output_mode": "chunks",</span>
|
||
<span class="s1"> "dao_id": "daarion",</span>
|
||
<span class="s1"> "user_id": "user123"</span>
|
||
<span class="s1"> }'</span>
|
||
</code></pre></div>
|
||
|
||
<h3 id="dagi-router">Приклад через DAGI Router<a class="headerlink" href="#dagi-router" title="Permanent link">¶</a></h3>
|
||
<div class="codehilite"><pre><span></span><code>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://router:9102/route<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||
<span class="s1"> "mode": "doc_parse",</span>
|
||
<span class="s1"> "dao_id": "daarion",</span>
|
||
<span class="s1"> "user_id": "user123",</span>
|
||
<span class="s1"> "payload": {</span>
|
||
<span class="s1"> "doc_url": "https://example.com/tokenomics.pdf",</span>
|
||
<span class="s1"> "output_mode": "qa_pairs"</span>
|
||
<span class="s1"> }</span>
|
||
<span class="s1"> }'</span>
|
||
</code></pre></div>
|
||
|
||
<h2 id="_11">Архітектура сервісу<a class="headerlink" href="#_11" title="Permanent link">¶</a></h2>
|
||
<div class="codehilite"><pre><span></span><code><span class="n">parser</span><span class="o">-</span><span class="n">service</span><span class="o">/</span>
|
||
<span class="err">├──</span><span class="w"> </span><span class="n">main</span><span class="o">.</span><span class="n">py</span><span class="w"> </span><span class="c1"># FastAPI сервіс</span>
|
||
<span class="err">├──</span><span class="w"> </span><span class="n">parser_runtime</span><span class="o">/</span><span class="w"> </span><span class="c1"># Runtime для dots.ocr</span>
|
||
<span class="err">│</span><span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="n">__init__</span><span class="o">.</span><span class="n">py</span>
|
||
<span class="err">│</span><span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="n">model_loader</span><span class="o">.</span><span class="n">py</span><span class="w"> </span><span class="c1"># Lazy init, GPU/CPU fallback</span>
|
||
<span class="err">│</span><span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="n">inference</span><span class="o">.</span><span class="n">py</span><span class="w"> </span><span class="c1"># parse_image, parse_pdf</span>
|
||
<span class="err">├──</span><span class="w"> </span><span class="n">schemas</span><span class="o">.</span><span class="n">py</span><span class="w"> </span><span class="c1"># Pydantic моделі</span>
|
||
<span class="err">└──</span><span class="w"> </span><span class="n">config</span><span class="o">.</span><span class="n">py</span><span class="w"> </span><span class="c1"># Конфігурація</span>
|
||
</code></pre></div>
|
||
|
||
<h2 id="_12">Залежності<a class="headerlink" href="#_12" title="Permanent link">¶</a></h2>
|
||
<ul>
|
||
<li><strong>Runtime:</strong> HuggingFace Transformers + vLLM/SGLang (або llama.cpp/GGUF)</li>
|
||
<li><strong>Модель:</strong> <code>rednote-hilab/dots.ocr</code></li>
|
||
<li><strong>Python:</strong> 3.11+</li>
|
||
<li><strong>GPU:</strong> Рекомендовано (можна CPU fallback)</li>
|
||
</ul>
|
||
<h2 id="_13">Посилання<a class="headerlink" href="#_13" title="Permanent link">¶</a></h2>
|
||
<ul>
|
||
<li><a href="../TODO-PARSER-RAG.md">TODO: PARSER + RAG Implementation</a></li>
|
||
<li><a href="./dagi-router.md">DAGI Router Documentation</a></li>
|
||
<li><a href="./crewai-orchestrator.md">CrewAI Orchestrator</a></li>
|
||
</ul>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</article>
|
||
</div>
|
||
|
||
|
||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||
</div>
|
||
|
||
</main>
|
||
|
||
<footer class="md-footer">
|
||
|
||
<div class="md-footer-meta md-typeset">
|
||
<div class="md-footer-meta__inner md-grid">
|
||
<div class="md-copyright">
|
||
|
||
|
||
Made with
|
||
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||
Material for MkDocs
|
||
</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
<div class="md-dialog" data-md-component="dialog">
|
||
<div class="md-dialog__inner md-typeset"></div>
|
||
</div>
|
||
|
||
|
||
<script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections", "navigation.instant", "content.code.copy"], "search": "../../assets/javascripts/workers/search.b8dbb3d2.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
|
||
|
||
|
||
<script src="../../assets/javascripts/bundle.3220b9d7.min.js"></script>
|
||
|
||
|
||
</body>
|
||
</html> |