🔧 Add site/ to .gitignore (mkdocs build output)
Some checks failed
Build and Deploy Docs / build-and-deploy (push) Has been cancelled

This commit is contained in:
Apple
2026-01-10 07:57:47 -08:00
parent fb4f4a16d5
commit eed1e30aca
385 changed files with 1 additions and 369105 deletions

View File

@@ -1,973 +0,0 @@
<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="canonical" href="https://IvanTytar.github.io/microdao-daarion/RAG_METRICS_PLAN/">
<link rel="icon" href="../assets/images/favicon.png">
<meta name="generator" content="mkdocs-1.5.3, mkdocs-material-9.5.18">
<title>RAG Metrics & Dashboard Plan - DAARION Documentation</title>
<link rel="stylesheet" href="../assets/stylesheets/main.66ac8b77.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#rag-metrics-dashboard-plan" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href=".." title="DAARION Documentation" class="md-header__button md-logo" aria-label="DAARION Documentation" data-md-component="logo">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
DAARION Documentation
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
RAG Metrics & Dashboard Plan
</span>
</div>
</div>
</div>
<script>var media,input,key,value,palette=__md_get("__palette");if(palette&&palette.color){"(prefers-color-scheme)"===palette.color.media&&(media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']"),palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent"));for([key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href=".." title="DAARION Documentation" class="md-nav__button md-logo" aria-label="DAARION Documentation" data-md-component="logo">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
</a>
DAARION Documentation
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../public/" class="md-nav__link">
<span class="md-ellipsis">
Home
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../public/getting-started/" class="md-nav__link">
<span class="md-ellipsis">
Getting Started
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../public/architecture-overview/" class="md-nav__link">
<span class="md-ellipsis">
Architecture
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../public/daiS_daos_overview/" class="md-nav__link">
<span class="md-ellipsis">
DAIS & DAOS
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="">
<span class="md-ellipsis">
Internal
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
Internal
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1" >
<label class="md-nav__link" for="__nav_5_1" id="__nav_5_1_label" tabindex="0">
<span class="md-ellipsis">
Infra
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5_1">
<span class="md-nav__icon md-icon"></span>
Infra
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../internal/infra/INFRA_AUTOMATION_PACK_V1/" class="md-nav__link">
<span class="md-ellipsis">
Infra Automation Pack v1
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../internal/infra/monitoring_overview/" class="md-nav__link">
<span class="md-ellipsis">
Monitoring Overview
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../internal/infra/nodes_registry_v0/" class="md-nav__link">
<span class="md-ellipsis">
Nodes Registry v0
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_2" >
<label class="md-nav__link" for="__nav_5_2" id="__nav_5_2_label" tabindex="0">
<span class="md-ellipsis">
Specs
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5_2">
<span class="md-nav__icon md-icon"></span>
Specs
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../internal/specs/matrix_presence_aggregator/" class="md-nav__link">
<span class="md-ellipsis">
Matrix Presence Aggregator
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../internal/specs/city_map_spec/" class="md-nav__link">
<span class="md-ellipsis">
City Map Spec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../internal/specs/node_join_protocol_draft/" class="md-nav__link">
<span class="md-ellipsis">
Node Join Protocol (Draft)
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#1" class="md-nav__link">
<span class="md-ellipsis">
1. Метрики для збору
</span>
</a>
<nav class="md-nav" aria-label="1. Метрики для збору">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#11-rag-service-metrics" class="md-nav__link">
<span class="md-ellipsis">
1.1. RAG Service Metrics
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#12-router-metrics-rag-query-mode" class="md-nav__link">
<span class="md-ellipsis">
1.2. Router Metrics (RAG Query Mode)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#13-memory-service-metrics" class="md-nav__link">
<span class="md-ellipsis">
1.3. Memory Service Metrics
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#2" class="md-nav__link">
<span class="md-ellipsis">
2. Де збирати метрики
</span>
</a>
<nav class="md-nav" aria-label="2. Де збирати метрики">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#21-rag-service" class="md-nav__link">
<span class="md-ellipsis">
2.1. RAG Service
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#22-router" class="md-nav__link">
<span class="md-ellipsis">
2.2. Router
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#3-dashboard-grafana" class="md-nav__link">
<span class="md-ellipsis">
3. Dashboard (Grafana)
</span>
</a>
<nav class="md-nav" aria-label="3. Dashboard (Grafana)">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#31-panels" class="md-nav__link">
<span class="md-ellipsis">
3.1. Panels
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#32-alerts" class="md-nav__link">
<span class="md-ellipsis">
3.2. Alerts
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#4" class="md-nav__link">
<span class="md-ellipsis">
4. Реалізація (мінімальна)
</span>
</a>
<nav class="md-nav" aria-label="4. Реалізація (мінімальна)">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#41-prometheus-client" class="md-nav__link">
<span class="md-ellipsis">
4.1. Додати Prometheus Client
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#42-expose-metrics-endpoint" class="md-nav__link">
<span class="md-ellipsis">
4.2. Expose Metrics Endpoint
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#43-docker-compose-prometheus-grafana" class="md-nav__link">
<span class="md-ellipsis">
4.3. Docker Compose для Prometheus + Grafana
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#5" class="md-nav__link">
<span class="md-ellipsis">
5. Наступні кроки
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#6" class="md-nav__link">
<span class="md-ellipsis">
6. Корисні запити для аналізу
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1 id="rag-metrics-dashboard-plan">RAG Metrics &amp; Dashboard Plan<a class="headerlink" href="#rag-metrics-dashboard-plan" title="Permanent link">&para;</a></h1>
<p>План збору метрик та створення дашборду для RAG + Memory стеку.</p>
<hr />
<h2 id="1">1. Метрики для збору<a class="headerlink" href="#1" title="Permanent link">&para;</a></h2>
<h3 id="11-rag-service-metrics">1.1. RAG Service Metrics<a class="headerlink" href="#11-rag-service-metrics" title="Permanent link">&para;</a></h3>
<p><strong>Ingest Metrics:</strong>
- <code>rag_ingest_total</code> - загальна кількість ingest операцій
- <code>rag_ingest_duration_seconds</code> - час ingest (histogram)
- <code>rag_ingest_documents_indexed</code> - кількість індексованих документів
- <code>rag_ingest_pages_processed</code> - кількість оброблених сторінок
- <code>rag_ingest_errors_total</code> - кількість помилок ingest</p>
<p><strong>Query Metrics:</strong>
- <code>rag_query_total</code> - загальна кількість запитів
- <code>rag_query_duration_seconds</code> - час query (histogram)
- <code>rag_query_documents_retrieved</code> - кількість знайдених документів
- <code>rag_query_citations_count</code> - кількість citations
- <code>rag_query_embedding_time_seconds</code> - час embedding
- <code>rag_query_retrieval_time_seconds</code> - час retrieval
- <code>rag_query_llm_time_seconds</code> - час LLM генерації
- <code>rag_query_errors_total</code> - кількість помилок query
- <code>rag_query_empty_results_total</code> - запити без результатів</p>
<p><strong>Quality Metrics:</strong>
- <code>rag_query_dao_filter_applied</code> - застосування dao_id фільтра
- <code>rag_query_doc_ids_found</code> - унікальні doc_ids в результатах</p>
<h3 id="12-router-metrics-rag-query-mode">1.2. Router Metrics (RAG Query Mode)<a class="headerlink" href="#12-router-metrics-rag-query-mode" title="Permanent link">&para;</a></h3>
<ul>
<li><code>router_rag_query_total</code> - загальна кількість rag_query запитів</li>
<li><code>router_rag_query_duration_seconds</code> - загальний час обробки</li>
<li><code>router_rag_query_memory_used</code> - використання Memory</li>
<li><code>router_rag_query_rag_used</code> - використання RAG</li>
<li><code>router_rag_query_prompt_tokens_estimated</code> - оцінка токенів промпту</li>
<li><code>router_rag_query_fallback_total</code> - fallback на Memory only</li>
</ul>
<h3 id="13-memory-service-metrics">1.3. Memory Service Metrics<a class="headerlink" href="#13-memory-service-metrics" title="Permanent link">&para;</a></h3>
<ul>
<li><code>memory_context_fetch_total</code> - кількість викликів get_context</li>
<li><code>memory_context_fetch_duration_seconds</code> - час отримання контексту</li>
<li><code>memory_context_facts_count</code> - кількість facts</li>
<li><code>memory_context_events_count</code> - кількість events</li>
<li><code>memory_context_summaries_count</code> - кількість summaries</li>
</ul>
<hr />
<h2 id="2">2. Де збирати метрики<a class="headerlink" href="#2" title="Permanent link">&para;</a></h2>
<h3 id="21-rag-service">2.1. RAG Service<a class="headerlink" href="#21-rag-service" title="Permanent link">&para;</a></h3>
<p><strong>Файл:</strong> <code>services/rag-service/app/metrics.py</code></p>
<div class="codehilite"><pre><span></span><code><span class="kn">from</span><span class="w"> </span><span class="nn">prometheus_client</span><span class="w"> </span><span class="kn">import</span> <span class="n">Counter</span><span class="p">,</span> <span class="n">Histogram</span><span class="p">,</span> <span class="n">Gauge</span>
<span class="c1"># Ingest metrics</span>
<span class="n">ingest_total</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;rag_ingest_total&#39;</span><span class="p">,</span> <span class="s1">&#39;Total ingest operations&#39;</span><span class="p">)</span>
<span class="n">ingest_duration</span> <span class="o">=</span> <span class="n">Histogram</span><span class="p">(</span><span class="s1">&#39;rag_ingest_duration_seconds&#39;</span><span class="p">,</span> <span class="s1">&#39;Ingest duration&#39;</span><span class="p">)</span>
<span class="n">ingest_documents</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;rag_ingest_documents_indexed&#39;</span><span class="p">,</span> <span class="s1">&#39;Documents indexed&#39;</span><span class="p">)</span>
<span class="n">ingest_errors</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;rag_ingest_errors_total&#39;</span><span class="p">,</span> <span class="s1">&#39;Ingest errors&#39;</span><span class="p">)</span>
<span class="c1"># Query metrics</span>
<span class="n">query_total</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;rag_query_total&#39;</span><span class="p">,</span> <span class="s1">&#39;Total queries&#39;</span><span class="p">)</span>
<span class="n">query_duration</span> <span class="o">=</span> <span class="n">Histogram</span><span class="p">(</span><span class="s1">&#39;rag_query_duration_seconds&#39;</span><span class="p">,</span> <span class="s1">&#39;Query duration&#39;</span><span class="p">)</span>
<span class="n">query_documents</span> <span class="o">=</span> <span class="n">Histogram</span><span class="p">(</span><span class="s1">&#39;rag_query_documents_retrieved&#39;</span><span class="p">,</span> <span class="s1">&#39;Documents retrieved&#39;</span><span class="p">)</span>
<span class="n">query_citations</span> <span class="o">=</span> <span class="n">Histogram</span><span class="p">(</span><span class="s1">&#39;rag_query_citations_count&#39;</span><span class="p">,</span> <span class="s1">&#39;Citations count&#39;</span><span class="p">)</span>
<span class="n">query_errors</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;rag_query_errors_total&#39;</span><span class="p">,</span> <span class="s1">&#39;Query errors&#39;</span><span class="p">)</span>
<span class="n">query_empty</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;rag_query_empty_results_total&#39;</span><span class="p">,</span> <span class="s1">&#39;Empty results&#39;</span><span class="p">)</span>
<span class="c1"># Quality metrics</span>
<span class="n">query_dao_filter</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;rag_query_dao_filter_applied&#39;</span><span class="p">,</span> <span class="s1">&#39;DAO filter applied&#39;</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;dao_id&#39;</span><span class="p">])</span>
</code></pre></div>
<p><strong>Використання:</strong>
- В <code>ingest_pipeline.py</code>: після успішного ingest
- В <code>query_pipeline.py</code>: після кожного query</p>
<h3 id="22-router">2.2. Router<a class="headerlink" href="#22-router" title="Permanent link">&para;</a></h3>
<p><strong>Файл:</strong> <code>metrics.py</code> (в корені Router)</p>
<div class="codehilite"><pre><span></span><code><span class="kn">from</span><span class="w"> </span><span class="nn">prometheus_client</span><span class="w"> </span><span class="kn">import</span> <span class="n">Counter</span><span class="p">,</span> <span class="n">Histogram</span>
<span class="n">rag_query_total</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;router_rag_query_total&#39;</span><span class="p">,</span> <span class="s1">&#39;Total RAG queries&#39;</span><span class="p">)</span>
<span class="n">rag_query_duration</span> <span class="o">=</span> <span class="n">Histogram</span><span class="p">(</span><span class="s1">&#39;router_rag_query_duration_seconds&#39;</span><span class="p">,</span> <span class="s1">&#39;RAG query duration&#39;</span><span class="p">)</span>
<span class="n">rag_query_memory_used</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;router_rag_query_memory_used&#39;</span><span class="p">,</span> <span class="s1">&#39;Memory used in RAG queries&#39;</span><span class="p">)</span>
<span class="n">rag_query_rag_used</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;router_rag_query_rag_used&#39;</span><span class="p">,</span> <span class="s1">&#39;RAG used in queries&#39;</span><span class="p">)</span>
<span class="n">rag_query_fallback</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">(</span><span class="s1">&#39;router_rag_query_fallback_total&#39;</span><span class="p">,</span> <span class="s1">&#39;Fallback to Memory only&#39;</span><span class="p">)</span>
</code></pre></div>
<p><strong>Використання:</strong>
- В <code>router_app.py</code>: в <code>_handle_rag_query()</code></p>
<hr />
<h2 id="3-dashboard-grafana">3. Dashboard (Grafana)<a class="headerlink" href="#3-dashboard-grafana" title="Permanent link">&para;</a></h2>
<h3 id="31-panels">3.1. Panels<a class="headerlink" href="#31-panels" title="Permanent link">&para;</a></h3>
<p><strong>RAG Service:</strong>
1. <strong>Ingest Rate</strong> - <code>rate(rag_ingest_total[5m])</code>
2. <strong>Ingest Duration</strong> - <code>histogram_quantile(0.95, rag_ingest_duration_seconds)</code>
3. <strong>Documents Indexed</strong> - <code>sum(rag_ingest_documents_indexed)</code>
4. <strong>Query Rate</strong> - <code>rate(rag_query_total[5m])</code>
5. <strong>Query Duration</strong> - <code>histogram_quantile(0.95, rag_query_duration_seconds)</code>
6. <strong>Documents Retrieved</strong> - <code>avg(rag_query_documents_retrieved)</code>
7. <strong>Citations Count</strong> - <code>avg(rag_query_citations_count)</code>
8. <strong>Empty Results Rate</strong> - <code>rate(rag_query_empty_results_total[5m]) / rate(rag_query_total[5m])</code></p>
<p><strong>Router (RAG Query):</strong>
1. <strong>RAG Query Rate</strong> - <code>rate(router_rag_query_total[5m])</code>
2. <strong>RAG Query Duration</strong> - <code>histogram_quantile(0.95, router_rag_query_duration_seconds)</code>
3. <strong>Memory Usage Rate</strong> - <code>rate(router_rag_query_memory_used[5m]) / rate(router_rag_query_total[5m])</code>
4. <strong>RAG Usage Rate</strong> - <code>rate(router_rag_query_rag_used[5m]) / rate(router_rag_query_total[5m])</code>
5. <strong>Fallback Rate</strong> - <code>rate(router_rag_query_fallback_total[5m]) / rate(router_rag_query_total[5m])</code></p>
<p><strong>Memory Service:</strong>
1. <strong>Context Fetch Rate</strong> - <code>rate(memory_context_fetch_total[5m])</code>
2. <strong>Context Fetch Duration</strong> - <code>histogram_quantile(0.95, memory_context_fetch_duration_seconds)</code>
3. <strong>Average Facts Count</strong> - <code>avg(memory_context_facts_count)</code>
4. <strong>Average Events Count</strong> - <code>avg(memory_context_events_count)</code></p>
<h3 id="32-alerts">3.2. Alerts<a class="headerlink" href="#32-alerts" title="Permanent link">&para;</a></h3>
<ul>
<li><strong>High Error Rate</strong>: <code>rate(rag_query_errors_total[5m]) &gt; 0.1</code></li>
<li><strong>Slow Queries</strong>: <code>histogram_quantile(0.95, rag_query_duration_seconds) &gt; 10</code></li>
<li><strong>High Fallback Rate</strong>: <code>rate(router_rag_query_fallback_total[5m]) / rate(router_rag_query_total[5m]) &gt; 0.2</code></li>
<li><strong>Empty Results</strong>: <code>rate(rag_query_empty_results_total[5m]) / rate(rag_query_total[5m]) &gt; 0.3</code></li>
</ul>
<hr />
<h2 id="4">4. Реалізація (мінімальна)<a class="headerlink" href="#4" title="Permanent link">&para;</a></h2>
<h3 id="41-prometheus-client">4.1. Додати Prometheus Client<a class="headerlink" href="#41-prometheus-client" title="Permanent link">&para;</a></h3>
<p><strong>RAG Service:</strong></p>
<div class="codehilite"><pre><span></span><code>pip<span class="w"> </span>install<span class="w"> </span>prometheus-client
</code></pre></div>
<p><strong>Router:</strong></p>
<div class="codehilite"><pre><span></span><code>pip<span class="w"> </span>install<span class="w"> </span>prometheus-client
</code></pre></div>
<h3 id="42-expose-metrics-endpoint">4.2. Expose Metrics Endpoint<a class="headerlink" href="#42-expose-metrics-endpoint" title="Permanent link">&para;</a></h3>
<p><strong>RAG Service:</strong></p>
<div class="codehilite"><pre><span></span><code><span class="c1"># app/main.py</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">prometheus_client</span><span class="w"> </span><span class="kn">import</span> <span class="n">generate_latest</span><span class="p">,</span> <span class="n">CONTENT_TYPE_LATEST</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">fastapi.responses</span><span class="w"> </span><span class="kn">import</span> <span class="n">Response</span>
<span class="nd">@app</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;/metrics&quot;</span><span class="p">)</span>
<span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">metrics</span><span class="p">():</span>
<span class="k">return</span> <span class="n">Response</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">generate_latest</span><span class="p">(),</span> <span class="n">media_type</span><span class="o">=</span><span class="n">CONTENT_TYPE_LATEST</span><span class="p">)</span>
</code></pre></div>
<p><strong>Router:</strong></p>
<div class="codehilite"><pre><span></span><code><span class="c1"># http_api.py</span>
<span class="nd">@app</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;/metrics&quot;</span><span class="p">)</span>
<span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">metrics</span><span class="p">():</span>
<span class="k">return</span> <span class="n">Response</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="n">generate_latest</span><span class="p">(),</span> <span class="n">media_type</span><span class="o">=</span><span class="n">CONTENT_TYPE_LATEST</span><span class="p">)</span>
</code></pre></div>
<h3 id="43-docker-compose-prometheus-grafana">4.3. Docker Compose для Prometheus + Grafana<a class="headerlink" href="#43-docker-compose-prometheus-grafana" title="Permanent link">&para;</a></h3>
<div class="codehilite"><pre><span></span><code><span class="nt">prometheus</span><span class="p">:</span>
<span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">prom/prometheus</span>
<span class="w"> </span><span class="nt">volumes</span><span class="p">:</span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">./prometheus.yml:/etc/prometheus/prometheus.yml</span>
<span class="w"> </span><span class="nt">ports</span><span class="p">:</span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">&quot;9090:9090&quot;</span>
<span class="nt">grafana</span><span class="p">:</span>
<span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">grafana/grafana</span>
<span class="w"> </span><span class="nt">ports</span><span class="p">:</span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">&quot;3000:3000&quot;</span>
<span class="w"> </span><span class="nt">environment</span><span class="p">:</span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">GF_SECURITY_ADMIN_PASSWORD=admin</span>
</code></pre></div>
<hr />
<h2 id="5">5. Наступні кроки<a class="headerlink" href="#5" title="Permanent link">&para;</a></h2>
<ol>
<li>Додати <code>prometheus-client</code> в requirements</li>
<li>Створити <code>metrics.py</code> в RAG Service та Router</li>
<li>Додати <code>/metrics</code> endpoints</li>
<li>Налаштувати Prometheus scraping</li>
<li>Створити Grafana dashboard</li>
<li>Налаштувати alerts</li>
</ol>
<hr />
<h2 id="6">6. Корисні запити для аналізу<a class="headerlink" href="#6" title="Permanent link">&para;</a></h2>
<p><strong>Hit Rate (кількість успішних запитів з результатами):</strong></p>
<div class="codehilite"><pre><span></span><code>(rag_query_total - rag_query_empty_results_total) / rag_query_total
</code></pre></div>
<p><strong>Average Documents per Query:</strong></p>
<div class="codehilite"><pre><span></span><code>avg(rag_query_documents_retrieved)
</code></pre></div>
<p><strong>DAO Distribution:</strong></p>
<div class="codehilite"><pre><span></span><code>sum by (dao_id) (rag_query_dao_filter_applied)
</code></pre></div>
<p><strong>Token Usage:</strong></p>
<div class="codehilite"><pre><span></span><code>avg(router_rag_query_prompt_tokens_estimated)
</code></pre></div>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "..", "features": ["navigation.sections", "navigation.instant", "content.code.copy"], "search": "../assets/javascripts/workers/search.b8dbb3d2.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
<script src="../assets/javascripts/bundle.3220b9d7.min.js"></script>
</body>
</html>