diff --git a/gateway-bot/metrics.py b/gateway-bot/metrics.py new file mode 100644 index 00000000..24cd7366 --- /dev/null +++ b/gateway-bot/metrics.py @@ -0,0 +1,90 @@ +""" +Gateway Prometheus Metrics +Стандартизовані метрики для observability +""" +from prometheus_client import Counter, Histogram, Gauge, Info, generate_latest, CONTENT_TYPE_LATEST + +# === HTTP Metrics === +HTTP_REQUESTS_TOTAL = Counter( + "gateway_http_requests_total", + "Total HTTP requests", + ["method", "endpoint", "status"] +) + +HTTP_REQUEST_DURATION = Histogram( + "gateway_http_request_duration_seconds", + "HTTP request duration", + ["method", "endpoint"], + buckets=[0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0] +) + +# === Agent Metrics === +AGENT_RUNS_TOTAL = Counter( + "gateway_agent_runs_total", + "Total agent runs", + ["agent", "status"] # status: started, completed, failed +) + +AGENT_RUN_DURATION = Histogram( + "gateway_agent_run_duration_seconds", + "Agent run duration", + ["agent"], + buckets=[1.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0] +) + +# === LLM/Router Metrics === +ROUTER_CALLS_TOTAL = Counter( + "gateway_router_calls_total", + "Total calls to router", + ["status"] # success, error, timeout +) + +ROUTER_LATENCY = Histogram( + "gateway_router_latency_seconds", + "Router call latency", + buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0] +) + +# === Memory Service Metrics === +MEMORY_CALLS_TOTAL = Counter( + "gateway_memory_calls_total", + "Total calls to memory service", + ["operation", "status"] # operation: save, search, delete +) + +# === Telegram Metrics === +TELEGRAM_MESSAGES_TOTAL = Counter( + "gateway_telegram_messages_total", + "Total Telegram messages processed", + ["agent", "direction"] # direction: incoming, outgoing +) + +# === Errors === +ERRORS_TOTAL = Counter( + "gateway_errors_total", + "Total errors", + ["type", "source"] +) + +# === Active connections === +ACTIVE_REQUESTS = Gauge( + "gateway_active_requests", + "Currently active requests" +) + +# === Service info === +SERVICE_INFO = Info( + "gateway_service", + "Gateway service information" +) +SERVICE_INFO.info({"version": "2.0.0", "node": "node1"}) + + +def get_metrics(): + """Return metrics in Prometheus format""" + return generate_latest() + + +def get_content_type(): + """Return Prometheus content type""" + return CONTENT_TYPE_LATEST