""" Metrics Exporter — Prometheus metrics для worker """ import asyncio from typing import Optional from aiohttp import web from prometheus_client import Counter, Histogram, Gauge, generate_latest class MetricsExporter: def __init__(self, port: int = 9090): self.port = port self.app: Optional[web.Application] = None self.runner: Optional[web.AppRunner] = None # Metrics self.jobs_processed = Counter( "worker_jobs_processed_total", "Total jobs processed", ["type", "status"] ) self.job_duration = Histogram( "worker_job_duration_seconds", "Job execution duration", ["type"], buckets=[0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0] ) self.gpu_utilization = Gauge( "worker_gpu_utilization", "GPU utilization percentage", ["node_id"] ) self.vram_usage = Gauge( "worker_vram_usage_bytes", "VRAM usage in bytes", ["node_id"] ) self.errors_total = Counter( "worker_errors_total", "Total errors", ["type", "error_type"] ) async def start(self): """Запуск metrics server""" self.app = web.Application() self.app.router.add_get("/metrics", self.metrics_handler) self.runner = web.AppRunner(self.app) await self.runner.setup() site = web.TCPSite(self.runner, "0.0.0.0", self.port) await site.start() print(f"✅ Metrics server запущено на порту {self.port}") async def stop(self): """Зупинка metrics server""" if self.runner: await self.runner.cleanup() print("✅ Metrics server зупинено") async def metrics_handler(self, request): """HTTP handler для /metrics""" return web.Response( text=generate_latest(), content_type="text/plain" )