Files
microdao-daarion/services/node-registry/app/node1_prometheus.py
Apple 3de3c8cb36 feat: Add presence heartbeat for Matrix online status
- matrix-gateway: POST /internal/matrix/presence/online endpoint
- usePresenceHeartbeat hook with activity tracking
- Auto away after 5 min inactivity
- Offline on page close/visibility change
- Integrated in MatrixChatRoom component
2025-11-27 00:19:40 -08:00

88 lines
3.0 KiB
Python

"""
Helper for reading real Prometheus metrics from NODE1 via SSH tunnel.
Assumes tunnel exposes Prometheus locally (http://localhost:19090 by default).
"""
from __future__ import annotations
import os
from datetime import datetime
from typing import Any, Dict, Optional
import requests
PROM_URL = os.getenv("NODE1_PROMETHEUS_URL", "http://localhost:19090")
REQUEST_TIMEOUT = float(os.getenv("NODE1_PROMETHEUS_TIMEOUT", "2.5"))
PROM_HEALTH_QUERY = 'up{job="prometheus"}'
CPU_USAGE_QUERY = "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)"
MEMORY_USAGE_QUERY = "(1 - (avg(node_memory_MemAvailable_bytes) / avg(node_memory_MemTotal_bytes))) * 100"
DISK_USAGE_QUERY = "(1 - (sum(node_filesystem_free_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"}))) * 100"
GPU_USAGE_QUERY = "avg(dcgm_gpu_utilization)"
def prom_query(query: str) -> Optional[float]:
"""Execute Prometheus instant query and return float value."""
try:
response = requests.get(
f"{PROM_URL}/api/v1/query",
params={"query": query},
timeout=REQUEST_TIMEOUT,
)
response.raise_for_status()
payload = response.json()
if payload.get("status") != "success":
return None
results = payload.get("data", {}).get("result", [])
if not results:
return None
# value structure: [timestamp, value]
value = float(results[0]["value"][1])
return value
except Exception:
return None
def _clamp(value: Optional[float]) -> float:
if value is None:
return 0.0
return max(0.0, min(100.0, round(value, 2)))
def get_node1_metrics() -> Dict[str, Any]:
"""
Collect NODE1 metrics via Prometheus tunnel.
Returns structure compatible with system_metrics.get_all_metrics().
"""
health = prom_query(PROM_HEALTH_QUERY)
if health is None:
return {
"success": False,
"error": "Prometheus reachable but returned no data for health query",
"source": PROM_URL,
}
cpu_percent = _clamp(prom_query(CPU_USAGE_QUERY))
mem_percent = _clamp(prom_query(MEMORY_USAGE_QUERY))
disk_percent = _clamp(prom_query(DISK_USAGE_QUERY))
gpu_percent = _clamp(prom_query(GPU_USAGE_QUERY))
metrics_available = any(value > 0 for value in [cpu_percent, mem_percent, disk_percent, gpu_percent])
message = None
if not metrics_available:
message = "node_exporter/DCGM метрики не знайдені, повертаємо значення за замовчуванням"
return {
"success": True,
"metrics_available": metrics_available,
"source": PROM_URL,
"timestamp": datetime.utcnow().isoformat() + "Z",
"message": message,
"metrics": {
"cpu": {"percent": cpu_percent},
"memory": {"percent": mem_percent},
"disk": {"percent": disk_percent},
"gpu": {"percent": gpu_percent},
},
}