feat: Add presence heartbeat for Matrix online status
- matrix-gateway: POST /internal/matrix/presence/online endpoint - usePresenceHeartbeat hook with activity tracking - Auto away after 5 min inactivity - Offline on page close/visibility change - Integrated in MatrixChatRoom component
This commit is contained in:
87
services/node-registry/app/node1_prometheus.py
Normal file
87
services/node-registry/app/node1_prometheus.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""
|
||||
Helper for reading real Prometheus metrics from NODE1 via SSH tunnel.
|
||||
Assumes tunnel exposes Prometheus locally (http://localhost:19090 by default).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
PROM_URL = os.getenv("NODE1_PROMETHEUS_URL", "http://localhost:19090")
|
||||
REQUEST_TIMEOUT = float(os.getenv("NODE1_PROMETHEUS_TIMEOUT", "2.5"))
|
||||
|
||||
PROM_HEALTH_QUERY = 'up{job="prometheus"}'
|
||||
CPU_USAGE_QUERY = "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)"
|
||||
MEMORY_USAGE_QUERY = "(1 - (avg(node_memory_MemAvailable_bytes) / avg(node_memory_MemTotal_bytes))) * 100"
|
||||
DISK_USAGE_QUERY = "(1 - (sum(node_filesystem_free_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"}))) * 100"
|
||||
GPU_USAGE_QUERY = "avg(dcgm_gpu_utilization)"
|
||||
|
||||
|
||||
def prom_query(query: str) -> Optional[float]:
|
||||
"""Execute Prometheus instant query and return float value."""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{PROM_URL}/api/v1/query",
|
||||
params={"query": query},
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
if payload.get("status") != "success":
|
||||
return None
|
||||
results = payload.get("data", {}).get("result", [])
|
||||
if not results:
|
||||
return None
|
||||
# value structure: [timestamp, value]
|
||||
value = float(results[0]["value"][1])
|
||||
return value
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _clamp(value: Optional[float]) -> float:
|
||||
if value is None:
|
||||
return 0.0
|
||||
return max(0.0, min(100.0, round(value, 2)))
|
||||
|
||||
|
||||
def get_node1_metrics() -> Dict[str, Any]:
|
||||
"""
|
||||
Collect NODE1 metrics via Prometheus tunnel.
|
||||
Returns structure compatible with system_metrics.get_all_metrics().
|
||||
"""
|
||||
health = prom_query(PROM_HEALTH_QUERY)
|
||||
if health is None:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Prometheus reachable but returned no data for health query",
|
||||
"source": PROM_URL,
|
||||
}
|
||||
|
||||
cpu_percent = _clamp(prom_query(CPU_USAGE_QUERY))
|
||||
mem_percent = _clamp(prom_query(MEMORY_USAGE_QUERY))
|
||||
disk_percent = _clamp(prom_query(DISK_USAGE_QUERY))
|
||||
gpu_percent = _clamp(prom_query(GPU_USAGE_QUERY))
|
||||
|
||||
metrics_available = any(value > 0 for value in [cpu_percent, mem_percent, disk_percent, gpu_percent])
|
||||
message = None
|
||||
if not metrics_available:
|
||||
message = "node_exporter/DCGM метрики не знайдені, повертаємо значення за замовчуванням"
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"metrics_available": metrics_available,
|
||||
"source": PROM_URL,
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"message": message,
|
||||
"metrics": {
|
||||
"cpu": {"percent": cpu_percent},
|
||||
"memory": {"percent": mem_percent},
|
||||
"disk": {"percent": disk_percent},
|
||||
"gpu": {"percent": gpu_percent},
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user