Files
microdao-daarion/infrastructure/worker-daemon/worker/registry.py
Apple a688666fa1
Some checks failed
Update Documentation / update-repos-info (push) Has been cancelled
🔧 Worker Daemon: базова реалізація v1
- Capability Registry (Postgres heartbeat)
- NATS Client (підписка на streams)
- Job Executor (виконання jobs)
- Metrics Exporter (Prometheus)
- Dockerfile для deployment
- Виправлено server_name в NATS (emptyDir)

TODO: Реальна реалізація embed/retrieve/summarize, Matrix Gateway, Auth
2026-01-10 10:24:13 -08:00

176 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Capability Registry — реєстрація воркерів в Postgres
"""
import asyncio
import asyncpg
import json
import os
import platform
import subprocess
from datetime import datetime
from typing import Dict, Any, Optional
class CapabilityRegistry:
def __init__(self, postgres_url: str, node_id: str, tier: str, region: str):
self.postgres_url = postgres_url
self.node_id = node_id
self.tier = tier
self.region = region
self.pool: Optional[asyncpg.Pool] = None
async def connect(self):
"""Підключення до Postgres"""
if not self.postgres_url:
print("⚠️ CAPABILITY_REGISTRY не встановлено, пропускаємо реєстрацію")
return
try:
self.pool = await asyncpg.create_pool(self.postgres_url)
await self._ensure_table()
except Exception as e:
print(f"❌ Помилка підключення до Postgres: {e}")
self.pool = None
async def _ensure_table(self):
"""Створення таблиці worker_capabilities якщо не існує"""
async with self.pool.acquire() as conn:
await conn.execute("""
CREATE TABLE IF NOT EXISTS worker_capabilities (
node_id VARCHAR(255) PRIMARY KEY,
tier VARCHAR(10) NOT NULL,
region VARCHAR(50),
trust_zone VARCHAR(50),
hardware JSONB NOT NULL,
capabilities JSONB NOT NULL,
status VARCHAR(20) NOT NULL,
last_heartbeat TIMESTAMP WITH TIME ZONE NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_worker_capabilities_tier
ON worker_capabilities(tier);
CREATE INDEX IF NOT EXISTS idx_worker_capabilities_status
ON worker_capabilities(status);
""")
async def _detect_hardware(self) -> Dict[str, Any]:
"""Автоматичне визначення hardware capabilities"""
hardware = {
"cpu_cores": os.cpu_count() or 1,
"ram_gb": self._get_ram_gb(),
"gpu": False,
"gpu_model": None,
"vram_gb": 0,
"cuda_version": None
}
# Перевірка GPU (NVIDIA)
try:
result = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"],
capture_output=True, text=True, timeout=5)
if result.returncode == 0:
lines = result.stdout.strip().split("\n")
if lines:
gpu_info = lines[0].split(",")
hardware["gpu"] = True
hardware["gpu_model"] = gpu_info[0].strip()
# Парсинг VRAM (формат: "XXXXX MiB")
vram_str = gpu_info[1].strip().replace("MiB", "").strip()
hardware["vram_gb"] = int(vram_str) // 1024 if vram_str.isdigit() else 0
# CUDA version
cuda_result = subprocess.run(["nvcc", "--version"],
capture_output=True, text=True, timeout=5)
if cuda_result.returncode == 0:
for line in cuda_result.stdout.split("\n"):
if "release" in line.lower():
# Парсинг версії CUDA
parts = line.split()
for i, part in enumerate(parts):
if part.lower() == "release":
if i + 1 < len(parts):
hardware["cuda_version"] = parts[i + 1].rstrip(",")
except Exception:
pass # GPU не знайдено або помилка
return hardware
def _get_ram_gb(self) -> int:
"""Отримання RAM в GB"""
try:
if platform.system() == "Linux":
with open("/proc/meminfo") as f:
for line in f:
if line.startswith("MemTotal:"):
return int(line.split()[1]) // (1024 * 1024)
elif platform.system() == "Darwin": # macOS
result = subprocess.run(["sysctl", "-n", "hw.memsize"],
capture_output=True, text=True)
if result.returncode == 0:
return int(result.stdout.strip()) // (1024 ** 3)
except Exception:
pass
return 8 # Default
async def register(self):
"""Реєстрація воркера"""
await self.connect()
if not self.pool:
return
hardware = await self._detect_hardware()
capabilities = {
"max_batch": 1000,
"max_tokens": 8192,
"models": ["cohere/embed-multilingual-v3.0"],
"embedding_dim": 1024,
"supported_jobs": ["embed", "summarize", "index"]
}
async with self.pool.acquire() as conn:
await conn.execute("""
INSERT INTO worker_capabilities
(node_id, tier, region, trust_zone, hardware, capabilities, status, last_heartbeat)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (node_id) DO UPDATE SET
tier = EXCLUDED.tier,
region = EXCLUDED.region,
hardware = EXCLUDED.hardware,
capabilities = EXCLUDED.capabilities,
status = EXCLUDED.status,
last_heartbeat = EXCLUDED.last_heartbeat,
updated_at = CURRENT_TIMESTAMP
""", self.node_id, self.tier, self.region, "internal",
json.dumps(hardware), json.dumps(capabilities), "ready", datetime.utcnow())
print(f"✅ Worker зареєстровано: {self.node_id} (Tier {self.tier})")
async def update_heartbeat(self):
"""Оновлення heartbeat"""
if not self.pool:
return
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE worker_capabilities
SET last_heartbeat = CURRENT_TIMESTAMP
WHERE node_id = $1
""", self.node_id)
async def unregister(self):
"""Видалення реєстрації"""
if not self.pool:
return
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE worker_capabilities
SET status = 'offline'
WHERE node_id = $1
""", self.node_id)
print(f"✅ Worker видалено з реєстру: {self.node_id}")