🔧 Worker Daemon: базова реалізація v1
Some checks failed
Update Documentation / update-repos-info (push) Has been cancelled

- Capability Registry (Postgres heartbeat)
- NATS Client (підписка на streams)
- Job Executor (виконання jobs)
- Metrics Exporter (Prometheus)
- Dockerfile для deployment
- Виправлено server_name в NATS (emptyDir)

TODO: Реальна реалізація embed/retrieve/summarize, Matrix Gateway, Auth
This commit is contained in:
Apple
2026-01-10 10:24:13 -08:00
parent 8fe0b58978
commit a688666fa1
10 changed files with 778 additions and 0 deletions

View File

@@ -0,0 +1,175 @@
"""
Capability Registry — реєстрація воркерів в Postgres
"""
import asyncio
import asyncpg
import json
import os
import platform
import subprocess
from datetime import datetime
from typing import Dict, Any, Optional
class CapabilityRegistry:
def __init__(self, postgres_url: str, node_id: str, tier: str, region: str):
self.postgres_url = postgres_url
self.node_id = node_id
self.tier = tier
self.region = region
self.pool: Optional[asyncpg.Pool] = None
async def connect(self):
"""Підключення до Postgres"""
if not self.postgres_url:
print("⚠️ CAPABILITY_REGISTRY не встановлено, пропускаємо реєстрацію")
return
try:
self.pool = await asyncpg.create_pool(self.postgres_url)
await self._ensure_table()
except Exception as e:
print(f"❌ Помилка підключення до Postgres: {e}")
self.pool = None
async def _ensure_table(self):
"""Створення таблиці worker_capabilities якщо не існує"""
async with self.pool.acquire() as conn:
await conn.execute("""
CREATE TABLE IF NOT EXISTS worker_capabilities (
node_id VARCHAR(255) PRIMARY KEY,
tier VARCHAR(10) NOT NULL,
region VARCHAR(50),
trust_zone VARCHAR(50),
hardware JSONB NOT NULL,
capabilities JSONB NOT NULL,
status VARCHAR(20) NOT NULL,
last_heartbeat TIMESTAMP WITH TIME ZONE NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_worker_capabilities_tier
ON worker_capabilities(tier);
CREATE INDEX IF NOT EXISTS idx_worker_capabilities_status
ON worker_capabilities(status);
""")
async def _detect_hardware(self) -> Dict[str, Any]:
"""Автоматичне визначення hardware capabilities"""
hardware = {
"cpu_cores": os.cpu_count() or 1,
"ram_gb": self._get_ram_gb(),
"gpu": False,
"gpu_model": None,
"vram_gb": 0,
"cuda_version": None
}
# Перевірка GPU (NVIDIA)
try:
result = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"],
capture_output=True, text=True, timeout=5)
if result.returncode == 0:
lines = result.stdout.strip().split("\n")
if lines:
gpu_info = lines[0].split(",")
hardware["gpu"] = True
hardware["gpu_model"] = gpu_info[0].strip()
# Парсинг VRAM (формат: "XXXXX MiB")
vram_str = gpu_info[1].strip().replace("MiB", "").strip()
hardware["vram_gb"] = int(vram_str) // 1024 if vram_str.isdigit() else 0
# CUDA version
cuda_result = subprocess.run(["nvcc", "--version"],
capture_output=True, text=True, timeout=5)
if cuda_result.returncode == 0:
for line in cuda_result.stdout.split("\n"):
if "release" in line.lower():
# Парсинг версії CUDA
parts = line.split()
for i, part in enumerate(parts):
if part.lower() == "release":
if i + 1 < len(parts):
hardware["cuda_version"] = parts[i + 1].rstrip(",")
except Exception:
pass # GPU не знайдено або помилка
return hardware
def _get_ram_gb(self) -> int:
"""Отримання RAM в GB"""
try:
if platform.system() == "Linux":
with open("/proc/meminfo") as f:
for line in f:
if line.startswith("MemTotal:"):
return int(line.split()[1]) // (1024 * 1024)
elif platform.system() == "Darwin": # macOS
result = subprocess.run(["sysctl", "-n", "hw.memsize"],
capture_output=True, text=True)
if result.returncode == 0:
return int(result.stdout.strip()) // (1024 ** 3)
except Exception:
pass
return 8 # Default
async def register(self):
"""Реєстрація воркера"""
await self.connect()
if not self.pool:
return
hardware = await self._detect_hardware()
capabilities = {
"max_batch": 1000,
"max_tokens": 8192,
"models": ["cohere/embed-multilingual-v3.0"],
"embedding_dim": 1024,
"supported_jobs": ["embed", "summarize", "index"]
}
async with self.pool.acquire() as conn:
await conn.execute("""
INSERT INTO worker_capabilities
(node_id, tier, region, trust_zone, hardware, capabilities, status, last_heartbeat)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (node_id) DO UPDATE SET
tier = EXCLUDED.tier,
region = EXCLUDED.region,
hardware = EXCLUDED.hardware,
capabilities = EXCLUDED.capabilities,
status = EXCLUDED.status,
last_heartbeat = EXCLUDED.last_heartbeat,
updated_at = CURRENT_TIMESTAMP
""", self.node_id, self.tier, self.region, "internal",
json.dumps(hardware), json.dumps(capabilities), "ready", datetime.utcnow())
print(f"✅ Worker зареєстровано: {self.node_id} (Tier {self.tier})")
async def update_heartbeat(self):
"""Оновлення heartbeat"""
if not self.pool:
return
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE worker_capabilities
SET last_heartbeat = CURRENT_TIMESTAMP
WHERE node_id = $1
""", self.node_id)
async def unregister(self):
"""Видалення реєстрації"""
if not self.pool:
return
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE worker_capabilities
SET status = 'offline'
WHERE node_id = $1
""", self.node_id)
print(f"✅ Worker видалено з реєстру: {self.node_id}")