feat: Add Alateya, Clan, Eonarch agents + fix gateway-router connection
## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
This commit is contained in:
296
scripts/monitor-collections-health.py
Normal file
296
scripts/monitor-collections-health.py
Normal file
@@ -0,0 +1,296 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Qdrant Collections Health Monitor
|
||||
Перевіряє здоров'я колекцій і відправляє сповіщення при проблемах
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
import httpx
|
||||
|
||||
# Configuration
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
||||
TELEGRAM_BOT_TOKEN = os.getenv("ADMIN_TELEGRAM_BOT_TOKEN", "")
|
||||
ADMIN_CHAT_ID = os.getenv("ADMIN_CHAT_ID", "")
|
||||
MIN_POINTS_THRESHOLD = int(os.getenv("MIN_POINTS_THRESHOLD", "10"))
|
||||
STATE_FILE = "/opt/backups/collections-state.json"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CollectionsHealthMonitor:
|
||||
"""Моніторинг здоров'я Qdrant колекцій"""
|
||||
|
||||
def __init__(self):
|
||||
self.http_client = httpx.AsyncClient(timeout=30.0)
|
||||
self.previous_state = self.load_state()
|
||||
self.alerts: List[str] = []
|
||||
|
||||
def load_state(self) -> Dict:
|
||||
"""Завантажити попередній стан з файлу"""
|
||||
try:
|
||||
if os.path.exists(STATE_FILE):
|
||||
with open(STATE_FILE, 'r') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Помилка завантаження стану: {e}")
|
||||
return {}
|
||||
|
||||
def save_state(self, state: Dict):
|
||||
"""Зберегти поточний стан у файл"""
|
||||
try:
|
||||
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
|
||||
with open(STATE_FILE, 'w') as f:
|
||||
json.dump(state, f, indent=2)
|
||||
except Exception as e:
|
||||
logger.error(f"Помилка збереження стану: {e}")
|
||||
|
||||
async def get_all_collections(self) -> List[Dict]:
|
||||
"""Отримати список всіх колекцій"""
|
||||
try:
|
||||
url = f"{QDRANT_URL}/collections"
|
||||
response = await self.http_client.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
collections = data.get("result", {}).get("collections", [])
|
||||
logger.info(f"Знайдено {len(collections)} колекцій")
|
||||
return collections
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Помилка отримання колекцій: {e}")
|
||||
return []
|
||||
|
||||
async def get_collection_info(self, collection_name: str) -> Optional[Dict]:
|
||||
"""Отримати детальну інформацію про колекцію"""
|
||||
try:
|
||||
url = f"{QDRANT_URL}/collections/{collection_name}"
|
||||
response = await self.http_client.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
result = data.get("result", {})
|
||||
return {
|
||||
"name": collection_name,
|
||||
"points_count": result.get("points_count", 0),
|
||||
"segments_count": result.get("segments_count", 0),
|
||||
"status": result.get("status", "unknown"),
|
||||
"vectors_count": result.get("vectors_count", 0),
|
||||
"indexed_vectors_count": result.get("indexed_vectors_count", 0),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Помилка отримання інфо про {collection_name}: {e}")
|
||||
return None
|
||||
|
||||
async def check_collection_health(self, collection: Dict) -> Dict:
|
||||
"""Перевірити здоров'я колекції"""
|
||||
name = collection.get("name")
|
||||
info = await self.get_collection_info(name)
|
||||
|
||||
if not info:
|
||||
return {
|
||||
"name": name,
|
||||
"status": "error",
|
||||
"issues": ["Не вдалося отримати інформацію"]
|
||||
}
|
||||
|
||||
issues = []
|
||||
warnings = []
|
||||
|
||||
# Перевірка 1: Порожня колекція
|
||||
if info["points_count"] == 0:
|
||||
issues.append("Колекція порожня (0 точок)")
|
||||
|
||||
# Перевірка 2: Дуже мало даних
|
||||
elif info["points_count"] < MIN_POINTS_THRESHOLD:
|
||||
warnings.append(f"Мало даних ({info['points_count']} точок, мінімум {MIN_POINTS_THRESHOLD})")
|
||||
|
||||
# Перевірка 3: Зменшення кількості точок
|
||||
prev_count = self.previous_state.get(name, {}).get("points_count", 0)
|
||||
if prev_count > 0 and info["points_count"] < prev_count * 0.9: # Зменшення більше ніж на 10%
|
||||
decrease = prev_count - info["points_count"]
|
||||
issues.append(f"Втрата даних: було {prev_count}, зараз {info['points_count']} (-{decrease})")
|
||||
|
||||
# Перевірка 4: Статус колекції
|
||||
if info["status"] != "green":
|
||||
issues.append(f"Статус: {info['status']} (очікується green)")
|
||||
|
||||
# Визначити загальний стан
|
||||
if issues:
|
||||
status = "critical"
|
||||
elif warnings:
|
||||
status = "warning"
|
||||
else:
|
||||
status = "healthy"
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"status": status,
|
||||
"info": info,
|
||||
"issues": issues,
|
||||
"warnings": warnings,
|
||||
"previous_count": prev_count
|
||||
}
|
||||
|
||||
async def send_telegram_alert(self, message: str):
|
||||
"""Відправити сповіщення в Telegram"""
|
||||
if not TELEGRAM_BOT_TOKEN or not ADMIN_CHAT_ID:
|
||||
logger.warning("Telegram credentials not configured, skipping alert")
|
||||
return
|
||||
|
||||
try:
|
||||
url = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage"
|
||||
payload = {
|
||||
"chat_id": ADMIN_CHAT_ID,
|
||||
"text": message,
|
||||
"parse_mode": "Markdown"
|
||||
}
|
||||
|
||||
response = await self.http_client.post(url, json=payload)
|
||||
response.raise_for_status()
|
||||
logger.info("Telegram alert sent successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Помилка відправки Telegram сповіщення: {e}")
|
||||
|
||||
async def monitor(self):
|
||||
"""Виконати моніторинг всіх колекцій"""
|
||||
logger.info("🔍 Початок моніторингу Qdrant колекцій...")
|
||||
|
||||
collections = await self.get_all_collections()
|
||||
|
||||
if not collections:
|
||||
alert = "⚠️ *Qdrant Collections Monitor*\n\nНе знайдено жодної колекції!"
|
||||
self.alerts.append(alert)
|
||||
await self.send_telegram_alert(alert)
|
||||
return
|
||||
|
||||
results = []
|
||||
critical_count = 0
|
||||
warning_count = 0
|
||||
healthy_count = 0
|
||||
|
||||
# Перевірити кожну колекцію
|
||||
for collection in collections:
|
||||
health = await self.check_collection_health(collection)
|
||||
results.append(health)
|
||||
|
||||
if health["status"] == "critical":
|
||||
critical_count += 1
|
||||
elif health["status"] == "warning":
|
||||
warning_count += 1
|
||||
else:
|
||||
healthy_count += 1
|
||||
|
||||
# Сформувати звіт
|
||||
logger.info(f"✅ Healthy: {healthy_count}, ⚠️ Warnings: {warning_count}, 🔴 Critical: {critical_count}")
|
||||
|
||||
# Зберегти поточний стан
|
||||
new_state = {}
|
||||
for result in results:
|
||||
if result["info"]:
|
||||
new_state[result["name"]] = result["info"]
|
||||
self.save_state(new_state)
|
||||
|
||||
# Відправити алерти для критичних проблем
|
||||
if critical_count > 0:
|
||||
await self.send_critical_alerts(results)
|
||||
|
||||
# Вивести детальний звіт
|
||||
self.print_report(results, critical_count, warning_count, healthy_count)
|
||||
|
||||
return results
|
||||
|
||||
async def send_critical_alerts(self, results: List[Dict]):
|
||||
"""Відправити критичні алерти"""
|
||||
critical_issues = [r for r in results if r["status"] == "critical"]
|
||||
|
||||
if not critical_issues:
|
||||
return
|
||||
|
||||
message = "🔴 *Qdrant Collections Alert*\n\n"
|
||||
message += f"Виявлено {len(critical_issues)} критичних проблем:\n\n"
|
||||
|
||||
for issue in critical_issues:
|
||||
message += f"*{issue['name']}*\n"
|
||||
for problem in issue["issues"]:
|
||||
message += f" • {problem}\n"
|
||||
message += "\n"
|
||||
|
||||
message += f"_Час: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_"
|
||||
|
||||
await self.send_telegram_alert(message)
|
||||
|
||||
def print_report(self, results: List[Dict], critical: int, warning: int, healthy: int):
|
||||
"""Вивести детальний звіт"""
|
||||
print("\n" + "="*80)
|
||||
print("📊 QDRANT COLLECTIONS HEALTH REPORT")
|
||||
print("="*80)
|
||||
print(f"Час: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print(f"Всього колекцій: {len(results)}")
|
||||
print(f"✅ Здорові: {healthy}")
|
||||
print(f"⚠️ Попередження: {warning}")
|
||||
print(f"🔴 Критичні: {critical}")
|
||||
print("="*80)
|
||||
|
||||
# Групувати за статусом
|
||||
for status_type in ["critical", "warning", "healthy"]:
|
||||
items = [r for r in results if r["status"] == status_type]
|
||||
|
||||
if not items:
|
||||
continue
|
||||
|
||||
icon = {"critical": "🔴", "warning": "⚠️", "healthy": "✅"}[status_type]
|
||||
print(f"\n{icon} {status_type.upper()}")
|
||||
print("-"*80)
|
||||
|
||||
for item in items:
|
||||
info = item.get("info", {})
|
||||
print(f"\n{item['name']}:")
|
||||
print(f" Points: {info.get('points_count', 0):,}")
|
||||
print(f" Segments: {info.get('segments_count', 0)}")
|
||||
print(f" Status: {info.get('status', 'unknown')}")
|
||||
|
||||
if item.get("issues"):
|
||||
print(f" Issues:")
|
||||
for issue in item["issues"]:
|
||||
print(f" • {issue}")
|
||||
|
||||
if item.get("warnings"):
|
||||
print(f" Warnings:")
|
||||
for warn in item["warnings"]:
|
||||
print(f" • {warn}")
|
||||
|
||||
print("\n" + "="*80 + "\n")
|
||||
|
||||
async def close(self):
|
||||
"""Закрити HTTP клієнт"""
|
||||
await self.http_client.aclose()
|
||||
|
||||
|
||||
async def main():
|
||||
"""Головна функція"""
|
||||
monitor = CollectionsHealthMonitor()
|
||||
|
||||
try:
|
||||
await monitor.monitor()
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(f"Помилка моніторингу: {e}", exc_info=True)
|
||||
return 1
|
||||
finally:
|
||||
await monitor.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user