## Agents Added - Alateya: R&D, biotech, innovations - Clan (Spirit): Community spirit agent - Eonarch: Consciousness evolution agent ## Changes - docker-compose.node1.yml: Added tokens for all 3 new agents - gateway-bot/http_api.py: Added configs and webhook endpoints - gateway-bot/clan_prompt.txt: New prompt file - gateway-bot/eonarch_prompt.txt: New prompt file ## Fixes - Fixed ROUTER_URL from :9102 to :8000 (internal container port) - All 9 Telegram agents now working ## Documentation - Created PROJECT-MASTER-INDEX.md - single entry point - Added various status documents and scripts Tokens configured: - Helion, NUTRA, Agromatrix (existing) - Alateya, Clan, Eonarch (new) - Druid, GreenFood, DAARWIZZ (configured)
297 lines
11 KiB
Python
297 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Qdrant Collections Health Monitor
|
||
Перевіряє здоров'я колекцій і відправляє сповіщення при проблемах
|
||
"""
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import os
|
||
import sys
|
||
from datetime import datetime
|
||
from typing import Dict, List, Optional
|
||
import httpx
|
||
|
||
# Configuration
|
||
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
||
TELEGRAM_BOT_TOKEN = os.getenv("ADMIN_TELEGRAM_BOT_TOKEN", "")
|
||
ADMIN_CHAT_ID = os.getenv("ADMIN_CHAT_ID", "")
|
||
MIN_POINTS_THRESHOLD = int(os.getenv("MIN_POINTS_THRESHOLD", "10"))
|
||
STATE_FILE = "/opt/backups/collections-state.json"
|
||
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||
)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class CollectionsHealthMonitor:
|
||
"""Моніторинг здоров'я Qdrant колекцій"""
|
||
|
||
def __init__(self):
|
||
self.http_client = httpx.AsyncClient(timeout=30.0)
|
||
self.previous_state = self.load_state()
|
||
self.alerts: List[str] = []
|
||
|
||
def load_state(self) -> Dict:
|
||
"""Завантажити попередній стан з файлу"""
|
||
try:
|
||
if os.path.exists(STATE_FILE):
|
||
with open(STATE_FILE, 'r') as f:
|
||
return json.load(f)
|
||
except Exception as e:
|
||
logger.error(f"Помилка завантаження стану: {e}")
|
||
return {}
|
||
|
||
def save_state(self, state: Dict):
|
||
"""Зберегти поточний стан у файл"""
|
||
try:
|
||
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
|
||
with open(STATE_FILE, 'w') as f:
|
||
json.dump(state, f, indent=2)
|
||
except Exception as e:
|
||
logger.error(f"Помилка збереження стану: {e}")
|
||
|
||
async def get_all_collections(self) -> List[Dict]:
|
||
"""Отримати список всіх колекцій"""
|
||
try:
|
||
url = f"{QDRANT_URL}/collections"
|
||
response = await self.http_client.get(url)
|
||
response.raise_for_status()
|
||
data = response.json()
|
||
|
||
collections = data.get("result", {}).get("collections", [])
|
||
logger.info(f"Знайдено {len(collections)} колекцій")
|
||
return collections
|
||
|
||
except Exception as e:
|
||
logger.error(f"Помилка отримання колекцій: {e}")
|
||
return []
|
||
|
||
async def get_collection_info(self, collection_name: str) -> Optional[Dict]:
|
||
"""Отримати детальну інформацію про колекцію"""
|
||
try:
|
||
url = f"{QDRANT_URL}/collections/{collection_name}"
|
||
response = await self.http_client.get(url)
|
||
response.raise_for_status()
|
||
data = response.json()
|
||
|
||
result = data.get("result", {})
|
||
return {
|
||
"name": collection_name,
|
||
"points_count": result.get("points_count", 0),
|
||
"segments_count": result.get("segments_count", 0),
|
||
"status": result.get("status", "unknown"),
|
||
"vectors_count": result.get("vectors_count", 0),
|
||
"indexed_vectors_count": result.get("indexed_vectors_count", 0),
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"Помилка отримання інфо про {collection_name}: {e}")
|
||
return None
|
||
|
||
async def check_collection_health(self, collection: Dict) -> Dict:
|
||
"""Перевірити здоров'я колекції"""
|
||
name = collection.get("name")
|
||
info = await self.get_collection_info(name)
|
||
|
||
if not info:
|
||
return {
|
||
"name": name,
|
||
"status": "error",
|
||
"issues": ["Не вдалося отримати інформацію"]
|
||
}
|
||
|
||
issues = []
|
||
warnings = []
|
||
|
||
# Перевірка 1: Порожня колекція
|
||
if info["points_count"] == 0:
|
||
issues.append("Колекція порожня (0 точок)")
|
||
|
||
# Перевірка 2: Дуже мало даних
|
||
elif info["points_count"] < MIN_POINTS_THRESHOLD:
|
||
warnings.append(f"Мало даних ({info['points_count']} точок, мінімум {MIN_POINTS_THRESHOLD})")
|
||
|
||
# Перевірка 3: Зменшення кількості точок
|
||
prev_count = self.previous_state.get(name, {}).get("points_count", 0)
|
||
if prev_count > 0 and info["points_count"] < prev_count * 0.9: # Зменшення більше ніж на 10%
|
||
decrease = prev_count - info["points_count"]
|
||
issues.append(f"Втрата даних: було {prev_count}, зараз {info['points_count']} (-{decrease})")
|
||
|
||
# Перевірка 4: Статус колекції
|
||
if info["status"] != "green":
|
||
issues.append(f"Статус: {info['status']} (очікується green)")
|
||
|
||
# Визначити загальний стан
|
||
if issues:
|
||
status = "critical"
|
||
elif warnings:
|
||
status = "warning"
|
||
else:
|
||
status = "healthy"
|
||
|
||
return {
|
||
"name": name,
|
||
"status": status,
|
||
"info": info,
|
||
"issues": issues,
|
||
"warnings": warnings,
|
||
"previous_count": prev_count
|
||
}
|
||
|
||
async def send_telegram_alert(self, message: str):
|
||
"""Відправити сповіщення в Telegram"""
|
||
if not TELEGRAM_BOT_TOKEN or not ADMIN_CHAT_ID:
|
||
logger.warning("Telegram credentials not configured, skipping alert")
|
||
return
|
||
|
||
try:
|
||
url = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage"
|
||
payload = {
|
||
"chat_id": ADMIN_CHAT_ID,
|
||
"text": message,
|
||
"parse_mode": "Markdown"
|
||
}
|
||
|
||
response = await self.http_client.post(url, json=payload)
|
||
response.raise_for_status()
|
||
logger.info("Telegram alert sent successfully")
|
||
|
||
except Exception as e:
|
||
logger.error(f"Помилка відправки Telegram сповіщення: {e}")
|
||
|
||
async def monitor(self):
|
||
"""Виконати моніторинг всіх колекцій"""
|
||
logger.info("🔍 Початок моніторингу Qdrant колекцій...")
|
||
|
||
collections = await self.get_all_collections()
|
||
|
||
if not collections:
|
||
alert = "⚠️ *Qdrant Collections Monitor*\n\nНе знайдено жодної колекції!"
|
||
self.alerts.append(alert)
|
||
await self.send_telegram_alert(alert)
|
||
return
|
||
|
||
results = []
|
||
critical_count = 0
|
||
warning_count = 0
|
||
healthy_count = 0
|
||
|
||
# Перевірити кожну колекцію
|
||
for collection in collections:
|
||
health = await self.check_collection_health(collection)
|
||
results.append(health)
|
||
|
||
if health["status"] == "critical":
|
||
critical_count += 1
|
||
elif health["status"] == "warning":
|
||
warning_count += 1
|
||
else:
|
||
healthy_count += 1
|
||
|
||
# Сформувати звіт
|
||
logger.info(f"✅ Healthy: {healthy_count}, ⚠️ Warnings: {warning_count}, 🔴 Critical: {critical_count}")
|
||
|
||
# Зберегти поточний стан
|
||
new_state = {}
|
||
for result in results:
|
||
if result["info"]:
|
||
new_state[result["name"]] = result["info"]
|
||
self.save_state(new_state)
|
||
|
||
# Відправити алерти для критичних проблем
|
||
if critical_count > 0:
|
||
await self.send_critical_alerts(results)
|
||
|
||
# Вивести детальний звіт
|
||
self.print_report(results, critical_count, warning_count, healthy_count)
|
||
|
||
return results
|
||
|
||
async def send_critical_alerts(self, results: List[Dict]):
|
||
"""Відправити критичні алерти"""
|
||
critical_issues = [r for r in results if r["status"] == "critical"]
|
||
|
||
if not critical_issues:
|
||
return
|
||
|
||
message = "🔴 *Qdrant Collections Alert*\n\n"
|
||
message += f"Виявлено {len(critical_issues)} критичних проблем:\n\n"
|
||
|
||
for issue in critical_issues:
|
||
message += f"*{issue['name']}*\n"
|
||
for problem in issue["issues"]:
|
||
message += f" • {problem}\n"
|
||
message += "\n"
|
||
|
||
message += f"_Час: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_"
|
||
|
||
await self.send_telegram_alert(message)
|
||
|
||
def print_report(self, results: List[Dict], critical: int, warning: int, healthy: int):
|
||
"""Вивести детальний звіт"""
|
||
print("\n" + "="*80)
|
||
print("📊 QDRANT COLLECTIONS HEALTH REPORT")
|
||
print("="*80)
|
||
print(f"Час: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
print(f"Всього колекцій: {len(results)}")
|
||
print(f"✅ Здорові: {healthy}")
|
||
print(f"⚠️ Попередження: {warning}")
|
||
print(f"🔴 Критичні: {critical}")
|
||
print("="*80)
|
||
|
||
# Групувати за статусом
|
||
for status_type in ["critical", "warning", "healthy"]:
|
||
items = [r for r in results if r["status"] == status_type]
|
||
|
||
if not items:
|
||
continue
|
||
|
||
icon = {"critical": "🔴", "warning": "⚠️", "healthy": "✅"}[status_type]
|
||
print(f"\n{icon} {status_type.upper()}")
|
||
print("-"*80)
|
||
|
||
for item in items:
|
||
info = item.get("info", {})
|
||
print(f"\n{item['name']}:")
|
||
print(f" Points: {info.get('points_count', 0):,}")
|
||
print(f" Segments: {info.get('segments_count', 0)}")
|
||
print(f" Status: {info.get('status', 'unknown')}")
|
||
|
||
if item.get("issues"):
|
||
print(f" Issues:")
|
||
for issue in item["issues"]:
|
||
print(f" • {issue}")
|
||
|
||
if item.get("warnings"):
|
||
print(f" Warnings:")
|
||
for warn in item["warnings"]:
|
||
print(f" • {warn}")
|
||
|
||
print("\n" + "="*80 + "\n")
|
||
|
||
async def close(self):
|
||
"""Закрити HTTP клієнт"""
|
||
await self.http_client.aclose()
|
||
|
||
|
||
async def main():
|
||
"""Головна функція"""
|
||
monitor = CollectionsHealthMonitor()
|
||
|
||
try:
|
||
await monitor.monitor()
|
||
return 0
|
||
except Exception as e:
|
||
logger.error(f"Помилка моніторингу: {e}", exc_info=True)
|
||
return 1
|
||
finally:
|
||
await monitor.close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
exit_code = asyncio.run(main())
|
||
sys.exit(exit_code)
|