#!/bin/bash # Monitor database stability and auto-recover if needed # Run this periodically (e.g., every 5 minutes via cron) set -e LOG_FILE="/var/log/db-stability-monitor.log" TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') log() { echo "[$TIMESTAMP] $1" | tee -a "$LOG_FILE" } log "🔍 Starting database stability check..." # Check if PostgreSQL container is running if ! docker ps | grep -q daarion-postgres; then log "❌ PostgreSQL container is not running! Starting..." cd /opt/microdao-daarion docker compose -f docker-compose.db.yml up -d db sleep 10 fi # Check if database exists if ! docker exec daarion-postgres psql -U postgres -lqt | cut -d \| -f 1 | grep -qw daarion; then log "⚠️ Database 'daarion' does not exist, creating..." docker exec daarion-postgres psql -U postgres -c "CREATE DATABASE daarion;" || { log "❌ Failed to create database" exit 1 } fi # Check data integrity MICRODAO_COUNT=$(docker exec daarion-postgres psql -U postgres -d daarion -t -c "SELECT COUNT(*) FROM microdaos;" 2>/dev/null | tr -d ' ' || echo "0") AGENT_COUNT=$(docker exec daarion-postgres psql -U postgres -d daarion -t -c "SELECT COUNT(*) FROM agents;" 2>/dev/null | tr -d ' ' || echo "0") log "📊 Data check: MicroDAOs=$MICRODAO_COUNT, Agents=$AGENT_COUNT" # If data is missing, try to restore if [ "$MICRODAO_COUNT" -lt 5 ] || [ "$AGENT_COUNT" -lt 10 ]; then log "⚠️ Data loss detected! Attempting recovery..." # Check for recent backup LATEST_BACKUP=$(ls -t /opt/microdao-daarion/db_backups/*.sql 2>/dev/null | head -1) if [ -n "$LATEST_BACKUP" ]; then log "📦 Found backup: $LATEST_BACKUP" log "🔄 Restoring from backup..." # Terminate all connections to the database first log "🔒 Terminating active connections..." docker exec daarion-postgres psql -U postgres -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = 'daarion' AND pid <> pg_backend_pid();" 2>&1 | grep -v "terminate_backend\|^$" || true sleep 2 # Drop and recreate database docker exec daarion-postgres psql -U postgres -c "DROP DATABASE IF EXISTS daarion;" 2>&1 | grep -v "DROP DATABASE" || true docker exec daarion-postgres psql -U postgres -c "CREATE DATABASE daarion;" 2>&1 | grep -v "CREATE DATABASE" || true # Restore from backup docker exec -i daarion-postgres psql -U postgres -d daarion < "$LATEST_BACKUP" 2>&1 | grep -v "already exists\|does not exist" || true # Apply migrations cd /opt/microdao-daarion for f in migrations/*.sql; do docker exec -i daarion-postgres psql -U postgres -d daarion < "$f" 2>&1 | grep -v "already exists\|does not exist" || true done # Sync NODE2 agents (force sync even if they exist) log "🤖 Syncing NODE2 agents..." python3 scripts/sync-node2-dagi-agents.py 2>&1 | tail -10 || true # Verify agent count after sync AGENT_COUNT_AFTER=$(docker exec daarion-postgres psql -U postgres -d daarion -t -c "SELECT COUNT(*) FROM agents;" 2>/dev/null | tr -d ' \n' || echo "0") log "📊 Agents after sync: $AGENT_COUNT_AFTER" # Remove test agents bash scripts/remove-test-agents.sh 2>&1 | tail -3 || true # Fix asset URLs (logos and banners) log "🖼️ Fixing asset URLs..." bash scripts/fix-asset-urls.sh 2>&1 | tail -5 || true log "✅ Recovery complete" else log "❌ No backup found for recovery" fi fi # Check PostgreSQL logs for errors ERROR_COUNT=$(docker logs daarion-postgres --since 5m 2>&1 | grep -i "fatal\|error\|panic" | wc -l) if [ "$ERROR_COUNT" -gt 0 ]; then log "⚠️ Found $ERROR_COUNT errors in PostgreSQL logs in last 5 minutes" fi # Check container restart count RESTART_COUNT=$(docker inspect daarion-postgres --format='{{.RestartCount}}' 2>/dev/null || echo "0") if [ "$RESTART_COUNT" -gt 10 ]; then log "⚠️ PostgreSQL has restarted $RESTART_COUNT times - possible stability issue" fi log "✅ Stability check complete"