Files
microdao-daarion/scripts/deploy-prod.sh
Apple bca81dc719 feat: Node Self-Healing, DAGI Audit, Agent Prompts, Infra Invariants
### Backend (city-service)
- Node Registry + Self-Healing API (migration 039)
- Improved get_all_nodes() with robust fallback for node_registry/node_cache
- Agent Prompts Runtime API for DAGI Router integration
- DAGI Router Audit endpoints (phantom/stale detection)
- Node Agents API (Guardian/Steward)
- Node metrics extended (CPU/GPU/RAM/Disk)

### Frontend (apps/web)
- Node Directory with improved error handling
- Node Cabinet with metrics cards
- DAGI Router Card component
- Node Metrics Card component
- useDAGIAudit hook

### Scripts
- check-invariants.py - deploy verification
- node-bootstrap.sh - node self-registration
- node-guardian-loop.py - continuous self-healing
- dagi_agent_audit.py - DAGI audit utility

### Migrations
- 034: Agent prompts seed
- 035: Agent DAGI audit
- 036: Node metrics extended
- 037: Node agents complete
- 038: Agent prompts full coverage
- 039: Node registry self-healing

### Tests
- test_infra_smoke.py
- test_agent_prompts_runtime.py
- test_dagi_router_api.py

### Documentation
- DEPLOY_CHECKLIST_2024_11_30.md
- Multiple TASK_PHASE docs
2025-11-30 13:52:01 -08:00

254 lines
7.0 KiB
Bash
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
#
# DAARION Production Deployment Script
# Usage: ./scripts/deploy-prod.sh
#
set -e
# Colors
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
COMPOSE_FILE="docker-compose.all.yml"
CADDY_COMPOSE_FILE="docker-compose.caddy.yml"
ENV_FILE=".env"
LOG_DIR="/var/log/daarion"
BACKUP_DIR="/opt/daarion/backups"
# Functions
log_info() {
echo -e "${BLUE} $1${NC}"
}
log_success() {
echo -e "${GREEN}$1${NC}"
}
log_error() {
echo -e "${RED}$1${NC}"
}
log_warning() {
echo -e "${YELLOW}⚠️ $1${NC}"
}
# Banner
echo -e "${BLUE}"
cat << "EOF"
╔══════════════════════════════════════╗
║ DAARION Production Deployment ║
║ daarion.space ║
╚══════════════════════════════════════╝
EOF
echo -e "${NC}"
# Pre-flight checks
log_info "Running pre-flight checks..."
# Check if running as root or with sudo
if [ "$EUID" -ne 0 ] && [ -z "$SUDO_USER" ]; then
log_warning "Not running as root. Some commands may require sudo."
fi
# Check Docker
if ! command -v docker &> /dev/null; then
log_error "Docker is not installed!"
exit 1
fi
log_success "Docker found"
# Check Docker Compose
if ! docker compose version &> /dev/null; then
log_error "Docker Compose is not installed!"
exit 1
fi
log_success "Docker Compose found"
# Check ENV file
if [ ! -f "$ENV_FILE" ]; then
log_error ".env file not found! Copy .env.example and configure it."
exit 1
fi
log_success ".env file found"
# Check compose files
if [ ! -f "$COMPOSE_FILE" ]; then
log_error "$COMPOSE_FILE not found!"
exit 1
fi
log_success "Compose files found"
# Create log directory
mkdir -p "$LOG_DIR"
mkdir -p "$BACKUP_DIR"
log_success "Directories created"
# Network setup
log_info "Setting up Docker network..."
docker network create daarion-network 2>/dev/null || log_warning "Network already exists"
log_success "Network ready"
# Database backup
log_info "Creating database backup..."
BACKUP_FILE="$BACKUP_DIR/backup_$(date +%Y%m%d_%H%M%S).sql"
if docker ps --filter "name=daarion-postgres" --format "{{.Names}}" | grep -q postgres; then
docker compose -f "$COMPOSE_FILE" exec -T postgres \
pg_dump -U daarion_user daarion > "$BACKUP_FILE" 2>/dev/null || log_warning "Backup failed (DB may not exist yet)"
if [ -f "$BACKUP_FILE" ]; then
gzip "$BACKUP_FILE"
log_success "Backup created: ${BACKUP_FILE}.gz"
fi
else
log_warning "PostgreSQL not running, skipping backup"
fi
# Pull latest images (if using registry)
log_info "Pulling latest images..."
docker compose -f "$COMPOSE_FILE" pull || log_warning "Pull failed, will build locally"
# Build images
log_info "Building Docker images..."
docker compose -f "$COMPOSE_FILE" build
log_success "Images built"
# Start core services
log_info "Starting core services (postgres, redis, nats)..."
docker compose -f "$COMPOSE_FILE" up -d postgres redis nats
sleep 10
log_success "Core services started"
# Run migrations
log_info "Running database migrations..."
if [ -f "scripts/migrate.sh" ]; then
bash scripts/migrate.sh
else
log_warning "migrate.sh not found, skipping migrations"
fi
# Start all services
log_info "Starting all application services..."
docker compose -f "$COMPOSE_FILE" up -d
log_success "Application services started"
# Wait for services to be healthy
log_info "Waiting for services to be healthy..."
sleep 15
# Start Caddy (SSL/HTTPS)
if [ -f "$CADDY_COMPOSE_FILE" ]; then
log_info "Starting Caddy (SSL/HTTPS)..."
docker compose -f "$CADDY_COMPOSE_FILE" up -d
log_success "Caddy started"
else
log_warning "Caddy compose file not found"
fi
# Health checks
log_info "Running health checks..."
HEALTH_FAILED=0
# Check PostgreSQL
if docker compose -f "$COMPOSE_FILE" exec postgres pg_isready -U daarion_user > /dev/null 2>&1; then
log_success "PostgreSQL: healthy"
else
log_error "PostgreSQL: unhealthy"
HEALTH_FAILED=1
fi
# Check Redis
if docker compose -f "$COMPOSE_FILE" exec redis redis-cli PING > /dev/null 2>&1; then
log_success "Redis: healthy"
else
log_error "Redis: unhealthy"
HEALTH_FAILED=1
fi
# Check Gateway (wait for Caddy SSL)
sleep 5
if curl -sf https://app.daarion.space/health > /dev/null 2>&1; then
log_success "Gateway: healthy"
else
log_warning "Gateway: not yet accessible (SSL may be provisioning)"
fi
# Show running containers
echo ""
log_info "Running containers:"
docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep daarion || true
# Show logs location
echo ""
log_info "Logs available at:"
echo " - Docker logs: docker logs <container_name>"
echo " - Application logs: $LOG_DIR"
echo " - Caddy logs: docker logs daarion-caddy"
# Run infrastructure invariants check
log_info "Running infrastructure invariants check..."
INVARIANTS_FAILED=0
# Wait a bit more for services to fully initialize
sleep 5
# Run invariants check
if [ -f "scripts/check-invariants.py" ]; then
# Try to run invariants check
if command -v python3 &> /dev/null; then
# Use internal Docker network URL or localhost
CITY_URL="${CITY_SERVICE_URL:-http://localhost:7001}"
python3 scripts/check-invariants.py --base-url "$CITY_URL" || {
INVARIANTS_FAILED=1
log_error "Infrastructure invariants check FAILED!"
}
else
log_warning "Python3 not found, skipping invariants check"
fi
else
log_warning "check-invariants.py not found, skipping invariants check"
fi
# Run smoke tests (optional)
if [ -f "tests/test_infra_smoke.py" ] && [ "$RUN_SMOKE_TESTS" = "true" ]; then
log_info "Running smoke tests..."
pytest tests/test_infra_smoke.py -v --tb=short || {
log_warning "Some smoke tests failed (non-blocking)"
}
fi
# Success message
echo ""
if [ $HEALTH_FAILED -eq 0 ] && [ $INVARIANTS_FAILED -eq 0 ]; then
log_success "🎉 Deployment completed successfully!"
echo ""
echo " 🌐 Application: https://app.daarion.space"
echo " 📊 Monitoring: https://app.daarion.space/grafana/"
echo ""
echo " ✅ All infrastructure invariants passed"
echo ""
echo " Next steps:"
echo " 1. Run smoke tests: RUN_SMOKE_TESTS=true ./scripts/deploy-prod.sh"
echo " 2. Monitor logs: docker logs -f daarion-gateway"
echo " 3. Check metrics: docker stats"
elif [ $INVARIANTS_FAILED -eq 1 ]; then
log_error "Deployment completed but INVARIANTS CHECK FAILED!"
echo ""
echo " ❌ Some infrastructure invariants are not met."
echo " Please review the output above and fix the issues."
echo ""
echo " Common fixes:"
echo " 1. Run migrations: scripts/migrate.sh"
echo " 2. Seed agents: psql < migrations/038_agent_prompts_full_coverage.sql"
echo " 3. Check node_cache: psql < migrations/036_node_metrics_extended.sql"
exit 1
else
log_error "Deployment completed with errors. Check logs above."
exit 1
fi