feat: Node Self-Healing, DAGI Audit, Agent Prompts, Infra Invariants
### Backend (city-service) - Node Registry + Self-Healing API (migration 039) - Improved get_all_nodes() with robust fallback for node_registry/node_cache - Agent Prompts Runtime API for DAGI Router integration - DAGI Router Audit endpoints (phantom/stale detection) - Node Agents API (Guardian/Steward) - Node metrics extended (CPU/GPU/RAM/Disk) ### Frontend (apps/web) - Node Directory with improved error handling - Node Cabinet with metrics cards - DAGI Router Card component - Node Metrics Card component - useDAGIAudit hook ### Scripts - check-invariants.py - deploy verification - node-bootstrap.sh - node self-registration - node-guardian-loop.py - continuous self-healing - dagi_agent_audit.py - DAGI audit utility ### Migrations - 034: Agent prompts seed - 035: Agent DAGI audit - 036: Node metrics extended - 037: Node agents complete - 038: Agent prompts full coverage - 039: Node registry self-healing ### Tests - test_infra_smoke.py - test_agent_prompts_runtime.py - test_dagi_router_api.py ### Documentation - DEPLOY_CHECKLIST_2024_11_30.md - Multiple TASK_PHASE docs
This commit is contained in:
@@ -188,18 +188,64 @@ echo " - Docker logs: docker logs <container_name>"
|
||||
echo " - Application logs: $LOG_DIR"
|
||||
echo " - Caddy logs: docker logs daarion-caddy"
|
||||
|
||||
# Run infrastructure invariants check
|
||||
log_info "Running infrastructure invariants check..."
|
||||
INVARIANTS_FAILED=0
|
||||
|
||||
# Wait a bit more for services to fully initialize
|
||||
sleep 5
|
||||
|
||||
# Run invariants check
|
||||
if [ -f "scripts/check-invariants.py" ]; then
|
||||
# Try to run invariants check
|
||||
if command -v python3 &> /dev/null; then
|
||||
# Use internal Docker network URL or localhost
|
||||
CITY_URL="${CITY_SERVICE_URL:-http://localhost:7001}"
|
||||
|
||||
python3 scripts/check-invariants.py --base-url "$CITY_URL" || {
|
||||
INVARIANTS_FAILED=1
|
||||
log_error "Infrastructure invariants check FAILED!"
|
||||
}
|
||||
else
|
||||
log_warning "Python3 not found, skipping invariants check"
|
||||
fi
|
||||
else
|
||||
log_warning "check-invariants.py not found, skipping invariants check"
|
||||
fi
|
||||
|
||||
# Run smoke tests (optional)
|
||||
if [ -f "tests/test_infra_smoke.py" ] && [ "$RUN_SMOKE_TESTS" = "true" ]; then
|
||||
log_info "Running smoke tests..."
|
||||
pytest tests/test_infra_smoke.py -v --tb=short || {
|
||||
log_warning "Some smoke tests failed (non-blocking)"
|
||||
}
|
||||
fi
|
||||
|
||||
# Success message
|
||||
echo ""
|
||||
if [ $HEALTH_FAILED -eq 0 ]; then
|
||||
if [ $HEALTH_FAILED -eq 0 ] && [ $INVARIANTS_FAILED -eq 0 ]; then
|
||||
log_success "🎉 Deployment completed successfully!"
|
||||
echo ""
|
||||
echo " 🌐 Application: https://app.daarion.space"
|
||||
echo " 📊 Monitoring: https://app.daarion.space/grafana/"
|
||||
echo ""
|
||||
echo " ✅ All infrastructure invariants passed"
|
||||
echo ""
|
||||
echo " Next steps:"
|
||||
echo " 1. Run smoke tests: docs/DEPLOY_SMOKETEST_CHECKLIST.md"
|
||||
echo " 1. Run smoke tests: RUN_SMOKE_TESTS=true ./scripts/deploy-prod.sh"
|
||||
echo " 2. Monitor logs: docker logs -f daarion-gateway"
|
||||
echo " 3. Check metrics: docker stats"
|
||||
elif [ $INVARIANTS_FAILED -eq 1 ]; then
|
||||
log_error "Deployment completed but INVARIANTS CHECK FAILED!"
|
||||
echo ""
|
||||
echo " ❌ Some infrastructure invariants are not met."
|
||||
echo " Please review the output above and fix the issues."
|
||||
echo ""
|
||||
echo " Common fixes:"
|
||||
echo " 1. Run migrations: scripts/migrate.sh"
|
||||
echo " 2. Seed agents: psql < migrations/038_agent_prompts_full_coverage.sql"
|
||||
echo " 3. Check node_cache: psql < migrations/036_node_metrics_extended.sql"
|
||||
exit 1
|
||||
else
|
||||
log_error "Deployment completed with errors. Check logs above."
|
||||
exit 1
|
||||
|
||||
Reference in New Issue
Block a user