Files
microdao-daarion/scripts/node-bootstrap.sh
Apple bca81dc719 feat: Node Self-Healing, DAGI Audit, Agent Prompts, Infra Invariants
### Backend (city-service)
- Node Registry + Self-Healing API (migration 039)
- Improved get_all_nodes() with robust fallback for node_registry/node_cache
- Agent Prompts Runtime API for DAGI Router integration
- DAGI Router Audit endpoints (phantom/stale detection)
- Node Agents API (Guardian/Steward)
- Node metrics extended (CPU/GPU/RAM/Disk)

### Frontend (apps/web)
- Node Directory with improved error handling
- Node Cabinet with metrics cards
- DAGI Router Card component
- Node Metrics Card component
- useDAGIAudit hook

### Scripts
- check-invariants.py - deploy verification
- node-bootstrap.sh - node self-registration
- node-guardian-loop.py - continuous self-healing
- dagi_agent_audit.py - DAGI audit utility

### Migrations
- 034: Agent prompts seed
- 035: Agent DAGI audit
- 036: Node metrics extended
- 037: Node agents complete
- 038: Agent prompts full coverage
- 039: Node registry self-healing

### Tests
- test_infra_smoke.py
- test_agent_prompts_runtime.py
- test_dagi_router_api.py

### Documentation
- DEPLOY_CHECKLIST_2024_11_30.md
- Multiple TASK_PHASE docs
2025-11-30 13:52:01 -08:00

156 lines
4.2 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# DAARION Node Bootstrap Script
# Виконує самореєстрацію ноди при старті.
#
# Використання:
# ./scripts/node-bootstrap.sh
#
# Environment variables:
# CITY_SERVICE_URL - URL city-service (default: http://localhost:7001)
# NODE_ID - Ідентифікатор ноди (required)
# NODE_NAME - Назва ноди (required)
# NODE_ENVIRONMENT - production|development|staging (default: development)
# NODE_HOSTNAME - Hostname (optional)
# NODE_ROLES - Ролі через кому: gpu,ai_runtime,storage (default: gpu,ai_runtime)
# NODE_DESCRIPTION - Опис ноди (optional)
set -euo pipefail
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() {
echo -e "${BLUE}[NODE-BOOTSTRAP]${NC} $1"
}
log_success() {
echo -e "${GREEN}[NODE-BOOTSTRAP]${NC}$1"
}
log_error() {
echo -e "${RED}[NODE-BOOTSTRAP]${NC}$1"
}
log_warning() {
echo -e "${YELLOW}[NODE-BOOTSTRAP]${NC} ⚠️ $1"
}
# Configuration
CITY_SERVICE_URL="${CITY_SERVICE_URL:-http://localhost:7001}"
NODE_ID="${NODE_ID:-}"
NODE_NAME="${NODE_NAME:-}"
NODE_ENVIRONMENT="${NODE_ENVIRONMENT:-development}"
NODE_HOSTNAME="${NODE_HOSTNAME:-$(hostname 2>/dev/null || echo '')}"
NODE_ROLES="${NODE_ROLES:-gpu,ai_runtime}"
NODE_DESCRIPTION="${NODE_DESCRIPTION:-}"
# Retry settings
MAX_RETRIES=5
RETRY_DELAY=5
# Validate required params
if [ -z "$NODE_ID" ]; then
log_error "NODE_ID is required"
exit 1
fi
if [ -z "$NODE_NAME" ]; then
log_error "NODE_NAME is required"
exit 1
fi
# Convert roles to JSON array
roles_json=""
IFS=',' read -ra ROLE_ARRAY <<< "$NODE_ROLES"
for i in "${!ROLE_ARRAY[@]}"; do
if [ $i -eq 0 ]; then
roles_json="\"${ROLE_ARRAY[$i]}\""
else
roles_json="$roles_json, \"${ROLE_ARRAY[$i]}\""
fi
done
roles_json="[$roles_json]"
# Build payload
payload=$(cat <<EOF
{
"id": "$NODE_ID",
"name": "$NODE_NAME",
"hostname": "$NODE_HOSTNAME",
"environment": "$NODE_ENVIRONMENT",
"roles": $roles_json,
"description": "$NODE_DESCRIPTION"
}
EOF
)
log_info "Starting node self-registration..."
log_info " Node ID: $NODE_ID"
log_info " Node Name: $NODE_NAME"
log_info " Environment: $NODE_ENVIRONMENT"
log_info " Hostname: $NODE_HOSTNAME"
log_info " Roles: $NODE_ROLES"
log_info " City Service: $CITY_SERVICE_URL"
# Self-registration with retries
attempt=1
while [ $attempt -le $MAX_RETRIES ]; do
log_info "Registration attempt $attempt/$MAX_RETRIES..."
response=$(curl -s -w "\n%{http_code}" \
-X POST "${CITY_SERVICE_URL}/internal/nodes/register-or-update" \
-H "Content-Type: application/json" \
-d "$payload" \
--max-time 10 \
2>/dev/null || echo -e "\n000")
http_code=$(echo "$response" | tail -n1)
body=$(echo "$response" | sed '$d')
if [ "$http_code" = "200" ]; then
success=$(echo "$body" | grep -o '"success":\s*true' || true)
if [ -n "$success" ]; then
is_new=$(echo "$body" | grep -o '"is_new":\s*true' || true)
if [ -n "$is_new" ]; then
log_success "Node registered successfully (new registration)"
else
log_success "Node updated successfully"
fi
# Optional: run initial heartbeat
log_info "Sending initial heartbeat..."
curl -s -X POST "${CITY_SERVICE_URL}/internal/node/${NODE_ID}/heartbeat" \
-H "Content-Type: application/json" \
-d '{"metrics": {}}' \
--max-time 5 > /dev/null 2>&1 || true
log_success "Node bootstrap completed"
exit 0
fi
fi
log_warning "Registration failed (HTTP $http_code)"
if [ $attempt -lt $MAX_RETRIES ]; then
log_info "Retrying in ${RETRY_DELAY}s..."
sleep $RETRY_DELAY
fi
attempt=$((attempt + 1))
done
log_error "Node registration failed after $MAX_RETRIES attempts"
log_error "Please check:"
log_error " 1. City service is running at $CITY_SERVICE_URL"
log_error " 2. Migration 039_node_registry_self_healing.sql is applied"
log_error " 3. Network connectivity to city service"
exit 1