Files
microdao-daarion/scripts/node2/install_swoper_models_optimized.sh
Apple 3de3c8cb36 feat: Add presence heartbeat for Matrix online status
- matrix-gateway: POST /internal/matrix/presence/online endpoint
- usePresenceHeartbeat hook with activity tracking
- Auto away after 5 min inactivity
- Offline on page close/visibility change
- Integrated in MatrixChatRoom component
2025-11-27 00:19:40 -08:00

316 lines
10 KiB
Bash
Executable File

#!/bin/bash
# Install Swoper with optimized models for Node-2
# CORRECTED: Only quantize models that don't fit (>60 GB)
# Smaller models can use full precision or q4 for speed
set -e
echo "🚀 Installing Swoper with optimized models for microDAO Node-2"
echo "=================================================="
# Colors
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Check if Swoper service exists (optional - models can be installed via Ollama)
SWAPPER_DIR=""
if [ -d "services/swapper" ]; then
SWAPPER_DIR="services/swapper"
echo -e "${GREEN}✅ Found Swoper at: ${SWAPPER_DIR}${NC}"
elif [ -d "/opt/microdao-daarion/services/swapper" ]; then
SWAPPER_DIR="/opt/microdao-daarion/services/swapper"
echo -e "${GREEN}✅ Found Swoper at: ${SWAPPER_DIR}${NC}"
else
echo -e "${YELLOW}⚠️ Swoper service not found in project.${NC}"
echo -e "${YELLOW} Models will be installed via Ollama.${NC}"
echo -e "${YELLOW} Swoper configuration will be created for future use.${NC}"
fi
# Models configuration - OPTIMIZED
# Format: model_key:ollama_name:quantization:size_gb:priority:reason
declare -A MODELS=(
# 🔴 OBLIGATORY q4/q5 (>60 GB, don't fit)
["deepseek-r1"]="deepseek-r1:q4:40:high:OBLIGATORY_67GB_full"
["qwen-code-72b"]="qwen2.5-coder-72b-instruct:q4:40:high:OBLIGATORY_144GB_full"
["deepseek-math-33b"]="deepseek-math:33b:q4:20:high:OBLIGATORY_66GB_full"
["starcoder2-34b"]="starcoder2:34b:q4:20:medium:OBLIGATORY_68GB_full"
["qwen-vl-32b"]="qwen2-vl:32b-instruct:q4:20:high:OBLIGATORY_64GB_full_better_quality"
# 🟡 RECOMMENDED q4 (40-60 GB, fits but q4 better for performance)
["gemma-30b"]="gemma2:27b-it:q4:18:medium:RECOMMENDED_60GB_full"
["mistral-22b"]="mistral-nemo:22b:q4:13:medium:RECOMMENDED_44GB_full"
# 🟢 OPTIONAL q4 or full (<40 GB, can use full)
["mistral-13b"]="mistral:13b-instruct:full:26:medium:OPTIONAL_can_use_full"
["gpt-oss-20b"]="gpt-oss:20b:full:40:low:OPTIONAL_can_use_full"
["qwen-vl-7b"]="qwen2-vl:7b-instruct:full:8:high:OPTIONAL_can_use_full"
# Already quantized
["falcon-40b"]="falcon:40b-instruct:q4:24:low:ALREADY_Q4"
)
# Create models directory
MODELS_DIR="$HOME/node2/swoper/models"
mkdir -p "$MODELS_DIR"
echo -e "\n${GREEN}📦 Installing models via Ollama...${NC}"
# Check if Ollama is running
if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
echo -e "${YELLOW}⚠️ Ollama is not running. Starting Ollama...${NC}"
brew services start ollama || {
echo -e "${RED}❌ Failed to start Ollama${NC}"
exit 1
}
sleep 5
fi
# Install models
INSTALLED=0
FAILED=0
echo -e "\n${BLUE}📋 Model Installation Strategy:${NC}"
echo -e "${RED} 🔴 OBLIGATORY q4/q5 (>60 GB):${NC} DeepSeek-R1, Qwen Code 72B, DeepSeek Math 33B, StarCoder2-34B, Qwen2-VL-32B"
echo -e "${YELLOW} 🟡 RECOMMENDED q4 (40-60 GB):${NC} Gemma 30B, Mistral 22B"
echo -e "${GREEN} 🟢 OPTIONAL full/q4 (<40 GB):${NC} Mistral 13B, GPT-OSS-20B, Qwen-VL-7B"
echo ""
for model_key in "${!MODELS[@]}"; do
model_info="${MODELS[$model_key]}"
IFS=':' read -r ollama_name quantization size_gb priority reason <<< "$model_info"
# Construct Ollama model name
# Note: Ollama quantization is usually automatic or specified differently
# For now, try the model name as-is first
if [ "$quantization" = "q4" ]; then
# Try with :q4 suffix first, then without
ollama_model_q4="${ollama_name}:q4"
ollama_model="$ollama_name"
quant_label="q4"
elif [ "$quantization" = "q5" ]; then
ollama_model_q5="${ollama_name}:q5"
ollama_model="$ollama_name"
quant_label="q5"
else
ollama_model="$ollama_name"
quant_label="full"
fi
# Color based on priority
if [[ "$reason" == OBLIGATORY* ]]; then
color=$RED
icon="🔴"
elif [[ "$reason" == RECOMMENDED* ]]; then
color=$YELLOW
icon="🟡"
else
color=$GREEN
icon="🟢"
fi
echo -e "\n${color}${icon} Installing: ${ollama_name} ${quant_label} (${size_gb} GB) [${priority} priority]${NC}"
echo -e "${color} Reason: ${reason}${NC}"
# Try to pull model
if [ "$quantization" = "q4" ] && [ -n "$ollama_model_q4" ]; then
# Try q4 version first
if ollama pull "$ollama_model_q4" 2>&1 | tee /tmp/ollama_install.log; then
echo -e "${GREEN}${ollama_name} ${quant_label} installed${NC}"
INSTALLED=$((INSTALLED + 1))
continue
fi
fi
# Try standard model name (Ollama may handle quantization automatically)
if ollama pull "$ollama_model" 2>&1 | tee /tmp/ollama_install.log; then
echo -e "${GREEN}${ollama_name} ${quant_label} installed${NC}"
INSTALLED=$((INSTALLED + 1))
else
echo -e "${YELLOW} ⚠️ Model not found, checking available models...${NC}"
# Check if model exists in different format
if ollama list 2>/dev/null | grep -qi "$ollama_name"; then
echo -e "${GREEN}${ollama_name} already installed${NC}"
INSTALLED=$((INSTALLED + 1))
else
echo -e "${RED} ❌ Failed to install ${ollama_name}${NC}"
echo -e "${YELLOW} 💡 Model may not be available in Ollama. Check: ollama list${NC}"
FAILED=$((FAILED + 1))
fi
fi
done
echo -e "\n${GREEN}=================================================="
echo "📊 Installation Summary"
echo "==================================================${NC}"
echo -e " ✅ Installed: ${INSTALLED} models"
echo -e " ❌ Failed: ${FAILED} models"
echo ""
# Create Swoper configuration for Node-2
echo -e "${GREEN}📝 Creating Swoper configuration for Node-2...${NC}"
cat > "$HOME/node2/swoper/config_node2.yaml" << 'EOF'
# Swoper Configuration for microDAO Node-2
# Single-active LLM scheduler with optimized quantization
# Only large models (>60 GB) use q4/q5, smaller can use full precision
swoper:
mode: single-active
max_concurrent_models: 1
model_swap_timeout: 30
gpu_enabled: true
metal_acceleration: true # Apple Silicon Metal
quantization_strategy: smart # Only quantize when needed
models:
# 🔴 OBLIGATORY q4/q5 (>60 GB, don't fit in 64 GB RAM)
deepseek-r1:
path: ollama:deepseek-r1:q4
type: llm
size_gb: 40
priority: high
quantization: q4
reason: "67 GB full doesn't fit, q4 (40 GB) fits in 64 GB RAM"
qwen-code-72b:
path: ollama:qwen2.5-coder-72b-instruct:q4
type: code
size_gb: 40
priority: high
quantization: q4
reason: "144 GB full doesn't fit, q4 (40 GB) required"
deepseek-math-33b:
path: ollama:deepseek-math:33b:q4
type: math
size_gb: 20
priority: high
quantization: q4
reason: "66 GB full doesn't fit, q4 (20 GB) required"
starcoder2-34b:
path: ollama:starcoder2:34b:q4
type: code
size_gb: 20
priority: medium
quantization: q4
reason: "68 GB full doesn't fit, q4 (20 GB) required"
qwen-vl-32b:
path: ollama:qwen2-vl:32b-instruct:q4
type: vision
size_gb: 20
priority: high
quantization: q4
reason: "64 GB full doesn't fit, q4 (20 GB) for better quality than 7B"
# 🟡 RECOMMENDED q4 (40-60 GB, fits but q4 better for performance)
gemma-30b:
path: ollama:gemma2:27b-it:q4
type: llm
size_gb: 18
priority: medium
quantization: q4
reason: "60 GB full fits but q4 (18 GB) better performance"
mistral-22b:
path: ollama:mistral-nemo:22b:q4
type: llm
size_gb: 13
priority: medium
quantization: q4
reason: "44 GB full fits but q4 (13 GB) better performance"
# 🟢 OPTIONAL full/q4 (<40 GB, can use full precision)
mistral-13b:
path: ollama:mistral:13b-instruct
type: llm
size_gb: 26
priority: medium
quantization: full
reason: "26 GB fits, can use full precision or q4 for speed"
gpt-oss-20b:
path: ollama:gpt-oss:20b
type: llm
size_gb: 40
priority: low
quantization: full
reason: "40 GB fits, can use full precision"
qwen-vl-7b:
path: ollama:qwen2-vl:7b-instruct
type: vision
size_gb: 8
priority: high
quantization: full
reason: "8 GB fits, can use full precision (fast vision model)"
falcon-40b:
path: ollama:falcon:40b-instruct:q4
type: llm
size_gb: 24
priority: low
quantization: q4
reason: "Already quantized"
storage:
models_dir: ~/node2/swoper/models
cache_dir: ~/node2/swoper/cache
swap_dir: ~/node2/swoper/swap
ollama:
url: http://localhost:11434
timeout: 300
# GPU/VRAM info
hardware:
ram_gb: 64
gpu: "M4 Max 40-core"
vram: "Shared with RAM (up to 64 GB)"
metal_acceleration: true
EOF
echo -e "${GREEN}✅ Configuration saved to: $HOME/node2/swoper/config_node2.yaml${NC}"
# Calculate total size
TOTAL_SIZE=$(python3 << 'PYEOF'
# Only count models that will be installed
obligatory = [40, 40, 20, 20, 20] # q4 models that are required
recommended = [18, 13] # q4 models recommended
optional_full = [26, 40, 8] # full models
optional_q4 = [24] # already q4
total = sum(obligatory) + sum(recommended) + sum(optional_full) + sum(optional_q4)
print(f"{total}")
PYEOF
)
echo -e "\n${GREEN}📊 Total models size: ~${TOTAL_SIZE} GB${NC}"
echo -e "${GREEN} Available disk: 1.5 TB${NC}"
echo -e "${GREEN} Available RAM: 64 GB${NC}"
echo -e "${GREEN} ✅ Models will fit comfortably${NC}"
echo -e "\n${BLUE}💡 DeepSeek-R1 q4 (40 GB) Analysis:${NC}"
echo -e " - 64 GB RAM достатньо для 40 GB моделі ✅"
echo -e " - M4 Max Metal acceleration підтримується ✅"
echo -e " - Може працювати, але займе більшу частину RAM"
echo -e " - Рекомендація: q4 для DeepSeek-R1 (40 GB < 64 GB) ✅"
echo -e "\n${GREEN}=================================================="
echo "✅ Swoper Installation Complete"
echo "==================================================${NC}"
echo ""
echo "📁 Configuration: $HOME/node2/swoper/config_node2.yaml"
echo "📦 Models directory: $HOME/node2/swoper/models"
echo ""
echo "⏭️ Next steps:"
echo " 1. Review config_node2.yaml"
echo " 2. Test Swoper with: curl http://localhost:8890/health"
echo " 3. Update router-config.yml with Node-2 Swoper provider"
echo ""