microdao-daarion/scripts/node2/install_swoper_models_optimized.sh

#!/bin/bash
# Install Swoper with optimized models for Node-2
# CORRECTED: Only quantize models that don't fit (>60 GB)
# Smaller models can use full precision or q4 for speed

set -e

echo "🚀 Installing Swoper with optimized models for microDAO Node-2"
echo "=================================================="

# Colors
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Check if Swoper service exists (optional - models can be installed via Ollama)
SWAPPER_DIR=""
if [ -d "services/swapper" ]; then
    SWAPPER_DIR="services/swapper"
    echo -e "${GREEN}✅ Found Swoper at: ${SWAPPER_DIR}${NC}"
elif [ -d "/opt/microdao-daarion/services/swapper" ]; then
    SWAPPER_DIR="/opt/microdao-daarion/services/swapper"
    echo -e "${GREEN}✅ Found Swoper at: ${SWAPPER_DIR}${NC}"
else
    echo -e "${YELLOW}⚠️  Swoper service not found in project.${NC}"
    echo -e "${YELLOW}   Models will be installed via Ollama.${NC}"
    echo -e "${YELLOW}   Swoper configuration will be created for future use.${NC}"
fi

# Models configuration - OPTIMIZED
# Format: model_key:ollama_name:quantization:size_gb:priority:reason
declare -A MODELS=(
    # 🔴 OBLIGATORY q4/q5 (>60 GB, don't fit)
    ["deepseek-r1"]="deepseek-r1:q4:40:high:OBLIGATORY_67GB_full"
    ["qwen-code-72b"]="qwen2.5-coder-72b-instruct:q4:40:high:OBLIGATORY_144GB_full"
    ["deepseek-math-33b"]="deepseek-math:33b:q4:20:high:OBLIGATORY_66GB_full"
    ["starcoder2-34b"]="starcoder2:34b:q4:20:medium:OBLIGATORY_68GB_full"
    ["qwen-vl-32b"]="qwen2-vl:32b-instruct:q4:20:high:OBLIGATORY_64GB_full_better_quality"

    # 🟡 RECOMMENDED q4 (40-60 GB, fits but q4 better for performance)
    ["gemma-30b"]="gemma2:27b-it:q4:18:medium:RECOMMENDED_60GB_full"
    ["mistral-22b"]="mistral-nemo:22b:q4:13:medium:RECOMMENDED_44GB_full"

    # 🟢 OPTIONAL q4 or full (<40 GB, can use full)
    ["mistral-13b"]="mistral:13b-instruct:full:26:medium:OPTIONAL_can_use_full"
    ["gpt-oss-20b"]="gpt-oss:20b:full:40:low:OPTIONAL_can_use_full"
    ["qwen-vl-7b"]="qwen2-vl:7b-instruct:full:8:high:OPTIONAL_can_use_full"

    # Already quantized
    ["falcon-40b"]="falcon:40b-instruct:q4:24:low:ALREADY_Q4"
)

# Create models directory
MODELS_DIR="$HOME/node2/swoper/models"
mkdir -p "$MODELS_DIR"

echo -e "\n${GREEN}📦 Installing models via Ollama...${NC}"

# Check if Ollama is running
if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
    echo -e "${YELLOW}⚠️  Ollama is not running. Starting Ollama...${NC}"
    brew services start ollama || {
        echo -e "${RED}❌ Failed to start Ollama${NC}"
        exit 1
    }
    sleep 5
fi

# Install models
INSTALLED=0
FAILED=0

echo -e "\n${BLUE}📋 Model Installation Strategy:${NC}"
echo -e "${RED}   🔴 OBLIGATORY q4/q5 (>60 GB):${NC} DeepSeek-R1, Qwen Code 72B, DeepSeek Math 33B, StarCoder2-34B, Qwen2-VL-32B"
echo -e "${YELLOW}   🟡 RECOMMENDED q4 (40-60 GB):${NC} Gemma 30B, Mistral 22B"
echo -e "${GREEN}   🟢 OPTIONAL full/q4 (<40 GB):${NC} Mistral 13B, GPT-OSS-20B, Qwen-VL-7B"
echo ""

for model_key in "${!MODELS[@]}"; do
    model_info="${MODELS[$model_key]}"
    IFS=':' read -r ollama_name quantization size_gb priority reason <<< "$model_info"

    # Construct Ollama model name
    # Note: Ollama quantization is usually automatic or specified differently
    # For now, try the model name as-is first
    if [ "$quantization" = "q4" ]; then
        # Try with :q4 suffix first, then without
        ollama_model_q4="${ollama_name}:q4"
        ollama_model="$ollama_name"
        quant_label="q4"
    elif [ "$quantization" = "q5" ]; then
        ollama_model_q5="${ollama_name}:q5"
        ollama_model="$ollama_name"
        quant_label="q5"
    else
        ollama_model="$ollama_name"
        quant_label="full"
    fi

    # Color based on priority
    if [[ "$reason" == OBLIGATORY* ]]; then
        color=$RED
        icon="🔴"
    elif [[ "$reason" == RECOMMENDED* ]]; then
        color=$YELLOW
        icon="🟡"
    else
        color=$GREEN
        icon="🟢"
    fi

    echo -e "\n${color}${icon} Installing: ${ollama_name} ${quant_label} (${size_gb} GB) [${priority} priority]${NC}"
    echo -e "${color}   Reason: ${reason}${NC}"

    # Try to pull model
    if [ "$quantization" = "q4" ] && [ -n "$ollama_model_q4" ]; then
        # Try q4 version first
        if ollama pull "$ollama_model_q4" 2>&1 | tee /tmp/ollama_install.log; then
            echo -e "${GREEN}   ✅ ${ollama_name} ${quant_label} installed${NC}"
            INSTALLED=$((INSTALLED + 1))
            continue
        fi
    fi

    # Try standard model name (Ollama may handle quantization automatically)
    if ollama pull "$ollama_model" 2>&1 | tee /tmp/ollama_install.log; then
        echo -e "${GREEN}   ✅ ${ollama_name} ${quant_label} installed${NC}"
        INSTALLED=$((INSTALLED + 1))
    else
        echo -e "${YELLOW}   ⚠️  Model not found, checking available models...${NC}"
        # Check if model exists in different format
        if ollama list 2>/dev/null | grep -qi "$ollama_name"; then
            echo -e "${GREEN}   ✅ ${ollama_name} already installed${NC}"
            INSTALLED=$((INSTALLED + 1))
        else
            echo -e "${RED}   ❌ Failed to install ${ollama_name}${NC}"
            echo -e "${YELLOW}   💡 Model may not be available in Ollama. Check: ollama list${NC}"
            FAILED=$((FAILED + 1))
        fi
    fi
done

echo -e "\n${GREEN}=================================================="
echo "📊 Installation Summary"
echo "==================================================${NC}"
echo -e "   ✅ Installed: ${INSTALLED} models"
echo -e "   ❌ Failed: ${FAILED} models"
echo ""

# Create Swoper configuration for Node-2
echo -e "${GREEN}📝 Creating Swoper configuration for Node-2...${NC}"

cat > "$HOME/node2/swoper/config_node2.yaml" << 'EOF'
# Swoper Configuration for microDAO Node-2
# Single-active LLM scheduler with optimized quantization
# Only large models (>60 GB) use q4/q5, smaller can use full precision

swoper:
  mode: single-active
  max_concurrent_models: 1
  model_swap_timeout: 30
  gpu_enabled: true
  metal_acceleration: true  # Apple Silicon Metal
  quantization_strategy: smart  # Only quantize when needed

models:
  # 🔴 OBLIGATORY q4/q5 (>60 GB, don't fit in 64 GB RAM)
  deepseek-r1:
    path: ollama:deepseek-r1:q4
    type: llm
    size_gb: 40
    priority: high
    quantization: q4
    reason: "67 GB full doesn't fit, q4 (40 GB) fits in 64 GB RAM"

  qwen-code-72b:
    path: ollama:qwen2.5-coder-72b-instruct:q4
    type: code
    size_gb: 40
    priority: high
    quantization: q4
    reason: "144 GB full doesn't fit, q4 (40 GB) required"

  deepseek-math-33b:
    path: ollama:deepseek-math:33b:q4
    type: math
    size_gb: 20
    priority: high
    quantization: q4
    reason: "66 GB full doesn't fit, q4 (20 GB) required"

  starcoder2-34b:
    path: ollama:starcoder2:34b:q4
    type: code
    size_gb: 20
    priority: medium
    quantization: q4
    reason: "68 GB full doesn't fit, q4 (20 GB) required"

  qwen-vl-32b:
    path: ollama:qwen2-vl:32b-instruct:q4
    type: vision
    size_gb: 20
    priority: high
    quantization: q4
    reason: "64 GB full doesn't fit, q4 (20 GB) for better quality than 7B"

  # 🟡 RECOMMENDED q4 (40-60 GB, fits but q4 better for performance)
  gemma-30b:
    path: ollama:gemma2:27b-it:q4
    type: llm
    size_gb: 18
    priority: medium
    quantization: q4
    reason: "60 GB full fits but q4 (18 GB) better performance"

  mistral-22b:
    path: ollama:mistral-nemo:22b:q4
    type: llm
    size_gb: 13
    priority: medium
    quantization: q4
    reason: "44 GB full fits but q4 (13 GB) better performance"

  # 🟢 OPTIONAL full/q4 (<40 GB, can use full precision)
  mistral-13b:
    path: ollama:mistral:13b-instruct
    type: llm
    size_gb: 26
    priority: medium
    quantization: full
    reason: "26 GB fits, can use full precision or q4 for speed"

  gpt-oss-20b:
    path: ollama:gpt-oss:20b
    type: llm
    size_gb: 40
    priority: low
    quantization: full
    reason: "40 GB fits, can use full precision"

  qwen-vl-7b:
    path: ollama:qwen2-vl:7b-instruct
    type: vision
    size_gb: 8
    priority: high
    quantization: full
    reason: "8 GB fits, can use full precision (fast vision model)"

  falcon-40b:
    path: ollama:falcon:40b-instruct:q4
    type: llm
    size_gb: 24
    priority: low
    quantization: q4
    reason: "Already quantized"

storage:
  models_dir: ~/node2/swoper/models
  cache_dir: ~/node2/swoper/cache
  swap_dir: ~/node2/swoper/swap

ollama:
  url: http://localhost:11434
  timeout: 300

# GPU/VRAM info
hardware:
  ram_gb: 64
  gpu: "M4 Max 40-core"
  vram: "Shared with RAM (up to 64 GB)"
  metal_acceleration: true
EOF

echo -e "${GREEN}✅ Configuration saved to: $HOME/node2/swoper/config_node2.yaml${NC}"

# Calculate total size
TOTAL_SIZE=$(python3 << 'PYEOF'
# Only count models that will be installed
obligatory = [40, 40, 20, 20, 20]  # q4 models that are required
recommended = [18, 13]  # q4 models recommended
optional_full = [26, 40, 8]  # full models
optional_q4 = [24]  # already q4

total = sum(obligatory) + sum(recommended) + sum(optional_full) + sum(optional_q4)
print(f"{total}")
PYEOF
)

echo -e "\n${GREEN}📊 Total models size: ~${TOTAL_SIZE} GB${NC}"
echo -e "${GREEN}   Available disk: 1.5 TB${NC}"
echo -e "${GREEN}   Available RAM: 64 GB${NC}"
echo -e "${GREEN}   ✅ Models will fit comfortably${NC}"

echo -e "\n${BLUE}💡 DeepSeek-R1 q4 (40 GB) Analysis:${NC}"
echo -e "   - 64 GB RAM достатньо для 40 GB моделі ✅"
echo -e "   - M4 Max Metal acceleration підтримується ✅"
echo -e "   - Може працювати, але займе більшу частину RAM"
echo -e "   - Рекомендація: q4 для DeepSeek-R1 (40 GB < 64 GB) ✅"

echo -e "\n${GREEN}=================================================="
echo "✅ Swoper Installation Complete"
echo "==================================================${NC}"
echo ""
echo "📁 Configuration: $HOME/node2/swoper/config_node2.yaml"
echo "📦 Models directory: $HOME/node2/swoper/models"
echo ""
echo "⏭️  Next steps:"
echo "   1. Review config_node2.yaml"
echo "   2. Test Swoper with: curl http://localhost:8890/health"
echo "   3. Update router-config.yml with Node-2 Swoper provider"
echo ""