aipackage/install.sh

168 lines
5.8 KiB
Bash

#!/usr/bin/env bash
# ─────────────────────────────────────────────
# Cezen AI Suite — Entry Level Installer
# Usage:
# sudo bash install.sh → Phase 1 (drivers + schedules reboot → Phase 2)
# sudo bash install.sh --phase=2 → Phase 2 (all software, run after reboot)
# ─────────────────────────────────────────────
set -e
TIER="entry"
PHASE="1"
SKIP_ROLES=""
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ANSIBLE_DIR="$SCRIPT_DIR/ansible"
# Load saved config (written by web setup UI before phase 1)
[ -f /opt/cezen/install.conf ] && source /opt/cezen/install.conf
for arg in "$@"; do
case $arg in
--tier=*) TIER="${arg#*=}" ;;
--phase=*) PHASE="${arg#*=}" ;;
--skip=*) SKIP_ROLES="${arg#*=}" ;;
esac
done
# ── Preflight ──────────────────────────────────
check_root() {
if [ "$EUID" -ne 0 ]; then
echo "ERROR: Run as root: sudo bash install.sh"
exit 1
fi
}
check_os() {
if [ -f /etc/os-release ]; then
. /etc/os-release
if [[ "$ID" != "ubuntu" ]]; then
echo "ERROR: Ubuntu 22.04 required. Detected: $PRETTY_NAME"
exit 1
fi
echo "✓ OS: $PRETTY_NAME"
fi
}
install_ansible() {
if ! command -v ansible-playbook &>/dev/null; then
echo "→ Installing Ansible..."
apt-get update -qq
apt-get install -y -qq ansible python3-pip
fi
echo "✓ Ansible ready"
}
has_nvidia_pci_gpu() {
for vendor_file in /sys/bus/pci/devices/*/vendor; do
[ -f "$vendor_file" ] || continue
if [ "$(tr '[:upper:]' '[:lower:]' < "$vendor_file")" = "0x10de" ]; then
return 0
fi
done
return 1
}
has_working_nvidia_driver() {
command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null
}
# ── Phase 1: NVIDIA drivers only ──────────────
run_phase1() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Cezen AI Suite — Phase 1: NVIDIA ║"
echo "╚══════════════════════════════════════════╝"
if ! has_nvidia_pci_gpu; then
echo "No NVIDIA GPU found. Continuing with CPU/non-GPU installation path."
PHASE="2"
run_phase2
return
fi
ANSIBLE_STDOUT_CALLBACK=yaml \
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/phase1_nvidia.yml" \
-e "tier=$TIER"
# Register phase 2 as a one-shot systemd service so it runs after reboot
cat > /etc/systemd/system/cezen-phase2.service << EOF
[Unit]
Description=Cezen AI Suite Phase 2 Installer
After=network-online.target nvidia-persistenced.service
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/bash -lc 'set -o pipefail; /bin/bash ${SCRIPT_DIR}/install.sh --phase=2 --tier=${TIER} 2>&1 | tee -a /var/log/cezen-install.log'
RemainAfterExit=yes
StandardOutput=journal+console
StandardError=journal+console
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable cezen-phase2.service
echo ""
echo "✓ Phase 2 registered — will run automatically after reboot"
echo "→ Rebooting in 10 seconds..."
sleep 10
reboot
}
# ── Phase 2: Full stack ────────────────────────
run_phase2() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Cezen AI Suite — Phase 2: Stack ║"
echo "╚══════════════════════════════════════════╝"
GPU_AVAILABLE=false
if ! has_working_nvidia_driver; then
echo "No working NVIDIA GPU/driver found. Continuing with CPU/non-GPU installation path."
echo "GPU-only features such as NVIDIA Docker runtime, DCGM metrics, and vLLM serving will be skipped or left inactive."
else
GPU_AVAILABLE=true
echo "✓ NVIDIA driver: $(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1)"
fi
# Build skip_roles extra var (comma-separated list, empty string = skip nothing)
EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\" gpu_available=$GPU_AVAILABLE"
echo "→ Tier: $TIER | Skip: ${SKIP_ROLES:-none}"
echo "→ GPU available: $GPU_AVAILABLE"
ANSIBLE_STDOUT_CALLBACK=yaml \
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/entry.yml" \
-e "$EXTRA_VARS"
# Disable one-shot service so it doesn't run again on next reboot
systemctl disable cezen-phase2.service 2>/dev/null || true
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Cezen AI Suite installation complete! ║"
echo "║ ║"
echo "║ JupyterLab → http://localhost:8888 ║"
echo "║ Ollama API → http://localhost:11434 ║"
echo "║ MLflow → http://localhost:5000 ║"
echo "║ MinIO → http://localhost:9001 ║"
echo "║ Grafana → http://localhost:3000 ║"
echo "╚══════════════════════════════════════════╝"
}
# ── Main ───────────────────────────────────────
check_root
check_os
install_ansible
if [ "$PHASE" = "1" ]; then
run_phase1
elif [ "$PHASE" = "2" ]; then
run_phase2
else
echo "ERROR: Unknown phase '$PHASE'. Use --phase=1 or --phase=2"
exit 1
fi