Improve ISO installer non-GPU and bundled setup flow
This commit is contained in:
parent
82cd52a409
commit
ec296b3d42
@ -11,6 +11,7 @@
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
cuda_version: "12.4"
|
cuda_version: "12.4"
|
||||||
skip_roles: "" # comma-separated list of role names to skip (set by install.sh)
|
skip_roles: "" # comma-separated list of role names to skip (set by install.sh)
|
||||||
|
gpu_available: false
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
- role: base
|
- role: base
|
||||||
|
|||||||
@ -34,6 +34,21 @@
|
|||||||
enabled: yes
|
enabled: yes
|
||||||
state: started
|
state: started
|
||||||
|
|
||||||
|
- name: Configure Docker standard runtime for non-GPU installs
|
||||||
|
copy:
|
||||||
|
dest: /etc/docker/daemon.json
|
||||||
|
content: |
|
||||||
|
{
|
||||||
|
"log-driver": "json-file",
|
||||||
|
"log-opts": {
|
||||||
|
"max-size": "100m",
|
||||||
|
"max-file": "3"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mode: "0644"
|
||||||
|
notify: restart docker
|
||||||
|
when: not (gpu_available | default(false) | bool)
|
||||||
|
|
||||||
# NVIDIA Container Toolkit (allows GPU passthrough into containers)
|
# NVIDIA Container Toolkit (allows GPU passthrough into containers)
|
||||||
- name: Add NVIDIA Container Toolkit repo
|
- name: Add NVIDIA Container Toolkit repo
|
||||||
shell: |
|
shell: |
|
||||||
@ -44,16 +59,19 @@
|
|||||||
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||||
args:
|
args:
|
||||||
creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||||
|
when: gpu_available | default(false) | bool
|
||||||
|
|
||||||
- name: Install NVIDIA Container Toolkit
|
- name: Install NVIDIA Container Toolkit
|
||||||
apt:
|
apt:
|
||||||
name: nvidia-container-toolkit
|
name: nvidia-container-toolkit
|
||||||
state: present
|
state: present
|
||||||
update_cache: yes
|
update_cache: yes
|
||||||
|
when: gpu_available | default(false) | bool
|
||||||
|
|
||||||
- name: Configure Docker to use NVIDIA runtime
|
- name: Configure Docker to use NVIDIA runtime
|
||||||
shell: nvidia-ctk runtime configure --runtime=docker
|
shell: nvidia-ctk runtime configure --runtime=docker
|
||||||
notify: restart docker
|
notify: restart docker
|
||||||
|
when: gpu_available | default(false) | bool
|
||||||
|
|
||||||
- name: Set NVIDIA as default Docker runtime
|
- name: Set NVIDIA as default Docker runtime
|
||||||
copy:
|
copy:
|
||||||
@ -75,3 +93,4 @@
|
|||||||
}
|
}
|
||||||
mode: "0644"
|
mode: "0644"
|
||||||
notify: restart docker
|
notify: restart docker
|
||||||
|
when: gpu_available | default(false) | bool
|
||||||
|
|||||||
@ -13,6 +13,7 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PACKAGE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
WORK_DIR="/tmp/cezen-iso-work"
|
WORK_DIR="/tmp/cezen-iso-work"
|
||||||
ORIGINAL_ISO="/tmp/ubuntu-22.04.5-live-server-amd64.iso"
|
ORIGINAL_ISO="/tmp/ubuntu-22.04.5-live-server-amd64.iso"
|
||||||
OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-ubuntu2204.iso"
|
OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-ubuntu2204.iso"
|
||||||
@ -26,7 +27,7 @@ echo ""
|
|||||||
# ── Install build tools ────────────────────────
|
# ── Install build tools ────────────────────────
|
||||||
echo "→ Installing build tools..."
|
echo "→ Installing build tools..."
|
||||||
apt-get update -qq
|
apt-get update -qq
|
||||||
apt-get install -y -qq xorriso wget isolinux
|
apt-get install -y -qq xorriso wget isolinux rsync
|
||||||
echo "✓ Tools ready"
|
echo "✓ Tools ready"
|
||||||
|
|
||||||
# ── Download Ubuntu ISO ────────────────────────
|
# ── Download Ubuntu ISO ────────────────────────
|
||||||
@ -55,6 +56,16 @@ cp "$SCRIPT_DIR/user-data" "$WORK_DIR/nocloud/user-data"
|
|||||||
cp "$SCRIPT_DIR/meta-data" "$WORK_DIR/nocloud/meta-data"
|
cp "$SCRIPT_DIR/meta-data" "$WORK_DIR/nocloud/meta-data"
|
||||||
echo "✓ user-data and meta-data injected"
|
echo "✓ user-data and meta-data injected"
|
||||||
|
|
||||||
|
# Keep the installer payload on the ISO so first boot does not depend on a
|
||||||
|
# private Git server being reachable before the setup UI can start.
|
||||||
|
echo "→ Bundling Cezen AI installer payload..."
|
||||||
|
mkdir -p "$WORK_DIR/cezen-aipackage"
|
||||||
|
rsync -a --delete \
|
||||||
|
--exclude 'autoinstall/cezen-ai-ubuntu2204.iso' \
|
||||||
|
--exclude '*.iso' \
|
||||||
|
"$PACKAGE_DIR/" "$WORK_DIR/cezen-aipackage/"
|
||||||
|
echo "✓ Installer payload bundled"
|
||||||
|
|
||||||
# ── Patch GRUB ────────────────────────────────
|
# ── Patch GRUB ────────────────────────────────
|
||||||
echo "→ Patching GRUB config..."
|
echo "→ Patching GRUB config..."
|
||||||
GRUB_CFG="$WORK_DIR/boot/grub/grub.cfg"
|
GRUB_CFG="$WORK_DIR/boot/grub/grub.cfg"
|
||||||
|
|||||||
@ -169,18 +169,22 @@ echo ""
|
|||||||
|
|
||||||
# Write selected tools to a config file so install.sh can read it
|
# Write selected tools to a config file so install.sh can read it
|
||||||
mkdir -p /opt/cezen
|
mkdir -p /opt/cezen
|
||||||
cat > /opt/cezen/install.conf << EOF
|
|
||||||
TIER=${TIER}
|
|
||||||
SKIP_ROLES=""
|
SKIP_ROLES=""
|
||||||
EOF
|
|
||||||
|
|
||||||
# Determine which roles to skip based on tool selection
|
|
||||||
for role in ollama jupyterlab chromadb vllm mlflow minio monitoring k3s; do
|
for role in ollama jupyterlab chromadb vllm mlflow minio monitoring k3s; do
|
||||||
if ! echo "$TOOLS" | grep -q "$role"; then
|
if ! echo "$TOOLS" | grep -q "$role"; then
|
||||||
sed -i "s/SKIP_ROLES=\"\"/SKIP_ROLES=\"${role}\"/" /opt/cezen/install.conf
|
if [ -n "$SKIP_ROLES" ]; then
|
||||||
|
SKIP_ROLES="${SKIP_ROLES},${role}"
|
||||||
|
else
|
||||||
|
SKIP_ROLES="${role}"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
cat > /opt/cezen/install.conf << EOF
|
||||||
|
TIER=${TIER}
|
||||||
|
SKIP_ROLES=${SKIP_ROLES}
|
||||||
|
EOF
|
||||||
|
|
||||||
# Mark as configured so this wizard doesn't run again
|
# Mark as configured so this wizard doesn't run again
|
||||||
touch /opt/cezen/.setup-done
|
touch /opt/cezen/.setup-done
|
||||||
|
|
||||||
|
|||||||
@ -60,8 +60,10 @@ autoinstall:
|
|||||||
- echo "cezen ALL=(ALL) NOPASSWD:ALL" > /target/etc/sudoers.d/cezen
|
- echo "cezen ALL=(ALL) NOPASSWD:ALL" > /target/etc/sudoers.d/cezen
|
||||||
- chmod 440 /target/etc/sudoers.d/cezen
|
- chmod 440 /target/etc/sudoers.d/cezen
|
||||||
|
|
||||||
# Clone the Cezen AI installer
|
# Install the Cezen AI payload from the ISO first. Fall back to Git only
|
||||||
- git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage || true
|
# when building from older media that does not contain /cdrom/cezen-aipackage.
|
||||||
|
- mkdir -p /target/opt/aipackage
|
||||||
|
- cp -a /cdrom/cezen-aipackage/. /target/opt/aipackage/ || git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
|
||||||
|
|
||||||
# Deploy the web setup server
|
# Deploy the web setup server
|
||||||
- mkdir -p /target/opt/cezen
|
- mkdir -p /target/opt/cezen
|
||||||
|
|||||||
@ -36,7 +36,30 @@ def get_interfaces():
|
|||||||
except:
|
except:
|
||||||
return ["eth0"]
|
return ["eth0"]
|
||||||
|
|
||||||
|
def has_nvidia_gpu():
|
||||||
|
"""Detect NVIDIA PCI devices before the driver or nvidia-smi exists."""
|
||||||
|
try:
|
||||||
|
for root, _, files in os.walk("/sys/bus/pci/devices"):
|
||||||
|
if "vendor" not in files:
|
||||||
|
continue
|
||||||
|
with open(os.path.join(root, "vendor")) as f:
|
||||||
|
if f.read().strip().lower() == "0x10de":
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
def validate_static_network(ip, prefix, gateway, dns):
|
||||||
|
ipaddress.ip_address(ip)
|
||||||
|
ipaddress.ip_address(gateway)
|
||||||
|
ipaddress.ip_address(dns)
|
||||||
|
prefix_int = int(prefix)
|
||||||
|
if prefix_int < 1 or prefix_int > 32:
|
||||||
|
raise ValueError("CIDR prefix must be between 1 and 32")
|
||||||
|
return str(prefix_int)
|
||||||
|
|
||||||
def apply_static_ip(iface, ip, prefix, gateway, dns):
|
def apply_static_ip(iface, ip, prefix, gateway, dns):
|
||||||
|
prefix = validate_static_network(ip, prefix, gateway, dns)
|
||||||
config = f"""network:
|
config = f"""network:
|
||||||
version: 2
|
version: 2
|
||||||
ethernets:
|
ethernets:
|
||||||
@ -69,9 +92,10 @@ def run_install(tier, skip_tools):
|
|||||||
open(SETUP_DONE_FILE, "w").close()
|
open(SETUP_DONE_FILE, "w").close()
|
||||||
|
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
# Phase 1: installs NVIDIA drivers, registers cezen-phase2 systemd service,
|
# Fresh NVIDIA servers do not have nvidia-smi yet, so detect the PCI
|
||||||
# then reboots. Phase 2 (full stack) runs automatically after reboot.
|
# device and run phase 1 to install drivers before the AI stack.
|
||||||
cmd = ["bash", f"{AIPACKAGE_DIR}/install.sh", "--phase=1", f"--tier={tier}"]
|
phase = "1" if has_nvidia_gpu() else "2"
|
||||||
|
cmd = ["bash", f"{AIPACKAGE_DIR}/install.sh", f"--phase={phase}", f"--tier={tier}"]
|
||||||
with open(INSTALL_LOG, "w") as log:
|
with open(INSTALL_LOG, "w") as log:
|
||||||
proc = subprocess.Popen(cmd, stdout=log, stderr=log, env=env)
|
proc = subprocess.Popen(cmd, stdout=log, stderr=log, env=env)
|
||||||
proc.wait()
|
proc.wait()
|
||||||
@ -525,11 +549,18 @@ function streamLog() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let reconnectAttempts = 0;
|
||||||
es.onerror = () => {
|
es.onerror = () => {
|
||||||
es.close();
|
es.close();
|
||||||
if (!installDone) {
|
if (installDone) return;
|
||||||
// Connection lost — most likely the server rebooted
|
reconnectAttempts++;
|
||||||
|
lbl.textContent = `Connection lost — reconnecting... (${reconnectAttempts})`;
|
||||||
|
if (reconnectAttempts >= 5) {
|
||||||
|
// After 5 failed reconnects assume it's a real reboot
|
||||||
showRebootNotice();
|
showRebootNotice();
|
||||||
|
} else {
|
||||||
|
// Try reconnecting after a delay
|
||||||
|
setTimeout(() => { if (!installDone) streamLog(); }, 4000);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -653,14 +684,46 @@ class Handler(BaseHTTPRequestHandler):
|
|||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
|
||||||
# ─── Main ─────────────────────────────────────────────────
|
# ─── Main ─────────────────────────────────────────────────
|
||||||
|
def show_console_banner(ip):
|
||||||
|
"""Write the setup URL banner to /dev/tty1 so it appears on the physical console."""
|
||||||
|
banner = f"""
|
||||||
|
|
||||||
|
\033[1;36m╔══════════════════════════════════════════════════════╗
|
||||||
|
║ ║
|
||||||
|
║ CEZEN AI SUITE — SERVER SETUP ║
|
||||||
|
║ ║
|
||||||
|
║ Open a browser on any computer on this network: ║
|
||||||
|
║ ║
|
||||||
|
║ \033[1;33m➜ http://{ip:<42}\033[1;36m║
|
||||||
|
║ \033[1;33m➜ http://cezenai.local\033[1;36m ║
|
||||||
|
║ ║
|
||||||
|
║ Complete setup from your browser — no keyboard ║
|
||||||
|
║ input needed here. ║
|
||||||
|
║ ║
|
||||||
|
╚══════════════════════════════════════════════════════╝\033[0m
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Write to tty1 (physical console) and stdout (journalctl)
|
||||||
|
print(banner)
|
||||||
|
try:
|
||||||
|
with open("/dev/tty1", "w") as tty:
|
||||||
|
tty.write(banner)
|
||||||
|
except Exception:
|
||||||
|
pass # tty1 may not be accessible in all environments
|
||||||
|
|
||||||
|
# Also update /etc/issue so the URL appears above the login prompt
|
||||||
|
try:
|
||||||
|
with open("/etc/issue", "w") as f:
|
||||||
|
f.write(f"Ubuntu 22.04.5 LTS \\n \\l\n\n")
|
||||||
|
f.write(f" \033[1;36mCezen AI Suite Setup:\033[0m http://{ip} | http://cezenai.local\n\n")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Ensure log file exists
|
# Ensure log file exists
|
||||||
open(INSTALL_LOG, "a").close()
|
open(INSTALL_LOG, "a").close()
|
||||||
ip = get_ip()
|
ip = get_ip()
|
||||||
print(f"\n{'='*50}")
|
show_console_banner(ip)
|
||||||
print(f" Cezen AI Suite — Setup Server")
|
|
||||||
print(f" Open in browser: http://{ip}")
|
|
||||||
print(f" Or: http://cezenai.local")
|
|
||||||
print(f"{'='*50}\n")
|
|
||||||
server = HTTPServer(("0.0.0.0", 80), Handler)
|
server = HTTPServer(("0.0.0.0", 80), Handler)
|
||||||
server.serve_forever()
|
server.serve_forever()
|
||||||
|
|||||||
35
install.sh
35
install.sh
@ -52,6 +52,20 @@ install_ansible() {
|
|||||||
echo "✓ Ansible ready"
|
echo "✓ Ansible ready"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
has_nvidia_pci_gpu() {
|
||||||
|
for vendor_file in /sys/bus/pci/devices/*/vendor; do
|
||||||
|
[ -f "$vendor_file" ] || continue
|
||||||
|
if [ "$(tr '[:upper:]' '[:lower:]' < "$vendor_file")" = "0x10de" ]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
has_working_nvidia_driver() {
|
||||||
|
command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
# ── Phase 1: NVIDIA drivers only ──────────────
|
# ── Phase 1: NVIDIA drivers only ──────────────
|
||||||
run_phase1() {
|
run_phase1() {
|
||||||
echo ""
|
echo ""
|
||||||
@ -59,6 +73,13 @@ run_phase1() {
|
|||||||
echo "║ Cezen AI Suite — Phase 1: NVIDIA ║"
|
echo "║ Cezen AI Suite — Phase 1: NVIDIA ║"
|
||||||
echo "╚══════════════════════════════════════════╝"
|
echo "╚══════════════════════════════════════════╝"
|
||||||
|
|
||||||
|
if ! has_nvidia_pci_gpu; then
|
||||||
|
echo "No NVIDIA GPU found. Continuing with CPU/non-GPU installation path."
|
||||||
|
PHASE="2"
|
||||||
|
run_phase2
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
ANSIBLE_STDOUT_CALLBACK=yaml \
|
ANSIBLE_STDOUT_CALLBACK=yaml \
|
||||||
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/phase1_nvidia.yml" \
|
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/phase1_nvidia.yml" \
|
||||||
-e "tier=$TIER"
|
-e "tier=$TIER"
|
||||||
@ -72,7 +93,7 @@ Wants=network-online.target
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
ExecStart=/bin/bash ${SCRIPT_DIR}/install.sh --phase=2 --tier=${TIER}
|
ExecStart=/bin/bash -lc 'set -o pipefail; /bin/bash ${SCRIPT_DIR}/install.sh --phase=2 --tier=${TIER} 2>&1 | tee -a /var/log/cezen-install.log'
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
StandardOutput=journal+console
|
StandardOutput=journal+console
|
||||||
StandardError=journal+console
|
StandardError=journal+console
|
||||||
@ -98,17 +119,19 @@ run_phase2() {
|
|||||||
echo "║ Cezen AI Suite — Phase 2: Stack ║"
|
echo "║ Cezen AI Suite — Phase 2: Stack ║"
|
||||||
echo "╚══════════════════════════════════════════╝"
|
echo "╚══════════════════════════════════════════╝"
|
||||||
|
|
||||||
# Verify NVIDIA driver loaded
|
GPU_AVAILABLE=false
|
||||||
if ! nvidia-smi &>/dev/null; then
|
if ! has_working_nvidia_driver; then
|
||||||
echo "WARNING: nvidia-smi not responding. NVIDIA driver may not be loaded."
|
echo "No working NVIDIA GPU/driver found. Continuing with CPU/non-GPU installation path."
|
||||||
echo " Continuing — non-GPU roles will still install correctly."
|
echo "GPU-only features such as NVIDIA Docker runtime, DCGM metrics, and vLLM serving will be skipped or left inactive."
|
||||||
else
|
else
|
||||||
|
GPU_AVAILABLE=true
|
||||||
echo "✓ NVIDIA driver: $(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1)"
|
echo "✓ NVIDIA driver: $(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Build skip_roles extra var (comma-separated list, empty string = skip nothing)
|
# Build skip_roles extra var (comma-separated list, empty string = skip nothing)
|
||||||
EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\""
|
EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\" gpu_available=$GPU_AVAILABLE"
|
||||||
echo "→ Tier: $TIER | Skip: ${SKIP_ROLES:-none}"
|
echo "→ Tier: $TIER | Skip: ${SKIP_ROLES:-none}"
|
||||||
|
echo "→ GPU available: $GPU_AVAILABLE"
|
||||||
|
|
||||||
ANSIBLE_STDOUT_CALLBACK=yaml \
|
ANSIBLE_STDOUT_CALLBACK=yaml \
|
||||||
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/entry.yml" \
|
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/entry.yml" \
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user