Initial Cezen AI Suite installer — Entry tier

This commit is contained in:
Jino Jose 2026-06-23 13:09:03 +05:30
commit a071602cf1
18 changed files with 1195 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

77
README.md Normal file
View File

@ -0,0 +1,77 @@
# Cezen AI Suite — Installer
## Quick Start
```bash
git clone <cgit-url>
cd cgit
sudo bash install.sh
```
Server reboots automatically after NVIDIA drivers install. Phase 2 runs on its own after reboot.
## What Gets Installed (Entry Tier)
| Service | Port | Notes |
|---|---|---|
| Ollama | 11434 | LLM inference, 2 models pre-loaded |
| Open WebUI | 3001 | Chat interface |
| vLLM | 8000 | OpenAI-compatible API (start manually) |
| JupyterLab | 8888 | Token: `cezen2024` |
| ChromaDB | 8100 | Vector DB for RAG |
| MLflow | 5000 | Experiment tracking |
| MinIO | 9001 | Object storage (user: cezenadmin / Cezen@2024!) |
| Grafana | 3000 | GPU + system monitoring (admin / cezen2024) |
## Testing Without a GPU (Multipass)
```bash
# On your MacBook:
multipass launch 22.04 --name cezen-test --cpus 4 --mem 8G --disk 40G
multipass shell cezen-test
# Inside the VM:
git clone <cgit-url>
sudo bash install.sh
```
NVIDIA driver install will succeed but `nvidia-smi` won't show GPUs — that's expected. All other services will run fine.
## Pull More Models
```bash
bash models/pull-models.sh --tier=entry
```
## File Structure
```
cgit/
├── install.sh ← Entry point
├── ansible/
│ ├── phase1_nvidia.yml ← Phase 1: drivers (triggers reboot)
│ ├── entry.yml ← Phase 2: full stack
│ └── roles/
│ ├── base/ ← OS, Python, Miniconda, LangChain
│ ├── nvidia/ ← Drivers, CUDA 12.4, cuDNN 9
│ ├── docker/ ← Docker CE + NVIDIA Container Toolkit
│ ├── k3s/ ← Lightweight Kubernetes
│ ├── ollama/ ← Ollama + Open WebUI
│ ├── vllm/ ← vLLM inference server
│ ├── jupyterlab/ ← JupyterLab notebooks
│ ├── chromadb/ ← Vector database
│ ├── mlflow/ ← Experiment tracking
│ ├── minio/ ← Object storage
│ └── monitoring/ ← Grafana + Prometheus + DCGM
└── models/
└── pull-models.sh ← Pull additional models
```
## Change Default Passwords
Before shipping to a customer, update these:
- JupyterLab token: `/opt/cezen/.jupyter/jupyter_lab_config.py`
- MinIO: `/etc/default/minio`
- Grafana: environment vars in monitoring role, or via UI after first login
- MLflow: no auth by default (add reverse proxy if needed)

BIN
ansible/.DS_Store vendored Normal file

Binary file not shown.

23
ansible/entry.yml Normal file
View File

@ -0,0 +1,23 @@
---
# Phase 2: Full Cezen AI Suite — Entry Tier
# Runs after NVIDIA driver reboot
- name: Cezen AI — Entry Tier Stack
hosts: localhost
connection: local
become: true
vars:
cezen_user: "cezen"
cezen_home: "/opt/cezen"
python_version: "3.11"
cuda_version: "12.4"
roles:
- docker
- k3s
- ollama
- vllm
- jupyterlab
- chromadb
- mlflow
- minio
- monitoring

View File

@ -0,0 +1,9 @@
---
# Phase 1: NVIDIA drivers only. Server reboots after this.
- name: Cezen AI — Phase 1 NVIDIA Drivers
hosts: localhost
connection: local
become: true
roles:
- base
- nvidia

View File

@ -0,0 +1,91 @@
---
# Base role: OS updates, essential packages, Python/Miniconda
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 3600
- name: Upgrade all packages
apt:
upgrade: dist
autoremove: yes
- name: Install essential system packages
apt:
name:
- curl
- wget
- git
- build-essential
- ca-certificates
- gnupg
- lsb-release
- software-properties-common
- unzip
- htop
- net-tools
- jq
- python3-pip
- python3-venv
state: present
- name: Create cezen user
user:
name: cezen
shell: /bin/bash
home: /opt/cezen
create_home: yes
groups: sudo
append: yes
- name: Create cezen directories
file:
path: "{{ item }}"
state: directory
owner: cezen
group: cezen
mode: "0755"
loop:
- /opt/cezen
- /opt/cezen/models
- /opt/cezen/data
- /opt/cezen/logs
- name: Download Miniconda
get_url:
url: https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
dest: /tmp/miniconda.sh
mode: "0755"
retries: 3
delay: 10
- name: Install Miniconda
become_user: cezen
command: bash /tmp/miniconda.sh -b -p /opt/cezen/miniconda
args:
creates: /opt/cezen/miniconda/bin/conda
- name: Add conda to cezen PATH
lineinfile:
path: /opt/cezen/.bashrc
line: 'export PATH="/opt/cezen/miniconda/bin:$PATH"'
create: yes
owner: cezen
- name: Create cezen conda environment (Python 3.11)
become_user: cezen
command: /opt/cezen/miniconda/bin/conda create -n cezen python=3.11 -y
args:
creates: /opt/cezen/miniconda/envs/cezen
- name: Install LangChain + LlamaIndex + HuggingFace in conda env
become_user: cezen
shell: |
/opt/cezen/miniconda/bin/conda run -n cezen pip install \
langchain langchain-community llama-index \
transformers huggingface-hub \
peft bitsandbytes accelerate \
fastapi uvicorn[standard] \
sentence-transformers
retries: 3
delay: 15

View File

@ -0,0 +1,53 @@
---
# ChromaDB — vector database for RAG pipelines
- name: Install ChromaDB in cezen conda env
become_user: cezen
shell: |
/opt/cezen/miniconda/bin/conda run -n cezen pip install chromadb
retries: 3
delay: 10
- name: Create ChromaDB data directory
file:
path: /opt/cezen/data/chromadb
state: directory
owner: cezen
group: cezen
- name: Create ChromaDB systemd service
copy:
dest: /etc/systemd/system/chromadb.service
content: |
[Unit]
Description=ChromaDB Vector Database
After=network.target
[Service]
Type=simple
User=cezen
Group=cezen
WorkingDirectory=/opt/cezen/data/chromadb
ExecStart=/opt/cezen/miniconda/envs/cezen/bin/chroma run \
--host 0.0.0.0 \
--port 8100 \
--path /opt/cezen/data/chromadb
Restart=always
RestartSec=5
Environment="PATH=/opt/cezen/miniconda/envs/cezen/bin:/usr/local/bin:/usr/bin:/bin"
[Install]
WantedBy=multi-user.target
mode: "0644"
- name: Enable and start ChromaDB
systemd:
name: chromadb
enabled: yes
state: started
daemon_reload: yes
- name: Wait for ChromaDB to be ready
wait_for:
host: localhost
port: 8100
timeout: 30

View File

@ -0,0 +1,83 @@
---
# Docker CE + NVIDIA Container Toolkit
- name: Add Docker GPG key
apt_key:
url: https://download.docker.com/linux/ubuntu/gpg
state: present
- name: Add Docker apt repository
apt_repository:
repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
state: present
filename: docker
- name: Install Docker CE
apt:
name:
- docker-ce
- docker-ce-cli
- containerd.io
- docker-buildx-plugin
- docker-compose-plugin
state: present
update_cache: yes
- name: Add cezen user to docker group
user:
name: cezen
groups: docker
append: yes
- name: Enable and start Docker
systemd:
name: docker
enabled: yes
state: started
# NVIDIA Container Toolkit (allows GPU passthrough into containers)
- name: Add NVIDIA Container Toolkit repo
shell: |
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
args:
creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
- name: Install NVIDIA Container Toolkit
apt:
name: nvidia-container-toolkit
state: present
update_cache: yes
- name: Configure Docker to use NVIDIA runtime
shell: nvidia-ctk runtime configure --runtime=docker
notify: restart docker
- name: Set NVIDIA as default Docker runtime
copy:
dest: /etc/docker/daemon.json
content: |
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
},
"log-driver": "json-file",
"log-opts": {
"max-size": "100m",
"max-file": "3"
}
}
mode: "0644"
notify: restart docker
handlers:
- name: restart docker
systemd:
name: docker
state: restarted

View File

@ -0,0 +1,92 @@
---
# JupyterLab — notebook interface for AI/ML development
- name: Install JupyterLab in cezen conda env
become_user: cezen
shell: |
/opt/cezen/miniconda/bin/conda run -n cezen pip install \
jupyterlab \
ipywidgets \
ipykernel \
notebook \
nbconvert
retries: 3
delay: 10
- name: Create JupyterLab config directory
file:
path: /opt/cezen/.jupyter
state: directory
owner: cezen
group: cezen
- name: Generate JupyterLab config
become_user: cezen
shell: |
/opt/cezen/miniconda/envs/cezen/bin/jupyter lab --generate-config
args:
creates: /opt/cezen/.jupyter/jupyter_lab_config.py
- name: Configure JupyterLab (no browser, allow all IPs, set base dir)
lineinfile:
path: /opt/cezen/.jupyter/jupyter_lab_config.py
line: "{{ item }}"
create: yes
owner: cezen
loop:
- "c.ServerApp.ip = '0.0.0.0'"
- "c.ServerApp.port = 8888"
- "c.ServerApp.open_browser = False"
- "c.ServerApp.notebook_dir = '/opt/cezen/notebooks'"
- "c.ServerApp.token = 'cezen2024'"
- "c.ServerApp.allow_root = False"
- name: Create notebooks directory
file:
path: /opt/cezen/notebooks
state: directory
owner: cezen
group: cezen
- name: Create sample notebook placeholder
copy:
dest: /opt/cezen/notebooks/README.md
content: |
# Cezen AI Suite — JupyterLab
Default token: `cezen2024`
Change this in: `/opt/cezen/.jupyter/jupyter_lab_config.py`
Then restart: `sudo systemctl restart jupyterlab`
owner: cezen
group: cezen
- name: Create JupyterLab systemd service
copy:
dest: /etc/systemd/system/jupyterlab.service
content: |
[Unit]
Description=JupyterLab Server
After=network.target
[Service]
Type=simple
User=cezen
Group=cezen
WorkingDirectory=/opt/cezen/notebooks
ExecStart=/opt/cezen/miniconda/envs/cezen/bin/jupyter lab \
--config=/opt/cezen/.jupyter/jupyter_lab_config.py
Restart=always
RestartSec=5
Environment="PATH=/opt/cezen/miniconda/envs/cezen/bin:/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin"
Environment="CUDA_HOME=/usr/local/cuda"
[Install]
WantedBy=multi-user.target
mode: "0644"
- name: Enable and start JupyterLab
systemd:
name: jupyterlab
enabled: yes
state: started
daemon_reload: yes

View File

@ -0,0 +1,62 @@
---
# K3s — lightweight Kubernetes for single-node AI workloads
- name: Check if K3s is already installed
stat:
path: /usr/local/bin/k3s
register: k3s_binary
- name: Install K3s
shell: |
curl -sfL https://get.k3s.io | \
INSTALL_K3S_EXEC="--disable traefik --disable servicelb" sh -
when: not k3s_binary.stat.exists
retries: 3
delay: 10
- name: Wait for K3s to be ready
wait_for:
path: /etc/rancher/k3s/k3s.yaml
timeout: 120
- name: Enable and start K3s
systemd:
name: k3s
enabled: yes
state: started
- name: Copy kubeconfig for cezen user
copy:
src: /etc/rancher/k3s/k3s.yaml
dest: /opt/cezen/.kube/config
owner: cezen
group: cezen
mode: "0600"
remote_src: yes
- name: Create .kube directory for cezen
file:
path: /opt/cezen/.kube
state: directory
owner: cezen
group: cezen
- name: Set KUBECONFIG in cezen .bashrc
lineinfile:
path: /opt/cezen/.bashrc
line: 'export KUBECONFIG=/opt/cezen/.kube/config'
create: yes
owner: cezen
- name: Install kubectl alias for cezen
lineinfile:
path: /opt/cezen/.bashrc
line: "alias kubectl='k3s kubectl'"
create: yes
owner: cezen
- name: Verify K3s node is ready
command: k3s kubectl get nodes
register: k3s_nodes
retries: 5
delay: 10
until: k3s_nodes.rc == 0

View File

@ -0,0 +1,87 @@
---
# MinIO — S3-compatible object storage for model artifacts and datasets
- name: Download MinIO server binary
get_url:
url: https://dl.min.io/server/minio/release/linux-amd64/minio
dest: /usr/local/bin/minio
mode: "0755"
retries: 3
delay: 10
- name: Download MinIO client (mc)
get_url:
url: https://dl.min.io/client/mc/release/linux-amd64/mc
dest: /usr/local/bin/mc
mode: "0755"
retries: 3
delay: 10
- name: Create MinIO data directories
file:
path: "{{ item }}"
state: directory
owner: cezen
group: cezen
mode: "0750"
loop:
- /opt/cezen/data/minio
- /opt/cezen/data/minio/models
- /opt/cezen/data/minio/datasets
- name: Create MinIO environment file
copy:
dest: /etc/default/minio
content: |
MINIO_ROOT_USER=cezenadmin
MINIO_ROOT_PASSWORD=Cezen@2024!
MINIO_VOLUMES="/opt/cezen/data/minio"
MINIO_OPTS="--console-address :9001"
mode: "0640"
owner: cezen
group: cezen
- name: Create MinIO systemd service
copy:
dest: /etc/systemd/system/minio.service
content: |
[Unit]
Description=MinIO Object Storage
Documentation=https://docs.min.io
Wants=network-online.target
After=network-online.target
[Service]
User=cezen
Group=cezen
EnvironmentFile=/etc/default/minio
ExecStartPre=/bin/bash -c "if [ -z \"${MINIO_VOLUMES}\" ]; then echo 'Variable MINIO_VOLUMES not set'; exit 1; fi"
ExecStart=/usr/local/bin/minio server ${MINIO_VOLUMES} ${MINIO_OPTS}
Restart=always
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
mode: "0644"
- name: Enable and start MinIO
systemd:
name: minio
enabled: yes
state: started
daemon_reload: yes
- name: Wait for MinIO to be ready
wait_for:
host: localhost
port: 9001
timeout: 30
- name: Configure mc client with local MinIO
become_user: cezen
shell: |
mc alias set local http://localhost:9000 cezenadmin 'Cezen@2024!'
mc mb local/models --ignore-existing
mc mb local/datasets --ignore-existing
retries: 3
delay: 5

View File

@ -0,0 +1,56 @@
---
# MLflow — experiment tracking and model registry
- name: Install MLflow in cezen conda env
become_user: cezen
shell: |
/opt/cezen/miniconda/bin/conda run -n cezen pip install mlflow boto3
retries: 3
delay: 10
- name: Create MLflow directories
file:
path: "{{ item }}"
state: directory
owner: cezen
group: cezen
loop:
- /opt/cezen/data/mlflow
- /opt/cezen/data/mlflow/artifacts
- name: Create MLflow systemd service
copy:
dest: /etc/systemd/system/mlflow.service
content: |
[Unit]
Description=MLflow Tracking Server
After=network.target minio.service
[Service]
Type=simple
User=cezen
Group=cezen
ExecStart=/opt/cezen/miniconda/envs/cezen/bin/mlflow server \
--host 0.0.0.0 \
--port 5000 \
--backend-store-uri sqlite:///opt/cezen/data/mlflow/mlflow.db \
--default-artifact-root /opt/cezen/data/mlflow/artifacts
Restart=always
RestartSec=5
Environment="PATH=/opt/cezen/miniconda/envs/cezen/bin:/usr/local/bin:/usr/bin:/bin"
[Install]
WantedBy=multi-user.target
mode: "0644"
- name: Enable and start MLflow
systemd:
name: mlflow
enabled: yes
state: started
daemon_reload: yes
- name: Wait for MLflow to be ready
wait_for:
host: localhost
port: 5000
timeout: 30

View File

@ -0,0 +1,145 @@
---
# Monitoring: Prometheus + Grafana + DCGM Exporter (GPU metrics)
# ignore_errors: true on most tasks — monitoring is optional and should never block the install
- name: Create monitoring directories
file:
path: "{{ item }}"
state: directory
owner: cezen
group: cezen
loop:
- /opt/cezen/monitoring
- /opt/cezen/monitoring/prometheus
- /opt/cezen/monitoring/grafana
# ── DCGM Exporter (GPU metrics for Prometheus) ──────────
- name: Start DCGM Exporter container
shell: |
docker run -d \
--name dcgm-exporter \
--restart always \
--gpus all \
-p 9400:9400 \
nvcr.io/nvidia/k8s/dcgm-exporter:3.3.0-3.2.0-ubuntu22.04
register: dcgm_result
failed_when: dcgm_result.rc != 0 and 'already in use' not in dcgm_result.stderr
ignore_errors: true
# ── Prometheus ──────────────────────────────────────────
- name: Write Prometheus config
copy:
dest: /opt/cezen/monitoring/prometheus/prometheus.yml
owner: cezen
group: cezen
content: |
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'dcgm'
static_configs:
- targets: ['host-gateway:9400']
- job_name: 'node'
static_configs:
- targets: ['host-gateway:9100']
- name: Start Prometheus container
shell: |
docker run -d \
--name prometheus \
--restart always \
--add-host=host-gateway:172.17.0.1 \
-p 9090:9090 \
-v /opt/cezen/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml \
prom/prometheus:latest
register: prom_result
failed_when: prom_result.rc != 0 and 'already in use' not in prom_result.stderr
ignore_errors: true
# ── Node Exporter (CPU/RAM/disk metrics) ───────────────
- name: Start Node Exporter container
shell: |
docker run -d \
--name node-exporter \
--restart always \
--network=host \
--pid=host \
-v /:/host:ro,rslave \
prom/node-exporter:latest \
--path.rootfs=/host
register: node_exp_result
failed_when: node_exp_result.rc != 0 and 'already in use' not in node_exp_result.stderr
ignore_errors: true
# ── Grafana ─────────────────────────────────────────────
- name: Start Grafana container
shell: |
docker run -d \
--name grafana \
--restart always \
-p 3000:3000 \
--add-host=host-gateway:172.17.0.1 \
-v grafana-storage:/var/lib/grafana \
-e GF_SECURITY_ADMIN_USER=admin \
-e GF_SECURITY_ADMIN_PASSWORD=cezen2024 \
-e GF_USERS_ALLOW_SIGN_UP=false \
grafana/grafana:latest
register: grafana_result
failed_when: grafana_result.rc != 0 and 'already in use' not in grafana_result.stderr
ignore_errors: true
- name: Wait for Grafana to be ready
wait_for:
host: localhost
port: 3000
timeout: 60
ignore_errors: true
- name: Add Prometheus datasource to Grafana
uri:
url: http://localhost:3000/api/datasources
method: POST
user: admin
password: cezen2024
force_basic_auth: yes
body_format: json
body:
name: Prometheus
type: prometheus
url: "http://host-gateway:9090"
access: proxy
isDefault: true
status_code: [200, 409] # 409 = already exists, that's fine
ignore_errors: true
- name: Import NVIDIA GPU dashboard (ID 12239)
uri:
url: http://localhost:3000/api/dashboards/import
method: POST
user: admin
password: cezen2024
force_basic_auth: yes
body_format: json
body:
inputs:
- name: DS_PROMETHEUS
type: datasource
pluginId: prometheus
value: Prometheus
overwrite: true
folderId: 0
dashboard:
"__inputs": []
"__requires": []
id: null
title: "NVIDIA GPU Overview"
uid: "nvidia-gpu"
status_code: [200, 412]
ignore_errors: true

View File

@ -0,0 +1,81 @@
---
# NVIDIA role: Drivers + CUDA + cuDNN
# NOTE: Tested on L40S (Entry) and A40 (lab). Requires reboot after this role.
# If no GPU is present, this role will install drivers but nvidia-smi won't show GPUs.
- name: Add NVIDIA package repository key
apt_key:
url: https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
state: present
- name: Add NVIDIA CUDA apt repository
apt_repository:
repo: "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
state: present
filename: cuda
- name: Update apt cache after adding NVIDIA repo
apt:
update_cache: yes
- name: Install NVIDIA driver (open kernel module, recommended for data center GPUs)
apt:
name:
- nvidia-driver-550-open
- nvidia-utils-550
state: present
notify: reboot required
# CUDA Toolkit
- name: Install CUDA Toolkit 12.4
apt:
name:
- cuda-toolkit-12-4
- cuda-cudart-12-4
state: present
# cuDNN
- name: Add cuDNN repository
apt_repository:
repo: "deb https://developer.download.nvidia.com/compute/cudnn/repos/ubuntu2204/x86_64/ /"
state: present
filename: cudnn
- name: Install cuDNN 9 for CUDA 12
apt:
name:
- cudnn9-cuda-12
state: present
# Environment variables
- name: Set CUDA paths system-wide
copy:
dest: /etc/profile.d/cuda.sh
content: |
export CUDA_HOME=/usr/local/cuda
export PATH=$CUDA_HOME/bin:$PATH
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
mode: "0644"
- name: Set NVIDIA persistence mode (survives reboots)
copy:
dest: /etc/systemd/system/nvidia-persistenced-mode.service
content: |
[Unit]
Description=NVIDIA Persistence Daemon Mode
After=nvidia-persistenced.service
[Service]
Type=oneshot
ExecStart=/usr/bin/nvidia-smi -pm 1
RemainAfterExit=yes
[Install]
WantedBy=multi-user.target
mode: "0644"
- name: Enable NVIDIA persistence service
systemd:
name: nvidia-persistenced-mode
enabled: yes
daemon_reload: yes

View File

@ -0,0 +1,103 @@
---
# Ollama — local LLM serving (main inference engine for Entry tier)
- name: Check if Ollama is already installed
stat:
path: /usr/local/bin/ollama
register: ollama_binary
- name: Install Ollama
shell: curl -fsSL https://ollama.ai/install.sh | sh
when: not ollama_binary.stat.exists
retries: 3
delay: 10
- name: Create Ollama systemd service with GPU support
copy:
dest: /etc/systemd/system/ollama.service
content: |
[Unit]
Description=Ollama Service
After=network-online.target
[Service]
ExecStart=/usr/local/bin/ollama serve
User=cezen
Group=cezen
Restart=always
RestartSec=3
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
Environment="OLLAMA_HOST=0.0.0.0:11434"
Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
Environment="CUDA_VISIBLE_DEVICES=0,1,2"
[Install]
WantedBy=multi-user.target
mode: "0644"
- name: Create Ollama models directory
file:
path: /opt/cezen/models/ollama
state: directory
owner: cezen
group: cezen
- name: Enable and start Ollama
systemd:
name: ollama
enabled: yes
state: started
daemon_reload: yes
- name: Wait for Ollama API to be ready
wait_for:
host: localhost
port: 11434
timeout: 60
- name: Pull default models (Llama 3.1 8B + Mistral 7B)
become_user: cezen
command: ollama pull {{ item }}
loop:
- llama3.1:8b
- mistral:7b
environment:
OLLAMA_HOST: "http://localhost:11434"
retries: 3
delay: 15
# NOTE: Models are large (~5GB each). This step takes time on first run.
# Skip by setting: ansible-playbook ... -e "skip_model_pull=true"
when: not (skip_model_pull | default(false))
# Open WebUI (chat interface on top of Ollama)
- name: Deploy Open WebUI via Docker
community.docker.docker_container:
name: open-webui
image: ghcr.io/open-webui/open-webui:main
state: started
restart_policy: always
ports:
- "3001:8080"
volumes:
- open-webui:/app/backend/data
env:
OLLAMA_BASE_URL: "http://host-gateway:11434"
etc_hosts:
host-gateway: "172.17.0.1"
# Note: Requires docker community collection. Install with:
# ansible-galaxy collection install community.docker
ignore_errors: true # Falls back gracefully if docker collection not available
- name: Alternative Open WebUI start (if community.docker not available)
shell: |
docker run -d \
--name open-webui \
--restart always \
-p 3001:8080 \
--add-host=host-gateway:172.17.0.1 \
-v open-webui:/app/backend/data \
-e OLLAMA_BASE_URL=http://host-gateway:11434 \
ghcr.io/open-webui/open-webui:main
args:
executable: /bin/bash
register: webui_result
failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr

View File

@ -0,0 +1,56 @@
---
# vLLM — high-performance LLM inference with OpenAI-compatible API
# Entry tier: runs as a Docker container (easier to manage than pip install)
- name: Pull vLLM Docker image
shell: docker pull vllm/vllm-openai:latest
retries: 3
delay: 15
- name: Create vLLM systemd service
copy:
dest: /etc/systemd/system/vllm.service
content: |
[Unit]
Description=vLLM OpenAI-Compatible Inference Server
After=docker.service ollama.service
Requires=docker.service
[Service]
Restart=always
RestartSec=5
ExecStartPre=-/usr/bin/docker stop vllm
ExecStartPre=-/usr/bin/docker rm vllm
ExecStart=/usr/bin/docker run \
--name vllm \
--gpus all \
--ipc=host \
-p 8000:8000 \
-v /opt/cezen/models:/root/.cache/huggingface \
-e HF_HOME=/root/.cache/huggingface \
vllm/vllm-openai:latest \
--model meta-llama/Meta-Llama-3.1-8B-Instruct \
--gpu-memory-utilization 0.7 \
--max-model-len 8192 \
--tensor-parallel-size 1
ExecStop=/usr/bin/docker stop vllm
[Install]
WantedBy=multi-user.target
mode: "0644"
- name: Enable vLLM (but don't start yet — model selection needed first)
systemd:
name: vllm
enabled: yes
daemon_reload: yes
# Note: vLLM service is enabled but not started by default.
# Start manually after choosing a model:
# sudo systemctl start vllm
# Or change the --model flag in /etc/systemd/system/vllm.service first.
- name: Create vLLM model directory
file:
path: /opt/cezen/models/hf_cache
state: directory
owner: cezen
group: cezen

133
install.sh Normal file
View File

@ -0,0 +1,133 @@
#!/usr/bin/env bash
# ─────────────────────────────────────────────
# Cezen AI Suite — Entry Level Installer
# Usage:
# sudo bash install.sh → Phase 1 (drivers + schedules reboot → Phase 2)
# sudo bash install.sh --phase=2 → Phase 2 (all software, run after reboot)
# ─────────────────────────────────────────────
set -e
TIER="entry"
PHASE="1"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ANSIBLE_DIR="$SCRIPT_DIR/ansible"
for arg in "$@"; do
case $arg in
--tier=*) TIER="${arg#*=}" ;;
--phase=*) PHASE="${arg#*=}" ;;
esac
done
# ── Preflight ──────────────────────────────────
check_root() {
if [ "$EUID" -ne 0 ]; then
echo "ERROR: Run as root: sudo bash install.sh"
exit 1
fi
}
check_os() {
if [ -f /etc/os-release ]; then
. /etc/os-release
if [[ "$ID" != "ubuntu" ]]; then
echo "ERROR: Ubuntu 22.04 required. Detected: $PRETTY_NAME"
exit 1
fi
echo "✓ OS: $PRETTY_NAME"
fi
}
install_ansible() {
if ! command -v ansible-playbook &>/dev/null; then
echo "→ Installing Ansible..."
apt-get update -qq
apt-get install -y -qq ansible python3-pip
fi
echo "✓ Ansible ready"
}
# ── Phase 1: NVIDIA drivers only ──────────────
run_phase1() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Cezen AI Suite — Phase 1: NVIDIA ║"
echo "╚══════════════════════════════════════════╝"
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/phase1_nvidia.yml" \
-e "tier=$TIER" -v
# Register phase 2 as a one-shot systemd service so it runs after reboot
cat > /etc/systemd/system/cezen-phase2.service << EOF
[Unit]
Description=Cezen AI Suite Phase 2 Installer
After=network-online.target nvidia-persistenced.service
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/bash ${SCRIPT_DIR}/install.sh --phase=2 --tier=${TIER}
RemainAfterExit=yes
StandardOutput=journal+console
StandardError=journal+console
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable cezen-phase2.service
echo ""
echo "✓ Phase 2 registered — will run automatically after reboot"
echo "→ Rebooting in 10 seconds..."
sleep 10
reboot
}
# ── Phase 2: Full stack ────────────────────────
run_phase2() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Cezen AI Suite — Phase 2: Stack ║"
echo "╚══════════════════════════════════════════╝"
# Verify NVIDIA driver loaded
if ! nvidia-smi &>/dev/null; then
echo "WARNING: nvidia-smi not responding. NVIDIA driver may not be loaded."
echo " Continuing — non-GPU roles will still install correctly."
else
echo "✓ NVIDIA driver: $(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1)"
fi
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/entry.yml" \
-e "tier=$TIER" -v
# Disable one-shot service so it doesn't run again on next reboot
systemctl disable cezen-phase2.service 2>/dev/null || true
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Cezen AI Suite installation complete! ║"
echo "║ ║"
echo "║ JupyterLab → http://localhost:8888 ║"
echo "║ Ollama API → http://localhost:11434 ║"
echo "║ MLflow → http://localhost:5000 ║"
echo "║ MinIO → http://localhost:9001 ║"
echo "║ Grafana → http://localhost:3000 ║"
echo "╚══════════════════════════════════════════╝"
}
# ── Main ───────────────────────────────────────
check_root
check_os
install_ansible
if [ "$PHASE" = "1" ]; then
run_phase1
elif [ "$PHASE" = "2" ]; then
run_phase2
else
echo "ERROR: Unknown phase '$PHASE'. Use --phase=1 or --phase=2"
exit 1
fi

44
models/pull-models.sh Normal file
View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Pull additional AI models into Ollama
# Run after install: bash models/pull-models.sh --tier=entry
# ─────────────────────────────────────────────
TIER=${1:-entry}
echo "Pulling models for tier: $TIER"
entry_models=(
"llama3.1:8b" # General purpose, good baseline
"mistral:7b" # Fast, good for APIs
"llama3.1:70b" # Larger — only if enough VRAM (3× L40S has 144GB total)
"nomic-embed-text" # Embedding model for RAG
"codellama:13b" # Code generation
)
mid_models=(
"${entry_models[@]}"
"llama3.1:70b"
"mixtral:8x7b"
"deepseek-coder-v2:16b"
)
advanced_models=(
"${mid_models[@]}"
"llama3.1:405b"
"mixtral:8x22b"
)
case $TIER in
entry) models=("${entry_models[@]}") ;;
mid) models=("${mid_models[@]}") ;;
advanced) models=("${advanced_models[@]}") ;;
*) echo "Unknown tier: $TIER. Use entry, mid, or advanced."; exit 1 ;;
esac
for model in "${models[@]}"; do
echo ""
echo "→ Pulling $model..."
ollama pull "$model"
done
echo ""
echo "✓ All models pulled. List with: ollama list"