Initial Cezen AI Suite installer — Entry tier

2026-06-23 13:09:03 +05:30 · 2026-06-23 13:09:03 +05:30 · a071602cf1
commit a071602cf1
18 changed files with 1195 additions and 0 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.md
+++ b/README.md
@ -0,0 +1,77 @@
 # Cezen AI Suite — Installer
 ## Quick Start
 ```bash
 git clone <cgit-url>
 cd cgit
 sudo bash install.sh
 ```
 Server reboots automatically after NVIDIA drivers install. Phase 2 runs on its own after reboot.
 ## What Gets Installed (Entry Tier)
 | Service | Port | Notes |
 |---|---|---|
 | Ollama | 11434 | LLM inference, 2 models pre-loaded |
 | Open WebUI | 3001 | Chat interface |
 | vLLM | 8000 | OpenAI-compatible API (start manually) |
 | JupyterLab | 8888 | Token: `cezen2024` |
 | ChromaDB | 8100 | Vector DB for RAG |
 | MLflow | 5000 | Experiment tracking |
 | MinIO | 9001 | Object storage (user: cezenadmin / Cezen@2024!) |
 | Grafana | 3000 | GPU + system monitoring (admin / cezen2024) |
 ## Testing Without a GPU (Multipass)
 ```bash
 # On your MacBook:
 multipass launch 22.04 --name cezen-test --cpus 4 --mem 8G --disk 40G
 multipass shell cezen-test
 # Inside the VM:
 git clone <cgit-url>
 sudo bash install.sh
 ```
 NVIDIA driver install will succeed but `nvidia-smi` won't show GPUs — that's expected. All other services will run fine.
 ## Pull More Models
 ```bash
 bash models/pull-models.sh --tier=entry
 ```
 ## File Structure
 ```
 cgit/
 ├── install.sh                    ← Entry point
 ├── ansible/
 │   ├── phase1_nvidia.yml         ← Phase 1: drivers (triggers reboot)
 │   ├── entry.yml                 ← Phase 2: full stack
 │   └── roles/
 │       ├── base/                 ← OS, Python, Miniconda, LangChain
 │       ├── nvidia/               ← Drivers, CUDA 12.4, cuDNN 9
 │       ├── docker/               ← Docker CE + NVIDIA Container Toolkit
 │       ├── k3s/                  ← Lightweight Kubernetes
 │       ├── ollama/               ← Ollama + Open WebUI
 │       ├── vllm/                 ← vLLM inference server
 │       ├── jupyterlab/           ← JupyterLab notebooks
 │       ├── chromadb/             ← Vector database
 │       ├── mlflow/               ← Experiment tracking
 │       ├── minio/                ← Object storage
 │       └── monitoring/           ← Grafana + Prometheus + DCGM
 └── models/
    └── pull-models.sh            ← Pull additional models
 ```
 ## Change Default Passwords
 Before shipping to a customer, update these:
 - JupyterLab token: `/opt/cezen/.jupyter/jupyter_lab_config.py`
 - MinIO: `/etc/default/minio`
 - Grafana: environment vars in monitoring role, or via UI after first login
 - MLflow: no auth by default (add reverse proxy if needed)
--- a/ansible/.DS_Store
+++ b/ansible/.DS_Store
--- a/ansible/entry.yml
+++ b/ansible/entry.yml
@ -0,0 +1,23 @@
 ---
 # Phase 2: Full Cezen AI Suite — Entry Tier
 # Runs after NVIDIA driver reboot
 - name: Cezen AI — Entry Tier Stack
  hosts: localhost
  connection: local
  become: true
  vars:
    cezen_user: "cezen"
    cezen_home: "/opt/cezen"
    python_version: "3.11"
    cuda_version: "12.4"
  roles:
    - docker
    - k3s
    - ollama
    - vllm
    - jupyterlab
    - chromadb
    - mlflow
    - minio
    - monitoring
--- a/ansible/phase1_nvidia.yml
+++ b/ansible/phase1_nvidia.yml
@ -0,0 +1,9 @@
 ---
 # Phase 1: NVIDIA drivers only. Server reboots after this.
 - name: Cezen AI — Phase 1 NVIDIA Drivers
  hosts: localhost
  connection: local
  become: true
  roles:
    - base
    - nvidia
--- a/ansible/roles/base/tasks/main.yml
+++ b/ansible/roles/base/tasks/main.yml
@ -0,0 +1,91 @@
 ---
 # Base role: OS updates, essential packages, Python/Miniconda
 - name: Update apt cache
  apt:
    update_cache: yes
    cache_valid_time: 3600
 - name: Upgrade all packages
  apt:
    upgrade: dist
    autoremove: yes
 - name: Install essential system packages
  apt:
    name:
      - curl
      - wget
      - git
      - build-essential
      - ca-certificates
      - gnupg
      - lsb-release
      - software-properties-common
      - unzip
      - htop
      - net-tools
      - jq
      - python3-pip
      - python3-venv
    state: present
 - name: Create cezen user
  user:
    name: cezen
    shell: /bin/bash
    home: /opt/cezen
    create_home: yes
    groups: sudo
    append: yes
 - name: Create cezen directories
  file:
    path: "{{ item }}"
    state: directory
    owner: cezen
    group: cezen
    mode: "0755"
  loop:
    - /opt/cezen
    - /opt/cezen/models
    - /opt/cezen/data
    - /opt/cezen/logs
 - name: Download Miniconda
  get_url:
    url: https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
    dest: /tmp/miniconda.sh
    mode: "0755"
  retries: 3
  delay: 10
 - name: Install Miniconda
  become_user: cezen
  command: bash /tmp/miniconda.sh -b -p /opt/cezen/miniconda
  args:
    creates: /opt/cezen/miniconda/bin/conda
 - name: Add conda to cezen PATH
  lineinfile:
    path: /opt/cezen/.bashrc
    line: 'export PATH="/opt/cezen/miniconda/bin:$PATH"'
    create: yes
    owner: cezen
 - name: Create cezen conda environment (Python 3.11)
  become_user: cezen
  command: /opt/cezen/miniconda/bin/conda create -n cezen python=3.11 -y
  args:
    creates: /opt/cezen/miniconda/envs/cezen
 - name: Install LangChain + LlamaIndex + HuggingFace in conda env
  become_user: cezen
  shell: |
    /opt/cezen/miniconda/bin/conda run -n cezen pip install \
      langchain langchain-community llama-index \
      transformers huggingface-hub \
      peft bitsandbytes accelerate \
      fastapi uvicorn[standard] \
      sentence-transformers
  retries: 3
  delay: 15
--- a/ansible/roles/chromadb/tasks/main.yml
+++ b/ansible/roles/chromadb/tasks/main.yml
@ -0,0 +1,53 @@
 ---
 # ChromaDB — vector database for RAG pipelines
 - name: Install ChromaDB in cezen conda env
  become_user: cezen
  shell: |
    /opt/cezen/miniconda/bin/conda run -n cezen pip install chromadb
  retries: 3
  delay: 10
 - name: Create ChromaDB data directory
  file:
    path: /opt/cezen/data/chromadb
    state: directory
    owner: cezen
    group: cezen
 - name: Create ChromaDB systemd service
  copy:
    dest: /etc/systemd/system/chromadb.service
    content: |
      [Unit]
      Description=ChromaDB Vector Database
      After=network.target
      [Service]
      Type=simple
      User=cezen
      Group=cezen
      WorkingDirectory=/opt/cezen/data/chromadb
      ExecStart=/opt/cezen/miniconda/envs/cezen/bin/chroma run \
        --host 0.0.0.0 \
        --port 8100 \
        --path /opt/cezen/data/chromadb
      Restart=always
      RestartSec=5
      Environment="PATH=/opt/cezen/miniconda/envs/cezen/bin:/usr/local/bin:/usr/bin:/bin"
      [Install]
      WantedBy=multi-user.target
    mode: "0644"
 - name: Enable and start ChromaDB
  systemd:
    name: chromadb
    enabled: yes
    state: started
    daemon_reload: yes
 - name: Wait for ChromaDB to be ready
  wait_for:
    host: localhost
    port: 8100
    timeout: 30
--- a/ansible/roles/docker/tasks/main.yml
+++ b/ansible/roles/docker/tasks/main.yml
@ -0,0 +1,83 @@
 ---
 # Docker CE + NVIDIA Container Toolkit
 - name: Add Docker GPG key
  apt_key:
    url: https://download.docker.com/linux/ubuntu/gpg
    state: present
 - name: Add Docker apt repository
  apt_repository:
    repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
    state: present
    filename: docker
 - name: Install Docker CE
  apt:
    name:
      - docker-ce
      - docker-ce-cli
      - containerd.io
      - docker-buildx-plugin
      - docker-compose-plugin
    state: present
    update_cache: yes
 - name: Add cezen user to docker group
  user:
    name: cezen
    groups: docker
    append: yes
 - name: Enable and start Docker
  systemd:
    name: docker
    enabled: yes
    state: started
 # NVIDIA Container Toolkit (allows GPU passthrough into containers)
 - name: Add NVIDIA Container Toolkit repo
  shell: |
    curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
      gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
    curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
      sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
      tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
  args:
    creates: /etc/apt/sources.list.d/nvidia-container-toolkit.list
 - name: Install NVIDIA Container Toolkit
  apt:
    name: nvidia-container-toolkit
    state: present
    update_cache: yes
 - name: Configure Docker to use NVIDIA runtime
  shell: nvidia-ctk runtime configure --runtime=docker
  notify: restart docker
 - name: Set NVIDIA as default Docker runtime
  copy:
    dest: /etc/docker/daemon.json
    content: |
      {
        "default-runtime": "nvidia",
        "runtimes": {
          "nvidia": {
            "path": "nvidia-container-runtime",
            "runtimeArgs": []
          }
        },
        "log-driver": "json-file",
        "log-opts": {
          "max-size": "100m",
          "max-file": "3"
        }
      }
    mode: "0644"
  notify: restart docker
 handlers:
  - name: restart docker
    systemd:
      name: docker
      state: restarted
--- a/ansible/roles/jupyterlab/tasks/main.yml
+++ b/ansible/roles/jupyterlab/tasks/main.yml
@ -0,0 +1,92 @@
 ---
 # JupyterLab — notebook interface for AI/ML development
 - name: Install JupyterLab in cezen conda env
  become_user: cezen
  shell: |
    /opt/cezen/miniconda/bin/conda run -n cezen pip install \
      jupyterlab \
      ipywidgets \
      ipykernel \
      notebook \
      nbconvert
  retries: 3
  delay: 10
 - name: Create JupyterLab config directory
  file:
    path: /opt/cezen/.jupyter
    state: directory
    owner: cezen
    group: cezen
 - name: Generate JupyterLab config
  become_user: cezen
  shell: |
    /opt/cezen/miniconda/envs/cezen/bin/jupyter lab --generate-config
  args:
    creates: /opt/cezen/.jupyter/jupyter_lab_config.py
 - name: Configure JupyterLab (no browser, allow all IPs, set base dir)
  lineinfile:
    path: /opt/cezen/.jupyter/jupyter_lab_config.py
    line: "{{ item }}"
    create: yes
    owner: cezen
  loop:
    - "c.ServerApp.ip = '0.0.0.0'"
    - "c.ServerApp.port = 8888"
    - "c.ServerApp.open_browser = False"
    - "c.ServerApp.notebook_dir = '/opt/cezen/notebooks'"
    - "c.ServerApp.token = 'cezen2024'"
    - "c.ServerApp.allow_root = False"
 - name: Create notebooks directory
  file:
    path: /opt/cezen/notebooks
    state: directory
    owner: cezen
    group: cezen
 - name: Create sample notebook placeholder
  copy:
    dest: /opt/cezen/notebooks/README.md
    content: |
      # Cezen AI Suite — JupyterLab
      Default token: `cezen2024`
      Change this in: `/opt/cezen/.jupyter/jupyter_lab_config.py`
      Then restart: `sudo systemctl restart jupyterlab`
    owner: cezen
    group: cezen
 - name: Create JupyterLab systemd service
  copy:
    dest: /etc/systemd/system/jupyterlab.service
    content: |
      [Unit]
      Description=JupyterLab Server
      After=network.target
      [Service]
      Type=simple
      User=cezen
      Group=cezen
      WorkingDirectory=/opt/cezen/notebooks
      ExecStart=/opt/cezen/miniconda/envs/cezen/bin/jupyter lab \
        --config=/opt/cezen/.jupyter/jupyter_lab_config.py
      Restart=always
      RestartSec=5
      Environment="PATH=/opt/cezen/miniconda/envs/cezen/bin:/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin"
      Environment="CUDA_HOME=/usr/local/cuda"
      [Install]
      WantedBy=multi-user.target
    mode: "0644"
 - name: Enable and start JupyterLab
  systemd:
    name: jupyterlab
    enabled: yes
    state: started
    daemon_reload: yes
--- a/ansible/roles/k3s/tasks/main.yml
+++ b/ansible/roles/k3s/tasks/main.yml
@ -0,0 +1,62 @@
 ---
 # K3s — lightweight Kubernetes for single-node AI workloads
 - name: Check if K3s is already installed
  stat:
    path: /usr/local/bin/k3s
  register: k3s_binary
 - name: Install K3s
  shell: |
    curl -sfL https://get.k3s.io | \
      INSTALL_K3S_EXEC="--disable traefik --disable servicelb" sh -
  when: not k3s_binary.stat.exists
  retries: 3
  delay: 10
 - name: Wait for K3s to be ready
  wait_for:
    path: /etc/rancher/k3s/k3s.yaml
    timeout: 120
 - name: Enable and start K3s
  systemd:
    name: k3s
    enabled: yes
    state: started
 - name: Copy kubeconfig for cezen user
  copy:
    src: /etc/rancher/k3s/k3s.yaml
    dest: /opt/cezen/.kube/config
    owner: cezen
    group: cezen
    mode: "0600"
    remote_src: yes
 - name: Create .kube directory for cezen
  file:
    path: /opt/cezen/.kube
    state: directory
    owner: cezen
    group: cezen
 - name: Set KUBECONFIG in cezen .bashrc
  lineinfile:
    path: /opt/cezen/.bashrc
    line: 'export KUBECONFIG=/opt/cezen/.kube/config'
    create: yes
    owner: cezen
 - name: Install kubectl alias for cezen
  lineinfile:
    path: /opt/cezen/.bashrc
    line: "alias kubectl='k3s kubectl'"
    create: yes
    owner: cezen
 - name: Verify K3s node is ready
  command: k3s kubectl get nodes
  register: k3s_nodes
  retries: 5
  delay: 10
  until: k3s_nodes.rc == 0
--- a/ansible/roles/minio/tasks/main.yml
+++ b/ansible/roles/minio/tasks/main.yml
@ -0,0 +1,87 @@
 ---
 # MinIO — S3-compatible object storage for model artifacts and datasets
 - name: Download MinIO server binary
  get_url:
    url: https://dl.min.io/server/minio/release/linux-amd64/minio
    dest: /usr/local/bin/minio
    mode: "0755"
  retries: 3
  delay: 10
 - name: Download MinIO client (mc)
  get_url:
    url: https://dl.min.io/client/mc/release/linux-amd64/mc
    dest: /usr/local/bin/mc
    mode: "0755"
  retries: 3
  delay: 10
 - name: Create MinIO data directories
  file:
    path: "{{ item }}"
    state: directory
    owner: cezen
    group: cezen
    mode: "0750"
  loop:
    - /opt/cezen/data/minio
    - /opt/cezen/data/minio/models
    - /opt/cezen/data/minio/datasets
 - name: Create MinIO environment file
  copy:
    dest: /etc/default/minio
    content: |
      MINIO_ROOT_USER=cezenadmin
      MINIO_ROOT_PASSWORD=Cezen@2024!
      MINIO_VOLUMES="/opt/cezen/data/minio"
      MINIO_OPTS="--console-address :9001"
    mode: "0640"
    owner: cezen
    group: cezen
 - name: Create MinIO systemd service
  copy:
    dest: /etc/systemd/system/minio.service
    content: |
      [Unit]
      Description=MinIO Object Storage
      Documentation=https://docs.min.io
      Wants=network-online.target
      After=network-online.target
      [Service]
      User=cezen
      Group=cezen
      EnvironmentFile=/etc/default/minio
      ExecStartPre=/bin/bash -c "if [ -z \"${MINIO_VOLUMES}\" ]; then echo 'Variable MINIO_VOLUMES not set'; exit 1; fi"
      ExecStart=/usr/local/bin/minio server ${MINIO_VOLUMES} ${MINIO_OPTS}
      Restart=always
      RestartSec=5
      LimitNOFILE=65536
      [Install]
      WantedBy=multi-user.target
    mode: "0644"
 - name: Enable and start MinIO
  systemd:
    name: minio
    enabled: yes
    state: started
    daemon_reload: yes
 - name: Wait for MinIO to be ready
  wait_for:
    host: localhost
    port: 9001
    timeout: 30
 - name: Configure mc client with local MinIO
  become_user: cezen
  shell: |
    mc alias set local http://localhost:9000 cezenadmin 'Cezen@2024!'
    mc mb local/models --ignore-existing
    mc mb local/datasets --ignore-existing
  retries: 3
  delay: 5
--- a/ansible/roles/mlflow/tasks/main.yml
+++ b/ansible/roles/mlflow/tasks/main.yml
@ -0,0 +1,56 @@
 ---
 # MLflow — experiment tracking and model registry
 - name: Install MLflow in cezen conda env
  become_user: cezen
  shell: |
    /opt/cezen/miniconda/bin/conda run -n cezen pip install mlflow boto3
  retries: 3
  delay: 10
 - name: Create MLflow directories
  file:
    path: "{{ item }}"
    state: directory
    owner: cezen
    group: cezen
  loop:
    - /opt/cezen/data/mlflow
    - /opt/cezen/data/mlflow/artifacts
 - name: Create MLflow systemd service
  copy:
    dest: /etc/systemd/system/mlflow.service
    content: |
      [Unit]
      Description=MLflow Tracking Server
      After=network.target minio.service
      [Service]
      Type=simple
      User=cezen
      Group=cezen
      ExecStart=/opt/cezen/miniconda/envs/cezen/bin/mlflow server \
        --host 0.0.0.0 \
        --port 5000 \
        --backend-store-uri sqlite:///opt/cezen/data/mlflow/mlflow.db \
        --default-artifact-root /opt/cezen/data/mlflow/artifacts
      Restart=always
      RestartSec=5
      Environment="PATH=/opt/cezen/miniconda/envs/cezen/bin:/usr/local/bin:/usr/bin:/bin"
      [Install]
      WantedBy=multi-user.target
    mode: "0644"
 - name: Enable and start MLflow
  systemd:
    name: mlflow
    enabled: yes
    state: started
    daemon_reload: yes
 - name: Wait for MLflow to be ready
  wait_for:
    host: localhost
    port: 5000
    timeout: 30
--- a/ansible/roles/monitoring/tasks/main.yml
+++ b/ansible/roles/monitoring/tasks/main.yml
@ -0,0 +1,145 @@
 ---
 # Monitoring: Prometheus + Grafana + DCGM Exporter (GPU metrics)
 # ignore_errors: true on most tasks — monitoring is optional and should never block the install
 - name: Create monitoring directories
  file:
    path: "{{ item }}"
    state: directory
    owner: cezen
    group: cezen
  loop:
    - /opt/cezen/monitoring
    - /opt/cezen/monitoring/prometheus
    - /opt/cezen/monitoring/grafana
 # ── DCGM Exporter (GPU metrics for Prometheus) ──────────
 - name: Start DCGM Exporter container
  shell: |
    docker run -d \
      --name dcgm-exporter \
      --restart always \
      --gpus all \
      -p 9400:9400 \
      nvcr.io/nvidia/k8s/dcgm-exporter:3.3.0-3.2.0-ubuntu22.04
  register: dcgm_result
  failed_when: dcgm_result.rc != 0 and 'already in use' not in dcgm_result.stderr
  ignore_errors: true
 # ── Prometheus ──────────────────────────────────────────
 - name: Write Prometheus config
  copy:
    dest: /opt/cezen/monitoring/prometheus/prometheus.yml
    owner: cezen
    group: cezen
    content: |
      global:
        scrape_interval: 15s
        evaluation_interval: 15s
      scrape_configs:
        - job_name: 'prometheus'
          static_configs:
            - targets: ['localhost:9090']
        - job_name: 'dcgm'
          static_configs:
            - targets: ['host-gateway:9400']
        - job_name: 'node'
          static_configs:
            - targets: ['host-gateway:9100']
 - name: Start Prometheus container
  shell: |
    docker run -d \
      --name prometheus \
      --restart always \
      --add-host=host-gateway:172.17.0.1 \
      -p 9090:9090 \
      -v /opt/cezen/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml \
      prom/prometheus:latest
  register: prom_result
  failed_when: prom_result.rc != 0 and 'already in use' not in prom_result.stderr
  ignore_errors: true
 # ── Node Exporter (CPU/RAM/disk metrics) ───────────────
 - name: Start Node Exporter container
  shell: |
    docker run -d \
      --name node-exporter \
      --restart always \
      --network=host \
      --pid=host \
      -v /:/host:ro,rslave \
      prom/node-exporter:latest \
      --path.rootfs=/host
  register: node_exp_result
  failed_when: node_exp_result.rc != 0 and 'already in use' not in node_exp_result.stderr
  ignore_errors: true
 # ── Grafana ─────────────────────────────────────────────
 - name: Start Grafana container
  shell: |
    docker run -d \
      --name grafana \
      --restart always \
      -p 3000:3000 \
      --add-host=host-gateway:172.17.0.1 \
      -v grafana-storage:/var/lib/grafana \
      -e GF_SECURITY_ADMIN_USER=admin \
      -e GF_SECURITY_ADMIN_PASSWORD=cezen2024 \
      -e GF_USERS_ALLOW_SIGN_UP=false \
      grafana/grafana:latest
  register: grafana_result
  failed_when: grafana_result.rc != 0 and 'already in use' not in grafana_result.stderr
  ignore_errors: true
 - name: Wait for Grafana to be ready
  wait_for:
    host: localhost
    port: 3000
    timeout: 60
  ignore_errors: true
 - name: Add Prometheus datasource to Grafana
  uri:
    url: http://localhost:3000/api/datasources
    method: POST
    user: admin
    password: cezen2024
    force_basic_auth: yes
    body_format: json
    body:
      name: Prometheus
      type: prometheus
      url: "http://host-gateway:9090"
      access: proxy
      isDefault: true
    status_code: [200, 409]  # 409 = already exists, that's fine
  ignore_errors: true
 - name: Import NVIDIA GPU dashboard (ID 12239)
  uri:
    url: http://localhost:3000/api/dashboards/import
    method: POST
    user: admin
    password: cezen2024
    force_basic_auth: yes
    body_format: json
    body:
      inputs:
        - name: DS_PROMETHEUS
          type: datasource
          pluginId: prometheus
          value: Prometheus
      overwrite: true
      folderId: 0
      dashboard:
        "__inputs": []
        "__requires": []
        id: null
        title: "NVIDIA GPU Overview"
        uid: "nvidia-gpu"
    status_code: [200, 412]
  ignore_errors: true
--- a/ansible/roles/nvidia/tasks/main.yml
+++ b/ansible/roles/nvidia/tasks/main.yml
@ -0,0 +1,81 @@
 ---
 # NVIDIA role: Drivers + CUDA + cuDNN
 # NOTE: Tested on L40S (Entry) and A40 (lab). Requires reboot after this role.
 #       If no GPU is present, this role will install drivers but nvidia-smi won't show GPUs.
 - name: Add NVIDIA package repository key
  apt_key:
    url: https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
    state: present
 - name: Add NVIDIA CUDA apt repository
  apt_repository:
    repo: "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
    state: present
    filename: cuda
 - name: Update apt cache after adding NVIDIA repo
  apt:
    update_cache: yes
 - name: Install NVIDIA driver (open kernel module, recommended for data center GPUs)
  apt:
    name:
      - nvidia-driver-550-open
      - nvidia-utils-550
    state: present
  notify: reboot required
 # CUDA Toolkit
 - name: Install CUDA Toolkit 12.4
  apt:
    name:
      - cuda-toolkit-12-4
      - cuda-cudart-12-4
    state: present
 # cuDNN
 - name: Add cuDNN repository
  apt_repository:
    repo: "deb https://developer.download.nvidia.com/compute/cudnn/repos/ubuntu2204/x86_64/ /"
    state: present
    filename: cudnn
 - name: Install cuDNN 9 for CUDA 12
  apt:
    name:
      - cudnn9-cuda-12
    state: present
 # Environment variables
 - name: Set CUDA paths system-wide
  copy:
    dest: /etc/profile.d/cuda.sh
    content: |
      export CUDA_HOME=/usr/local/cuda
      export PATH=$CUDA_HOME/bin:$PATH
      export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
    mode: "0644"
 - name: Set NVIDIA persistence mode (survives reboots)
  copy:
    dest: /etc/systemd/system/nvidia-persistenced-mode.service
    content: |
      [Unit]
      Description=NVIDIA Persistence Daemon Mode
      After=nvidia-persistenced.service
      [Service]
      Type=oneshot
      ExecStart=/usr/bin/nvidia-smi -pm 1
      RemainAfterExit=yes
      [Install]
      WantedBy=multi-user.target
    mode: "0644"
 - name: Enable NVIDIA persistence service
  systemd:
    name: nvidia-persistenced-mode
    enabled: yes
    daemon_reload: yes
--- a/ansible/roles/ollama/tasks/main.yml
+++ b/ansible/roles/ollama/tasks/main.yml
@ -0,0 +1,103 @@
 ---
 # Ollama — local LLM serving (main inference engine for Entry tier)
 - name: Check if Ollama is already installed
  stat:
    path: /usr/local/bin/ollama
  register: ollama_binary
 - name: Install Ollama
  shell: curl -fsSL https://ollama.ai/install.sh | sh
  when: not ollama_binary.stat.exists
  retries: 3
  delay: 10
 - name: Create Ollama systemd service with GPU support
  copy:
    dest: /etc/systemd/system/ollama.service
    content: |
      [Unit]
      Description=Ollama Service
      After=network-online.target
      [Service]
      ExecStart=/usr/local/bin/ollama serve
      User=cezen
      Group=cezen
      Restart=always
      RestartSec=3
      Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
      Environment="OLLAMA_HOST=0.0.0.0:11434"
      Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
      Environment="CUDA_VISIBLE_DEVICES=0,1,2"
      [Install]
      WantedBy=multi-user.target
    mode: "0644"
 - name: Create Ollama models directory
  file:
    path: /opt/cezen/models/ollama
    state: directory
    owner: cezen
    group: cezen
 - name: Enable and start Ollama
  systemd:
    name: ollama
    enabled: yes
    state: started
    daemon_reload: yes
 - name: Wait for Ollama API to be ready
  wait_for:
    host: localhost
    port: 11434
    timeout: 60
 - name: Pull default models (Llama 3.1 8B + Mistral 7B)
  become_user: cezen
  command: ollama pull {{ item }}
  loop:
    - llama3.1:8b
    - mistral:7b
  environment:
    OLLAMA_HOST: "http://localhost:11434"
  retries: 3
  delay: 15
  # NOTE: Models are large (~5GB each). This step takes time on first run.
  # Skip by setting: ansible-playbook ... -e "skip_model_pull=true"
  when: not (skip_model_pull | default(false))
 # Open WebUI (chat interface on top of Ollama)
 - name: Deploy Open WebUI via Docker
  community.docker.docker_container:
    name: open-webui
    image: ghcr.io/open-webui/open-webui:main
    state: started
    restart_policy: always
    ports:
      - "3001:8080"
    volumes:
      - open-webui:/app/backend/data
    env:
      OLLAMA_BASE_URL: "http://host-gateway:11434"
    etc_hosts:
      host-gateway: "172.17.0.1"
  # Note: Requires docker community collection. Install with:
  # ansible-galaxy collection install community.docker
  ignore_errors: true  # Falls back gracefully if docker collection not available
 - name: Alternative Open WebUI start (if community.docker not available)
  shell: |
    docker run -d \
      --name open-webui \
      --restart always \
      -p 3001:8080 \
      --add-host=host-gateway:172.17.0.1 \
      -v open-webui:/app/backend/data \
      -e OLLAMA_BASE_URL=http://host-gateway:11434 \
      ghcr.io/open-webui/open-webui:main
  args:
    executable: /bin/bash
  register: webui_result
  failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr
--- a/ansible/roles/vllm/tasks/main.yml
+++ b/ansible/roles/vllm/tasks/main.yml
@ -0,0 +1,56 @@
 ---
 # vLLM — high-performance LLM inference with OpenAI-compatible API
 # Entry tier: runs as a Docker container (easier to manage than pip install)
 - name: Pull vLLM Docker image
  shell: docker pull vllm/vllm-openai:latest
  retries: 3
  delay: 15
 - name: Create vLLM systemd service
  copy:
    dest: /etc/systemd/system/vllm.service
    content: |
      [Unit]
      Description=vLLM OpenAI-Compatible Inference Server
      After=docker.service ollama.service
      Requires=docker.service
      [Service]
      Restart=always
      RestartSec=5
      ExecStartPre=-/usr/bin/docker stop vllm
      ExecStartPre=-/usr/bin/docker rm vllm
      ExecStart=/usr/bin/docker run \
        --name vllm \
        --gpus all \
        --ipc=host \
        -p 8000:8000 \
        -v /opt/cezen/models:/root/.cache/huggingface \
        -e HF_HOME=/root/.cache/huggingface \
        vllm/vllm-openai:latest \
        --model meta-llama/Meta-Llama-3.1-8B-Instruct \
        --gpu-memory-utilization 0.7 \
        --max-model-len 8192 \
        --tensor-parallel-size 1
      ExecStop=/usr/bin/docker stop vllm
      [Install]
      WantedBy=multi-user.target
    mode: "0644"
 - name: Enable vLLM (but don't start yet — model selection needed first)
  systemd:
    name: vllm
    enabled: yes
    daemon_reload: yes
  # Note: vLLM service is enabled but not started by default.
  # Start manually after choosing a model:
  #   sudo systemctl start vllm
  # Or change the --model flag in /etc/systemd/system/vllm.service first.
 - name: Create vLLM model directory
  file:
    path: /opt/cezen/models/hf_cache
    state: directory
    owner: cezen
    group: cezen
--- a/install.sh
+++ b/install.sh
@ -0,0 +1,133 @@
 #!/usr/bin/env bash
 # ─────────────────────────────────────────────
 # Cezen AI Suite — Entry Level Installer
 # Usage:
 #   sudo bash install.sh           → Phase 1 (drivers + schedules reboot → Phase 2)
 #   sudo bash install.sh --phase=2 → Phase 2 (all software, run after reboot)
 # ─────────────────────────────────────────────
 set -e
 TIER="entry"
 PHASE="1"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ANSIBLE_DIR="$SCRIPT_DIR/ansible"
 for arg in "$@"; do
  case $arg in
    --tier=*)  TIER="${arg#*=}" ;;
    --phase=*) PHASE="${arg#*=}" ;;
  esac
 done
 # ── Preflight ──────────────────────────────────
 check_root() {
  if [ "$EUID" -ne 0 ]; then
    echo "ERROR: Run as root: sudo bash install.sh"
    exit 1
  fi
 }
 check_os() {
  if [ -f /etc/os-release ]; then
    . /etc/os-release
    if [[ "$ID" != "ubuntu" ]]; then
      echo "ERROR: Ubuntu 22.04 required. Detected: $PRETTY_NAME"
      exit 1
    fi
    echo "✓ OS: $PRETTY_NAME"
  fi
 }
 install_ansible() {
  if ! command -v ansible-playbook &>/dev/null; then
    echo "→ Installing Ansible..."
    apt-get update -qq
    apt-get install -y -qq ansible python3-pip
  fi
  echo "✓ Ansible ready"
 }
 # ── Phase 1: NVIDIA drivers only ──────────────
 run_phase1() {
  echo ""
  echo "╔══════════════════════════════════════════╗"
  echo "║   Cezen AI Suite — Phase 1: NVIDIA       ║"
  echo "╚══════════════════════════════════════════╝"
  ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/phase1_nvidia.yml" \
    -e "tier=$TIER" -v
  # Register phase 2 as a one-shot systemd service so it runs after reboot
  cat > /etc/systemd/system/cezen-phase2.service << EOF
 [Unit]
 Description=Cezen AI Suite Phase 2 Installer
 After=network-online.target nvidia-persistenced.service
 Wants=network-online.target
 [Service]
 Type=oneshot
 ExecStart=/bin/bash ${SCRIPT_DIR}/install.sh --phase=2 --tier=${TIER}
 RemainAfterExit=yes
 StandardOutput=journal+console
 StandardError=journal+console
 [Install]
 WantedBy=multi-user.target
 EOF
  systemctl daemon-reload
  systemctl enable cezen-phase2.service
  echo ""
  echo "✓ Phase 2 registered — will run automatically after reboot"
  echo "→ Rebooting in 10 seconds..."
  sleep 10
  reboot
 }
 # ── Phase 2: Full stack ────────────────────────
 run_phase2() {
  echo ""
  echo "╔══════════════════════════════════════════╗"
  echo "║   Cezen AI Suite — Phase 2: Stack        ║"
  echo "╚══════════════════════════════════════════╝"
  # Verify NVIDIA driver loaded
  if ! nvidia-smi &>/dev/null; then
    echo "WARNING: nvidia-smi not responding. NVIDIA driver may not be loaded."
    echo "         Continuing — non-GPU roles will still install correctly."
  else
    echo "✓ NVIDIA driver: $(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1)"
  fi
  ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/entry.yml" \
    -e "tier=$TIER" -v
  # Disable one-shot service so it doesn't run again on next reboot
  systemctl disable cezen-phase2.service 2>/dev/null || true
  echo ""
  echo "╔══════════════════════════════════════════╗"
  echo "║   Cezen AI Suite installation complete!  ║"
  echo "║                                          ║"
  echo "║   JupyterLab  → http://localhost:8888    ║"
  echo "║   Ollama API  → http://localhost:11434   ║"
  echo "║   MLflow      → http://localhost:5000    ║"
  echo "║   MinIO       → http://localhost:9001    ║"
  echo "║   Grafana     → http://localhost:3000    ║"
  echo "╚══════════════════════════════════════════╝"
 }
 # ── Main ───────────────────────────────────────
 check_root
 check_os
 install_ansible
 if [ "$PHASE" = "1" ]; then
  run_phase1
 elif [ "$PHASE" = "2" ]; then
  run_phase2
 else
  echo "ERROR: Unknown phase '$PHASE'. Use --phase=1 or --phase=2"
  exit 1
 fi
--- a/models/pull-models.sh
+++ b/models/pull-models.sh
@ -0,0 +1,44 @@
 #!/usr/bin/env bash
 # Pull additional AI models into Ollama
 # Run after install: bash models/pull-models.sh --tier=entry
 # ─────────────────────────────────────────────
 TIER=${1:-entry}
 echo "Pulling models for tier: $TIER"
 entry_models=(
  "llama3.1:8b"         # General purpose, good baseline
  "mistral:7b"          # Fast, good for APIs
  "llama3.1:70b"        # Larger — only if enough VRAM (3× L40S has 144GB total)
  "nomic-embed-text"    # Embedding model for RAG
  "codellama:13b"       # Code generation
 )
 mid_models=(
  "${entry_models[@]}"
  "llama3.1:70b"
  "mixtral:8x7b"
  "deepseek-coder-v2:16b"
 )
 advanced_models=(
  "${mid_models[@]}"
  "llama3.1:405b"
  "mixtral:8x22b"
 )
 case $TIER in
  entry)    models=("${entry_models[@]}") ;;
  mid)      models=("${mid_models[@]}") ;;
  advanced) models=("${advanced_models[@]}") ;;
  *)        echo "Unknown tier: $TIER. Use entry, mid, or advanced."; exit 1 ;;
 esac
 for model in "${models[@]}"; do
  echo ""
  echo "→ Pulling $model..."
  ollama pull "$model"
 done
 echo ""
 echo "✓ All models pulled. List with: ollama list"