82 lines
2.1 KiB
YAML
82 lines
2.1 KiB
YAML
---
|
|
# NVIDIA role: Drivers + CUDA + cuDNN
|
|
# NOTE: Tested on RTX Pro 6000 (Entry) and A40 (lab). Requires reboot after this role.
|
|
# If no GPU is present, this role will install drivers but nvidia-smi won't show GPUs.
|
|
|
|
- name: Add NVIDIA package repository key
|
|
apt_key:
|
|
url: https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
|
|
state: present
|
|
|
|
- name: Add NVIDIA CUDA apt repository
|
|
apt_repository:
|
|
repo: "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
|
|
state: present
|
|
filename: cuda
|
|
|
|
- name: Update apt cache after adding NVIDIA repo
|
|
apt:
|
|
update_cache: yes
|
|
|
|
- name: Install NVIDIA driver (open kernel module, recommended for data center GPUs)
|
|
apt:
|
|
name:
|
|
- nvidia-driver-550-open
|
|
- nvidia-utils-550
|
|
state: present
|
|
notify: reboot required
|
|
|
|
# CUDA Toolkit
|
|
- name: Install CUDA Toolkit 12.4
|
|
apt:
|
|
name:
|
|
- cuda-toolkit-12-4
|
|
- cuda-cudart-12-4
|
|
state: present
|
|
|
|
# cuDNN
|
|
- name: Add cuDNN repository
|
|
apt_repository:
|
|
repo: "deb https://developer.download.nvidia.com/compute/cudnn/repos/ubuntu2204/x86_64/ /"
|
|
state: present
|
|
filename: cudnn
|
|
|
|
- name: Install cuDNN 9 for CUDA 12
|
|
apt:
|
|
name:
|
|
- cudnn9-cuda-12
|
|
state: present
|
|
|
|
# Environment variables
|
|
- name: Set CUDA paths system-wide
|
|
copy:
|
|
dest: /etc/profile.d/cuda.sh
|
|
content: |
|
|
export CUDA_HOME=/usr/local/cuda
|
|
export PATH=$CUDA_HOME/bin:$PATH
|
|
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
|
|
mode: "0644"
|
|
|
|
- name: Set NVIDIA persistence mode (survives reboots)
|
|
copy:
|
|
dest: /etc/systemd/system/nvidia-persistenced-mode.service
|
|
content: |
|
|
[Unit]
|
|
Description=NVIDIA Persistence Daemon Mode
|
|
After=nvidia-persistenced.service
|
|
|
|
[Service]
|
|
Type=oneshot
|
|
ExecStart=/usr/bin/nvidia-smi -pm 1
|
|
RemainAfterExit=yes
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
mode: "0644"
|
|
|
|
- name: Enable NVIDIA persistence service
|
|
systemd:
|
|
name: nvidia-persistenced-mode
|
|
enabled: yes
|
|
daemon_reload: yes
|