--- # NVIDIA role: Drivers + CUDA + cuDNN # NOTE: Tested on RTX Pro 6000 (Entry) and A40 (lab). Requires reboot after this role. # If no GPU is present, this role will install drivers but nvidia-smi won't show GPUs. - name: Add NVIDIA package repository key apt_key: url: https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub state: present - name: Add NVIDIA CUDA apt repository apt_repository: repo: "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" state: present filename: cuda - name: Update apt cache after adding NVIDIA repo apt: update_cache: yes - name: Install NVIDIA driver (open kernel module, recommended for data center GPUs) apt: name: - nvidia-driver-550-open - nvidia-utils-550 state: present notify: reboot required # CUDA Toolkit - name: Install CUDA Toolkit 12.4 apt: name: - cuda-toolkit-12-4 - cuda-cudart-12-4 state: present # cuDNN - name: Add cuDNN repository apt_repository: repo: "deb https://developer.download.nvidia.com/compute/cudnn/repos/ubuntu2204/x86_64/ /" state: present filename: cudnn - name: Install cuDNN 9 for CUDA 12 apt: name: - cudnn9-cuda-12 state: present # Environment variables - name: Set CUDA paths system-wide copy: dest: /etc/profile.d/cuda.sh content: | export CUDA_HOME=/usr/local/cuda export PATH=$CUDA_HOME/bin:$PATH export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH mode: "0644" - name: Set NVIDIA persistence mode (survives reboots) copy: dest: /etc/systemd/system/nvidia-persistenced-mode.service content: | [Unit] Description=NVIDIA Persistence Daemon Mode After=nvidia-persistenced.service [Service] Type=oneshot ExecStart=/usr/bin/nvidia-smi -pm 1 RemainAfterExit=yes [Install] WantedBy=multi-user.target mode: "0644" - name: Enable NVIDIA persistence service systemd: name: nvidia-persistenced-mode enabled: yes daemon_reload: yes