Skip vLLM image pull when no GPU present

This commit is contained in:
Jino Jose 2026-06-23 14:28:40 +05:30
parent 5e4b1f7deb
commit 70497adbb6

View File

@ -1,10 +1,23 @@
--- ---
# vLLM — high-performance LLM inference with OpenAI-compatible API # vLLM — high-performance LLM inference with OpenAI-compatible API
# Entry tier: runs as a Docker container (easier to manage than pip install) # Skipped automatically if no GPU is present.
- name: Check for NVIDIA GPU
shell: nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1
register: gpu_check
ignore_errors: true
changed_when: false
- name: Skip vLLM if no GPU detected
debug:
msg: "No GPU detected — skipping vLLM image pull. Run manually when GPU is available: docker pull vllm/vllm-openai:latest"
when: gpu_check.stdout == "" or gpu_check.rc != 0
- name: Pull vLLM Docker image - name: Pull vLLM Docker image
shell: docker pull vllm/vllm-openai:latest shell: docker pull vllm/vllm-openai:latest
retries: 3 retries: 3
delay: 15 delay: 15
when: gpu_check.stdout != "" and gpu_check.rc == 0
- name: Create vLLM systemd service - name: Create vLLM systemd service
copy: copy:
@ -38,16 +51,6 @@
WantedBy=multi-user.target WantedBy=multi-user.target
mode: "0644" mode: "0644"
- name: Enable vLLM (but don't start yet — model selection needed first)
systemd:
name: vllm
enabled: yes
daemon_reload: yes
# Note: vLLM service is enabled but not started by default.
# Start manually after choosing a model:
# sudo systemctl start vllm
# Or change the --model flag in /etc/systemd/system/vllm.service first.
- name: Create vLLM model directory - name: Create vLLM model directory
file: file:
path: /opt/cezen/models/hf_cache path: /opt/cezen/models/hf_cache