diff --git a/ansible/roles/vllm/tasks/main.yml b/ansible/roles/vllm/tasks/main.yml index 2e04550..67e050f 100644 --- a/ansible/roles/vllm/tasks/main.yml +++ b/ansible/roles/vllm/tasks/main.yml @@ -1,10 +1,23 @@ --- # vLLM — high-performance LLM inference with OpenAI-compatible API -# Entry tier: runs as a Docker container (easier to manage than pip install) +# Skipped automatically if no GPU is present. + +- name: Check for NVIDIA GPU + shell: nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 + register: gpu_check + ignore_errors: true + changed_when: false + +- name: Skip vLLM if no GPU detected + debug: + msg: "No GPU detected — skipping vLLM image pull. Run manually when GPU is available: docker pull vllm/vllm-openai:latest" + when: gpu_check.stdout == "" or gpu_check.rc != 0 + - name: Pull vLLM Docker image shell: docker pull vllm/vllm-openai:latest retries: 3 delay: 15 + when: gpu_check.stdout != "" and gpu_check.rc == 0 - name: Create vLLM systemd service copy: @@ -38,16 +51,6 @@ WantedBy=multi-user.target mode: "0644" -- name: Enable vLLM (but don't start yet — model selection needed first) - systemd: - name: vllm - enabled: yes - daemon_reload: yes - # Note: vLLM service is enabled but not started by default. - # Start manually after choosing a model: - # sudo systemctl start vllm - # Or change the --model flag in /etc/systemd/system/vllm.service first. - - name: Create vLLM model directory file: path: /opt/cezen/models/hf_cache