--- # vLLM role defaults — overridden per-tier in the tier playbook vars block vllm_model: "meta-llama/Meta-Llama-3.1-8B-Instruct" vllm_tensor_parallel: 1 vllm_gpu_memory_util: "0.70" vllm_max_model_len: 8192 vllm_quantization: "" # blank = full precision; set to "awq" for 4-bit