8 lines
295 B
YAML
8 lines
295 B
YAML
---
|
|
# vLLM role defaults — overridden per-tier in the tier playbook vars block
|
|
vllm_model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
|
vllm_tensor_parallel: 1
|
|
vllm_gpu_memory_util: "0.70"
|
|
vllm_max_model_len: 8192
|
|
vllm_quantization: "" # blank = full precision; set to "awq" for 4-bit
|