84 lines
2.7 KiB
YAML
84 lines
2.7 KiB
YAML
---
|
||
# Nexus One AI — Max Tier Stack
|
||
# Hardware: 4–8× NVIDIA H100/A100/RTX 5090 (80–320 GB VRAM total), 256–512 GB DDR5, 8 TB+ NVMe, 100 GbE
|
||
# Capacity: 100+ concurrent users
|
||
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
|
||
#
|
||
# Differences from Pro tier:
|
||
# - vLLM tensor-parallel across 4+ GPUs (set vllm_tensor_parallel to GPU count)
|
||
# - Full precision models (no quantization required)
|
||
# - Advanced fine-tuning (QLoRA + DeepSpeed ZeRO-3 for multi-GPU training)
|
||
# - Full MLflow + MinIO stack for experiment tracking and artifact storage
|
||
# - All optional services enabled by default
|
||
|
||
- name: Nexus One AI — Max Tier Stack
|
||
hosts: localhost
|
||
connection: local
|
||
become: true
|
||
vars:
|
||
cezen_user: "cezen"
|
||
cezen_home: "/opt/cezen"
|
||
cezen_login_home: "/home/cezen"
|
||
python_version: "3.11"
|
||
cuda_version: "12.6"
|
||
skip_roles: "" # comma-separated list of roles to skip
|
||
gpu_available: false
|
||
tier: "max"
|
||
|
||
# ── vLLM — Max defaults ──────────────────────
|
||
# Full-precision Llama-3.1-70B across 4 GPUs by default.
|
||
# For HGX/DGX-class systems with 8 GPUs set vllm_tensor_parallel: 8
|
||
# and switch to Llama-3.1-405B or Mixtral-8x22B.
|
||
vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||
vllm_tensor_parallel: 4
|
||
vllm_gpu_memory_util: "0.90"
|
||
vllm_max_model_len: 32768
|
||
vllm_quantization: "" # full precision at Max tier
|
||
|
||
# ── Ollama — large model defaults ────────────
|
||
ollama_default_model: "llama3.1:70b"
|
||
|
||
# ── DeepSpeed — multi-GPU fine-tuning ────────
|
||
deepspeed_enabled: true
|
||
deepspeed_zero_stage: 3 # ZeRO-3 for large model training
|
||
|
||
roles:
|
||
- role: base
|
||
when: "'base' not in skip_roles.split(',')"
|
||
|
||
- role: docker
|
||
when: "'docker' not in skip_roles.split(',')"
|
||
|
||
- role: k3s
|
||
when: "'k3s' not in skip_roles.split(',')"
|
||
|
||
- role: ollama
|
||
when: "'ollama' not in skip_roles.split(',')"
|
||
|
||
- role: vllm
|
||
when: "'vllm' not in skip_roles.split(',')"
|
||
|
||
- role: jupyterlab
|
||
when: "'jupyterlab' not in skip_roles.split(',')"
|
||
|
||
- role: chromadb
|
||
when: "'chromadb' not in skip_roles.split(',')"
|
||
|
||
- role: mlflow
|
||
when: "'mlflow' not in skip_roles.split(',')"
|
||
|
||
- role: minio
|
||
when: "'minio' not in skip_roles.split(',')"
|
||
|
||
- role: monitoring
|
||
when: "'monitoring' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-backend
|
||
when: "'cezen-backend' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-ttyd
|
||
when: "'cezen-ttyd' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-nginx
|
||
when: "'cezen-nginx' not in skip_roles.split(',')"
|