aipackage/ansible/max.yml

84 lines
2.7 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
# Nexus One AI — Max Tier Stack
# Hardware: 48× NVIDIA H100/A100/RTX 5090 (80320 GB VRAM total), 256512 GB DDR5, 8 TB+ NVMe, 100 GbE
# Capacity: 100+ concurrent users
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
#
# Differences from Pro tier:
# - vLLM tensor-parallel across 4+ GPUs (set vllm_tensor_parallel to GPU count)
# - Full precision models (no quantization required)
# - Advanced fine-tuning (QLoRA + DeepSpeed ZeRO-3 for multi-GPU training)
# - Full MLflow + MinIO stack for experiment tracking and artifact storage
# - All optional services enabled by default
- name: Nexus One AI — Max Tier Stack
hosts: localhost
connection: local
become: true
vars:
cezen_user: "cezen"
cezen_home: "/opt/cezen"
cezen_login_home: "/home/cezen"
python_version: "3.11"
cuda_version: "12.6"
skip_roles: "" # comma-separated list of roles to skip
gpu_available: false
tier: "max"
# ── vLLM — Max defaults ──────────────────────
# Full-precision Llama-3.1-70B across 4 GPUs by default.
# For HGX/DGX-class systems with 8 GPUs set vllm_tensor_parallel: 8
# and switch to Llama-3.1-405B or Mixtral-8x22B.
vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
vllm_tensor_parallel: 4
vllm_gpu_memory_util: "0.90"
vllm_max_model_len: 32768
vllm_quantization: "" # full precision at Max tier
# ── Ollama — large model defaults ────────────
ollama_default_model: "llama3.1:70b"
# ── DeepSpeed — multi-GPU fine-tuning ────────
deepspeed_enabled: true
deepspeed_zero_stage: 3 # ZeRO-3 for large model training
roles:
- role: base
when: "'base' not in skip_roles.split(',')"
- role: docker
when: "'docker' not in skip_roles.split(',')"
- role: k3s
when: "'k3s' not in skip_roles.split(',')"
- role: ollama
when: "'ollama' not in skip_roles.split(',')"
- role: vllm
when: "'vllm' not in skip_roles.split(',')"
- role: jupyterlab
when: "'jupyterlab' not in skip_roles.split(',')"
- role: chromadb
when: "'chromadb' not in skip_roles.split(',')"
- role: mlflow
when: "'mlflow' not in skip_roles.split(',')"
- role: minio
when: "'minio' not in skip_roles.split(',')"
- role: monitoring
when: "'monitoring' not in skip_roles.split(',')"
- role: cezen-backend
when: "'cezen-backend' not in skip_roles.split(',')"
- role: cezen-ttyd
when: "'cezen-ttyd' not in skip_roles.split(',')"
- role: cezen-nginx
when: "'cezen-nginx' not in skip_roles.split(',')"