--- # Nexus One AI — Max Tier Stack # Hardware: 4–8× NVIDIA H100/A100/RTX 5090 (80–320 GB VRAM total), 256–512 GB DDR5, 8 TB+ NVMe, 100 GbE # Capacity: 100+ concurrent users # Runs after NVIDIA driver reboot (phase1_nvidia.yml) # # Differences from Pro tier: # - vLLM tensor-parallel across 4+ GPUs (set vllm_tensor_parallel to GPU count) # - Full precision models (no quantization required) # - Advanced fine-tuning (QLoRA + DeepSpeed ZeRO-3 for multi-GPU training) # - Full MLflow + MinIO stack for experiment tracking and artifact storage # - All optional services enabled by default - name: Nexus One AI — Max Tier Stack hosts: localhost connection: local become: true vars: cezen_user: "cezen" cezen_home: "/opt/cezen" cezen_login_home: "/home/cezen" python_version: "3.11" cuda_version: "12.6" skip_roles: "" # comma-separated list of roles to skip gpu_available: false tier: "max" # ── vLLM — Max defaults ────────────────────── # Full-precision Llama-3.1-70B across 4 GPUs by default. # For HGX/DGX-class systems with 8 GPUs set vllm_tensor_parallel: 8 # and switch to Llama-3.1-405B or Mixtral-8x22B. vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct" vllm_tensor_parallel: 4 vllm_gpu_memory_util: "0.90" vllm_max_model_len: 32768 vllm_quantization: "" # full precision at Max tier # ── Ollama — large model defaults ──────────── ollama_default_model: "llama3.1:70b" # ── DeepSpeed — multi-GPU fine-tuning ──────── deepspeed_enabled: true deepspeed_zero_stage: 3 # ZeRO-3 for large model training roles: - role: base when: "'base' not in skip_roles.split(',')" - role: docker when: "'docker' not in skip_roles.split(',')" - role: k3s when: "'k3s' not in skip_roles.split(',')" - role: ollama when: "'ollama' not in skip_roles.split(',')" - role: vllm when: "'vllm' not in skip_roles.split(',')" - role: jupyterlab when: "'jupyterlab' not in skip_roles.split(',')" - role: chromadb when: "'chromadb' not in skip_roles.split(',')" - role: mlflow when: "'mlflow' not in skip_roles.split(',')" - role: minio when: "'minio' not in skip_roles.split(',')" - role: monitoring when: "'monitoring' not in skip_roles.split(',')" - role: cezen-backend when: "'cezen-backend' not in skip_roles.split(',')" - role: cezen-ttyd when: "'cezen-ttyd' not in skip_roles.split(',')" - role: cezen-nginx when: "'cezen-nginx' not in skip_roles.split(',')"