--- # Nexus One AI — Pro Tier Stack # Hardware: 2× NVIDIA RTX 5090 (32 GB GDDR7 each / 64 GB total), 128 GB DDR5, 4 TB NVMe, 10 GbE # Capacity: 20–100 concurrent users # Runs after NVIDIA driver reboot (phase1_nvidia.yml) # # Differences from Basic tier: # - k3s included (multi-service orchestration at this scale) # - MLflow included (fine-tuning tracking needed at Pro) # - MinIO included (model + data storage at scale) # - vLLM runs tensor-parallel across 2 GPUs # - QLoRA fine-tuning available via portal - name: Nexus One AI — Pro Tier Stack hosts: localhost connection: local become: true vars: cezen_user: "cezen" cezen_home: "/opt/cezen" cezen_login_home: "/home/cezen" python_version: "3.11" cuda_version: "12.6" skip_roles: "" # comma-separated list of roles to skip gpu_available: false tier: "pro" # ── vLLM — Pro defaults ────────────────────── # Tensor-parallel across 2× RTX 5090 (64 GB combined GDDR7). # Llama-3.1-70B at 4-bit fits comfortably; switch to full-precision # smaller models via the portal Model Manager. vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct" vllm_tensor_parallel: 2 vllm_gpu_memory_util: "0.85" vllm_max_model_len: 8192 vllm_quantization: "awq" # ── Ollama — full-size models ───────────────── ollama_default_model: "llama3.1:70b" roles: - role: base when: "'base' not in skip_roles.split(',')" - role: docker when: "'docker' not in skip_roles.split(',')" - role: k3s when: "'k3s' not in skip_roles.split(',')" - role: ollama when: "'ollama' not in skip_roles.split(',')" - role: vllm when: "'vllm' not in skip_roles.split(',')" - role: jupyterlab when: "'jupyterlab' not in skip_roles.split(',')" - role: chromadb when: "'chromadb' not in skip_roles.split(',')" - role: mlflow when: "'mlflow' not in skip_roles.split(',')" - role: minio when: "'minio' not in skip_roles.split(',')" - role: monitoring when: "'monitoring' not in skip_roles.split(',')" - role: cezen-backend when: "'cezen-backend' not in skip_roles.split(',')" - role: cezen-ttyd when: "'cezen-ttyd' not in skip_roles.split(',')" - role: cezen-nginx when: "'cezen-nginx' not in skip_roles.split(',')"