--- # Nexus One AI — Starter Tier Stack # Hardware: 1× NVIDIA RTX 5090 (32 GB GDDR7), 64 GB DDR5, 2 TB NVMe, 2.5 GbE # Capacity: 1–5 concurrent users # Runs after NVIDIA driver reboot (phase1_nvidia.yml) # # Differences from Basic tier: # - No k3s (Kubernetes overhead not justified for 64 GB RAM / 1-5 users) # - No MLflow (fine-tuning tracking overkill for Starter) # - No MinIO (local model cache is sufficient) # - vLLM uses Phi-3 Mini / Llama 3.2 3B with 4-bit quant by default # - JupyterLab is optional (off by default, wizard can enable) - name: Nexus One AI — Starter Tier Stack hosts: localhost connection: local become: true vars: cezen_user: "cezen" cezen_home: "/opt/cezen" cezen_login_home: "/home/cezen" python_version: "3.11" cuda_version: "12.6" # RTX 5090 requires CUDA 12.6+ skip_roles: "" # comma-separated list of roles to skip gpu_available: false tier: "starter" # ── vLLM — Starter defaults ────────────────── # Small 4-bit quantised model fits comfortably in 32 GB GDDR7. # Users can swap to Llama-3.2-3B-Instruct via the portal Model Manager. vllm_model: "microsoft/Phi-3-mini-4k-instruct" vllm_tensor_parallel: 1 vllm_gpu_memory_util: "0.85" vllm_max_model_len: 4096 vllm_quantization: "awq" # ── Ollama — lightweight models ─────────────── ollama_default_model: "phi3:mini" roles: - role: base when: "'base' not in skip_roles.split(',')" - role: docker when: "'docker' not in skip_roles.split(',')" # k3s intentionally omitted for Starter — insufficient RAM headroom - role: ollama when: "'ollama' not in skip_roles.split(',')" - role: vllm when: "'vllm' not in skip_roles.split(',')" - role: chromadb when: "'chromadb' not in skip_roles.split(',')" # mlflow / minio omitted for Starter - role: monitoring when: "'monitoring' not in skip_roles.split(',')" - role: cezen-backend when: "'cezen-backend' not in skip_roles.split(',')" - role: cezen-ttyd when: "'cezen-ttyd' not in skip_roles.split(',')" - role: cezen-nginx when: "'cezen-nginx' not in skip_roles.split(',')" # JupyterLab — optional, install only when explicitly requested - role: jupyterlab when: > 'jupyterlab' not in skip_roles.split(',') and install_jupyterlab | default(false) | bool