77 lines
2.5 KiB
YAML
77 lines
2.5 KiB
YAML
---
|
||
# Nexus One AI — Starter Tier Stack
|
||
# Hardware: 1× NVIDIA RTX 5090 (32 GB GDDR7), 64 GB DDR5, 2 TB NVMe, 2.5 GbE
|
||
# Capacity: 1–5 concurrent users
|
||
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
|
||
#
|
||
# Differences from Basic tier:
|
||
# - No k3s (Kubernetes overhead not justified for 64 GB RAM / 1-5 users)
|
||
# - No MLflow (fine-tuning tracking overkill for Starter)
|
||
# - No MinIO (local model cache is sufficient)
|
||
# - vLLM uses Phi-3 Mini / Llama 3.2 3B with 4-bit quant by default
|
||
# - JupyterLab is optional (off by default, wizard can enable)
|
||
|
||
- name: Nexus One AI — Starter Tier Stack
|
||
hosts: localhost
|
||
connection: local
|
||
become: true
|
||
vars:
|
||
cezen_user: "cezen"
|
||
cezen_home: "/opt/cezen"
|
||
cezen_login_home: "/home/cezen"
|
||
python_version: "3.11"
|
||
cuda_version: "12.6" # RTX 5090 requires CUDA 12.6+
|
||
skip_roles: "" # comma-separated list of roles to skip
|
||
gpu_available: false
|
||
tier: "starter"
|
||
|
||
# ── vLLM — Starter defaults ──────────────────
|
||
# Small 4-bit quantised model fits comfortably in 32 GB GDDR7.
|
||
# Users can swap to Llama-3.2-3B-Instruct via the portal Model Manager.
|
||
vllm_model: "microsoft/Phi-3-mini-4k-instruct"
|
||
vllm_tensor_parallel: 1
|
||
vllm_gpu_memory_util: "0.85"
|
||
vllm_max_model_len: 4096
|
||
vllm_quantization: "awq"
|
||
|
||
# ── Ollama — lightweight models ───────────────
|
||
ollama_default_model: "phi3:mini"
|
||
|
||
roles:
|
||
- role: base
|
||
when: "'base' not in skip_roles.split(',')"
|
||
|
||
- role: docker
|
||
when: "'docker' not in skip_roles.split(',')"
|
||
|
||
# k3s intentionally omitted for Starter — insufficient RAM headroom
|
||
|
||
- role: ollama
|
||
when: "'ollama' not in skip_roles.split(',')"
|
||
|
||
- role: vllm
|
||
when: "'vllm' not in skip_roles.split(',')"
|
||
|
||
- role: chromadb
|
||
when: "'chromadb' not in skip_roles.split(',')"
|
||
|
||
# mlflow / minio omitted for Starter
|
||
|
||
- role: monitoring
|
||
when: "'monitoring' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-backend
|
||
when: "'cezen-backend' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-ttyd
|
||
when: "'cezen-ttyd' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-nginx
|
||
when: "'cezen-nginx' not in skip_roles.split(',')"
|
||
|
||
# JupyterLab — optional, install only when explicitly requested
|
||
- role: jupyterlab
|
||
when: >
|
||
'jupyterlab' not in skip_roles.split(',') and
|
||
install_jupyterlab | default(false) | bool
|