80 lines
2.4 KiB
YAML
80 lines
2.4 KiB
YAML
---
|
||
# Nexus One AI — Pro Tier Stack
|
||
# Hardware: 2× NVIDIA RTX 5090 (32 GB GDDR7 each / 64 GB total), 128 GB DDR5, 4 TB NVMe, 10 GbE
|
||
# Capacity: 20–100 concurrent users
|
||
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
|
||
#
|
||
# Differences from Basic tier:
|
||
# - k3s included (multi-service orchestration at this scale)
|
||
# - MLflow included (fine-tuning tracking needed at Pro)
|
||
# - MinIO included (model + data storage at scale)
|
||
# - vLLM runs tensor-parallel across 2 GPUs
|
||
# - QLoRA fine-tuning available via portal
|
||
|
||
- name: Nexus One AI — Pro Tier Stack
|
||
hosts: localhost
|
||
connection: local
|
||
become: true
|
||
vars:
|
||
cezen_user: "cezen"
|
||
cezen_home: "/opt/cezen"
|
||
cezen_login_home: "/home/cezen"
|
||
python_version: "3.11"
|
||
cuda_version: "12.6"
|
||
skip_roles: "" # comma-separated list of roles to skip
|
||
gpu_available: false
|
||
tier: "pro"
|
||
|
||
# ── vLLM — Pro defaults ──────────────────────
|
||
# Tensor-parallel across 2× RTX 5090 (64 GB combined GDDR7).
|
||
# Llama-3.1-70B at 4-bit fits comfortably; switch to full-precision
|
||
# smaller models via the portal Model Manager.
|
||
vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||
vllm_tensor_parallel: 2
|
||
vllm_gpu_memory_util: "0.85"
|
||
vllm_max_model_len: 8192
|
||
vllm_quantization: "awq"
|
||
|
||
# ── Ollama — full-size models ─────────────────
|
||
ollama_default_model: "llama3.1:70b"
|
||
|
||
roles:
|
||
- role: base
|
||
when: "'base' not in skip_roles.split(',')"
|
||
|
||
- role: docker
|
||
when: "'docker' not in skip_roles.split(',')"
|
||
|
||
- role: k3s
|
||
when: "'k3s' not in skip_roles.split(',')"
|
||
|
||
- role: ollama
|
||
when: "'ollama' not in skip_roles.split(',')"
|
||
|
||
- role: vllm
|
||
when: "'vllm' not in skip_roles.split(',')"
|
||
|
||
- role: jupyterlab
|
||
when: "'jupyterlab' not in skip_roles.split(',')"
|
||
|
||
- role: chromadb
|
||
when: "'chromadb' not in skip_roles.split(',')"
|
||
|
||
- role: mlflow
|
||
when: "'mlflow' not in skip_roles.split(',')"
|
||
|
||
- role: minio
|
||
when: "'minio' not in skip_roles.split(',')"
|
||
|
||
- role: monitoring
|
||
when: "'monitoring' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-backend
|
||
when: "'cezen-backend' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-ttyd
|
||
when: "'cezen-ttyd' not in skip_roles.split(',')"
|
||
|
||
- role: cezen-nginx
|
||
when: "'cezen-nginx' not in skip_roles.split(',')"
|