aipackage/ansible/pro.yml

80 lines
2.4 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
# Nexus One AI — Pro Tier Stack
# Hardware: 2× NVIDIA RTX 5090 (32 GB GDDR7 each / 64 GB total), 128 GB DDR5, 4 TB NVMe, 10 GbE
# Capacity: 20100 concurrent users
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
#
# Differences from Basic tier:
# - k3s included (multi-service orchestration at this scale)
# - MLflow included (fine-tuning tracking needed at Pro)
# - MinIO included (model + data storage at scale)
# - vLLM runs tensor-parallel across 2 GPUs
# - QLoRA fine-tuning available via portal
- name: Nexus One AI — Pro Tier Stack
hosts: localhost
connection: local
become: true
vars:
cezen_user: "cezen"
cezen_home: "/opt/cezen"
cezen_login_home: "/home/cezen"
python_version: "3.11"
cuda_version: "12.6"
skip_roles: "" # comma-separated list of roles to skip
gpu_available: false
tier: "pro"
# ── vLLM — Pro defaults ──────────────────────
# Tensor-parallel across 2× RTX 5090 (64 GB combined GDDR7).
# Llama-3.1-70B at 4-bit fits comfortably; switch to full-precision
# smaller models via the portal Model Manager.
vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
vllm_tensor_parallel: 2
vllm_gpu_memory_util: "0.85"
vllm_max_model_len: 8192
vllm_quantization: "awq"
# ── Ollama — full-size models ─────────────────
ollama_default_model: "llama3.1:70b"
roles:
- role: base
when: "'base' not in skip_roles.split(',')"
- role: docker
when: "'docker' not in skip_roles.split(',')"
- role: k3s
when: "'k3s' not in skip_roles.split(',')"
- role: ollama
when: "'ollama' not in skip_roles.split(',')"
- role: vllm
when: "'vllm' not in skip_roles.split(',')"
- role: jupyterlab
when: "'jupyterlab' not in skip_roles.split(',')"
- role: chromadb
when: "'chromadb' not in skip_roles.split(',')"
- role: mlflow
when: "'mlflow' not in skip_roles.split(',')"
- role: minio
when: "'minio' not in skip_roles.split(',')"
- role: monitoring
when: "'monitoring' not in skip_roles.split(',')"
- role: cezen-backend
when: "'cezen-backend' not in skip_roles.split(',')"
- role: cezen-ttyd
when: "'cezen-ttyd' not in skip_roles.split(',')"
- role: cezen-nginx
when: "'cezen-nginx' not in skip_roles.split(',')"