aipackage/ansible/starter.yml

77 lines
2.5 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
# Nexus One AI — Starter Tier Stack
# Hardware: 1× NVIDIA RTX 5090 (32 GB GDDR7), 64 GB DDR5, 2 TB NVMe, 2.5 GbE
# Capacity: 15 concurrent users
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
#
# Differences from Basic tier:
# - No k3s (Kubernetes overhead not justified for 64 GB RAM / 1-5 users)
# - No MLflow (fine-tuning tracking overkill for Starter)
# - No MinIO (local model cache is sufficient)
# - vLLM uses Phi-3 Mini / Llama 3.2 3B with 4-bit quant by default
# - JupyterLab is optional (off by default, wizard can enable)
- name: Nexus One AI — Starter Tier Stack
hosts: localhost
connection: local
become: true
vars:
cezen_user: "cezen"
cezen_home: "/opt/cezen"
cezen_login_home: "/home/cezen"
python_version: "3.11"
cuda_version: "12.6" # RTX 5090 requires CUDA 12.6+
skip_roles: "" # comma-separated list of roles to skip
gpu_available: false
tier: "starter"
# ── vLLM — Starter defaults ──────────────────
# Small 4-bit quantised model fits comfortably in 32 GB GDDR7.
# Users can swap to Llama-3.2-3B-Instruct via the portal Model Manager.
vllm_model: "microsoft/Phi-3-mini-4k-instruct"
vllm_tensor_parallel: 1
vllm_gpu_memory_util: "0.85"
vllm_max_model_len: 4096
vllm_quantization: "awq"
# ── Ollama — lightweight models ───────────────
ollama_default_model: "phi3:mini"
roles:
- role: base
when: "'base' not in skip_roles.split(',')"
- role: docker
when: "'docker' not in skip_roles.split(',')"
# k3s intentionally omitted for Starter — insufficient RAM headroom
- role: ollama
when: "'ollama' not in skip_roles.split(',')"
- role: vllm
when: "'vllm' not in skip_roles.split(',')"
- role: chromadb
when: "'chromadb' not in skip_roles.split(',')"
# mlflow / minio omitted for Starter
- role: monitoring
when: "'monitoring' not in skip_roles.split(',')"
- role: cezen-backend
when: "'cezen-backend' not in skip_roles.split(',')"
- role: cezen-ttyd
when: "'cezen-ttyd' not in skip_roles.split(',')"
- role: cezen-nginx
when: "'cezen-nginx' not in skip_roles.split(',')"
# JupyterLab — optional, install only when explicitly requested
- role: jupyterlab
when: >
'jupyterlab' not in skip_roles.split(',') and
install_jupyterlab | default(false) | bool