aipackage/ansible/pro.yml

---
# Nexus One AI — Pro Tier Stack
# Hardware: 2× NVIDIA RTX 5090 (32 GB GDDR7 each / 64 GB total), 128 GB DDR5, 4 TB NVMe, 10 GbE
# Capacity: 20–100 concurrent users
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
#
# Differences from Basic tier:
#   - k3s included (multi-service orchestration at this scale)
#   - MLflow included (fine-tuning tracking needed at Pro)
#   - MinIO included (model + data storage at scale)
#   - vLLM runs tensor-parallel across 2 GPUs
#   - QLoRA fine-tuning available via portal

- name: Nexus One AI — Pro Tier Stack
  hosts: localhost
  connection: local
  become: true
  vars:
    cezen_user: "cezen"
    cezen_home: "/opt/cezen"
    cezen_login_home: "/home/cezen"
    python_version: "3.11"
    cuda_version: "12.6"
    skip_roles: ""                # comma-separated list of roles to skip
    gpu_available: false
    tier: "pro"

    # ── vLLM — Pro defaults ──────────────────────
    # Tensor-parallel across 2× RTX 5090 (64 GB combined GDDR7).
    # Llama-3.1-70B at 4-bit fits comfortably; switch to full-precision
    # smaller models via the portal Model Manager.
    vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
    vllm_tensor_parallel: 2
    vllm_gpu_memory_util: "0.85"
    vllm_max_model_len: 8192
    vllm_quantization: "awq"

    # ── Ollama — full-size models ─────────────────
    ollama_default_model: "llama3.1:70b"

  roles:
    - role: base
      when: "'base' not in skip_roles.split(',')"

    - role: docker
      when: "'docker' not in skip_roles.split(',')"

    - role: k3s
      when: "'k3s' not in skip_roles.split(',')"

    - role: ollama
      when: "'ollama' not in skip_roles.split(',')"

    - role: vllm
      when: "'vllm' not in skip_roles.split(',')"

    - role: jupyterlab
      when: "'jupyterlab' not in skip_roles.split(',')"

    - role: chromadb
      when: "'chromadb' not in skip_roles.split(',')"

    - role: mlflow
      when: "'mlflow' not in skip_roles.split(',')"

    - role: minio
      when: "'minio' not in skip_roles.split(',')"

    - role: monitoring
      when: "'monitoring' not in skip_roles.split(',')"

    - role: cezen-backend
      when: "'cezen-backend' not in skip_roles.split(',')"

    - role: cezen-ttyd
      when: "'cezen-ttyd' not in skip_roles.split(',')"

    - role: cezen-nginx
      when: "'cezen-nginx' not in skip_roles.split(',')"