Update ISO installer tiers and backend deployment

2026-06-30 08:54:01 +05:30 · 2026-06-30 08:54:01 +05:30 · 56668f7bdc
commit 56668f7bdc
parent 79784a6743
38 changed files with 7206 additions and 175 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+*.iso
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# Cezen AI Suite — Installer
+# Nexus One AI — Installer

 ## Quick Start

@ -12,6 +12,61 @@ Server reboots automatically after NVIDIA drivers install. Phase 2 runs on its o

 On the custom ISO, Ubuntu autoinstall now pauses on the installer network screen so the operator can choose the final IP address from the VM console before installation continues.

+## Software-Only / Existing Hardware
+
+Run a feasibility scan before quoting or installing on customer-owned hardware:
+
+```bash
+bash scripts/cezen-feasibility.sh
+```
+
+The checker reports CPU, RAM, disk, NVIDIA GPU/VRAM, tool readiness, available features, and a recommended Cezen profile. It writes JSON to `/opt/cezen/feasibility.json` when possible, otherwise `./feasibility.json`.
+
+Install on existing hardware without the appliance NVIDIA phase:
+
+```bash
+sudo bash install.sh --software-only --profile=auto
+```
+
+For small systems or slow customer networks, the installer skips default model downloads on lightweight profiles. To force the same behavior manually:
+
+```bash
+sudo bash install.sh --software-only --profile=cpu-ai --skip-model-pull
+```
+
+Profiles:
+
+| Profile | Use When | Installs |
+|---|---|---|
+| `core` | no GPU / low RAM | portal, backend, nginx, health/metrics API |
+| `cpu-ai` | 32 GB+ RAM, no usable GPU | core + Chroma/Ollama CPU path, model pull optional |
+| `gpu-starter` | 24-32 GB VRAM | local AI starter stack, model pull optional |
+| `gpu-standard` | 48-96 GB VRAM | standard GPU stack |
+| `gpu-pro` | multi/high-VRAM GPU | advanced GPU stack |
+| `gpu-max` | multi-node or HGX-class | full stack, custom sizing |
+
+## Sellable v1 Admin APIs
+
+The backend exposes the first productization APIs for software-only and appliance deployments:
+
+| API | Purpose |
+|---|---|
+| `GET /api/license` | Shows current tier, feature matrix, and whether the tier is locked by Cezen. |
+| `GET /api/system/feasibility` | Returns the generated hardware feasibility report or live fallback. |
+| `GET /api/system/readiness-report` | Combines license, feasibility, and install readiness into a customer-facing report payload. |
+| `GET /api/audit/report?days=7` | Basic audit summary for handover and admin review. |
+| `GET /api/system/backups` | Lists local backups. |
+| `POST /api/system/backups` | Creates a local backup of Cezen data. |
+| `POST /api/system/backups/{name}/restore` | Restores a named local backup and creates a pre-restore safety snapshot. |
+
+CLI backup helper:
+
+```bash
+sudo bash scripts/cezen-backup.sh backup
+sudo bash scripts/cezen-backup.sh list
+sudo bash scripts/cezen-backup.sh restore /opt/cezen/backups/cezen-backup-YYYYmmdd-HHMMSS.zip
+```
+
 ## What Gets Installed (Entry Tier)

 | Service | Port | Notes |
@ -42,7 +97,10 @@ NVIDIA driver install will succeed but `nvidia-smi` won't show GPUs — that's e
 ## Pull More Models

 ```bash
-bash models/pull-models.sh --tier=entry
+bash models/pull-models.sh --tier=starter   # phi3:mini + embeddings
+bash models/pull-models.sh --tier=basic     # llama3.1:8b, mistral:7b, codellama
+bash models/pull-models.sh --tier=pro       # + llama3.1:70b, mixtral, deepseek-coder
+bash models/pull-models.sh --tier=max       # + llama3.1:405b, mixtral:8x22b
 ```

 ## File Structure
@ -52,7 +110,10 @@ cgit/
 ├── install.sh                    ← Entry point
 ├── ansible/
 │   ├── phase1_nvidia.yml         ← Phase 1: drivers (triggers reboot)
-│   ├── entry.yml                 ← Phase 2: full stack
+│   ├── starter.yml               ← Phase 2: Starter tier (1 GPU, small team)
+│   ├── entry.yml                 ← Phase 2: Basic tier (1–2 GPU, department)
+│   ├── pro.yml                   ← Phase 2: Pro tier (2+ GPU, multi-team)
+│   ├── max.yml                   ← Phase 2: Max tier (4–8 GPU, enterprise)
 │   └── roles/
 │       ├── base/                 ← OS, Python, Miniconda, LangChain
 │       ├── nvidia/               ← Drivers, CUDA 12.4, cuDNN 9
--- a/ansible/entry.yml
+++ b/ansible/entry.yml
@ -1,7 +1,7 @@
 ---
-# Phase 2: Full Cezen AI Suite — Entry Tier
+# Phase 2: Full Nexus One AI — Entry Tier
 # Runs after NVIDIA driver reboot
- name: Cezen AI — Entry Tier Stack
+- name: Nexus One AI — Entry Tier Stack
  hosts: localhost
  connection: local
  become: true
@ -35,3 +35,9 @@
      when: "'minio' not in skip_roles.split(',')"
    - role: monitoring
      when: "'monitoring' not in skip_roles.split(',')"
+    - role: cezen-backend
+      when: "'cezen-backend' not in skip_roles.split(',')"
+    - role: cezen-ttyd
+      when: "'cezen-ttyd' not in skip_roles.split(',')"
+    - role: cezen-nginx
+      when: "'cezen-nginx' not in skip_roles.split(',')"
--- a/ansible/max.yml
+++ b/ansible/max.yml
@ -0,0 +1,83 @@
+---
+# Nexus One AI — Max Tier Stack
+# Hardware: 4–8× NVIDIA H100/A100/RTX 5090 (80–320 GB VRAM total), 256–512 GB DDR5, 8 TB+ NVMe, 100 GbE
+# Capacity: 100+ concurrent users
+# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
+#
+# Differences from Pro tier:
+#   - vLLM tensor-parallel across 4+ GPUs (set vllm_tensor_parallel to GPU count)
+#   - Full precision models (no quantization required)
+#   - Advanced fine-tuning (QLoRA + DeepSpeed ZeRO-3 for multi-GPU training)
+#   - Full MLflow + MinIO stack for experiment tracking and artifact storage
+#   - All optional services enabled by default
+
+- name: Nexus One AI — Max Tier Stack
+  hosts: localhost
+  connection: local
+  become: true
+  vars:
+    cezen_user: "cezen"
+    cezen_home: "/opt/cezen"
+    cezen_login_home: "/home/cezen"
+    python_version: "3.11"
+    cuda_version: "12.6"
+    skip_roles: ""                # comma-separated list of roles to skip
+    gpu_available: false
+    tier: "max"
+
+    # ── vLLM — Max defaults ──────────────────────
+    # Full-precision Llama-3.1-70B across 4 GPUs by default.
+    # For HGX/DGX-class systems with 8 GPUs set vllm_tensor_parallel: 8
+    # and switch to Llama-3.1-405B or Mixtral-8x22B.
+    vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
+    vllm_tensor_parallel: 4
+    vllm_gpu_memory_util: "0.90"
+    vllm_max_model_len: 32768
+    vllm_quantization: ""          # full precision at Max tier
+
+    # ── Ollama — large model defaults ────────────
+    ollama_default_model: "llama3.1:70b"
+
+    # ── DeepSpeed — multi-GPU fine-tuning ────────
+    deepspeed_enabled: true
+    deepspeed_zero_stage: 3        # ZeRO-3 for large model training
+
+  roles:
+    - role: base
+      when: "'base' not in skip_roles.split(',')"
+
+    - role: docker
+      when: "'docker' not in skip_roles.split(',')"
+
+    - role: k3s
+      when: "'k3s' not in skip_roles.split(',')"
+
+    - role: ollama
+      when: "'ollama' not in skip_roles.split(',')"
+
+    - role: vllm
+      when: "'vllm' not in skip_roles.split(',')"
+
+    - role: jupyterlab
+      when: "'jupyterlab' not in skip_roles.split(',')"
+
+    - role: chromadb
+      when: "'chromadb' not in skip_roles.split(',')"
+
+    - role: mlflow
+      when: "'mlflow' not in skip_roles.split(',')"
+
+    - role: minio
+      when: "'minio' not in skip_roles.split(',')"
+
+    - role: monitoring
+      when: "'monitoring' not in skip_roles.split(',')"
+
+    - role: cezen-backend
+      when: "'cezen-backend' not in skip_roles.split(',')"
+
+    - role: cezen-ttyd
+      when: "'cezen-ttyd' not in skip_roles.split(',')"
+
+    - role: cezen-nginx
+      when: "'cezen-nginx' not in skip_roles.split(',')"
--- a/ansible/phase1_nvidia.yml
+++ b/ansible/phase1_nvidia.yml
@ -1,6 +1,6 @@
 ---
 # Phase 1: NVIDIA drivers only. Server reboots after this.
- name: Cezen AI — Phase 1 NVIDIA Drivers
+- name: Nexus One AI — Phase 1 NVIDIA Drivers
  hosts: localhost
  connection: local
  become: true
--- a/ansible/pro.yml
+++ b/ansible/pro.yml
@ -0,0 +1,79 @@
+---
+# Nexus One AI — Pro Tier Stack
+# Hardware: 2× NVIDIA RTX 5090 (32 GB GDDR7 each / 64 GB total), 128 GB DDR5, 4 TB NVMe, 10 GbE
+# Capacity: 20–100 concurrent users
+# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
+#
+# Differences from Basic tier:
+#   - k3s included (multi-service orchestration at this scale)
+#   - MLflow included (fine-tuning tracking needed at Pro)
+#   - MinIO included (model + data storage at scale)
+#   - vLLM runs tensor-parallel across 2 GPUs
+#   - QLoRA fine-tuning available via portal
+
+- name: Nexus One AI — Pro Tier Stack
+  hosts: localhost
+  connection: local
+  become: true
+  vars:
+    cezen_user: "cezen"
+    cezen_home: "/opt/cezen"
+    cezen_login_home: "/home/cezen"
+    python_version: "3.11"
+    cuda_version: "12.6"
+    skip_roles: ""                # comma-separated list of roles to skip
+    gpu_available: false
+    tier: "pro"
+
+    # ── vLLM — Pro defaults ──────────────────────
+    # Tensor-parallel across 2× RTX 5090 (64 GB combined GDDR7).
+    # Llama-3.1-70B at 4-bit fits comfortably; switch to full-precision
+    # smaller models via the portal Model Manager.
+    vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
+    vllm_tensor_parallel: 2
+    vllm_gpu_memory_util: "0.85"
+    vllm_max_model_len: 8192
+    vllm_quantization: "awq"
+
+    # ── Ollama — full-size models ─────────────────
+    ollama_default_model: "llama3.1:70b"
+
+  roles:
+    - role: base
+      when: "'base' not in skip_roles.split(',')"
+
+    - role: docker
+      when: "'docker' not in skip_roles.split(',')"
+
+    - role: k3s
+      when: "'k3s' not in skip_roles.split(',')"
+
+    - role: ollama
+      when: "'ollama' not in skip_roles.split(',')"
+
+    - role: vllm
+      when: "'vllm' not in skip_roles.split(',')"
+
+    - role: jupyterlab
+      when: "'jupyterlab' not in skip_roles.split(',')"
+
+    - role: chromadb
+      when: "'chromadb' not in skip_roles.split(',')"
+
+    - role: mlflow
+      when: "'mlflow' not in skip_roles.split(',')"
+
+    - role: minio
+      when: "'minio' not in skip_roles.split(',')"
+
+    - role: monitoring
+      when: "'monitoring' not in skip_roles.split(',')"
+
+    - role: cezen-backend
+      when: "'cezen-backend' not in skip_roles.split(',')"
+
+    - role: cezen-ttyd
+      when: "'cezen-ttyd' not in skip_roles.split(',')"
+
+    - role: cezen-nginx
+      when: "'cezen-nginx' not in skip_roles.split(',')"
--- a/ansible/roles/base/tasks/main.yml
+++ b/ansible/roles/base/tasks/main.yml
@ -50,6 +50,18 @@
    - "{{ cezen_home }}/models"
    - "{{ cezen_home }}/data"
    - "{{ cezen_home }}/logs"
+    - "{{ cezen_home }}/scripts"
+
+- name: Install Cezen operational helper scripts
+  copy:
+    src: "{{ playbook_dir }}/../scripts/{{ item }}"
+    dest: "{{ cezen_home }}/scripts/{{ item }}"
+    owner: "{{ cezen_user }}"
+    group: "{{ cezen_user }}"
+    mode: "0755"
+  loop:
+    - cezen-backup.sh
+    - cezen-feasibility.sh

 - name: Download Miniconda
  get_url:
--- a/ansible/roles/cezen-backend/files/cezen-api.service
+++ b/ansible/roles/cezen-backend/files/cezen-api.service
@ -0,0 +1,20 @@
+[Unit]
+Description=Nexus One AI Management API
+After=network-online.target ollama.service
+Wants=network-online.target
+
+[Service]
+Type=simple
+User=cezen
+WorkingDirectory=/opt/cezen/backend
+Environment="CEZEN_DATA=/opt/cezen/data"
+Environment="OLLAMA_URL=http://localhost:11434"
+Environment="PATH=/opt/cezen/backend/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin"
+ExecStart=/opt/cezen/backend/venv/bin/uvicorn main:app --host 0.0.0.0 --port 8080 --workers 2
+Restart=always
+RestartSec=5
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target
--- a/ansible/roles/cezen-backend/files/main.py
+++ b/ansible/roles/cezen-backend/files/main.py
--- a/ansible/roles/cezen-backend/files/requirements.txt
+++ b/ansible/roles/cezen-backend/files/requirements.txt
@ -0,0 +1,13 @@
+fastapi>=0.111.0
+uvicorn[standard]>=0.29.0
+python-jose[cryptography]>=3.3.0
+passlib[bcrypt]>=1.7.4
+bcrypt<4.0.0
+psutil>=5.9.0
+python-multipart>=0.0.9
+aiofiles>=23.0.0
+# Document Intelligence
+pymupdf>=1.24.0          # PDF text extraction (fitz)
+python-docx>=1.1.0       # Word document extraction
+# Scheduled Jobs
+apscheduler>=3.10.0      # In-process cron/interval scheduler
--- a/ansible/roles/cezen-backend/files/train_qlora.py
+++ b/ansible/roles/cezen-backend/files/train_qlora.py
@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+"""
+Nexus One AI — QLoRA Fine-Tuning Runner
+Launched as a subprocess by the FastAPI backend.
+
+Writes structured JSONL log lines to --log-path so the UI can stream
+live loss curves and progress. Updates training_jobs.status in SQLite.
+
+Requires (install on the training node):
+    pip install torch transformers datasets peft bitsandbytes trl
+
+Optional (faster, lower VRAM):
+    pip install unsloth
+
+Usage (called by main.py — do not run manually in production):
+    python3 train_qlora.py --job-id 1 --db-path /opt/cezen/data/cezen.db \
+        --dataset /opt/cezen/data/datasets/abc.jsonl \
+        --base-model mistral:7b --output-dir /opt/cezen/data/finetuned/mymodel \
+        --log-path /opt/cezen/data/job_logs/abc.jsonl \
+        --epochs 3 --lr 2e-4 --batch-size 4 --lora-r 16 --lora-alpha 32 \
+        --output-name mymodel
+"""
+
+import argparse, json, os, sqlite3, sys, time
+from datetime import datetime, timezone
+from pathlib import Path
+
+# ── Argument parsing ──────────────────────────────────────────────────────────
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--job-id",     type=int, required=True)
+parser.add_argument("--db-path",    required=True)
+parser.add_argument("--dataset",    required=True)
+parser.add_argument("--base-model", required=True)
+parser.add_argument("--output-dir", required=True)
+parser.add_argument("--log-path",   required=True)
+parser.add_argument("--output-name", required=True)
+parser.add_argument("--epochs",     type=int,   default=3)
+parser.add_argument("--lr",         type=float, default=2e-4)
+parser.add_argument("--batch-size", type=int,   default=4)
+parser.add_argument("--lora-r",     type=int,   default=16)
+parser.add_argument("--lora-alpha", type=int,   default=32)
+args = parser.parse_args()
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def utcnow():
+    return datetime.now(timezone.utc).isoformat()
+
+def db_connect():
+    conn = sqlite3.connect(args.db_path)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+log_file = open(args.log_path, "a", buffering=1)
+
+def log(type_: str, **kwargs):
+    entry = {"ts": utcnow(), "type": type_, **kwargs}
+    log_file.write(json.dumps(entry) + "\n")
+
+def set_status(status: str):
+    db = db_connect()
+    if status in ("completed", "failed", "cancelled"):
+        db.execute(
+            "UPDATE training_jobs SET status=?, finished_at=? WHERE id=?",
+            (status, utcnow(), args.job_id)
+        )
+    else:
+        db.execute("UPDATE training_jobs SET status=? WHERE id=?", (status, args.job_id))
+    db.commit()
+    db.close()
+
+# ── Dataset loading ───────────────────────────────────────────────────────────
+
+def load_dataset_from_file(path: str):
+    """Load JSONL or CSV dataset into a list of dicts with 'text' or 'prompt'/'completion' keys."""
+    p = Path(path)
+    rows = []
+    if p.suffix.lower() == ".csv":
+        import csv
+        with open(path, newline="", encoding="utf-8", errors="replace") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                rows.append(dict(row))
+    else:
+        with open(path, encoding="utf-8", errors="replace") as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    try:
+                        rows.append(json.loads(line))
+                    except Exception:
+                        pass
+    return rows
+
+def format_row(row: dict) -> str:
+    """Convert a dataset row to a plain text training string."""
+    if "text" in row:
+        return row["text"]
+    if "prompt" in row and "completion" in row:
+        return f"### Instruction:\n{row['prompt']}\n\n### Response:\n{row['completion']}"
+    if "instruction" in row and "output" in row:
+        inp = row.get("input", "")
+        return (f"### Instruction:\n{row['instruction']}\n\n### Input:\n{inp}\n\n### Response:\n{row['output']}"
+                if inp else
+                f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['output']}")
+    # Fallback: concatenate all values
+    return " ".join(str(v) for v in row.values() if v)
+
+# ── Main training routine ─────────────────────────────────────────────────────
+
+def main():
+    log("start", job_id=args.job_id, base_model=args.base_model,
+        epochs=args.epochs, lr=args.lr, batch_size=args.batch_size,
+        lora_r=args.lora_r, lora_alpha=args.lora_alpha)
+    set_status("running")
+
+    # Resolve model name (Ollama uses "mistral:7b" style — strip the tag for HF)
+    hf_model = args.base_model
+    if ":" in hf_model and "/" not in hf_model:
+        # e.g. "mistral:7b" → try to map to HF repo
+        name_map = {
+            "mistral":  "mistralai/Mistral-7B-v0.1",
+            "llama2":   "meta-llama/Llama-2-7b-hf",
+            "llama3":   "meta-llama/Meta-Llama-3-8B",
+            "phi3":     "microsoft/Phi-3-mini-4k-instruct",
+            "gemma":    "google/gemma-7b",
+            "codellama":"codellama/CodeLlama-7b-hf",
+            "qwen2":    "Qwen/Qwen2-7B",
+        }
+        base_name = hf_model.split(":")[0].lower()
+        hf_model = name_map.get(base_name, hf_model)
+        log("info", msg=f"Mapped '{args.base_model}' → '{hf_model}' (HuggingFace)")
+
+    # Load dataset
+    log("info", msg="Loading dataset...")
+    raw_rows = load_dataset_from_file(args.dataset)
+    if not raw_rows:
+        log("error", msg="Dataset is empty or could not be parsed")
+        set_status("failed")
+        sys.exit(1)
+
+    texts = [format_row(r) for r in raw_rows]
+    log("info", msg=f"Loaded {len(texts)} training examples")
+
+    # Try Unsloth first (faster), fall back to HF PEFT
+    use_unsloth = False
+    try:
+        from unsloth import FastLanguageModel
+        use_unsloth = True
+        log("info", msg="Using Unsloth for accelerated training")
+    except ImportError:
+        log("info", msg="Unsloth not available — using HuggingFace PEFT + BitsAndBytes")
+
+    try:
+        import torch
+        from transformers import TrainingArguments, TrainerCallback
+        from datasets import Dataset as HFDataset
+
+        if use_unsloth:
+            model, tokenizer = FastLanguageModel.from_pretrained(
+                model_name=hf_model,
+                max_seq_length=2048,
+                dtype=None,
+                load_in_4bit=True,
+            )
+            model = FastLanguageModel.get_peft_model(
+                model,
+                r=args.lora_r,
+                lora_alpha=args.lora_alpha,
+                target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
+                lora_dropout=0,
+                bias="none",
+                use_gradient_checkpointing="unsloth",
+            )
+        else:
+            from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+            from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
+
+            bnb_cfg = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.float16,
+            )
+            tokenizer = AutoTokenizer.from_pretrained(hf_model, trust_remote_code=True)
+            if tokenizer.pad_token is None:
+                tokenizer.pad_token = tokenizer.eos_token
+
+            model = AutoModelForCausalLM.from_pretrained(
+                hf_model,
+                quantization_config=bnb_cfg,
+                device_map="auto",
+                trust_remote_code=True,
+            )
+            model = prepare_model_for_kbit_training(model)
+
+            lora_cfg = LoraConfig(
+                r=args.lora_r,
+                lora_alpha=args.lora_alpha,
+                target_modules=["q_proj","k_proj","v_proj","o_proj"],
+                lora_dropout=0.05,
+                bias="none",
+                task_type="CAUSAL_LM",
+            )
+            model = get_peft_model(model, lora_cfg)
+
+        # Tokenise
+        def tokenise(examples):
+            return tokenizer(
+                examples["text"],
+                truncation=True,
+                max_length=2048,
+                padding="max_length",
+            )
+
+        hf_ds = HFDataset.from_dict({"text": texts})
+        hf_ds = hf_ds.map(tokenise, batched=True, remove_columns=["text"])
+
+        # Custom callback to stream loss to our log
+        class LossLogger(TrainerCallback):
+            def on_log(self, _args, state, control, logs=None, **kwargs):
+                if logs and "loss" in logs:
+                    log("loss",
+                        step=state.global_step,
+                        loss=round(float(logs["loss"]), 6),
+                        epoch=round(float(logs.get("epoch", 0)), 3),
+                        lr=float(logs.get("learning_rate", args.lr)))
+
+        output_dir = args.output_dir
+        Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+        from trl import SFTTrainer
+        trainer = SFTTrainer(
+            model=model,
+            tokenizer=tokenizer,
+            train_dataset=hf_ds,
+            dataset_text_field="input_ids",
+            max_seq_length=2048,
+            args=TrainingArguments(
+                output_dir=output_dir,
+                num_train_epochs=args.epochs,
+                per_device_train_batch_size=args.batch_size,
+                gradient_accumulation_steps=4,
+                warmup_steps=5,
+                learning_rate=args.lr,
+                fp16=not torch.cuda.is_bf16_supported(),
+                bf16=torch.cuda.is_bf16_supported(),
+                logging_steps=1,
+                save_strategy="epoch",
+                report_to="none",
+            ),
+            callbacks=[LossLogger()],
+        )
+
+        log("info", msg="Training started")
+        trainer.train()
+        log("info", msg="Training complete — saving model")
+        trainer.save_model(output_dir)
+        tokenizer.save_pretrained(output_dir)
+
+    except Exception as e:
+        import traceback
+        log("error", msg=str(e), traceback=traceback.format_exc())
+        set_status("failed")
+        sys.exit(1)
+
+    # Auto-register with Ollama via Modelfile
+    try:
+        _register_with_ollama(output_dir, args.output_name)
+    except Exception as e:
+        log("warning", msg=f"Could not auto-register with Ollama: {e}")
+
+    log("complete", msg="Job finished successfully", output_dir=output_dir)
+    set_status("completed")
+
+
+def _register_with_ollama(model_dir: str, model_name: str):
+    """Create an Ollama Modelfile and register the fine-tuned model."""
+    modelfile_path = Path(model_dir) / "Modelfile"
+    modelfile_path.write_text(
+        f'FROM {model_dir}\n'
+        f'PARAMETER stop "<|im_end|>"\n'
+        f'SYSTEM "This is a Nexus One AI fine-tuned model."\n'
+    )
+    import subprocess
+    result = subprocess.run(
+        ["ollama", "create", model_name, "-f", str(modelfile_path)],
+        capture_output=True, text=True, timeout=300
+    )
+    if result.returncode == 0:
+        log("info", msg=f"Model '{model_name}' registered with Ollama")
+    else:
+        log("warning", msg=f"Ollama registration failed: {result.stderr}")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        log("error", msg="Job interrupted (SIGTERM/SIGINT)")
+        set_status("cancelled")
+        sys.exit(130)
+    except Exception as e:
+        import traceback
+        log("error", msg=str(e), traceback=traceback.format_exc())
+        set_status("failed")
+        sys.exit(1)
+    finally:
+        log_file.close()
--- a/ansible/roles/cezen-backend/handlers/main.yml
+++ b/ansible/roles/cezen-backend/handlers/main.yml
@ -0,0 +1,9 @@
+---
+- name: Reload systemd
+  systemd:
+    daemon_reload: yes
+
+- name: Restart cezen-api
+  systemd:
+    name: cezen-api
+    state: restarted
--- a/ansible/roles/cezen-backend/tasks/main.yml
+++ b/ansible/roles/cezen-backend/tasks/main.yml
@ -0,0 +1,113 @@
+---
+# cezen-backend role: installs the Nexus One AI FastAPI management API
+
+- name: Install system Python deps
+  apt:
+    name:
+      - python3-pip
+      - python3-venv
+      - python3.11
+      - python3.11-venv
+      - libmupdf-dev      # required by pymupdf (Document Intelligence)
+      - mupdf-tools
+    state: present
+    update_cache: yes
+
+- name: Create backend directory
+  file:
+    path: /opt/cezen/backend
+    state: directory
+    owner: "{{ cezen_user }}"
+    group: "{{ cezen_user }}"
+    mode: "0755"
+
+- name: Create data directory (JWT secret + SQLite DB)
+  file:
+    path: /opt/cezen/data
+    state: directory
+    owner: "{{ cezen_user }}"
+    group: "{{ cezen_user }}"
+    mode: "0750"
+
+- name: Copy FastAPI application
+  copy:
+    src: main.py
+    dest: /opt/cezen/backend/main.py
+    owner: "{{ cezen_user }}"
+    group: "{{ cezen_user }}"
+    mode: "0644"
+  notify: Restart cezen-api
+
+- name: Copy QLoRA training runner
+  copy:
+    src: train_qlora.py
+    dest: /opt/cezen/backend/train_qlora.py
+    owner: "{{ cezen_user }}"
+    group: "{{ cezen_user }}"
+    mode: "0755"
+
+- name: Copy requirements.txt
+  copy:
+    src: requirements.txt
+    dest: /opt/cezen/backend/requirements.txt
+    owner: "{{ cezen_user }}"
+    group: "{{ cezen_user }}"
+    mode: "0644"
+
+- name: Create Python virtual environment (Python 3.11)
+  become_user: "{{ cezen_user }}"
+  command: python3.11 -m venv /opt/cezen/backend/venv
+  args:
+    creates: /opt/cezen/backend/venv/bin/activate
+
+- name: Install Python dependencies
+  become_user: "{{ cezen_user }}"
+  pip:
+    requirements: /opt/cezen/backend/requirements.txt
+    virtualenv: /opt/cezen/backend/venv
+  notify: Restart cezen-api
+
+- name: Install Pro/Max fine-tuning dependencies
+  become_user: "{{ cezen_user }}"
+  pip:
+    name:
+      - torch
+      - transformers
+      - datasets
+      - peft
+      - bitsandbytes
+      - accelerate
+      - trl
+      - sentencepiece
+    virtualenv: /opt/cezen/backend/venv
+  retries: 3
+  delay: 15
+  when: (tier | default('basic')) in ['pro', 'max']
+
+- name: Install Max multi-GPU training dependencies
+  become_user: "{{ cezen_user }}"
+  pip:
+    name:
+      - deepspeed
+    virtualenv: /opt/cezen/backend/venv
+  retries: 3
+  delay: 15
+  when: (tier | default('basic')) == 'max'
+
+- name: Install systemd service unit
+  copy:
+    src: cezen-api.service
+    dest: /etc/systemd/system/cezen-api.service
+    owner: root
+    group: root
+    mode: "0644"
+  notify:
+    - Reload systemd
+    - Restart cezen-api
+
+- name: Enable and start cezen-api service
+  systemd:
+    name: cezen-api
+    enabled: yes
+    state: started
+    daemon_reload: yes
--- a/ansible/roles/cezen-nginx/files/cezen.conf
+++ b/ansible/roles/cezen-nginx/files/cezen.conf
@ -0,0 +1,135 @@
+# /etc/nginx/sites-available/cezen
+# Nexus One AI Portal — serves static portal, proxies API and console terminal
+#
+# Install:
+#   sudo cp cezen.conf /etc/nginx/sites-available/cezen
+#   sudo ln -sf /etc/nginx/sites-available/cezen /etc/nginx/sites-enabled/cezen
+#   sudo rm -f /etc/nginx/sites-enabled/default
+#   sudo nginx -t && sudo systemctl reload nginx
+
+# ─── Rate limiting zones (must be outside server block) ──────────────────────
+# Login: 5 requests/min per IP, burst of 3 queued, then 429
+limit_req_zone $binary_remote_addr zone=cezen_login:10m rate=5r/m;
+# General API: 60 req/min per IP (generous for dashboard polling)
+limit_req_zone $binary_remote_addr zone=cezen_api:10m   rate=60r/m;
+
+server {
+    listen 80 default_server;
+    listen [::]:80 default_server;
+
+    server_name _;
+
+    # Hide server version
+    server_tokens off;
+
+    # Logging
+    access_log /var/log/nginx/cezen-access.log;
+    error_log  /var/log/nginx/cezen-error.log;
+
+    # ─── Global security headers ──────────────────────────────────────────────
+    add_header X-Content-Type-Options  "nosniff"                   always;
+    add_header X-Frame-Options         "SAMEORIGIN"                always;
+    add_header X-XSS-Protection        "1; mode=block"             always;
+    add_header Referrer-Policy         "strict-origin-when-cross-origin" always;
+    add_header Permissions-Policy      "geolocation=(), camera=(), microphone=()" always;
+    add_header Content-Security-Policy
+        "default-src 'self'; "
+        "script-src 'self' 'unsafe-inline'; "
+        "style-src 'self' 'unsafe-inline'; "
+        "img-src 'self' data:; "
+        "connect-src 'self'; "
+        "frame-src 'self'; "
+        "font-src 'self'; "
+        "object-src 'none'; "
+        "base-uri 'self';"
+        always;
+
+    # ─── robots.txt — block all indexing (air-gapped / private portal) ────────
+    location = /robots.txt {
+        return 200 "User-agent: *\nDisallow: /\n";
+        add_header Content-Type text/plain;
+    }
+
+    # ─── Static Portal ───────────────────────────────────────────────────────
+    root /opt/cezen/portal;
+    index index.html;
+
+    location / {
+        try_files $uri $uri/ /index.html;
+    }
+
+    # Cache static assets aggressively
+    location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
+        expires 7d;
+        add_header Cache-Control "public, immutable";
+    }
+
+    # ─── Model upload (large files — no size limit, extended timeout) ────────
+    location = /api/models/upload {
+        client_max_body_size    0;         # unlimited — GGUF files can be 70 GB+
+        proxy_request_buffering off;       # stream directly to backend, don't buffer in Nginx
+        proxy_read_timeout      7200s;     # 2 hours for slow transfers
+        proxy_send_timeout      7200s;
+
+        proxy_pass         http://127.0.0.1:8080;
+        proxy_http_version 1.1;
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
+    }
+
+    # ─── Login rate limit (tight) ─────────────────────────────────────────────
+    location = /api/auth/login {
+        limit_req zone=cezen_login burst=3 nodelay;
+        limit_req_status 429;
+
+        proxy_pass         http://127.0.0.1:8080;
+        proxy_http_version 1.1;
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header   X-Forwarded-Proto $scheme;
+        proxy_read_timeout 30s;
+    }
+
+    # ─── FastAPI Backend (/api/) ──────────────────────────────────────────────
+    location /api/ {
+        limit_req zone=cezen_api burst=20 nodelay;
+        limit_req_status 429;
+
+        proxy_pass         http://127.0.0.1:8080;
+        proxy_http_version 1.1;
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header   X-Forwarded-Proto $scheme;
+        proxy_read_timeout 120s;
+    }
+
+    # ─── Web Console (ttyd) (/console/) ──────────────────────────────────────
+    location /console/ {
+        proxy_pass         http://127.0.0.1:7681/;
+        proxy_http_version 1.1;
+        proxy_set_header   Upgrade           $http_upgrade;
+        proxy_set_header   Connection        "upgrade";
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_read_timeout 86400s;
+
+        # Rewrite paths so ttyd JS/CSS assets load correctly
+        proxy_redirect     / /console/;
+        sub_filter         'href="/'  'href="/console/';
+        sub_filter         'src="/'   'src="/console/';
+        sub_filter_once    off;
+        sub_filter_types   text/html;
+    }
+
+    # ─── Block dotfiles and common attack paths ───────────────────────────────
+    location ~ /\. {
+        deny all;
+    }
+
+    location ~* \.(env|git|sql|bak|sh|py)$ {
+        deny all;
+    }
+}
--- a/ansible/roles/cezen-nginx/handlers/main.yml
+++ b/ansible/roles/cezen-nginx/handlers/main.yml
@ -0,0 +1,5 @@
+---
+- name: Reload nginx
+  systemd:
+    name: nginx
+    state: reloaded
--- a/ansible/roles/cezen-nginx/tasks/main.yml
+++ b/ansible/roles/cezen-nginx/tasks/main.yml
@ -0,0 +1,59 @@
+---
+# cezen-nginx role: installs Nginx, deploys portal static files and site config
+
+- name: Install Nginx
+  apt:
+    name: nginx
+    state: present
+    update_cache: yes
+
+- name: Create portal directory
+  file:
+    path: /opt/cezen/portal
+    state: directory
+    owner: "{{ cezen_user }}"
+    group: www-data
+    mode: "0755"
+
+- name: Sync portal static files
+  synchronize:
+    src: "{{ playbook_dir }}/../../../cezen-portal/"
+    dest: /opt/cezen/portal/
+    delete: yes
+    recursive: yes
+    rsync_opts:
+      - "--exclude=.DS_Store"
+      - "--exclude=*.sh"
+  notify: Reload nginx
+
+- name: Deploy Nginx site config
+  copy:
+    src: cezen.conf
+    dest: /etc/nginx/sites-available/cezen
+    owner: root
+    group: root
+    mode: "0644"
+  notify: Reload nginx
+
+- name: Enable Cezen site
+  file:
+    src: /etc/nginx/sites-available/cezen
+    dest: /etc/nginx/sites-enabled/cezen
+    state: link
+  notify: Reload nginx
+
+- name: Disable default Nginx site
+  file:
+    path: /etc/nginx/sites-enabled/default
+    state: absent
+  notify: Reload nginx
+
+- name: Validate Nginx config
+  command: nginx -t
+  changed_when: false
+
+- name: Ensure Nginx is enabled and running
+  systemd:
+    name: nginx
+    enabled: yes
+    state: started
--- a/ansible/roles/cezen-ttyd/files/cezen-ttyd.service
+++ b/ansible/roles/cezen-ttyd/files/cezen-ttyd.service
@ -0,0 +1,17 @@
+[Unit]
+Description=Cezen Web Terminal (ttyd)
+After=network.target
+
+[Service]
+# Bind to localhost only — Nginx proxies /console/ to this port
+ExecStart=/usr/bin/ttyd \
+    --port 7681 \
+    --interface 127.0.0.1 \
+    --writable \
+    login -f cezen-console
+Restart=always
+RestartSec=5
+User=root
+
+[Install]
+WantedBy=multi-user.target
--- a/ansible/roles/cezen-ttyd/handlers/main.yml
+++ b/ansible/roles/cezen-ttyd/handlers/main.yml
@ -0,0 +1,9 @@
+---
+- name: Reload systemd
+  systemd:
+    daemon_reload: yes
+
+- name: Restart cezen-ttyd
+  systemd:
+    name: cezen-ttyd
+    state: restarted
--- a/ansible/roles/cezen-ttyd/tasks/main.yml
+++ b/ansible/roles/cezen-ttyd/tasks/main.yml
@ -0,0 +1,72 @@
+---
+# cezen-ttyd role: browser-based terminal via ttyd, bound to localhost
+
+- name: Install ttyd
+  apt:
+    name: ttyd
+    state: present
+    update_cache: yes
+
+- name: Create cezen-console restricted user
+  user:
+    name: cezen-console
+    shell: /bin/bash
+    comment: "Cezen Web Console User"
+    groups: "{{ cezen_user }}"
+    append: yes
+    state: present
+    create_home: yes
+
+- name: Set cezen-console password
+  # Change this password after first login or use PAM/SSO integration
+  shell: echo "cezen-console:CezenConsole2024!" | chpasswd
+  changed_when: false
+  no_log: true
+
+- name: Restrict cezen-console home directory
+  file:
+    path: /home/cezen-console
+    owner: cezen-console
+    group: cezen-console
+    mode: "0750"
+
+- name: Add useful aliases for console user
+  copy:
+    dest: /home/cezen-console/.bashrc
+    owner: cezen-console
+    group: cezen-console
+    mode: "0644"
+    content: |
+      # Cezen Web Console — restricted shell environment
+      PS1='\[\033[01;32m\]cezen-console\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
+
+      # Useful shortcuts
+      alias ll='ls -lah --color=auto'
+      alias logs='journalctl -u cezen-api -f'
+      alias api-status='systemctl status cezen-api'
+      alias ollama-ps='ollama ps'
+      alias gpu='nvidia-smi'
+      alias ports='ss -tlnp'
+
+      # Prevent accidental system damage
+      alias rm='rm -i'
+      alias mv='mv -i'
+      alias cp='cp -i'
+
+- name: Install ttyd systemd service
+  copy:
+    src: cezen-ttyd.service
+    dest: /etc/systemd/system/cezen-ttyd.service
+    owner: root
+    group: root
+    mode: "0644"
+  notify:
+    - Reload systemd
+    - Restart cezen-ttyd
+
+- name: Enable and start ttyd service
+  systemd:
+    name: cezen-ttyd
+    enabled: yes
+    state: started
+    daemon_reload: yes
--- a/ansible/roles/jupyterlab/tasks/main.yml
+++ b/ansible/roles/jupyterlab/tasks/main.yml
@ -51,7 +51,7 @@
  copy:
    dest: /opt/cezen/notebooks/README.md
    content: |
-      # Cezen AI Suite — JupyterLab
+      # Nexus One AI — JupyterLab

      Default token: `cezen2024`

--- a/ansible/roles/mlflow/tasks/main.yml
+++ b/ansible/roles/mlflow/tasks/main.yml
@ -53,5 +53,5 @@
  wait_for:
    host: localhost
    port: 5000
-    timeout: 30
+    timeout: 120
  ignore_errors: true
--- a/ansible/roles/monitoring/tasks/main.yml
+++ b/ansible/roles/monitoring/tasks/main.yml
@ -25,6 +25,7 @@
  register: dcgm_result
  failed_when: dcgm_result.rc != 0 and 'already in use' not in dcgm_result.stderr
  ignore_errors: true
+  when: gpu_available | default(false) | bool

 # ── Prometheus ──────────────────────────────────────────
 - name: Write Prometheus config
@ -99,7 +100,8 @@
  wait_for:
    host: localhost
    port: 3000
-    timeout: 60
+    timeout: 120
+  register: grafana_wait
  ignore_errors: true

 - name: Add Prometheus datasource to Grafana
@ -118,6 +120,7 @@
      isDefault: true
    status_code: [200, 409]  # 409 = already exists, that's fine
  ignore_errors: true
+  when: not (grafana_wait is failed)

 - name: Import NVIDIA GPU dashboard (ID 12239)
  uri:
@ -143,3 +146,6 @@
        uid: "nvidia-gpu"
    status_code: [200, 412]
  ignore_errors: true
+  when:
+    - not (grafana_wait is failed)
+    - gpu_available | default(false) | bool
--- a/ansible/roles/nvidia/tasks/main.yml
+++ b/ansible/roles/nvidia/tasks/main.yml
@ -1,6 +1,6 @@
 ---
 # NVIDIA role: Drivers + CUDA + cuDNN
-# NOTE: Tested on L40S (Entry) and A40 (lab). Requires reboot after this role.
+# NOTE: Tested on RTX Pro 6000 (Entry) and A40 (lab). Requires reboot after this role.
 #       If no GPU is present, this role will install drivers but nvidia-smi won't show GPUs.

 - name: Add NVIDIA package repository key
--- a/ansible/roles/ollama/tasks/main.yml
+++ b/ansible/roles/ollama/tasks/main.yml
@ -28,7 +28,7 @@
      Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
      Environment="OLLAMA_HOST=0.0.0.0:11434"
      Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
-      Environment="CUDA_VISIBLE_DEVICES=0,1,2"
+      Environment="CUDA_VISIBLE_DEVICES=0"

      [Install]
      WantedBy=multi-user.target
@ -54,50 +54,49 @@
    port: 11434
    timeout: 60

- name: Pull default models (Llama 3.1 8B + Mistral 7B)
+- name: Select tier model set
+  set_fact:
+    ollama_models: >-
+      {{
+        {
+          'starter': ['phi3:mini', 'nomic-embed-text'],
+          'basic': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
+          'entry': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
+          'pro': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b'],
+          'max': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b', 'llama3.1:405b', 'mixtral:8x22b']
+        }.get(tier | default('basic'), ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text'])
+      }}
+
+- name: Pull tier Ollama models
  become_user: cezen
  command: ollama pull {{ item }}
-  loop:
-    - llama3.1:8b
-    - mistral:7b
+  loop: "{{ ollama_models }}"
  environment:
    OLLAMA_HOST: "http://localhost:11434"
  retries: 3
  delay: 15
-  # NOTE: Models are large (~5GB each). This step takes time on first run.
-  # Skip by setting: ansible-playbook ... -e "skip_model_pull=true"
+  # NOTE: Pro/Max models are very large. Skip with --skip-model-pull for
+  # bandwidth-constrained installs, then run models/pull-models.sh later.
  when: not (skip_model_pull | default(false))

 # Open WebUI (chat interface on top of Ollama)
- name: Deploy Open WebUI via Docker
-  community.docker.docker_container:
-    name: open-webui
-    image: ghcr.io/open-webui/open-webui:main
-    state: started
-    restart_policy: always
-    ports:
-      - "3001:8080"
-    volumes:
-      - open-webui:/app/backend/data
-    env:
-      OLLAMA_BASE_URL: "http://host-gateway:11434"
-    etc_hosts:
-      host-gateway: "172.17.0.1"
-  # Note: Requires docker community collection. Install with:
-  # ansible-galaxy collection install community.docker
-  ignore_errors: true  # Falls back gracefully if docker collection not available
-
- name: Alternative Open WebUI start (if community.docker not available)
+- name: Start Open WebUI via Docker CLI
  shell: |
-    docker run -d \
-      --name open-webui \
-      --restart always \
-      -p 3001:8080 \
-      --add-host=host-gateway:172.17.0.1 \
-      -v open-webui:/app/backend/data \
-      -e OLLAMA_BASE_URL=http://host-gateway:11434 \
-      ghcr.io/open-webui/open-webui:main
+    if docker ps -a --format '{{ "{{" }}.Names{{ "}}" }}' | grep -qx open-webui; then
+      docker start open-webui
+    else
+      docker run -d \
+        --name open-webui \
+        --restart always \
+        -p 3001:8080 \
+        --add-host=host-gateway:172.17.0.1 \
+        -v open-webui:/app/backend/data \
+        -e OLLAMA_BASE_URL=http://host-gateway:11434 \
+        ghcr.io/open-webui/open-webui:main
+    fi
  args:
    executable: /bin/bash
  register: webui_result
-  failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr
+  changed_when: webui_result.rc == 0
+  failed_when: webui_result.rc != 0 and 'already in use' not in (webui_result.stderr | default(''))
+  ignore_errors: true
--- a/ansible/roles/vllm/defaults/main.yml
+++ b/ansible/roles/vllm/defaults/main.yml
@ -0,0 +1,7 @@
+---
+# vLLM role defaults — overridden per-tier in the tier playbook vars block
+vllm_model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
+vllm_tensor_parallel: 1
+vllm_gpu_memory_util: "0.70"
+vllm_max_model_len: 8192
+vllm_quantization: ""              # blank = full precision; set to "awq" for 4-bit
--- a/ansible/roles/vllm/tasks/main.yml
+++ b/ansible/roles/vllm/tasks/main.yml
@ -1,6 +1,12 @@
 ---
 # vLLM — high-performance LLM inference with OpenAI-compatible API
 # Skipped automatically if no GPU is present.
+# Variables (set defaults in defaults/main.yml, override per-tier in the playbook):
+#   vllm_model              HuggingFace model ID to load on start
+#   vllm_tensor_parallel    Number of GPUs for tensor parallelism (1 for Starter/Basic)
+#   vllm_gpu_memory_util    Fraction of VRAM to reserve for vLLM (0.0–1.0)
+#   vllm_max_model_len      Maximum context length in tokens
+#   vllm_quantization       Quantization method: "" (none) | "awq" | "gptq" | "fp8"

 - name: Check for NVIDIA GPU
  shell: nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1
@ -10,7 +16,9 @@

 - name: Skip vLLM if no GPU detected
  debug:
-    msg: "No GPU detected — skipping vLLM image pull. Run manually when GPU is available: docker pull vllm/vllm-openai:latest"
+    msg: >
+      No GPU detected — skipping vLLM image pull.
+      Run manually when GPU is available: docker pull vllm/vllm-openai:latest
  when: gpu_check.stdout == "" or gpu_check.rc != 0

 - name: Pull vLLM Docker image
@ -19,18 +27,23 @@
  delay: 15
  when: gpu_check.stdout != "" and gpu_check.rc == 0

+- name: Build vLLM quantization flag
+  set_fact:
+    vllm_quant_flag: "{{ '--quantization ' + vllm_quantization if vllm_quantization != '' else '' }}"
+
 - name: Create vLLM systemd service
  copy:
    dest: /etc/systemd/system/vllm.service
+    mode: "0644"
    content: |
      [Unit]
-      Description=vLLM OpenAI-Compatible Inference Server
-      After=docker.service ollama.service
+      Description=vLLM OpenAI-Compatible Inference Server ({{ vllm_model }})
+      After=docker.service
      Requires=docker.service

      [Service]
      Restart=always
-      RestartSec=5
+      RestartSec=10
      ExecStartPre=-/usr/bin/docker stop vllm
      ExecStartPre=-/usr/bin/docker rm vllm
      ExecStart=/usr/bin/docker run \
@ -41,15 +54,16 @@
        -v /opt/cezen/models:/root/.cache/huggingface \
        -e HF_HOME=/root/.cache/huggingface \
        vllm/vllm-openai:latest \
-        --model meta-llama/Meta-Llama-3.1-8B-Instruct \
-        --gpu-memory-utilization 0.7 \
-        --max-model-len 8192 \
-        --tensor-parallel-size 1
+        --model {{ vllm_model }} \
+        --gpu-memory-utilization {{ vllm_gpu_memory_util }} \
+        --max-model-len {{ vllm_max_model_len }} \
+        --tensor-parallel-size {{ vllm_tensor_parallel }} \
+        {{ vllm_quant_flag }}
      ExecStop=/usr/bin/docker stop vllm
+      TimeoutStartSec=300

      [Install]
      WantedBy=multi-user.target
-    mode: "0644"

 - name: Create vLLM model directory
  file:
@ -57,3 +71,26 @@
    state: directory
    owner: cezen
    group: cezen
+    mode: "0755"
+
+- name: Write vLLM tier config file (for portal reference)
+  copy:
+    dest: /opt/cezen/vllm-config.json
+    owner: cezen
+    group: cezen
+    mode: "0644"
+    content: |
+      {
+        "model": "{{ vllm_model }}",
+        "tensor_parallel_size": {{ vllm_tensor_parallel }},
+        "gpu_memory_utilization": {{ vllm_gpu_memory_util }},
+        "max_model_len": {{ vllm_max_model_len }},
+        "quantization": "{{ vllm_quantization }}"
+      }
+
+- name: Enable and start vLLM service
+  systemd:
+    name: vllm
+    enabled: true
+    daemon_reload: true
+  when: gpu_check.stdout != "" and gpu_check.rc == 0
--- a/ansible/starter.yml
+++ b/ansible/starter.yml
@ -0,0 +1,76 @@
+---
+# Nexus One AI — Starter Tier Stack
+# Hardware: 1× NVIDIA RTX 5090 (32 GB GDDR7), 64 GB DDR5, 2 TB NVMe, 2.5 GbE
+# Capacity: 1–5 concurrent users
+# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
+#
+# Differences from Basic tier:
+#   - No k3s (Kubernetes overhead not justified for 64 GB RAM / 1-5 users)
+#   - No MLflow (fine-tuning tracking overkill for Starter)
+#   - No MinIO (local model cache is sufficient)
+#   - vLLM uses Phi-3 Mini / Llama 3.2 3B with 4-bit quant by default
+#   - JupyterLab is optional (off by default, wizard can enable)
+
+- name: Nexus One AI — Starter Tier Stack
+  hosts: localhost
+  connection: local
+  become: true
+  vars:
+    cezen_user: "cezen"
+    cezen_home: "/opt/cezen"
+    cezen_login_home: "/home/cezen"
+    python_version: "3.11"
+    cuda_version: "12.6"          # RTX 5090 requires CUDA 12.6+
+    skip_roles: ""                # comma-separated list of roles to skip
+    gpu_available: false
+    tier: "starter"
+
+    # ── vLLM — Starter defaults ──────────────────
+    # Small 4-bit quantised model fits comfortably in 32 GB GDDR7.
+    # Users can swap to Llama-3.2-3B-Instruct via the portal Model Manager.
+    vllm_model: "microsoft/Phi-3-mini-4k-instruct"
+    vllm_tensor_parallel: 1
+    vllm_gpu_memory_util: "0.85"
+    vllm_max_model_len: 4096
+    vllm_quantization: "awq"
+
+    # ── Ollama — lightweight models ───────────────
+    ollama_default_model: "phi3:mini"
+
+  roles:
+    - role: base
+      when: "'base' not in skip_roles.split(',')"
+
+    - role: docker
+      when: "'docker' not in skip_roles.split(',')"
+
+    # k3s intentionally omitted for Starter — insufficient RAM headroom
+
+    - role: ollama
+      when: "'ollama' not in skip_roles.split(',')"
+
+    - role: vllm
+      when: "'vllm' not in skip_roles.split(',')"
+
+    - role: chromadb
+      when: "'chromadb' not in skip_roles.split(',')"
+
+    # mlflow / minio omitted for Starter
+
+    - role: monitoring
+      when: "'monitoring' not in skip_roles.split(',')"
+
+    - role: cezen-backend
+      when: "'cezen-backend' not in skip_roles.split(',')"
+
+    - role: cezen-ttyd
+      when: "'cezen-ttyd' not in skip_roles.split(',')"
+
+    - role: cezen-nginx
+      when: "'cezen-nginx' not in skip_roles.split(',')"
+
+    # JupyterLab — optional, install only when explicitly requested
+    - role: jupyterlab
+      when: >
+        'jupyterlab' not in skip_roles.split(',') and
+        install_jupyterlab | default(false) | bool
--- a/autoinstall/build-iso-starter.sh
+++ b/autoinstall/build-iso-starter.sh
@ -0,0 +1,199 @@
+#!/usr/bin/env bash
+# ─────────────────────────────────────────────────────────────
+# Nexus One AI — Starter Tier ISO Builder
+# Hardware target: compact workstation (1× RTX 5090, 64 GB RAM, 2 TB NVMe)
+#
+# Usage:
+#   cd ~/aipackage
+#   bash autoinstall/build-iso-starter.sh
+#
+# Output: autoinstall/cezen-ai-starter-ubuntu2204.iso
+# Flash to USB:
+#   diskutil unmountDisk /dev/diskN
+#   sudo dd if=cezen-ai-starter-ubuntu2204.iso of=/dev/diskN bs=4m status=progress
+# ─────────────────────────────────────────────────────────────
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PACKAGE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+WORK_DIR="/tmp/cezen-iso-starter-work"
+ORIGINAL_ISO="/tmp/ubuntu-22.04.5-live-server-amd64.iso"
+OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-starter-ubuntu2204.iso"
+UBUNTU_URL="https://releases.ubuntu.com/22.04.5/ubuntu-22.04.5-live-server-amd64.iso"
+TIER="starter"
+
+echo "╔══════════════════════════════════════════════════════╗"
+echo "║   Nexus One AI — ISO Builder  [STARTER TIER]            ║"
+echo "║   RTX 5090 · 64 GB RAM · 2 TB NVMe · 1–5 users     ║"
+echo "╚══════════════════════════════════════════════════════╝"
+echo ""
+
+# ── Install build tools ────────────────────────
+echo "→ Installing build tools..."
+apt-get update -qq
+apt-get install -y -qq xorriso wget isolinux rsync
+echo "✓ Tools ready"
+
+# ── Download Ubuntu ISO ────────────────────────
+if [ -f "$ORIGINAL_ISO" ]; then
+  echo "✓ Ubuntu ISO already downloaded"
+else
+  echo "→ Downloading Ubuntu 22.04.5 Server ISO (~1.8 GB)..."
+  wget --show-progress -O "$ORIGINAL_ISO" "$UBUNTU_URL"
+  echo "✓ Downloaded"
+fi
+
+# ── Extract ISO ────────────────────────────────
+echo "→ Extracting ISO..."
+rm -rf "$WORK_DIR"
+mkdir -p "$WORK_DIR"
+xorriso -osirrox on \
+  -indev "$ORIGINAL_ISO" \
+  -extract / "$WORK_DIR" 2>/dev/null
+chmod -R u+w "$WORK_DIR"
+echo "✓ Extracted"
+
+# ── Inject Starter autoinstall files ──────────
+echo "→ Injecting Starter autoinstall config..."
+mkdir -p "$WORK_DIR/nocloud"
+cp "$SCRIPT_DIR/user-data-starter" "$WORK_DIR/nocloud/user-data"
+cp "$SCRIPT_DIR/meta-data"         "$WORK_DIR/nocloud/meta-data"
+echo "✓ user-data-starter and meta-data injected"
+
+# ── Online installer mode ──────────────────────
+# The installed system pulls the current package from cgit on first boot. This
+# keeps the ISO small and avoids shipping stale backend/portal code.
+echo "✓ Online installer mode: package will be pulled from cgit on first boot"
+
+# ── Patch GRUB ────────────────────────────────
+echo "→ Patching GRUB config..."
+GRUB_CFG="$WORK_DIR/boot/grub/grub.cfg"
+cp "$GRUB_CFG" "$GRUB_CFG.orig"
+
+sed -i "s/set timeout=.*/set timeout=5/" "$GRUB_CFG"
+sed -i "s/set timeout_style=.*/set timeout_style=countdown/" "$GRUB_CFG"
+sed -i '/^\s*linux.*vmlinuz/s|---|autoinstall ds=nocloud\\;s=/cdrom/nocloud/ ---|' "$GRUB_CFG"
+
+# Update GRUB title to reflect Starter tier
+sed -i 's/Install Ubuntu Server/Install Nexus One AI — Starter Tier/' "$GRUB_CFG" || true
+echo "✓ GRUB patched"
+
+# ── Extract MBR and EFI boot data ─────────────
+echo "→ Extracting boot data from original ISO..."
+MBR_TEMPLATE=$(mktemp)
+EFI_IMG=$(mktemp)
+dd if="$ORIGINAL_ISO" bs=1 count=432 of="$MBR_TEMPLATE" 2>/dev/null
+
+EFI_LINE=$(fdisk -l "$ORIGINAL_ISO" 2>/dev/null | grep "EFI")
+echo "  EFI partition info: $EFI_LINE"
+EFI_START=$(echo "$EFI_LINE" | awk '{print $2}')
+EFI_SIZE=$(echo  "$EFI_LINE" | awk '{print $4}')
+
+if [ -z "$EFI_START" ] || [ -z "$EFI_SIZE" ]; then
+  echo "ERROR: Could not detect EFI partition in ISO."
+  echo "Run: fdisk -l $ORIGINAL_ISO"
+  exit 1
+fi
+
+dd if="$ORIGINAL_ISO" bs=512 skip="$EFI_START" count="$EFI_SIZE" \
+   of="$EFI_IMG" 2>/dev/null
+echo "✓ EFI partition extracted (start=$EFI_START, size=$EFI_SIZE)"
+
+# ── Repack ISO (pass 1) ────────────────────────
+echo "→ Repacking ISO (pass 1)..."
+xorriso -as mkisofs \
+  -r \
+  -V "CezenAI_Starter_2204" \
+  -o "$OUTPUT_ISO" \
+  --grub2-mbr "$MBR_TEMPLATE" \
+  -partition_offset 16 \
+  --mbr-force-bootable \
+  -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
+  -appended_part_as_gpt \
+  -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
+  -c "/boot.catalog" \
+  -b "/boot/grub/i386-pc/eltorito.img" \
+  -no-emul-boot \
+  -boot-load-size 4 \
+  -boot-info-table \
+  --grub2-boot-info \
+  -eltorito-alt-boot \
+  -e "--interval:appended_partition_2:::" \
+  -no-emul-boot \
+  "$WORK_DIR"
+
+# ── Refresh md5sum.txt and repack (pass 2) ────
+echo "→ Refreshing md5sum.txt..."
+FINAL_DIR=$(mktemp -d)
+VERIFY_DIR=$(mktemp -d)
+trap 'rm -rf "$WORK_DIR" "$MBR_TEMPLATE" "$EFI_IMG" "$FINAL_DIR" "$VERIFY_DIR"' EXIT
+
+xorriso -osirrox on -indev "$OUTPUT_ISO" -extract / "$FINAL_DIR" >/dev/null 2>&1
+chmod -R u+w "$FINAL_DIR"
+(
+  cd "$FINAL_DIR"
+  rm -f md5sum.txt
+  find . -type f \
+    ! -path './md5sum.txt' \
+    ! -path './boot.catalog' \
+    -print0 \
+    | sort -z \
+    | xargs -0 md5sum > md5sum.txt
+)
+echo "✓ md5sum.txt refreshed"
+
+echo "→ Repacking ISO (pass 2)..."
+xorriso -as mkisofs \
+  -r \
+  -V "CezenAI_Starter_2204" \
+  -o "$OUTPUT_ISO" \
+  --grub2-mbr "$MBR_TEMPLATE" \
+  -partition_offset 16 \
+  --mbr-force-bootable \
+  -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
+  -appended_part_as_gpt \
+  -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
+  -c "/boot.catalog" \
+  -b "/boot/grub/i386-pc/eltorito.img" \
+  -no-emul-boot \
+  -boot-load-size 4 \
+  -boot-info-table \
+  --grub2-boot-info \
+  -eltorito-alt-boot \
+  -e "--interval:appended_partition_2:::" \
+  -no-emul-boot \
+  "$FINAL_DIR"
+
+# ── Verify output ISO ──────────────────────────
+echo "→ Verifying rebuilt ISO manifest..."
+xorriso -osirrox on -indev "$OUTPUT_ISO" -extract / "$VERIFY_DIR" >/dev/null 2>&1
+chmod -R u+w "$VERIFY_DIR"
+(
+  cd "$VERIFY_DIR"
+  md5sum -c md5sum.txt >/tmp/cezen-iso-md5check-starter.log 2>&1 || {
+    echo "ERROR: Rebuilt ISO failed its own md5sum.txt verification."
+    sed -n '1,40p' /tmp/cezen-iso-md5check-starter.log
+    exit 1
+  }
+)
+echo "✓ Output ISO manifest verified"
+
+echo ""
+echo "╔══════════════════════════════════════════════════════╗"
+echo "║   Done! Starter Tier ISO ready.                     ║"
+echo "╚══════════════════════════════════════════════════════╝"
+echo ""
+ls -lh "$OUTPUT_ISO"
+echo ""
+echo "→ Transfer to MacBook:"
+echo "   scp user@server:~/aipackage/autoinstall/cezen-ai-starter-ubuntu2204.iso ."
+echo ""
+echo "→ Flash to USB (macOS):"
+echo "   diskutil list                            # find USB e.g. /dev/disk4"
+echo "   diskutil unmountDisk /dev/disk4"
+echo "   sudo dd if=cezen-ai-starter-ubuntu2204.iso of=/dev/disk4 bs=4m status=progress"
+echo ""
+echo "→ Post-flash: boot the workstation from USB."
+echo "   Unattended install completes in ~10 min."
+echo "   First-boot wizard runs on tty1 — set IP, org name, admin password."
+echo "   Then run: sudo bash /opt/aipackage/install.sh --tier starter"
--- a/autoinstall/build-iso.sh
+++ b/autoinstall/build-iso.sh
@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # ─────────────────────────────────────────────────────────────
-# Cezen AI Suite — Custom ISO Builder
+# Nexus One AI — Custom ISO Builder
 # Runs directly on Ubuntu 22.04 (run on the server)
 #
 # Usage:
@ -20,7 +20,7 @@ OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-ubuntu2204.iso"
 UBUNTU_URL="https://releases.ubuntu.com/22.04.5/ubuntu-22.04.5-live-server-amd64.iso"

 echo "╔══════════════════════════════════════════╗"
-echo "║   Cezen AI — ISO Builder                 ║"
+echo "║   Nexus One AI — ISO Builder                 ║"
 echo "╚══════════════════════════════════════════╝"
 echo ""

@ -56,15 +56,10 @@ cp "$SCRIPT_DIR/user-data" "$WORK_DIR/nocloud/user-data"
 cp "$SCRIPT_DIR/meta-data" "$WORK_DIR/nocloud/meta-data"
 echo "✓ user-data and meta-data injected"

-# Keep the installer payload on the ISO so first boot does not depend on a
-# private Git server being reachable before the setup UI can start.
-echo "→ Bundling Cezen AI installer payload..."
-mkdir -p "$WORK_DIR/cezen-aipackage"
-rsync -a --delete \
-  --exclude 'autoinstall/cezen-ai-ubuntu2204.iso' \
-  --exclude '*.iso' \
-  "$PACKAGE_DIR/" "$WORK_DIR/cezen-aipackage/"
-echo "✓ Installer payload bundled"
+# Keep this as an online installer ISO. The installed system pulls the current
+# Nexus One AI package from cgit during first boot, which keeps the ISO small and
+# avoids shipping stale backend/portal code inside the image.
+echo "✓ Online installer mode: package will be pulled from cgit on first boot"

 # ── Patch GRUB ────────────────────────────────
 echo "→ Patching GRUB config..."
@ -82,20 +77,6 @@ sed -i "s/set timeout_style=.*/set timeout_style=countdown/" "$GRUB_CFG"
 sed -i '/^\s*linux.*vmlinuz/s|---|autoinstall ds=nocloud\\;s=/cdrom/nocloud/ ---|' "$GRUB_CFG"
 echo "✓ GRUB patched"

-# ── Refresh installer checksum manifest ─────────────────
-echo "→ Refreshing md5sum.txt..."
-(
-  cd "$WORK_DIR"
-  rm -f md5sum.txt
-  find . -type f \
-    ! -path './md5sum.txt' \
-    ! -path './boot.catalog' \
-    -print0 \
-    | sort -z \
-    | xargs -0 md5sum > md5sum.txt
-)
-echo "✓ md5sum.txt refreshed"
-
 # ── Extract MBR and EFI partition from original ISO ────
 echo "→ Extracting boot data from original ISO..."
 MBR_TEMPLATE=$(mktemp)
@ -119,7 +100,7 @@ dd if="$ORIGINAL_ISO" bs=512 skip="$EFI_START" count="$EFI_SIZE" \
 echo "✓ EFI partition extracted (start=$EFI_START, size=$EFI_SIZE)"

 # ── Repack ISO ─────────────────────────────────
-echo "→ Repacking ISO (this takes ~2 minutes)..."
+echo "→ Repacking ISO (pass 1)..."
 xorriso -as mkisofs \
  -r \
  -V "Cezen_AI_Ubuntu2204" \
@ -141,6 +122,64 @@ xorriso -as mkisofs \
  -no-emul-boot \
  "$WORK_DIR"

+echo "→ Refreshing md5sum.txt from pass-1 ISO contents..."
+FINAL_DIR=$(mktemp -d)
+VERIFY_DIR=$(mktemp -d)
+trap 'rm -rf "$WORK_DIR" "$MBR_TEMPLATE" "$EFI_IMG" "$FINAL_DIR" "$VERIFY_DIR"' EXIT
+xorriso -osirrox on \
+  -indev "$OUTPUT_ISO" \
+  -extract / "$FINAL_DIR" >/dev/null 2>&1
+chmod -R u+w "$FINAL_DIR"
+(
+  cd "$FINAL_DIR"
+  rm -f md5sum.txt
+  find . -type f \
+    ! -path './md5sum.txt' \
+    ! -path './boot.catalog' \
+    -print0 \
+    | sort -z \
+    | xargs -0 md5sum > md5sum.txt
+)
+echo "✓ md5sum.txt refreshed"
+
+echo "→ Repacking ISO (pass 2 with final manifest)..."
+xorriso -as mkisofs \
+  -r \
+  -V "Cezen_AI_Ubuntu2204" \
+  -o "$OUTPUT_ISO" \
+  --grub2-mbr "$MBR_TEMPLATE" \
+  -partition_offset 16 \
+  --mbr-force-bootable \
+  -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
+  -appended_part_as_gpt \
+  -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
+  -c "/boot.catalog" \
+  -b "/boot/grub/i386-pc/eltorito.img" \
+  -no-emul-boot \
+  -boot-load-size 4 \
+  -boot-info-table \
+  --grub2-boot-info \
+  -eltorito-alt-boot \
+  -e "--interval:appended_partition_2:::" \
+  -no-emul-boot \
+  "$FINAL_DIR"
+
+# ── Verify output ISO integrity manifest ─────────────────
+echo "→ Verifying rebuilt ISO manifest..."
+xorriso -osirrox on \
+  -indev "$OUTPUT_ISO" \
+  -extract / "$VERIFY_DIR" >/dev/null 2>&1
+chmod -R u+w "$VERIFY_DIR"
+(
+  cd "$VERIFY_DIR"
+  md5sum -c md5sum.txt >/tmp/cezen-iso-md5check.log 2>&1 || {
+    echo "ERROR: Rebuilt ISO failed its own md5sum.txt verification."
+    sed -n '1,40p' /tmp/cezen-iso-md5check.log
+    exit 1
+  }
+)
+echo "✓ Output ISO manifest verified"
+
 echo ""
 echo "╔══════════════════════════════════════════════════════╗"
 echo "║   Done!                                              ║"
--- a/autoinstall/firstboot-setup.sh
+++ b/autoinstall/firstboot-setup.sh
@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # ─────────────────────────────────────────────────────────────
-# Cezen AI Suite — First Boot Setup Wizard
+# Nexus One AI — First Boot Setup Wizard
 # Runs on first boot after OS install via systemd service.
 # Uses whiptail for the TUI.
 # ─────────────────────────────────────────────────────────────
@ -8,6 +8,7 @@ set -e

 AIPACKAGE_DIR="/opt/aipackage"
 LOG_FILE="/var/log/cezen-setup.log"
+export TERM="${TERM:-linux}"
 exec > >(tee -a "$LOG_FILE") 2>&1

 detect_iface() {
@ -20,28 +21,28 @@ IFACE="${IFACE:-$(ip -o link show | awk -F': ' '$2 !~ /lo|docker|br-|veth/ {prin
 # ── Colors / terminal setup ────────────────────────────────
 export NEWT_COLORS='
 root=,black
-window=white,navy
-border=white,navy
-title=white,navy
-button=black,cyan
-actbutton=white,red
-checkbox=white,navy
-actcheckbox=black,cyan
-entry=white,navy
-label=white,navy
-listbox=white,navy
-actlistbox=black,cyan
-textbox=white,navy
-acttextbox=black,cyan
+window=black,white
+border=white,black
+title=black,white
+button=black,white
+actbutton=white,blue
+checkbox=black,white
+actcheckbox=white,blue
+entry=black,white
+label=black,white
+listbox=black,white
+actlistbox=white,blue
+textbox=black,white
+acttextbox=white,blue
 '

-TITLE="  Cezen AI Suite — Server Setup  "
+TITLE="  Nexus One AI — Server Setup  "
 H=20
 W=70

 # ── Welcome ────────────────────────────────────────────────
 whiptail --title "$TITLE" \
-  --msgbox "\nWelcome to the Cezen AI Suite installer.\n\nThis wizard will configure your network and install the AI stack.\n\nMake sure this server is connected to the internet before continuing." \
+  --msgbox "\nWelcome to the Nexus One AI installer.\n\nThis wizard will configure your network and install the AI stack.\n\nMake sure this server is connected to the internet before continuing." \
  $H $W

 # ════════════════════════════════════════════════════════════
@ -135,10 +136,11 @@ fi

 TIER=$(whiptail --title "$TITLE" \
  --menu "\nStep 2 of 3: Select AI Package Tier\n\nChoose the tier that matches your hardware:" \
-  $H $W 3 \
-  "entry"    "Entry    — 3× NVIDIA L40S   (48GB each) · Up to 20 users" \
-  "mid"      "Mid      — RTX Pro 6000 BW  (96GB each) · Up to 50 users" \
-  "advanced" "Advanced — HGX H200         (141GB each) · 200+ users" \
+  $H $W 4 \
+  "starter" "Starter — 1× RTX 5090 / 32GB VRAM · Small team" \
+  "basic" "Entry — 1× NVIDIA RTX Pro 6000 (96GB) · Up to 20 users" \
+  "pro"   "Pro   — 2× RTX 5090 / RTX Pro class · Up to 100 users" \
+  "max"   "Max   — 4–8× H100/H200/A100 class · 100+ users" \
  3>&1 1>&2 2>&3)

 # ════════════════════════════════════════════════════════════
@ -177,7 +179,7 @@ whiptail --title "$TITLE" \
 clear
 echo ""
 echo "╔══════════════════════════════════════════╗"
-echo "║   Cezen AI Suite — Installing...         ║"
+echo "║   Nexus One AI — Installing...         ║"
 echo "║   Check progress: journalctl -f           ║"
 echo "╚══════════════════════════════════════════╝"
 echo ""
--- a/autoinstall/user-data
+++ b/autoinstall/user-data
@ -85,10 +85,11 @@ autoinstall:
    # mirrors instead of the custom ISO content.
    - sed -i 's/^deb cdrom:/# deb cdrom:/' /target/etc/apt/sources.list || true

-    # Install the Cezen AI payload from the ISO first. Fall back to Git only
-    # when building from older media that does not contain /cdrom/cezen-aipackage.
+    # Pull the Nexus One AI installer from cgit. The ISO intentionally does not
+    # bundle the full package, keeping the image small and the installed code
+    # current at deployment time.
    - mkdir -p /target/opt/aipackage
-    - cp -a /cdrom/cezen-aipackage/. /target/opt/aipackage/ || git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
+    - git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage

    # Deploy the console setup wizard
    - mkdir -p /target/opt/cezen
@ -104,23 +105,18 @@ autoinstall:
    - |
      cat > /target/etc/systemd/system/cezen-setup.service << 'EOF'
      [Unit]
-      Description=Cezen AI Suite — Console Setup Wizard
+      Description=Nexus One AI — Console Setup Wizard
      After=cloud-final.service cloud-init.target network-online.target
      Wants=cloud-init.target network-online.target
-      Conflicts=getty@tty1.service
      ConditionPathExists=!/opt/cezen/.setup-done
+      OnFailure=getty@tty1.service

      [Service]
-      Type=idle
-      ExecStartPre=-/usr/bin/systemctl stop getty@tty1.service
-      ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; exec /opt/cezen/firstboot-setup.sh'
-      StandardInput=tty-force
-      StandardOutput=tty
-      StandardError=tty
-      TTYPath=/dev/tty1
-      TTYReset=yes
-      TTYVHangup=yes
-      TTYVTDisallocate=yes
+      Type=oneshot
+      WorkingDirectory=/opt/cezen
+      ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; /usr/bin/openvt -c 1 -f -w -- env TERM=linux /opt/cezen/firstboot-setup.sh'
+      StandardOutput=journal+console
+      StandardError=journal+console
      Restart=no

      [Install]
--- a/autoinstall/user-data-starter
+++ b/autoinstall/user-data-starter
@ -0,0 +1,137 @@
+#cloud-config
+# ─────────────────────────────────────────────────────────────
+# Nexus One AI — Starter Tier Autoinstall
+# Hardware target: compact workstation (Mini-ITX / SFF)
+#   GPU:     1× NVIDIA RTX 5090 (32 GB GDDR7)
+#   RAM:     64 GB DDR5
+#   Storage: 1× 2 TB NVMe SSD (single drive — simple LVM)
+#   Network: 2.5 GbE (single interface)
+# ─────────────────────────────────────────────────────────────
+autoinstall:
+  version: 1
+
+  # ── Locale & keyboard ──────────────────────────
+  locale: en_IN.UTF-8
+  keyboard:
+    layout: us
+
+  # ── Network: DHCP during install; static config applied post-install ──
+  network:
+    network:
+      version: 2
+      ethernets:
+        any-en:
+          dhcp4: true
+          match:
+            name: "en*"
+        any-eth:
+          dhcp4: true
+          match:
+            name: "eth*"
+
+  # ── Storage: single 2 TB NVMe, simple LVM ─────
+  # Starter workstations have one drive — no RAID needed.
+  storage:
+    layout:
+      name: lvm
+      match:
+        size: largest
+
+  # ── Identity ──────────────────────────────────
+  identity:
+    hostname: cezenai-starter
+    username: cezen
+    # Default password: cezen@123  (change via first-boot wizard)
+    password: "$6$I5VA.42G1xTeVhCv$KCLzqIKg/kbNHZyiTEMAY4FZsJMDDwoS90k6Ffb9VEwmcK.wuzlJNe3ceiEfLrzYzXEvqjYsLc7klAbeGPGab."
+
+  # ── SSH ───────────────────────────────────────
+  ssh:
+    install-server: true
+    allow-pw: true
+
+  # ── Base packages ─────────────────────────────
+  packages:
+    - git
+    - curl
+    - wget
+    - python3
+    - whiptail
+    - openssh-server
+    - nvme-cli          # NVMe health / SMART monitoring
+
+  # ── Late commands ─────────────────────────────
+  late-commands:
+    # Expand LVM to fill the full 2 TB NVMe
+    - lvextend -l +100%FREE /dev/ubuntu-vg/ubuntu-lv || true
+    - resize2fs /dev/ubuntu-vg/ubuntu-lv || true
+
+    # Passwordless sudo for cezen (needed by install.sh + first-boot wizard)
+    - echo "cezen ALL=(ALL) NOPASSWD:ALL" > /target/etc/sudoers.d/cezen
+    - chmod 440 /target/etc/sudoers.d/cezen
+
+    # Replace installer netplan with simple DHCP target config.
+    # The first-boot wizard will switch to static if desired.
+    - rm -f /target/etc/netplan/50-cloud-init.yaml /target/etc/netplan/00-installer-config.yaml || true
+    - |
+      cat > /target/etc/netplan/99-cezen-dhcp.yaml << 'EOF'
+      network:
+        version: 2
+        ethernets:
+          any-en:
+            dhcp4: true
+            match:
+              name: "en*"
+          any-eth:
+            dhcp4: true
+            match:
+              name: "eth*"
+      EOF
+
+    # Disable cdrom APT source
+    - sed -i 's/^deb cdrom:/# deb cdrom:/' /target/etc/apt/sources.list || true
+
+    # Pull the Nexus One AI installer from cgit. The ISO intentionally does not
+    # bundle the full package, keeping the image small and the installed code
+    # current at deployment time.
+    - mkdir -p /target/opt/aipackage
+    - git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
+
+    # Write tier marker — used by install.sh and the portal branding system
+    - mkdir -p /target/opt/cezen
+    - echo "starter" > /target/opt/cezen/tier
+
+    # Deploy first-boot TUI wizard
+    - cp /target/opt/aipackage/autoinstall/firstboot-setup.sh /target/opt/cezen/firstboot-setup.sh
+    - chmod +x /target/opt/cezen/firstboot-setup.sh
+
+    # Set hostname
+    - echo "cezenai-starter" > /target/etc/hostname
+    - sed -i 's/aiserver/cezenai-starter/g' /target/etc/hosts || true
+
+    # Systemd service: run first-boot wizard on tty1 once
+    - |
+      cat > /target/etc/systemd/system/cezen-setup.service << 'EOF'
+      [Unit]
+      Description=Nexus One AI — Console Setup Wizard (Starter)
+      After=cloud-final.service cloud-init.target network-online.target
+      Wants=cloud-init.target network-online.target
+      ConditionPathExists=!/opt/cezen/.setup-done
+      OnFailure=getty@tty1.service
+
+      [Service]
+      Type=oneshot
+      WorkingDirectory=/opt/cezen
+      ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; /usr/bin/openvt -c 1 -f -w -- env TERM=linux CEZEN_TIER=starter /opt/cezen/firstboot-setup.sh'
+      StandardOutput=journal+console
+      StandardError=journal+console
+      Restart=no
+
+      [Install]
+      WantedBy=cloud-init.target
+      EOF
+
+    - curtin in-target -- systemctl enable ssh
+    - curtin in-target -- systemctl enable cezen-setup.service
+
+  user-data:
+    disable_root: false
--- a/autoinstall/websetup/server.py
+++ b/autoinstall/websetup/server.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Cezen AI Suite — First Boot Web Setup Server
+Nexus One AI — First Boot Web Setup Server
 Serves on port 80. Access from any browser on the same network.
 """
 import os, json, subprocess, threading, time, socket, ipaddress
@ -123,7 +123,7 @@ HTML = r"""<!DOCTYPE html>
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>Cezen AI Suite — Server Setup</title>
+<title>Nexus One AI — Server Setup</title>
 <style>
  :root {
    --navy:    #1B2A4A;
@ -317,19 +317,24 @@ HTML = r"""<!DOCTYPE html>
      <h2>Select AI Package Tier</h2>
      <p class="desc">Choose the tier that matches your GPU hardware.</p>
      <div class="tier-grid">
-        <div class="tier-card" id="tier-entry" onclick="selectTier('entry')">
+        <div class="tier-card" id="tier-starter" onclick="selectTier('starter')">
+          <div class="tier-name">Starter</div>
+          <div class="tier-gpu">1× RTX 5090 / 32GB VRAM</div>
+          <div class="tier-users">Small team deployment</div>
+        </div>
+        <div class="tier-card" id="tier-basic" onclick="selectTier('basic')">
          <div class="tier-name">Entry</div>
-          <div class="tier-gpu">3× NVIDIA L40S</div>
+          <div class="tier-gpu">1× NVIDIA RTX Pro 6000 (96GB)</div>
          <div class="tier-users">Up to 20 concurrent users</div>
        </div>
-        <div class="tier-card" id="tier-mid" onclick="selectTier('mid')">
-          <div class="tier-name">Mid</div>
-          <div class="tier-gpu">3× RTX Pro 6000</div>
-          <div class="tier-users">Up to 50 concurrent users</div>
+        <div class="tier-card" id="tier-pro" onclick="selectTier('pro')">
+          <div class="tier-name">Pro</div>
+          <div class="tier-gpu">2× RTX 5090 / RTX Pro class</div>
+          <div class="tier-users">Up to 100 concurrent users</div>
        </div>
-        <div class="tier-card" id="tier-advanced" onclick="selectTier('advanced')">
-          <div class="tier-name">Advanced</div>
-          <div class="tier-gpu">8× HGX H200</div>
+        <div class="tier-card" id="tier-max" onclick="selectTier('max')">
+          <div class="tier-name">Max</div>
+          <div class="tier-gpu">4–8× H100/H200/A100 class</div>
          <div class="tier-users">200+ concurrent users</div>
        </div>
      </div>
@ -369,7 +374,7 @@ HTML = r"""<!DOCTYPE html>

    <div class="progress-wrap" id="progress-wrap">
      <div class="card">
-        <h2>Installing Cezen AI Suite...</h2>
+        <h2>Installing Nexus One AI...</h2>
        <div class="progress-bar-bg"><div class="progress-bar" id="progress-bar"></div></div>
        <p id="progress-label" style="font-size:13px;color:var(--muted);margin-bottom:12px">Starting...</p>
        <div class="log-box" id="log-box"></div>
@ -379,7 +384,7 @@ HTML = r"""<!DOCTYPE html>
    <div id="done-screen" class="hidden">
      <div class="done-icon">✅</div>
      <h2>Installation Complete!</h2>
-      <p>Your Cezen AI Suite is ready.</p>
+      <p>Your Nexus One AI is ready.</p>
      <div class="services card" style="margin-top:24px;text-align:left">
        <div class="summary-row"><span class="key">Open WebUI</span><span class="val badge">:3001</span></div>
        <div class="summary-row"><span class="key">JupyterLab</span><span class="val badge">:8888</span></div>
@ -400,7 +405,7 @@ HTML = r"""<!DOCTYPE html>
 <script>
 // ── State ──────────────────────────────────────────────────
 let netMode = 'dhcp';
-let selectedTier = 'entry';
+let selectedTier = 'basic';
 let tools = {
  ollama:     { name: 'Ollama + Open WebUI', desc: 'LLM inference & chat',     icon: '🤖', on: true },
  jupyterlab: { name: 'JupyterLab',          desc: 'Notebook environment',      icon: '📓', on: true },
@ -418,7 +423,7 @@ window.onload = () => {
    document.getElementById('current-ip').textContent = d.ip || 'unknown';
  });
  renderTools();
-  selectTier('entry');
+  selectTier('basic');
 };

 // ── Navigation ─────────────────────────────────────────────
@ -467,7 +472,7 @@ function applyStaticIP() {
 // ── Tier ───────────────────────────────────────────────────
 function selectTier(t) {
  selectedTier = t;
-  ['entry','mid','advanced'].forEach(x =>
+  ['starter','basic','pro','max'].forEach(x =>
    document.getElementById('tier-'+x).classList.toggle('selected', x===t));
 }

@ -707,7 +712,7 @@ class Handler(BaseHTTPRequestHandler):

        elif path == "/api/install":
            global install_proc
-            tier = body.get("tier", "entry")
+            tier = body.get("tier", "basic")
            skip = body.get("skip_tools", [])
            if not install_status["running"]:
                t = threading.Thread(target=run_install, args=(tier, skip), daemon=True)
@ -752,7 +757,7 @@ def show_console_banner(ip):
    try:
        with open("/etc/issue", "w") as f:
            f.write(f"Ubuntu 22.04.5 LTS \\n \\l\n\n")
-            f.write(f"  \033[1;36mCezen AI Suite Setup:\033[0m http://{ip}  |  http://cezenai.local\n\n")
+            f.write(f"  \033[1;36mNexus One AI Setup:\033[0m http://{ip}  |  http://cezenai.local\n\n")
    except Exception:
        pass

--- a/install.sh
+++ b/install.sh
@ -1,17 +1,37 @@
 #!/usr/bin/env bash
 # ─────────────────────────────────────────────
-# Cezen AI Suite — Entry Level Installer
+# Nexus One AI — Installer
 # Usage:
-#   sudo bash install.sh           → Phase 1 (drivers + schedules reboot → Phase 2)
-#   sudo bash install.sh --phase=2 → Phase 2 (all software, run after reboot)
+#   sudo bash install.sh                       → auto-detect tier, Phase 1
+#   sudo bash install.sh --tier=starter        → Starter tier, Phase 1
+#   sudo bash install.sh --tier=basic          → Basic tier, Phase 1
+#   sudo bash install.sh --tier=pro            → Pro tier, Phase 1
+#   sudo bash install.sh --tier=max            → Max tier, Phase 1
+#   sudo bash install.sh --phase=2 --tier=...  → Phase 2 only (post-reboot)
+#   sudo bash install.sh --software-only       → install on customer-owned hardware
+#   sudo bash install.sh --feasibility-only    → scan hardware and exit
+#   sudo bash install.sh --skip-model-pull     → install Ollama without preloading models
 # ─────────────────────────────────────────────
 set -e

-TIER="entry"
+# Auto-detect tier from ISO marker written by autoinstall user-data
+if [ -f /opt/cezen/tier ]; then
+  TIER="$(cat /opt/cezen/tier | tr -d '[:space:]')"
+elif [ -f /opt/aipackage/autoinstall/.tier ]; then
+  TIER="$(cat /opt/aipackage/autoinstall/.tier | tr -d '[:space:]')"
+else
+  TIER="basic"    # default if no marker found
+fi
 PHASE="1"
 SKIP_ROLES=""
+SOFTWARE_ONLY=false
+FEASIBILITY_ONLY=false
+SKIP_MODEL_PULL=false
+PROFILE="auto"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ANSIBLE_DIR="$SCRIPT_DIR/ansible"
+FEASIBILITY_SCRIPT="$SCRIPT_DIR/scripts/cezen-feasibility.sh"
+FEASIBILITY_JSON="/opt/cezen/feasibility.json"

 # Load saved config (written by web setup UI before phase 1)
 [ -f /opt/cezen/install.conf ] && source /opt/cezen/install.conf
@ -21,9 +41,24 @@ for arg in "$@"; do
    --tier=*)  TIER="${arg#*=}" ;;
    --phase=*) PHASE="${arg#*=}" ;;
    --skip=*)  SKIP_ROLES="${arg#*=}" ;;
+    --profile=*) PROFILE="${arg#*=}" ;;
+    --software-only) SOFTWARE_ONLY=true ;;
+    --feasibility-only) FEASIBILITY_ONLY=true ;;
+    --skip-model-pull) SKIP_MODEL_PULL=true ;;
  esac
 done

+normalize_tier() {
+  case "$TIER" in
+    entry|basic)      TIER="basic" ;;
+    mid|pro)          TIER="pro" ;;
+    advanced|max)     TIER="max" ;;
+    starter)          TIER="starter" ;;
+  esac
+}
+
+normalize_tier
+
 # ── Preflight ──────────────────────────────────
 check_root() {
  if [ "$EUID" -ne 0 ]; then
@ -52,6 +87,84 @@ install_ansible() {
  echo "✓ Ansible ready"
 }

+append_skip_role() {
+  local role="$1"
+  if [ -z "$SKIP_ROLES" ]; then
+    SKIP_ROLES="$role"
+  elif [[ ",$SKIP_ROLES," != *",$role,"* ]]; then
+    SKIP_ROLES="$SKIP_ROLES,$role"
+  fi
+}
+
+run_feasibility() {
+  if [ -f "$FEASIBILITY_SCRIPT" ]; then
+    bash "$FEASIBILITY_SCRIPT" "$FEASIBILITY_JSON"
+  else
+    echo "WARNING: Feasibility checker not found: $FEASIBILITY_SCRIPT"
+  fi
+}
+
+json_field() {
+  local expr="$1"
+  python3 - "$FEASIBILITY_JSON" "$expr" <<'PY'
+import json, sys
+try:
+    d=json.load(open(sys.argv[1]))
+    cur=d
+    for part in sys.argv[2].split("."):
+        cur=cur[part]
+    print(cur)
+except Exception:
+    print("")
+PY
+}
+
+apply_profile_from_feasibility() {
+  [ -f "$FEASIBILITY_JSON" ] || return 0
+  local detected_profile
+  detected_profile="$(json_field recommendation.recommended_profile)"
+  if [ "$PROFILE" = "auto" ] && [ -n "$detected_profile" ]; then
+    PROFILE="$detected_profile"
+  fi
+
+  case "$PROFILE" in
+    core)
+      append_skip_role docker
+      append_skip_role k3s
+      append_skip_role ollama
+      append_skip_role vllm
+      append_skip_role jupyterlab
+      append_skip_role chromadb
+      append_skip_role mlflow
+      append_skip_role minio
+      append_skip_role monitoring
+      SKIP_MODEL_PULL=true
+      ;;
+    cpu-ai)
+      append_skip_role k3s
+      append_skip_role vllm
+      append_skip_role mlflow
+      append_skip_role minio
+      SKIP_MODEL_PULL=true
+      ;;
+    gpu-lite|gpu-starter)
+      append_skip_role k3s
+      append_skip_role mlflow
+      append_skip_role minio
+      SKIP_MODEL_PULL=true
+      ;;
+    gpu-standard)
+      append_skip_role mlflow
+      append_skip_role minio
+      ;;
+    gpu-pro|gpu-max)
+      ;;
+    *)
+      echo "WARNING: Unknown profile '$PROFILE'; using explicit skip list only."
+      ;;
+  esac
+}
+
 has_nvidia_pci_gpu() {
  for vendor_file in /sys/bus/pci/devices/*/vendor; do
    [ -f "$vendor_file" ] || continue
@ -70,7 +183,7 @@ has_working_nvidia_driver() {
 run_phase1() {
  echo ""
  echo "╔══════════════════════════════════════════╗"
-  echo "║   Cezen AI Suite — Phase 1: NVIDIA       ║"
+  echo "║   Nexus One AI — Phase 1: NVIDIA       ║"
  echo "╚══════════════════════════════════════════╝"

  if ! has_nvidia_pci_gpu; then
@ -87,7 +200,7 @@ run_phase1() {
  # Register phase 2 as a one-shot systemd service so it runs after reboot
  cat > /etc/systemd/system/cezen-phase2.service << EOF
 [Unit]
-Description=Cezen AI Suite Phase 2 Installer
+Description=Nexus One AI Phase 2 Installer
 After=network-online.target nvidia-persistenced.service
 Wants=network-online.target

@ -116,7 +229,7 @@ EOF
 run_phase2() {
  echo ""
  echo "╔══════════════════════════════════════════╗"
-  echo "║   Cezen AI Suite — Phase 2: Stack        ║"
+  echo "║   Nexus One AI — Phase 2: Stack        ║"
  echo "╚══════════════════════════════════════════╝"

  GPU_AVAILABLE=false
@ -129,12 +242,26 @@ run_phase2() {
  fi

  # Build skip_roles extra var (comma-separated list, empty string = skip nothing)
-  EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\" gpu_available=$GPU_AVAILABLE"
+  EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\" gpu_available=$GPU_AVAILABLE skip_model_pull=$SKIP_MODEL_PULL"
  echo "→ Tier: $TIER | Skip: ${SKIP_ROLES:-none}"
  echo "→ GPU available: $GPU_AVAILABLE"
+  echo "→ Skip model pull: $SKIP_MODEL_PULL"

+  # Select Ansible playbook by tier
+  case "$TIER" in
+    starter)        PLAYBOOK="$ANSIBLE_DIR/starter.yml" ;;
+    basic|entry)    PLAYBOOK="$ANSIBLE_DIR/entry.yml" ;;
+    pro)            PLAYBOOK="$ANSIBLE_DIR/pro.yml" ;;
+    max)            PLAYBOOK="$ANSIBLE_DIR/max.yml" ;;
+    *)
+      echo "ERROR: Unknown tier '$TIER'. Valid: starter | basic | pro | max"
+      exit 1
+      ;;
+  esac
+
+  echo "→ Playbook: $PLAYBOOK"
  ANSIBLE_STDOUT_CALLBACK=yaml \
-  ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/entry.yml" \
+  ansible-playbook -i localhost, -c local "$PLAYBOOK" \
    -e "$EXTRA_VARS"

  # Disable one-shot service so it doesn't run again on next reboot
@ -142,19 +269,32 @@ run_phase2() {

  echo ""
  echo "╔══════════════════════════════════════════╗"
-  echo "║   Cezen AI Suite installation complete!  ║"
+  echo "║   Nexus One AI installation complete!  ║"
+  echo "║   Tier: $(printf '%-33s' "$TIER")║"
  echo "║                                          ║"
-  echo "║   JupyterLab  → http://localhost:8888    ║"
-  echo "║   Ollama API  → http://localhost:11434   ║"
-  echo "║   MLflow      → http://localhost:5000    ║"
-  echo "║   MinIO       → http://localhost:9001    ║"
-  echo "║   Grafana     → http://localhost:3000    ║"
+  echo "║   Portal     → http://localhost          ║"
+  echo "║   Ollama API → http://localhost:11434    ║"
+  echo "║   vLLM API   → http://localhost:8000     ║"
+  echo "║   Grafana    → http://localhost:3000     ║"
  echo "╚══════════════════════════════════════════╝"
 }

 # ── Main ───────────────────────────────────────
-check_root
 check_os
+
+if [ "$FEASIBILITY_ONLY" = true ]; then
+  run_feasibility
+  exit 0
+fi
+
+check_root
+run_feasibility
+
+if [ "$SOFTWARE_ONLY" = true ]; then
+  PHASE="2"
+  apply_profile_from_feasibility
+fi
+
 install_ansible

 if [ "$PHASE" = "1" ]; then
--- a/models/pull-models.sh
+++ b/models/pull-models.sh
@ -1,44 +1,68 @@
 #!/usr/bin/env bash
 # Pull additional AI models into Ollama
-# Run after install: bash models/pull-models.sh --tier=entry
+# Run after install: bash models/pull-models.sh --tier=starter
 # ─────────────────────────────────────────────
-TIER=${1:-entry}
+TIER="basic"   # default tier

-echo "Pulling models for tier: $TIER"
+for arg in "$@"; do
+  case $arg in
+    --tier=*) TIER="${arg#*=}" ;;
+    *)        TIER="$arg" ;;     # allow positional: pull-models.sh pro
+  esac
+done

-entry_models=(
-  "llama3.1:8b"         # General purpose, good baseline
-  "mistral:7b"          # Fast, good for APIs
-  "llama3.1:70b"        # Larger — only if enough VRAM (3× L40S has 144GB total)
-  "nomic-embed-text"    # Embedding model for RAG
-  "codellama:13b"       # Code generation
+# Normalise legacy names
+case "$TIER" in
+  entry)    TIER="basic" ;;
+  mid)      TIER="pro" ;;
+  advanced) TIER="max" ;;
+esac
+
+# ── Model lists ───────────────────────────────
+starter_models=(
+  "phi3:mini"              # 3.8B — fits in 32 GB GDDR7 at full precision
+  "nomic-embed-text"       # Embedding model for RAG
 )

-mid_models=(
-  "${entry_models[@]}"
-  "llama3.1:70b"
-  "mixtral:8x7b"
-  "deepseek-coder-v2:16b"
+basic_models=(
+  "llama3.1:8b"            # General purpose, good baseline
+  "mistral:7b"             # Fast, good for APIs
+  "nomic-embed-text"       # Embedding model for RAG
+  "codellama:13b"          # Code generation
 )

-advanced_models=(
-  "${mid_models[@]}"
-  "llama3.1:405b"
-  "mixtral:8x22b"
+pro_models=(
+  "${basic_models[@]}"
+  "llama3.1:70b"           # Large general purpose (needs 64+ GB VRAM at 4-bit)
+  "mixtral:8x7b"           # MoE model, strong reasoning
+  "deepseek-coder-v2:16b"  # Code specialist
+)
+
+max_models=(
+  "${pro_models[@]}"
+  "llama3.1:405b"          # Flagship — needs 320+ GB VRAM or multi-node
+  "mixtral:8x22b"          # Large MoE
 )

 case $TIER in
-  entry)    models=("${entry_models[@]}") ;;
-  mid)      models=("${mid_models[@]}") ;;
-  advanced) models=("${advanced_models[@]}") ;;
-  *)        echo "Unknown tier: $TIER. Use entry, mid, or advanced."; exit 1 ;;
+  starter) models=("${starter_models[@]}") ;;
+  basic)   models=("${basic_models[@]}") ;;
+  pro)     models=("${pro_models[@]}") ;;
+  max)     models=("${max_models[@]}") ;;
+  *)
+    echo "Unknown tier: $TIER"
+    echo "Usage: bash pull-models.sh --tier=starter|basic|pro|max"
+    exit 1
+    ;;
 esac

+echo "Pulling models for tier: $TIER"
+echo ""
+
 for model in "${models[@]}"; do
-  echo ""
  echo "→ Pulling $model..."
  ollama pull "$model"
+  echo ""
 done

-echo ""
-echo "✓ All models pulled. List with: ollama list"
+echo "✓ Done. List installed models with: ollama list"
--- a/nginx/cezen.conf
+++ b/nginx/cezen.conf
@ -0,0 +1,135 @@
+# /etc/nginx/sites-available/cezen
+# Nexus One AI Portal — serves static portal, proxies API and console terminal
+#
+# Install:
+#   sudo cp cezen.conf /etc/nginx/sites-available/cezen
+#   sudo ln -sf /etc/nginx/sites-available/cezen /etc/nginx/sites-enabled/cezen
+#   sudo rm -f /etc/nginx/sites-enabled/default
+#   sudo nginx -t && sudo systemctl reload nginx
+
+# ─── Rate limiting zones (must be outside server block) ──────────────────────
+# Login: 5 requests/min per IP, burst of 3 queued, then 429
+limit_req_zone $binary_remote_addr zone=cezen_login:10m rate=5r/m;
+# General API: 60 req/min per IP (generous for dashboard polling)
+limit_req_zone $binary_remote_addr zone=cezen_api:10m   rate=60r/m;
+
+server {
+    listen 80 default_server;
+    listen [::]:80 default_server;
+
+    server_name _;
+
+    # Hide server version
+    server_tokens off;
+
+    # Logging
+    access_log /var/log/nginx/cezen-access.log;
+    error_log  /var/log/nginx/cezen-error.log;
+
+    # ─── Global security headers ──────────────────────────────────────────────
+    add_header X-Content-Type-Options  "nosniff"                   always;
+    add_header X-Frame-Options         "SAMEORIGIN"                always;
+    add_header X-XSS-Protection        "1; mode=block"             always;
+    add_header Referrer-Policy         "strict-origin-when-cross-origin" always;
+    add_header Permissions-Policy      "geolocation=(), camera=(), microphone=()" always;
+    add_header Content-Security-Policy
+        "default-src 'self'; "
+        "script-src 'self' 'unsafe-inline'; "
+        "style-src 'self' 'unsafe-inline'; "
+        "img-src 'self' data:; "
+        "connect-src 'self'; "
+        "frame-src 'self'; "
+        "font-src 'self'; "
+        "object-src 'none'; "
+        "base-uri 'self';"
+        always;
+
+    # ─── robots.txt — block all indexing (air-gapped / private portal) ────────
+    location = /robots.txt {
+        return 200 "User-agent: *\nDisallow: /\n";
+        add_header Content-Type text/plain;
+    }
+
+    # ─── Static Portal ───────────────────────────────────────────────────────
+    root /opt/cezen/portal;
+    index index.html;
+
+    location / {
+        try_files $uri $uri/ /index.html;
+    }
+
+    # Cache static assets aggressively
+    location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
+        expires 7d;
+        add_header Cache-Control "public, immutable";
+    }
+
+    # ─── Model upload (large files — no size limit, extended timeout) ────────
+    location = /api/models/upload {
+        client_max_body_size    0;         # unlimited — GGUF files can be 70 GB+
+        proxy_request_buffering off;       # stream directly to backend, don't buffer in Nginx
+        proxy_read_timeout      7200s;     # 2 hours for slow transfers
+        proxy_send_timeout      7200s;
+
+        proxy_pass         http://127.0.0.1:8080;
+        proxy_http_version 1.1;
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
+    }
+
+    # ─── Login rate limit (tight) ─────────────────────────────────────────────
+    location = /api/auth/login {
+        limit_req zone=cezen_login burst=3 nodelay;
+        limit_req_status 429;
+
+        proxy_pass         http://127.0.0.1:8080;
+        proxy_http_version 1.1;
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header   X-Forwarded-Proto $scheme;
+        proxy_read_timeout 30s;
+    }
+
+    # ─── FastAPI Backend (/api/) ──────────────────────────────────────────────
+    location /api/ {
+        limit_req zone=cezen_api burst=20 nodelay;
+        limit_req_status 429;
+
+        proxy_pass         http://127.0.0.1:8080;
+        proxy_http_version 1.1;
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header   X-Forwarded-Proto $scheme;
+        proxy_read_timeout 120s;
+    }
+
+    # ─── Web Console (ttyd) (/console/) ──────────────────────────────────────
+    location /console/ {
+        proxy_pass         http://127.0.0.1:7681/;
+        proxy_http_version 1.1;
+        proxy_set_header   Upgrade           $http_upgrade;
+        proxy_set_header   Connection        "upgrade";
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_read_timeout 86400s;
+
+        # Rewrite paths so ttyd JS/CSS assets load correctly
+        proxy_redirect     / /console/;
+        sub_filter         'href="/'  'href="/console/';
+        sub_filter         'src="/'   'src="/console/';
+        sub_filter_once    off;
+        sub_filter_types   text/html;
+    }
+
+    # ─── Block dotfiles and common attack paths ───────────────────────────────
+    location ~ /\. {
+        deny all;
+    }
+
+    location ~* \.(env|git|sql|bak|sh|py)$ {
+        deny all;
+    }
+}
--- a/scripts/cezen-backup.sh
+++ b/scripts/cezen-backup.sh
@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# Nexus One AI backup/restore helper.
+#
+# Usage:
+#   sudo bash scripts/cezen-backup.sh backup
+#   sudo bash scripts/cezen-backup.sh list
+#   sudo bash scripts/cezen-backup.sh restore /opt/cezen/backups/cezen-backup-YYYYmmdd-HHMMSS.zip
+
+set -euo pipefail
+
+ACTION="${1:-backup}"
+TARGET="${2:-}"
+DATA_DIR="${CEZEN_DATA:-/opt/cezen/data}"
+BACKUP_DIR="${CEZEN_BACKUP_DIR:-/opt/cezen/backups}"
+
+python3 - "$ACTION" "$TARGET" "$DATA_DIR" "$BACKUP_DIR" <<'PY'
+import json
+import shutil
+import sys
+import zipfile
+from datetime import datetime, timezone
+from pathlib import Path
+
+action, target, data_dir, backup_dir = sys.argv[1:5]
+data_dir = Path(data_dir)
+backup_dir = Path(backup_dir)
+backup_dir.mkdir(parents=True, exist_ok=True)
+
+def now_tag():
+    return datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+
+def iso_now():
+    return datetime.now(timezone.utc).isoformat()
+
+def write_backup(dest):
+    manifest = {
+        "schema": "cezen.backup_manifest.v1",
+        "created_at": iso_now(),
+        "data_dir": str(data_dir),
+        "source": "cezen-backup.sh",
+    }
+    with zipfile.ZipFile(dest, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+        zf.writestr("manifest.json", json.dumps(manifest, indent=2))
+        if data_dir.exists():
+            for path in data_dir.rglob("*"):
+                if path.is_file():
+                    zf.write(path, path.relative_to(data_dir).as_posix())
+
+def restore(src):
+    src = Path(src)
+    if not src.exists():
+        raise SystemExit(f"Backup not found: {src}")
+    safety = backup_dir / f"pre-restore-{now_tag()}.zip"
+    write_backup(safety)
+    root = data_dir.resolve()
+    with zipfile.ZipFile(src, "r") as zf:
+        for member in zf.infolist():
+            if member.filename == "manifest.json" or member.is_dir():
+                continue
+            target_path = (data_dir / member.filename).resolve()
+            if root not in target_path.parents and target_path != root:
+                raise SystemExit(f"Unsafe archive path: {member.filename}")
+            target_path.parent.mkdir(parents=True, exist_ok=True)
+            with zf.open(member) as source, open(target_path, "wb") as out:
+                shutil.copyfileobj(source, out)
+    print(json.dumps({"ok": True, "restored": str(src), "pre_restore_snapshot": str(safety)}, indent=2))
+
+if action == "backup":
+    dest = backup_dir / f"cezen-backup-{now_tag()}.zip"
+    write_backup(dest)
+    print(json.dumps({"ok": True, "backup": str(dest)}, indent=2))
+elif action == "list":
+    rows = []
+    for path in sorted(backup_dir.glob("cezen-backup-*.zip"), key=lambda p: p.stat().st_mtime, reverse=True):
+        rows.append({"name": path.name, "path": str(path), "size_bytes": path.stat().st_size})
+    print(json.dumps({"backup_dir": str(backup_dir), "backups": rows}, indent=2))
+elif action == "restore":
+    if not target:
+        raise SystemExit("Usage: cezen-backup.sh restore /path/to/backup.zip")
+    restore(target)
+else:
+    raise SystemExit("Usage: cezen-backup.sh backup|list|restore [backup.zip]")
+PY
--- a/scripts/cezen-feasibility.sh
+++ b/scripts/cezen-feasibility.sh
@ -0,0 +1,218 @@
+#!/usr/bin/env bash
+# Nexus One AI feasibility checker.
+# Runs before installation to classify existing hardware for software-only or appliance deployments.
+set -euo pipefail
+
+OUT="${1:-/opt/cezen/feasibility.json}"
+mkdir -p "$(dirname "$OUT")" 2>/dev/null || true
+
+tmp_json="$(mktemp /tmp/cezen-feasibility.XXXXXX.json)"
+
+python3 - "$tmp_json" <<'PY'
+import json, os, platform, shutil, socket, subprocess, sys
+from pathlib import Path
+
+out = Path(sys.argv[1])
+
+def run(cmd, timeout=5):
+    try:
+        return subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True, timeout=timeout).strip()
+    except Exception:
+        return ""
+
+def read(path, default=""):
+    try:
+        return Path(path).read_text(errors="replace").strip()
+    except Exception:
+        return default
+
+def gb(n):
+    return round(n / (1024 ** 3), 1)
+
+def os_info():
+    data = {}
+    for line in read("/etc/os-release").splitlines():
+        if "=" in line:
+            k, v = line.split("=", 1)
+            data[k] = v.strip('"')
+    return {
+        "name": data.get("PRETTY_NAME") or platform.platform(),
+        "id": data.get("ID", ""),
+        "version_id": data.get("VERSION_ID", ""),
+        "kernel": platform.release(),
+    }
+
+def cpu_info():
+    model = ""
+    for line in read("/proc/cpuinfo").splitlines():
+        if line.lower().startswith("model name"):
+            model = line.split(":", 1)[1].strip()
+            break
+    return {"model": model or platform.processor(), "cores": os.cpu_count() or 0}
+
+def mem_gb():
+    for line in read("/proc/meminfo").splitlines():
+        if line.startswith("MemTotal:"):
+            return round(int(line.split()[1]) / 1024 / 1024, 1)
+    out = run(["sysctl", "-n", "hw.memsize"])
+    if out.isdigit():
+        return gb(int(out))
+    try:
+        pages = os.sysconf("SC_PHYS_PAGES")
+        page_size = os.sysconf("SC_PAGE_SIZE")
+        if pages and page_size:
+            return gb(int(pages) * int(page_size))
+    except Exception:
+        pass
+    return 0
+
+def disk_info():
+    usage = shutil.disk_usage("/")
+    return {"total_gb": gb(usage.total), "free_gb": gb(usage.free), "used_gb": gb(usage.used)}
+
+def gpu_info():
+    gpus = []
+    if shutil.which("nvidia-smi"):
+        q = "name,memory.total,driver_version"
+        out = run(["nvidia-smi", f"--query-gpu={q}", "--format=csv,noheader,nounits"])
+        for row in out.splitlines():
+            parts = [p.strip() for p in row.split(",")]
+            if len(parts) >= 2:
+                try:
+                    vram = round(float(parts[1]) / 1024, 1)
+                except Exception:
+                    vram = 0
+                gpus.append({"name": parts[0], "vram_gb": vram, "driver": parts[2] if len(parts) > 2 else ""})
+    if not gpus:
+        for vendor in Path("/sys/bus/pci/devices").glob("*/vendor"):
+            if read(vendor).lower() == "0x10de":
+                gpus.append({"name": "NVIDIA GPU detected (driver not ready)", "vram_gb": 0, "driver": ""})
+                break
+    return gpus
+
+def port_open(host, port):
+    try:
+        with socket.create_connection((host, port), timeout=1):
+            return True
+    except Exception:
+        return False
+
+def tool_state():
+    return {
+        "docker": bool(shutil.which("docker")),
+        "docker_running": bool(run(["docker", "info"], timeout=3)) if shutil.which("docker") else False,
+        "ansible": bool(shutil.which("ansible-playbook")),
+        "python3": bool(shutil.which("python3")),
+        "curl": bool(shutil.which("curl")),
+        "git": bool(shutil.which("git")),
+    }
+
+def recommend(mem, disk, gpus, cpu_cores):
+    max_vram = max([g.get("vram_gb", 0) for g in gpus] or [0])
+    gpu_count = len([g for g in gpus if g.get("vram_gb", 0) > 0])
+    profile = "core"
+    tier = "starter"
+    concurrency = "1-3"
+    notes = []
+
+    if max_vram >= 120 and gpu_count >= 4:
+        tier, profile, concurrency = "max", "gpu-max", "100+"
+    elif max_vram >= 80 and gpu_count >= 2:
+        tier, profile, concurrency = "pro", "gpu-pro", "20-100"
+    elif max_vram >= 48:
+        tier, profile, concurrency = "basic", "gpu-standard", "5-20"
+    elif max_vram >= 24:
+        tier, profile, concurrency = "starter", "gpu-starter", "1-10"
+    elif max_vram >= 8:
+        tier, profile, concurrency = "starter", "gpu-lite", "1-5"
+        notes.append("GPU is suitable for small quantized models only.")
+    elif mem >= 32 and cpu_cores >= 8:
+        tier, profile, concurrency = "starter", "cpu-ai", "1-3"
+        notes.append("No usable NVIDIA VRAM found; local CPU inference is limited. Use cloud/external model fallback for better UX.")
+    else:
+        tier, profile, concurrency = "starter", "core", "1-2"
+        notes.append("Hardware is best for portal, RAG management, workflows, and external/cloud model routing.")
+
+    if mem < 16:
+        notes.append("RAM below 16 GB; avoid local model serving.")
+    if disk < 100:
+        notes.append("Less than 100 GB free disk; model storage and document indexing will be constrained.")
+
+    return {
+        "recommended_tier": tier,
+        "recommended_profile": profile,
+        "estimated_concurrent_users": concurrency,
+        "notes": notes,
+    }
+
+mem = mem_gb()
+disk = disk_info()
+gpus = gpu_info()
+cpu = cpu_info()
+tools = tool_state()
+rec = recommend(mem, disk["free_gb"], gpus, cpu["cores"])
+
+features = {
+    "portal": True,
+    "users_auth": True,
+    "document_intelligence": mem >= 8,
+    "rag_chromadb": mem >= 16 and disk["free_gb"] >= 50,
+    "ollama_cpu": mem >= 32,
+    "ollama_gpu": any(g.get("vram_gb", 0) >= 8 for g in gpus),
+    "vllm": any(g.get("vram_gb", 0) >= 24 for g in gpus),
+    "fine_tuning_qlora": any(g.get("vram_gb", 0) >= 24 for g in gpus),
+    "distributed_training": len([g for g in gpus if g.get("vram_gb", 0) >= 48]) >= 2,
+    "monitoring": True,
+    "software_only": True,
+    "air_gapped_ready": True,
+}
+
+report = {
+    "schema": "cezen.feasibility.v1",
+    "generated_at": run(["date", "-u", "+%Y-%m-%dT%H:%M:%SZ"]) or "",
+    "host": socket.gethostname(),
+    "os": os_info(),
+    "cpu": cpu,
+    "ram_gb": mem,
+    "disk": disk,
+    "gpus": gpus,
+    "tools": tools,
+    "ports_in_use": {str(p): port_open("127.0.0.1", p) for p in [80, 8080, 11434, 8000, 3000, 8888]},
+    "recommendation": rec,
+    "features": features,
+}
+out.write_text(json.dumps(report, indent=2))
+PY
+
+if ! cp "$tmp_json" "$OUT" 2>/dev/null; then
+  OUT="./feasibility.json"
+  cp "$tmp_json" "$OUT"
+fi
+rm -f "$tmp_json"
+
+python3 - "$OUT" <<'PY'
+import json, sys
+p = sys.argv[1]
+d = json.load(open(p))
+r = d["recommendation"]
+print("")
+print("Nexus One AI Feasibility Report")
+print("--------------------------------")
+print(f"Host: {d['host']}")
+print(f"OS: {d['os']['name']}")
+print(f"CPU: {d['cpu']['cores']} cores | RAM: {d['ram_gb']} GB | Free disk: {d['disk']['free_gb']} GB")
+if d["gpus"]:
+    print("GPU: " + "; ".join(f"{g['name']} ({g.get('vram_gb', 0)} GB VRAM)" for g in d["gpus"]))
+else:
+    print("GPU: none detected")
+print("")
+print(f"Recommended tier: {r['recommended_tier'].upper()}")
+print(f"Recommended profile: {r['recommended_profile']}")
+print(f"Estimated concurrency: {r['estimated_concurrent_users']} users")
+if r["notes"]:
+    print("Notes:")
+    for n in r["notes"]:
+        print(f"  - {n}")
+print("")
+print(f"JSON report: {p}")
+PY