Update ISO installer tiers and backend deployment

2026-06-30 08:54:01 +05:30 · 2026-06-30 08:54:01 +05:30 · 56668f7bdc
commit 56668f7bdc
parent 79784a6743
38 changed files with 7206 additions and 175 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 *.iso
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# Cezen AI Suite — Installer
+# Nexus One AI — Installer
 ## Quick Start
@ -12,6 +12,61 @@ Server reboots automatically after NVIDIA drivers install. Phase 2 runs on its o
 On the custom ISO, Ubuntu autoinstall now pauses on the installer network screen so the operator can choose the final IP address from the VM console before installation continues.
 ## Software-Only / Existing Hardware
 Run a feasibility scan before quoting or installing on customer-owned hardware:
 ```bash
 bash scripts/cezen-feasibility.sh
 ```
 The checker reports CPU, RAM, disk, NVIDIA GPU/VRAM, tool readiness, available features, and a recommended Cezen profile. It writes JSON to `/opt/cezen/feasibility.json` when possible, otherwise `./feasibility.json`.
 Install on existing hardware without the appliance NVIDIA phase:
 ```bash
 sudo bash install.sh --software-only --profile=auto
 ```
 For small systems or slow customer networks, the installer skips default model downloads on lightweight profiles. To force the same behavior manually:
 ```bash
 sudo bash install.sh --software-only --profile=cpu-ai --skip-model-pull
 ```
 Profiles:
 | Profile | Use When | Installs |
 |---|---|---|
 | `core` | no GPU / low RAM | portal, backend, nginx, health/metrics API |
 | `cpu-ai` | 32 GB+ RAM, no usable GPU | core + Chroma/Ollama CPU path, model pull optional |
 | `gpu-starter` | 24-32 GB VRAM | local AI starter stack, model pull optional |
 | `gpu-standard` | 48-96 GB VRAM | standard GPU stack |
 | `gpu-pro` | multi/high-VRAM GPU | advanced GPU stack |
 | `gpu-max` | multi-node or HGX-class | full stack, custom sizing |
 ## Sellable v1 Admin APIs
 The backend exposes the first productization APIs for software-only and appliance deployments:
 | API | Purpose |
 |---|---|
 | `GET /api/license` | Shows current tier, feature matrix, and whether the tier is locked by Cezen. |
 | `GET /api/system/feasibility` | Returns the generated hardware feasibility report or live fallback. |
 | `GET /api/system/readiness-report` | Combines license, feasibility, and install readiness into a customer-facing report payload. |
 | `GET /api/audit/report?days=7` | Basic audit summary for handover and admin review. |
 | `GET /api/system/backups` | Lists local backups. |
 | `POST /api/system/backups` | Creates a local backup of Cezen data. |
 | `POST /api/system/backups/{name}/restore` | Restores a named local backup and creates a pre-restore safety snapshot. |
 CLI backup helper:
 ```bash
 sudo bash scripts/cezen-backup.sh backup
 sudo bash scripts/cezen-backup.sh list
 sudo bash scripts/cezen-backup.sh restore /opt/cezen/backups/cezen-backup-YYYYmmdd-HHMMSS.zip
 ```
 ## What Gets Installed (Entry Tier)
 | Service | Port | Notes |
@ -42,7 +97,10 @@ NVIDIA driver install will succeed but `nvidia-smi` won't show GPUs — that's e
 ## Pull More Models
 ```bash
-bash models/pull-models.sh --tier=entry
+bash models/pull-models.sh --tier=starter   # phi3:mini + embeddings
 bash models/pull-models.sh --tier=basic     # llama3.1:8b, mistral:7b, codellama
 bash models/pull-models.sh --tier=pro       # + llama3.1:70b, mixtral, deepseek-coder
 bash models/pull-models.sh --tier=max       # + llama3.1:405b, mixtral:8x22b
 ```
 ## File Structure
@ -52,7 +110,10 @@ cgit/
 ├── install.sh                    ← Entry point
 ├── ansible/
 │   ├── phase1_nvidia.yml         ← Phase 1: drivers (triggers reboot)
-│   ├── entry.yml                 ← Phase 2: full stack
+│   ├── starter.yml               ← Phase 2: Starter tier (1 GPU, small team)
 │   ├── entry.yml                 ← Phase 2: Basic tier (1–2 GPU, department)
 │   ├── pro.yml                   ← Phase 2: Pro tier (2+ GPU, multi-team)
 │   ├── max.yml                   ← Phase 2: Max tier (4–8 GPU, enterprise)
 │   └── roles/
 │       ├── base/                 ← OS, Python, Miniconda, LangChain
 │       ├── nvidia/               ← Drivers, CUDA 12.4, cuDNN 9
--- a/ansible/entry.yml
+++ b/ansible/entry.yml
@ -1,7 +1,7 @@
 ---
-# Phase 2: Full Cezen AI Suite — Entry Tier
+# Phase 2: Full Nexus One AI — Entry Tier
 # Runs after NVIDIA driver reboot
- name: Cezen AI — Entry Tier Stack
+- name: Nexus One AI — Entry Tier Stack
  hosts: localhost
  connection: local
  become: true
@ -35,3 +35,9 @@
      when: "'minio' not in skip_roles.split(',')"
    - role: monitoring
      when: "'monitoring' not in skip_roles.split(',')"
    - role: cezen-backend
      when: "'cezen-backend' not in skip_roles.split(',')"
    - role: cezen-ttyd
      when: "'cezen-ttyd' not in skip_roles.split(',')"
    - role: cezen-nginx
      when: "'cezen-nginx' not in skip_roles.split(',')"
--- a/ansible/max.yml
+++ b/ansible/max.yml
@ -0,0 +1,83 @@
 ---
 # Nexus One AI — Max Tier Stack
 # Hardware: 4–8× NVIDIA H100/A100/RTX 5090 (80–320 GB VRAM total), 256–512 GB DDR5, 8 TB+ NVMe, 100 GbE
 # Capacity: 100+ concurrent users
 # Runs after NVIDIA driver reboot (phase1_nvidia.yml)
 #
 # Differences from Pro tier:
 #   - vLLM tensor-parallel across 4+ GPUs (set vllm_tensor_parallel to GPU count)
 #   - Full precision models (no quantization required)
 #   - Advanced fine-tuning (QLoRA + DeepSpeed ZeRO-3 for multi-GPU training)
 #   - Full MLflow + MinIO stack for experiment tracking and artifact storage
 #   - All optional services enabled by default
 - name: Nexus One AI — Max Tier Stack
  hosts: localhost
  connection: local
  become: true
  vars:
    cezen_user: "cezen"
    cezen_home: "/opt/cezen"
    cezen_login_home: "/home/cezen"
    python_version: "3.11"
    cuda_version: "12.6"
    skip_roles: ""                # comma-separated list of roles to skip
    gpu_available: false
    tier: "max"
    # ── vLLM — Max defaults ──────────────────────
    # Full-precision Llama-3.1-70B across 4 GPUs by default.
    # For HGX/DGX-class systems with 8 GPUs set vllm_tensor_parallel: 8
    # and switch to Llama-3.1-405B or Mixtral-8x22B.
    vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
    vllm_tensor_parallel: 4
    vllm_gpu_memory_util: "0.90"
    vllm_max_model_len: 32768
    vllm_quantization: ""          # full precision at Max tier
    # ── Ollama — large model defaults ────────────
    ollama_default_model: "llama3.1:70b"
    # ── DeepSpeed — multi-GPU fine-tuning ────────
    deepspeed_enabled: true
    deepspeed_zero_stage: 3        # ZeRO-3 for large model training
  roles:
    - role: base
      when: "'base' not in skip_roles.split(',')"
    - role: docker
      when: "'docker' not in skip_roles.split(',')"
    - role: k3s
      when: "'k3s' not in skip_roles.split(',')"
    - role: ollama
      when: "'ollama' not in skip_roles.split(',')"
    - role: vllm
      when: "'vllm' not in skip_roles.split(',')"
    - role: jupyterlab
      when: "'jupyterlab' not in skip_roles.split(',')"
    - role: chromadb
      when: "'chromadb' not in skip_roles.split(',')"
    - role: mlflow
      when: "'mlflow' not in skip_roles.split(',')"
    - role: minio
      when: "'minio' not in skip_roles.split(',')"
    - role: monitoring
      when: "'monitoring' not in skip_roles.split(',')"
    - role: cezen-backend
      when: "'cezen-backend' not in skip_roles.split(',')"
    - role: cezen-ttyd
      when: "'cezen-ttyd' not in skip_roles.split(',')"
    - role: cezen-nginx
      when: "'cezen-nginx' not in skip_roles.split(',')"
--- a/ansible/phase1_nvidia.yml
+++ b/ansible/phase1_nvidia.yml
@ -1,6 +1,6 @@
 ---
 # Phase 1: NVIDIA drivers only. Server reboots after this.
- name: Cezen AI — Phase 1 NVIDIA Drivers
+- name: Nexus One AI — Phase 1 NVIDIA Drivers
  hosts: localhost
  connection: local
  become: true
--- a/ansible/pro.yml
+++ b/ansible/pro.yml
@ -0,0 +1,79 @@
 ---
 # Nexus One AI — Pro Tier Stack
 # Hardware: 2× NVIDIA RTX 5090 (32 GB GDDR7 each / 64 GB total), 128 GB DDR5, 4 TB NVMe, 10 GbE
 # Capacity: 20–100 concurrent users
 # Runs after NVIDIA driver reboot (phase1_nvidia.yml)
 #
 # Differences from Basic tier:
 #   - k3s included (multi-service orchestration at this scale)
 #   - MLflow included (fine-tuning tracking needed at Pro)
 #   - MinIO included (model + data storage at scale)
 #   - vLLM runs tensor-parallel across 2 GPUs
 #   - QLoRA fine-tuning available via portal
 - name: Nexus One AI — Pro Tier Stack
  hosts: localhost
  connection: local
  become: true
  vars:
    cezen_user: "cezen"
    cezen_home: "/opt/cezen"
    cezen_login_home: "/home/cezen"
    python_version: "3.11"
    cuda_version: "12.6"
    skip_roles: ""                # comma-separated list of roles to skip
    gpu_available: false
    tier: "pro"
    # ── vLLM — Pro defaults ──────────────────────
    # Tensor-parallel across 2× RTX 5090 (64 GB combined GDDR7).
    # Llama-3.1-70B at 4-bit fits comfortably; switch to full-precision
    # smaller models via the portal Model Manager.
    vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
    vllm_tensor_parallel: 2
    vllm_gpu_memory_util: "0.85"
    vllm_max_model_len: 8192
    vllm_quantization: "awq"
    # ── Ollama — full-size models ─────────────────
    ollama_default_model: "llama3.1:70b"
  roles:
    - role: base
      when: "'base' not in skip_roles.split(',')"
    - role: docker
      when: "'docker' not in skip_roles.split(',')"
    - role: k3s
      when: "'k3s' not in skip_roles.split(',')"
    - role: ollama
      when: "'ollama' not in skip_roles.split(',')"
    - role: vllm
      when: "'vllm' not in skip_roles.split(',')"
    - role: jupyterlab
      when: "'jupyterlab' not in skip_roles.split(',')"
    - role: chromadb
      when: "'chromadb' not in skip_roles.split(',')"
    - role: mlflow
      when: "'mlflow' not in skip_roles.split(',')"
    - role: minio
      when: "'minio' not in skip_roles.split(',')"
    - role: monitoring
      when: "'monitoring' not in skip_roles.split(',')"
    - role: cezen-backend
      when: "'cezen-backend' not in skip_roles.split(',')"
    - role: cezen-ttyd
      when: "'cezen-ttyd' not in skip_roles.split(',')"
    - role: cezen-nginx
      when: "'cezen-nginx' not in skip_roles.split(',')"
--- a/ansible/roles/base/tasks/main.yml
+++ b/ansible/roles/base/tasks/main.yml
@ -50,6 +50,18 @@
    - "{{ cezen_home }}/models"
    - "{{ cezen_home }}/data"
    - "{{ cezen_home }}/logs"
    - "{{ cezen_home }}/scripts"
 - name: Install Cezen operational helper scripts
  copy:
    src: "{{ playbook_dir }}/../scripts/{{ item }}"
    dest: "{{ cezen_home }}/scripts/{{ item }}"
    owner: "{{ cezen_user }}"
    group: "{{ cezen_user }}"
    mode: "0755"
  loop:
    - cezen-backup.sh
    - cezen-feasibility.sh
 - name: Download Miniconda
  get_url:
--- a/ansible/roles/cezen-backend/files/cezen-api.service
+++ b/ansible/roles/cezen-backend/files/cezen-api.service
@ -0,0 +1,20 @@
 [Unit]
 Description=Nexus One AI Management API
 After=network-online.target ollama.service
 Wants=network-online.target
 [Service]
 Type=simple
 User=cezen
 WorkingDirectory=/opt/cezen/backend
 Environment="CEZEN_DATA=/opt/cezen/data"
 Environment="OLLAMA_URL=http://localhost:11434"
 Environment="PATH=/opt/cezen/backend/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin"
 ExecStart=/opt/cezen/backend/venv/bin/uvicorn main:app --host 0.0.0.0 --port 8080 --workers 2
 Restart=always
 RestartSec=5
 StandardOutput=journal
 StandardError=journal
 [Install]
 WantedBy=multi-user.target
--- a/ansible/roles/cezen-backend/files/main.py
+++ b/ansible/roles/cezen-backend/files/main.py
--- a/ansible/roles/cezen-backend/files/requirements.txt
+++ b/ansible/roles/cezen-backend/files/requirements.txt
@ -0,0 +1,13 @@
 fastapi>=0.111.0
 uvicorn[standard]>=0.29.0
 python-jose[cryptography]>=3.3.0
 passlib[bcrypt]>=1.7.4
 bcrypt<4.0.0
 psutil>=5.9.0
 python-multipart>=0.0.9
 aiofiles>=23.0.0
 # Document Intelligence
 pymupdf>=1.24.0          # PDF text extraction (fitz)
 python-docx>=1.1.0       # Word document extraction
 # Scheduled Jobs
 apscheduler>=3.10.0      # In-process cron/interval scheduler
--- a/ansible/roles/cezen-backend/files/train_qlora.py
+++ b/ansible/roles/cezen-backend/files/train_qlora.py
@ -0,0 +1,309 @@
 #!/usr/bin/env python3
 """
 Nexus One AI — QLoRA Fine-Tuning Runner
 Launched as a subprocess by the FastAPI backend.
 Writes structured JSONL log lines to --log-path so the UI can stream
 live loss curves and progress. Updates training_jobs.status in SQLite.
 Requires (install on the training node):
    pip install torch transformers datasets peft bitsandbytes trl
 Optional (faster, lower VRAM):
    pip install unsloth
 Usage (called by main.py — do not run manually in production):
    python3 train_qlora.py --job-id 1 --db-path /opt/cezen/data/cezen.db \
        --dataset /opt/cezen/data/datasets/abc.jsonl \
        --base-model mistral:7b --output-dir /opt/cezen/data/finetuned/mymodel \
        --log-path /opt/cezen/data/job_logs/abc.jsonl \
        --epochs 3 --lr 2e-4 --batch-size 4 --lora-r 16 --lora-alpha 32 \
        --output-name mymodel
 """
 import argparse, json, os, sqlite3, sys, time
 from datetime import datetime, timezone
 from pathlib import Path
 # ── Argument parsing ──────────────────────────────────────────────────────────
 parser = argparse.ArgumentParser()
 parser.add_argument("--job-id",     type=int, required=True)
 parser.add_argument("--db-path",    required=True)
 parser.add_argument("--dataset",    required=True)
 parser.add_argument("--base-model", required=True)
 parser.add_argument("--output-dir", required=True)
 parser.add_argument("--log-path",   required=True)
 parser.add_argument("--output-name", required=True)
 parser.add_argument("--epochs",     type=int,   default=3)
 parser.add_argument("--lr",         type=float, default=2e-4)
 parser.add_argument("--batch-size", type=int,   default=4)
 parser.add_argument("--lora-r",     type=int,   default=16)
 parser.add_argument("--lora-alpha", type=int,   default=32)
 args = parser.parse_args()
 # ── Helpers ───────────────────────────────────────────────────────────────────
 def utcnow():
    return datetime.now(timezone.utc).isoformat()
 def db_connect():
    conn = sqlite3.connect(args.db_path)
    conn.row_factory = sqlite3.Row
    return conn
 log_file = open(args.log_path, "a", buffering=1)
 def log(type_: str, **kwargs):
    entry = {"ts": utcnow(), "type": type_, **kwargs}
    log_file.write(json.dumps(entry) + "\n")
 def set_status(status: str):
    db = db_connect()
    if status in ("completed", "failed", "cancelled"):
        db.execute(
            "UPDATE training_jobs SET status=?, finished_at=? WHERE id=?",
            (status, utcnow(), args.job_id)
        )
    else:
        db.execute("UPDATE training_jobs SET status=? WHERE id=?", (status, args.job_id))
    db.commit()
    db.close()
 # ── Dataset loading ───────────────────────────────────────────────────────────
 def load_dataset_from_file(path: str):
    """Load JSONL or CSV dataset into a list of dicts with 'text' or 'prompt'/'completion' keys."""
    p = Path(path)
    rows = []
    if p.suffix.lower() == ".csv":
        import csv
        with open(path, newline="", encoding="utf-8", errors="replace") as f:
            reader = csv.DictReader(f)
            for row in reader:
                rows.append(dict(row))
    else:
        with open(path, encoding="utf-8", errors="replace") as f:
            for line in f:
                line = line.strip()
                if line:
                    try:
                        rows.append(json.loads(line))
                    except Exception:
                        pass
    return rows
 def format_row(row: dict) -> str:
    """Convert a dataset row to a plain text training string."""
    if "text" in row:
        return row["text"]
    if "prompt" in row and "completion" in row:
        return f"### Instruction:\n{row['prompt']}\n\n### Response:\n{row['completion']}"
    if "instruction" in row and "output" in row:
        inp = row.get("input", "")
        return (f"### Instruction:\n{row['instruction']}\n\n### Input:\n{inp}\n\n### Response:\n{row['output']}"
                if inp else
                f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['output']}")
    # Fallback: concatenate all values
    return " ".join(str(v) for v in row.values() if v)
 # ── Main training routine ─────────────────────────────────────────────────────
 def main():
    log("start", job_id=args.job_id, base_model=args.base_model,
        epochs=args.epochs, lr=args.lr, batch_size=args.batch_size,
        lora_r=args.lora_r, lora_alpha=args.lora_alpha)
    set_status("running")
    # Resolve model name (Ollama uses "mistral:7b" style — strip the tag for HF)
    hf_model = args.base_model
    if ":" in hf_model and "/" not in hf_model:
        # e.g. "mistral:7b" → try to map to HF repo
        name_map = {
            "mistral":  "mistralai/Mistral-7B-v0.1",
            "llama2":   "meta-llama/Llama-2-7b-hf",
            "llama3":   "meta-llama/Meta-Llama-3-8B",
            "phi3":     "microsoft/Phi-3-mini-4k-instruct",
            "gemma":    "google/gemma-7b",
            "codellama":"codellama/CodeLlama-7b-hf",
            "qwen2":    "Qwen/Qwen2-7B",
        }
        base_name = hf_model.split(":")[0].lower()
        hf_model = name_map.get(base_name, hf_model)
        log("info", msg=f"Mapped '{args.base_model}' → '{hf_model}' (HuggingFace)")
    # Load dataset
    log("info", msg="Loading dataset...")
    raw_rows = load_dataset_from_file(args.dataset)
    if not raw_rows:
        log("error", msg="Dataset is empty or could not be parsed")
        set_status("failed")
        sys.exit(1)
    texts = [format_row(r) for r in raw_rows]
    log("info", msg=f"Loaded {len(texts)} training examples")
    # Try Unsloth first (faster), fall back to HF PEFT
    use_unsloth = False
    try:
        from unsloth import FastLanguageModel
        use_unsloth = True
        log("info", msg="Using Unsloth for accelerated training")
    except ImportError:
        log("info", msg="Unsloth not available — using HuggingFace PEFT + BitsAndBytes")
    try:
        import torch
        from transformers import TrainingArguments, TrainerCallback
        from datasets import Dataset as HFDataset
        if use_unsloth:
            model, tokenizer = FastLanguageModel.from_pretrained(
                model_name=hf_model,
                max_seq_length=2048,
                dtype=None,
                load_in_4bit=True,
            )
            model = FastLanguageModel.get_peft_model(
                model,
                r=args.lora_r,
                lora_alpha=args.lora_alpha,
                target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
                lora_dropout=0,
                bias="none",
                use_gradient_checkpointing="unsloth",
            )
        else:
            from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
            from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
            bnb_cfg = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.float16,
            )
            tokenizer = AutoTokenizer.from_pretrained(hf_model, trust_remote_code=True)
            if tokenizer.pad_token is None:
                tokenizer.pad_token = tokenizer.eos_token
            model = AutoModelForCausalLM.from_pretrained(
                hf_model,
                quantization_config=bnb_cfg,
                device_map="auto",
                trust_remote_code=True,
            )
            model = prepare_model_for_kbit_training(model)
            lora_cfg = LoraConfig(
                r=args.lora_r,
                lora_alpha=args.lora_alpha,
                target_modules=["q_proj","k_proj","v_proj","o_proj"],
                lora_dropout=0.05,
                bias="none",
                task_type="CAUSAL_LM",
            )
            model = get_peft_model(model, lora_cfg)
        # Tokenise
        def tokenise(examples):
            return tokenizer(
                examples["text"],
                truncation=True,
                max_length=2048,
                padding="max_length",
            )
        hf_ds = HFDataset.from_dict({"text": texts})
        hf_ds = hf_ds.map(tokenise, batched=True, remove_columns=["text"])
        # Custom callback to stream loss to our log
        class LossLogger(TrainerCallback):
            def on_log(self, _args, state, control, logs=None, **kwargs):
                if logs and "loss" in logs:
                    log("loss",
                        step=state.global_step,
                        loss=round(float(logs["loss"]), 6),
                        epoch=round(float(logs.get("epoch", 0)), 3),
                        lr=float(logs.get("learning_rate", args.lr)))
        output_dir = args.output_dir
        Path(output_dir).mkdir(parents=True, exist_ok=True)
        from trl import SFTTrainer
        trainer = SFTTrainer(
            model=model,
            tokenizer=tokenizer,
            train_dataset=hf_ds,
            dataset_text_field="input_ids",
            max_seq_length=2048,
            args=TrainingArguments(
                output_dir=output_dir,
                num_train_epochs=args.epochs,
                per_device_train_batch_size=args.batch_size,
                gradient_accumulation_steps=4,
                warmup_steps=5,
                learning_rate=args.lr,
                fp16=not torch.cuda.is_bf16_supported(),
                bf16=torch.cuda.is_bf16_supported(),
                logging_steps=1,
                save_strategy="epoch",
                report_to="none",
            ),
            callbacks=[LossLogger()],
        )
        log("info", msg="Training started")
        trainer.train()
        log("info", msg="Training complete — saving model")
        trainer.save_model(output_dir)
        tokenizer.save_pretrained(output_dir)
    except Exception as e:
        import traceback
        log("error", msg=str(e), traceback=traceback.format_exc())
        set_status("failed")
        sys.exit(1)
    # Auto-register with Ollama via Modelfile
    try:
        _register_with_ollama(output_dir, args.output_name)
    except Exception as e:
        log("warning", msg=f"Could not auto-register with Ollama: {e}")
    log("complete", msg="Job finished successfully", output_dir=output_dir)
    set_status("completed")
 def _register_with_ollama(model_dir: str, model_name: str):
    """Create an Ollama Modelfile and register the fine-tuned model."""
    modelfile_path = Path(model_dir) / "Modelfile"
    modelfile_path.write_text(
        f'FROM {model_dir}\n'
        f'PARAMETER stop "<|im_end|>"\n'
        f'SYSTEM "This is a Nexus One AI fine-tuned model."\n'
    )
    import subprocess
    result = subprocess.run(
        ["ollama", "create", model_name, "-f", str(modelfile_path)],
        capture_output=True, text=True, timeout=300
    )
    if result.returncode == 0:
        log("info", msg=f"Model '{model_name}' registered with Ollama")
    else:
        log("warning", msg=f"Ollama registration failed: {result.stderr}")
 if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        log("error", msg="Job interrupted (SIGTERM/SIGINT)")
        set_status("cancelled")
        sys.exit(130)
    except Exception as e:
        import traceback
        log("error", msg=str(e), traceback=traceback.format_exc())
        set_status("failed")
        sys.exit(1)
    finally:
        log_file.close()
--- a/ansible/roles/cezen-backend/handlers/main.yml
+++ b/ansible/roles/cezen-backend/handlers/main.yml
@ -0,0 +1,9 @@
 ---
 - name: Reload systemd
  systemd:
    daemon_reload: yes
 - name: Restart cezen-api
  systemd:
    name: cezen-api
    state: restarted
--- a/ansible/roles/cezen-backend/tasks/main.yml
+++ b/ansible/roles/cezen-backend/tasks/main.yml
@ -0,0 +1,113 @@
 ---
 # cezen-backend role: installs the Nexus One AI FastAPI management API
 - name: Install system Python deps
  apt:
    name:
      - python3-pip
      - python3-venv
      - python3.11
      - python3.11-venv
      - libmupdf-dev      # required by pymupdf (Document Intelligence)
      - mupdf-tools
    state: present
    update_cache: yes
 - name: Create backend directory
  file:
    path: /opt/cezen/backend
    state: directory
    owner: "{{ cezen_user }}"
    group: "{{ cezen_user }}"
    mode: "0755"
 - name: Create data directory (JWT secret + SQLite DB)
  file:
    path: /opt/cezen/data
    state: directory
    owner: "{{ cezen_user }}"
    group: "{{ cezen_user }}"
    mode: "0750"
 - name: Copy FastAPI application
  copy:
    src: main.py
    dest: /opt/cezen/backend/main.py
    owner: "{{ cezen_user }}"
    group: "{{ cezen_user }}"
    mode: "0644"
  notify: Restart cezen-api
 - name: Copy QLoRA training runner
  copy:
    src: train_qlora.py
    dest: /opt/cezen/backend/train_qlora.py
    owner: "{{ cezen_user }}"
    group: "{{ cezen_user }}"
    mode: "0755"
 - name: Copy requirements.txt
  copy:
    src: requirements.txt
    dest: /opt/cezen/backend/requirements.txt
    owner: "{{ cezen_user }}"
    group: "{{ cezen_user }}"
    mode: "0644"
 - name: Create Python virtual environment (Python 3.11)
  become_user: "{{ cezen_user }}"
  command: python3.11 -m venv /opt/cezen/backend/venv
  args:
    creates: /opt/cezen/backend/venv/bin/activate
 - name: Install Python dependencies
  become_user: "{{ cezen_user }}"
  pip:
    requirements: /opt/cezen/backend/requirements.txt
    virtualenv: /opt/cezen/backend/venv
  notify: Restart cezen-api
 - name: Install Pro/Max fine-tuning dependencies
  become_user: "{{ cezen_user }}"
  pip:
    name:
      - torch
      - transformers
      - datasets
      - peft
      - bitsandbytes
      - accelerate
      - trl
      - sentencepiece
    virtualenv: /opt/cezen/backend/venv
  retries: 3
  delay: 15
  when: (tier | default('basic')) in ['pro', 'max']
 - name: Install Max multi-GPU training dependencies
  become_user: "{{ cezen_user }}"
  pip:
    name:
      - deepspeed
    virtualenv: /opt/cezen/backend/venv
  retries: 3
  delay: 15
  when: (tier | default('basic')) == 'max'
 - name: Install systemd service unit
  copy:
    src: cezen-api.service
    dest: /etc/systemd/system/cezen-api.service
    owner: root
    group: root
    mode: "0644"
  notify:
    - Reload systemd
    - Restart cezen-api
 - name: Enable and start cezen-api service
  systemd:
    name: cezen-api
    enabled: yes
    state: started
    daemon_reload: yes
--- a/ansible/roles/cezen-nginx/files/cezen.conf
+++ b/ansible/roles/cezen-nginx/files/cezen.conf
@ -0,0 +1,135 @@
 # /etc/nginx/sites-available/cezen
 # Nexus One AI Portal — serves static portal, proxies API and console terminal
 #
 # Install:
 #   sudo cp cezen.conf /etc/nginx/sites-available/cezen
 #   sudo ln -sf /etc/nginx/sites-available/cezen /etc/nginx/sites-enabled/cezen
 #   sudo rm -f /etc/nginx/sites-enabled/default
 #   sudo nginx -t && sudo systemctl reload nginx
 # ─── Rate limiting zones (must be outside server block) ──────────────────────
 # Login: 5 requests/min per IP, burst of 3 queued, then 429
 limit_req_zone $binary_remote_addr zone=cezen_login:10m rate=5r/m;
 # General API: 60 req/min per IP (generous for dashboard polling)
 limit_req_zone $binary_remote_addr zone=cezen_api:10m   rate=60r/m;
 server {
    listen 80 default_server;
    listen [::]:80 default_server;
    server_name _;
    # Hide server version
    server_tokens off;
    # Logging
    access_log /var/log/nginx/cezen-access.log;
    error_log  /var/log/nginx/cezen-error.log;
    # ─── Global security headers ──────────────────────────────────────────────
    add_header X-Content-Type-Options  "nosniff"                   always;
    add_header X-Frame-Options         "SAMEORIGIN"                always;
    add_header X-XSS-Protection        "1; mode=block"             always;
    add_header Referrer-Policy         "strict-origin-when-cross-origin" always;
    add_header Permissions-Policy      "geolocation=(), camera=(), microphone=()" always;
    add_header Content-Security-Policy
        "default-src 'self'; "
        "script-src 'self' 'unsafe-inline'; "
        "style-src 'self' 'unsafe-inline'; "
        "img-src 'self' data:; "
        "connect-src 'self'; "
        "frame-src 'self'; "
        "font-src 'self'; "
        "object-src 'none'; "
        "base-uri 'self';"
        always;
    # ─── robots.txt — block all indexing (air-gapped / private portal) ────────
    location = /robots.txt {
        return 200 "User-agent: *\nDisallow: /\n";
        add_header Content-Type text/plain;
    }
    # ─── Static Portal ───────────────────────────────────────────────────────
    root /opt/cezen/portal;
    index index.html;
    location / {
        try_files $uri $uri/ /index.html;
    }
    # Cache static assets aggressively
    location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
        expires 7d;
        add_header Cache-Control "public, immutable";
    }
    # ─── Model upload (large files — no size limit, extended timeout) ────────
    location = /api/models/upload {
        client_max_body_size    0;         # unlimited — GGUF files can be 70 GB+
        proxy_request_buffering off;       # stream directly to backend, don't buffer in Nginx
        proxy_read_timeout      7200s;     # 2 hours for slow transfers
        proxy_send_timeout      7200s;
        proxy_pass         http://127.0.0.1:8080;
        proxy_http_version 1.1;
        proxy_set_header   Host              $host;
        proxy_set_header   X-Real-IP         $remote_addr;
        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
    }
    # ─── Login rate limit (tight) ─────────────────────────────────────────────
    location = /api/auth/login {
        limit_req zone=cezen_login burst=3 nodelay;
        limit_req_status 429;
        proxy_pass         http://127.0.0.1:8080;
        proxy_http_version 1.1;
        proxy_set_header   Host              $host;
        proxy_set_header   X-Real-IP         $remote_addr;
        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
        proxy_set_header   X-Forwarded-Proto $scheme;
        proxy_read_timeout 30s;
    }
    # ─── FastAPI Backend (/api/) ──────────────────────────────────────────────
    location /api/ {
        limit_req zone=cezen_api burst=20 nodelay;
        limit_req_status 429;
        proxy_pass         http://127.0.0.1:8080;
        proxy_http_version 1.1;
        proxy_set_header   Host              $host;
        proxy_set_header   X-Real-IP         $remote_addr;
        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
        proxy_set_header   X-Forwarded-Proto $scheme;
        proxy_read_timeout 120s;
    }
    # ─── Web Console (ttyd) (/console/) ──────────────────────────────────────
    location /console/ {
        proxy_pass         http://127.0.0.1:7681/;
        proxy_http_version 1.1;
        proxy_set_header   Upgrade           $http_upgrade;
        proxy_set_header   Connection        "upgrade";
        proxy_set_header   Host              $host;
        proxy_set_header   X-Real-IP         $remote_addr;
        proxy_read_timeout 86400s;
        # Rewrite paths so ttyd JS/CSS assets load correctly
        proxy_redirect     / /console/;
        sub_filter         'href="/'  'href="/console/';
        sub_filter         'src="/'   'src="/console/';
        sub_filter_once    off;
        sub_filter_types   text/html;
    }
    # ─── Block dotfiles and common attack paths ───────────────────────────────
    location ~ /\. {
        deny all;
    }
    location ~* \.(env|git|sql|bak|sh|py)$ {
        deny all;
    }
 }
--- a/ansible/roles/cezen-nginx/handlers/main.yml
+++ b/ansible/roles/cezen-nginx/handlers/main.yml
@ -0,0 +1,5 @@
 ---
 - name: Reload nginx
  systemd:
    name: nginx
    state: reloaded
--- a/ansible/roles/cezen-nginx/tasks/main.yml
+++ b/ansible/roles/cezen-nginx/tasks/main.yml
@ -0,0 +1,59 @@
 ---
 # cezen-nginx role: installs Nginx, deploys portal static files and site config
 - name: Install Nginx
  apt:
    name: nginx
    state: present
    update_cache: yes
 - name: Create portal directory
  file:
    path: /opt/cezen/portal
    state: directory
    owner: "{{ cezen_user }}"
    group: www-data
    mode: "0755"
 - name: Sync portal static files
  synchronize:
    src: "{{ playbook_dir }}/../../../cezen-portal/"
    dest: /opt/cezen/portal/
    delete: yes
    recursive: yes
    rsync_opts:
      - "--exclude=.DS_Store"
      - "--exclude=*.sh"
  notify: Reload nginx
 - name: Deploy Nginx site config
  copy:
    src: cezen.conf
    dest: /etc/nginx/sites-available/cezen
    owner: root
    group: root
    mode: "0644"
  notify: Reload nginx
 - name: Enable Cezen site
  file:
    src: /etc/nginx/sites-available/cezen
    dest: /etc/nginx/sites-enabled/cezen
    state: link
  notify: Reload nginx
 - name: Disable default Nginx site
  file:
    path: /etc/nginx/sites-enabled/default
    state: absent
  notify: Reload nginx
 - name: Validate Nginx config
  command: nginx -t
  changed_when: false
 - name: Ensure Nginx is enabled and running
  systemd:
    name: nginx
    enabled: yes
    state: started
--- a/ansible/roles/cezen-ttyd/files/cezen-ttyd.service
+++ b/ansible/roles/cezen-ttyd/files/cezen-ttyd.service
@ -0,0 +1,17 @@
 [Unit]
 Description=Cezen Web Terminal (ttyd)
 After=network.target
 [Service]
 # Bind to localhost only — Nginx proxies /console/ to this port
 ExecStart=/usr/bin/ttyd \
    --port 7681 \
    --interface 127.0.0.1 \
    --writable \
    login -f cezen-console
 Restart=always
 RestartSec=5
 User=root
 [Install]
 WantedBy=multi-user.target
--- a/ansible/roles/cezen-ttyd/handlers/main.yml
+++ b/ansible/roles/cezen-ttyd/handlers/main.yml
@ -0,0 +1,9 @@
 ---
 - name: Reload systemd
  systemd:
    daemon_reload: yes
 - name: Restart cezen-ttyd
  systemd:
    name: cezen-ttyd
    state: restarted
--- a/ansible/roles/cezen-ttyd/tasks/main.yml
+++ b/ansible/roles/cezen-ttyd/tasks/main.yml
@ -0,0 +1,72 @@
 ---
 # cezen-ttyd role: browser-based terminal via ttyd, bound to localhost
 - name: Install ttyd
  apt:
    name: ttyd
    state: present
    update_cache: yes
 - name: Create cezen-console restricted user
  user:
    name: cezen-console
    shell: /bin/bash
    comment: "Cezen Web Console User"
    groups: "{{ cezen_user }}"
    append: yes
    state: present
    create_home: yes
 - name: Set cezen-console password
  # Change this password after first login or use PAM/SSO integration
  shell: echo "cezen-console:CezenConsole2024!" | chpasswd
  changed_when: false
  no_log: true
 - name: Restrict cezen-console home directory
  file:
    path: /home/cezen-console
    owner: cezen-console
    group: cezen-console
    mode: "0750"
 - name: Add useful aliases for console user
  copy:
    dest: /home/cezen-console/.bashrc
    owner: cezen-console
    group: cezen-console
    mode: "0644"
    content: |
      # Cezen Web Console — restricted shell environment
      PS1='\[\033[01;32m\]cezen-console\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
      # Useful shortcuts
      alias ll='ls -lah --color=auto'
      alias logs='journalctl -u cezen-api -f'
      alias api-status='systemctl status cezen-api'
      alias ollama-ps='ollama ps'
      alias gpu='nvidia-smi'
      alias ports='ss -tlnp'
      # Prevent accidental system damage
      alias rm='rm -i'
      alias mv='mv -i'
      alias cp='cp -i'
 - name: Install ttyd systemd service
  copy:
    src: cezen-ttyd.service
    dest: /etc/systemd/system/cezen-ttyd.service
    owner: root
    group: root
    mode: "0644"
  notify:
    - Reload systemd
    - Restart cezen-ttyd
 - name: Enable and start ttyd service
  systemd:
    name: cezen-ttyd
    enabled: yes
    state: started
    daemon_reload: yes
--- a/ansible/roles/jupyterlab/tasks/main.yml
+++ b/ansible/roles/jupyterlab/tasks/main.yml
@ -51,7 +51,7 @@
  copy:
    dest: /opt/cezen/notebooks/README.md
    content: |
-      # Cezen AI Suite — JupyterLab
+      # Nexus One AI — JupyterLab
      Default token: `cezen2024`
--- a/ansible/roles/mlflow/tasks/main.yml
+++ b/ansible/roles/mlflow/tasks/main.yml
@ -53,5 +53,5 @@
  wait_for:
    host: localhost
    port: 5000
-    timeout: 30
+    timeout: 120
  ignore_errors: true
--- a/ansible/roles/monitoring/tasks/main.yml
+++ b/ansible/roles/monitoring/tasks/main.yml
@ -25,6 +25,7 @@
  register: dcgm_result
  failed_when: dcgm_result.rc != 0 and 'already in use' not in dcgm_result.stderr
  ignore_errors: true
  when: gpu_available | default(false) | bool
 # ── Prometheus ──────────────────────────────────────────
 - name: Write Prometheus config
@ -99,7 +100,8 @@
  wait_for:
    host: localhost
    port: 3000
-    timeout: 60
+    timeout: 120
  register: grafana_wait
  ignore_errors: true
 - name: Add Prometheus datasource to Grafana
@ -118,6 +120,7 @@
      isDefault: true
    status_code: [200, 409]  # 409 = already exists, that's fine
  ignore_errors: true
  when: not (grafana_wait is failed)
 - name: Import NVIDIA GPU dashboard (ID 12239)
  uri:
@ -143,3 +146,6 @@
        uid: "nvidia-gpu"
    status_code: [200, 412]
  ignore_errors: true
  when:
    - not (grafana_wait is failed)
    - gpu_available | default(false) | bool
--- a/ansible/roles/nvidia/tasks/main.yml
+++ b/ansible/roles/nvidia/tasks/main.yml
@ -1,6 +1,6 @@
 ---
 # NVIDIA role: Drivers + CUDA + cuDNN
-# NOTE: Tested on L40S (Entry) and A40 (lab). Requires reboot after this role.
+# NOTE: Tested on RTX Pro 6000 (Entry) and A40 (lab). Requires reboot after this role.
 #       If no GPU is present, this role will install drivers but nvidia-smi won't show GPUs.
 - name: Add NVIDIA package repository key
--- a/ansible/roles/ollama/tasks/main.yml
+++ b/ansible/roles/ollama/tasks/main.yml
@ -28,7 +28,7 @@
      Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
      Environment="OLLAMA_HOST=0.0.0.0:11434"
      Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
-      Environment="CUDA_VISIBLE_DEVICES=0,1,2"
+      Environment="CUDA_VISIBLE_DEVICES=0"
      [Install]
      WantedBy=multi-user.target
@ -54,50 +54,49 @@
    port: 11434
    timeout: 60
- name: Pull default models (Llama 3.1 8B + Mistral 7B)
+- name: Select tier model set
  set_fact:
    ollama_models: >-
      {{
        {
          'starter': ['phi3:mini', 'nomic-embed-text'],
          'basic': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
          'entry': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
          'pro': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b'],
          'max': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b', 'llama3.1:405b', 'mixtral:8x22b']
        }.get(tier | default('basic'), ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text'])
      }}
 - name: Pull tier Ollama models
  become_user: cezen
  command: ollama pull {{ item }}
-  loop:
+  loop: "{{ ollama_models }}"
    - llama3.1:8b
    - mistral:7b
  environment:
    OLLAMA_HOST: "http://localhost:11434"
  retries: 3
  delay: 15
-  # NOTE: Models are large (~5GB each). This step takes time on first run.
+  # NOTE: Pro/Max models are very large. Skip with --skip-model-pull for
-  # Skip by setting: ansible-playbook ... -e "skip_model_pull=true"
+  # bandwidth-constrained installs, then run models/pull-models.sh later.
  when: not (skip_model_pull | default(false))
 # Open WebUI (chat interface on top of Ollama)
- name: Deploy Open WebUI via Docker
+- name: Start Open WebUI via Docker CLI
  community.docker.docker_container:
    name: open-webui
    image: ghcr.io/open-webui/open-webui:main
    state: started
    restart_policy: always
    ports:
      - "3001:8080"
    volumes:
      - open-webui:/app/backend/data
    env:
      OLLAMA_BASE_URL: "http://host-gateway:11434"
    etc_hosts:
      host-gateway: "172.17.0.1"
  # Note: Requires docker community collection. Install with:
  # ansible-galaxy collection install community.docker
  ignore_errors: true  # Falls back gracefully if docker collection not available
 - name: Alternative Open WebUI start (if community.docker not available)
  shell: |
-    docker run -d \
+    if docker ps -a --format '{{ "{{" }}.Names{{ "}}" }}' | grep -qx open-webui; then
-      --name open-webui \
+      docker start open-webui
-      --restart always \
+    else
-      -p 3001:8080 \
+      docker run -d \
-      --add-host=host-gateway:172.17.0.1 \
+        --name open-webui \
-      -v open-webui:/app/backend/data \
+        --restart always \
-      -e OLLAMA_BASE_URL=http://host-gateway:11434 \
+        -p 3001:8080 \
-      ghcr.io/open-webui/open-webui:main
+        --add-host=host-gateway:172.17.0.1 \
        -v open-webui:/app/backend/data \
        -e OLLAMA_BASE_URL=http://host-gateway:11434 \
        ghcr.io/open-webui/open-webui:main
    fi
  args:
    executable: /bin/bash
  register: webui_result
-  failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr
+  changed_when: webui_result.rc == 0
  failed_when: webui_result.rc != 0 and 'already in use' not in (webui_result.stderr | default(''))
  ignore_errors: true
--- a/ansible/roles/vllm/defaults/main.yml
+++ b/ansible/roles/vllm/defaults/main.yml
@ -0,0 +1,7 @@
 ---
 # vLLM role defaults — overridden per-tier in the tier playbook vars block
 vllm_model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
 vllm_tensor_parallel: 1
 vllm_gpu_memory_util: "0.70"
 vllm_max_model_len: 8192
 vllm_quantization: ""              # blank = full precision; set to "awq" for 4-bit
--- a/ansible/roles/vllm/tasks/main.yml
+++ b/ansible/roles/vllm/tasks/main.yml
@ -1,6 +1,12 @@
 ---
 # vLLM — high-performance LLM inference with OpenAI-compatible API
 # Skipped automatically if no GPU is present.
 # Variables (set defaults in defaults/main.yml, override per-tier in the playbook):
 #   vllm_model              HuggingFace model ID to load on start
 #   vllm_tensor_parallel    Number of GPUs for tensor parallelism (1 for Starter/Basic)
 #   vllm_gpu_memory_util    Fraction of VRAM to reserve for vLLM (0.0–1.0)
 #   vllm_max_model_len      Maximum context length in tokens
 #   vllm_quantization       Quantization method: "" (none) | "awq" | "gptq" | "fp8"
 - name: Check for NVIDIA GPU
  shell: nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1
@ -10,7 +16,9 @@
 - name: Skip vLLM if no GPU detected
  debug:
-    msg: "No GPU detected — skipping vLLM image pull. Run manually when GPU is available: docker pull vllm/vllm-openai:latest"
+    msg: >
      No GPU detected — skipping vLLM image pull.
      Run manually when GPU is available: docker pull vllm/vllm-openai:latest
  when: gpu_check.stdout == "" or gpu_check.rc != 0
 - name: Pull vLLM Docker image
@ -19,18 +27,23 @@
  delay: 15
  when: gpu_check.stdout != "" and gpu_check.rc == 0
 - name: Build vLLM quantization flag
  set_fact:
    vllm_quant_flag: "{{ '--quantization ' + vllm_quantization if vllm_quantization != '' else '' }}"
 - name: Create vLLM systemd service
  copy:
    dest: /etc/systemd/system/vllm.service
    mode: "0644"
    content: |
      [Unit]
-      Description=vLLM OpenAI-Compatible Inference Server
+      Description=vLLM OpenAI-Compatible Inference Server ({{ vllm_model }})
-      After=docker.service ollama.service
+      After=docker.service
      Requires=docker.service
      [Service]
      Restart=always
-      RestartSec=5
+      RestartSec=10
      ExecStartPre=-/usr/bin/docker stop vllm
      ExecStartPre=-/usr/bin/docker rm vllm
      ExecStart=/usr/bin/docker run \
@ -41,15 +54,16 @@
        -v /opt/cezen/models:/root/.cache/huggingface \
        -e HF_HOME=/root/.cache/huggingface \
        vllm/vllm-openai:latest \
-        --model meta-llama/Meta-Llama-3.1-8B-Instruct \
+        --model {{ vllm_model }} \
-        --gpu-memory-utilization 0.7 \
+        --gpu-memory-utilization {{ vllm_gpu_memory_util }} \
-        --max-model-len 8192 \
+        --max-model-len {{ vllm_max_model_len }} \
-        --tensor-parallel-size 1
+        --tensor-parallel-size {{ vllm_tensor_parallel }} \
        {{ vllm_quant_flag }}
      ExecStop=/usr/bin/docker stop vllm
      TimeoutStartSec=300
      [Install]
      WantedBy=multi-user.target
    mode: "0644"
 - name: Create vLLM model directory
  file:
@ -57,3 +71,26 @@
    state: directory
    owner: cezen
    group: cezen
    mode: "0755"
 - name: Write vLLM tier config file (for portal reference)
  copy:
    dest: /opt/cezen/vllm-config.json
    owner: cezen
    group: cezen
    mode: "0644"
    content: |
      {
        "model": "{{ vllm_model }}",
        "tensor_parallel_size": {{ vllm_tensor_parallel }},
        "gpu_memory_utilization": {{ vllm_gpu_memory_util }},
        "max_model_len": {{ vllm_max_model_len }},
        "quantization": "{{ vllm_quantization }}"
      }
 - name: Enable and start vLLM service
  systemd:
    name: vllm
    enabled: true
    daemon_reload: true
  when: gpu_check.stdout != "" and gpu_check.rc == 0
--- a/ansible/starter.yml
+++ b/ansible/starter.yml
@ -0,0 +1,76 @@
 ---
 # Nexus One AI — Starter Tier Stack
 # Hardware: 1× NVIDIA RTX 5090 (32 GB GDDR7), 64 GB DDR5, 2 TB NVMe, 2.5 GbE
 # Capacity: 1–5 concurrent users
 # Runs after NVIDIA driver reboot (phase1_nvidia.yml)
 #
 # Differences from Basic tier:
 #   - No k3s (Kubernetes overhead not justified for 64 GB RAM / 1-5 users)
 #   - No MLflow (fine-tuning tracking overkill for Starter)
 #   - No MinIO (local model cache is sufficient)
 #   - vLLM uses Phi-3 Mini / Llama 3.2 3B with 4-bit quant by default
 #   - JupyterLab is optional (off by default, wizard can enable)
 - name: Nexus One AI — Starter Tier Stack
  hosts: localhost
  connection: local
  become: true
  vars:
    cezen_user: "cezen"
    cezen_home: "/opt/cezen"
    cezen_login_home: "/home/cezen"
    python_version: "3.11"
    cuda_version: "12.6"          # RTX 5090 requires CUDA 12.6+
    skip_roles: ""                # comma-separated list of roles to skip
    gpu_available: false
    tier: "starter"
    # ── vLLM — Starter defaults ──────────────────
    # Small 4-bit quantised model fits comfortably in 32 GB GDDR7.
    # Users can swap to Llama-3.2-3B-Instruct via the portal Model Manager.
    vllm_model: "microsoft/Phi-3-mini-4k-instruct"
    vllm_tensor_parallel: 1
    vllm_gpu_memory_util: "0.85"
    vllm_max_model_len: 4096
    vllm_quantization: "awq"
    # ── Ollama — lightweight models ───────────────
    ollama_default_model: "phi3:mini"
  roles:
    - role: base
      when: "'base' not in skip_roles.split(',')"
    - role: docker
      when: "'docker' not in skip_roles.split(',')"
    # k3s intentionally omitted for Starter — insufficient RAM headroom
    - role: ollama
      when: "'ollama' not in skip_roles.split(',')"
    - role: vllm
      when: "'vllm' not in skip_roles.split(',')"
    - role: chromadb
      when: "'chromadb' not in skip_roles.split(',')"
    # mlflow / minio omitted for Starter
    - role: monitoring
      when: "'monitoring' not in skip_roles.split(',')"
    - role: cezen-backend
      when: "'cezen-backend' not in skip_roles.split(',')"
    - role: cezen-ttyd
      when: "'cezen-ttyd' not in skip_roles.split(',')"
    - role: cezen-nginx
      when: "'cezen-nginx' not in skip_roles.split(',')"
    # JupyterLab — optional, install only when explicitly requested
    - role: jupyterlab
      when: >
        'jupyterlab' not in skip_roles.split(',') and
        install_jupyterlab | default(false) | bool
--- a/autoinstall/build-iso-starter.sh
+++ b/autoinstall/build-iso-starter.sh
@ -0,0 +1,199 @@
 #!/usr/bin/env bash
 # ─────────────────────────────────────────────────────────────
 # Nexus One AI — Starter Tier ISO Builder
 # Hardware target: compact workstation (1× RTX 5090, 64 GB RAM, 2 TB NVMe)
 #
 # Usage:
 #   cd ~/aipackage
 #   bash autoinstall/build-iso-starter.sh
 #
 # Output: autoinstall/cezen-ai-starter-ubuntu2204.iso
 # Flash to USB:
 #   diskutil unmountDisk /dev/diskN
 #   sudo dd if=cezen-ai-starter-ubuntu2204.iso of=/dev/diskN bs=4m status=progress
 # ─────────────────────────────────────────────────────────────
 set -e
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PACKAGE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
 WORK_DIR="/tmp/cezen-iso-starter-work"
 ORIGINAL_ISO="/tmp/ubuntu-22.04.5-live-server-amd64.iso"
 OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-starter-ubuntu2204.iso"
 UBUNTU_URL="https://releases.ubuntu.com/22.04.5/ubuntu-22.04.5-live-server-amd64.iso"
 TIER="starter"
 echo "╔══════════════════════════════════════════════════════╗"
 echo "║   Nexus One AI — ISO Builder  [STARTER TIER]            ║"
 echo "║   RTX 5090 · 64 GB RAM · 2 TB NVMe · 1–5 users     ║"
 echo "╚══════════════════════════════════════════════════════╝"
 echo ""
 # ── Install build tools ────────────────────────
 echo "→ Installing build tools..."
 apt-get update -qq
 apt-get install -y -qq xorriso wget isolinux rsync
 echo "✓ Tools ready"
 # ── Download Ubuntu ISO ────────────────────────
 if [ -f "$ORIGINAL_ISO" ]; then
  echo "✓ Ubuntu ISO already downloaded"
 else
  echo "→ Downloading Ubuntu 22.04.5 Server ISO (~1.8 GB)..."
  wget --show-progress -O "$ORIGINAL_ISO" "$UBUNTU_URL"
  echo "✓ Downloaded"
 fi
 # ── Extract ISO ────────────────────────────────
 echo "→ Extracting ISO..."
 rm -rf "$WORK_DIR"
 mkdir -p "$WORK_DIR"
 xorriso -osirrox on \
  -indev "$ORIGINAL_ISO" \
  -extract / "$WORK_DIR" 2>/dev/null
 chmod -R u+w "$WORK_DIR"
 echo "✓ Extracted"
 # ── Inject Starter autoinstall files ──────────
 echo "→ Injecting Starter autoinstall config..."
 mkdir -p "$WORK_DIR/nocloud"
 cp "$SCRIPT_DIR/user-data-starter" "$WORK_DIR/nocloud/user-data"
 cp "$SCRIPT_DIR/meta-data"         "$WORK_DIR/nocloud/meta-data"
 echo "✓ user-data-starter and meta-data injected"
 # ── Online installer mode ──────────────────────
 # The installed system pulls the current package from cgit on first boot. This
 # keeps the ISO small and avoids shipping stale backend/portal code.
 echo "✓ Online installer mode: package will be pulled from cgit on first boot"
 # ── Patch GRUB ────────────────────────────────
 echo "→ Patching GRUB config..."
 GRUB_CFG="$WORK_DIR/boot/grub/grub.cfg"
 cp "$GRUB_CFG" "$GRUB_CFG.orig"
 sed -i "s/set timeout=.*/set timeout=5/" "$GRUB_CFG"
 sed -i "s/set timeout_style=.*/set timeout_style=countdown/" "$GRUB_CFG"
 sed -i '/^\s*linux.*vmlinuz/s|---|autoinstall ds=nocloud\\;s=/cdrom/nocloud/ ---|' "$GRUB_CFG"
 # Update GRUB title to reflect Starter tier
 sed -i 's/Install Ubuntu Server/Install Nexus One AI — Starter Tier/' "$GRUB_CFG" || true
 echo "✓ GRUB patched"
 # ── Extract MBR and EFI boot data ─────────────
 echo "→ Extracting boot data from original ISO..."
 MBR_TEMPLATE=$(mktemp)
 EFI_IMG=$(mktemp)
 dd if="$ORIGINAL_ISO" bs=1 count=432 of="$MBR_TEMPLATE" 2>/dev/null
 EFI_LINE=$(fdisk -l "$ORIGINAL_ISO" 2>/dev/null | grep "EFI")
 echo "  EFI partition info: $EFI_LINE"
 EFI_START=$(echo "$EFI_LINE" | awk '{print $2}')
 EFI_SIZE=$(echo  "$EFI_LINE" | awk '{print $4}')
 if [ -z "$EFI_START" ] || [ -z "$EFI_SIZE" ]; then
  echo "ERROR: Could not detect EFI partition in ISO."
  echo "Run: fdisk -l $ORIGINAL_ISO"
  exit 1
 fi
 dd if="$ORIGINAL_ISO" bs=512 skip="$EFI_START" count="$EFI_SIZE" \
   of="$EFI_IMG" 2>/dev/null
 echo "✓ EFI partition extracted (start=$EFI_START, size=$EFI_SIZE)"
 # ── Repack ISO (pass 1) ────────────────────────
 echo "→ Repacking ISO (pass 1)..."
 xorriso -as mkisofs \
  -r \
  -V "CezenAI_Starter_2204" \
  -o "$OUTPUT_ISO" \
  --grub2-mbr "$MBR_TEMPLATE" \
  -partition_offset 16 \
  --mbr-force-bootable \
  -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
  -appended_part_as_gpt \
  -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
  -c "/boot.catalog" \
  -b "/boot/grub/i386-pc/eltorito.img" \
  -no-emul-boot \
  -boot-load-size 4 \
  -boot-info-table \
  --grub2-boot-info \
  -eltorito-alt-boot \
  -e "--interval:appended_partition_2:::" \
  -no-emul-boot \
  "$WORK_DIR"
 # ── Refresh md5sum.txt and repack (pass 2) ────
 echo "→ Refreshing md5sum.txt..."
 FINAL_DIR=$(mktemp -d)
 VERIFY_DIR=$(mktemp -d)
 trap 'rm -rf "$WORK_DIR" "$MBR_TEMPLATE" "$EFI_IMG" "$FINAL_DIR" "$VERIFY_DIR"' EXIT
 xorriso -osirrox on -indev "$OUTPUT_ISO" -extract / "$FINAL_DIR" >/dev/null 2>&1
 chmod -R u+w "$FINAL_DIR"
 (
  cd "$FINAL_DIR"
  rm -f md5sum.txt
  find . -type f \
    ! -path './md5sum.txt' \
    ! -path './boot.catalog' \
    -print0 \
    | sort -z \
    | xargs -0 md5sum > md5sum.txt
 )
 echo "✓ md5sum.txt refreshed"
 echo "→ Repacking ISO (pass 2)..."
 xorriso -as mkisofs \
  -r \
  -V "CezenAI_Starter_2204" \
  -o "$OUTPUT_ISO" \
  --grub2-mbr "$MBR_TEMPLATE" \
  -partition_offset 16 \
  --mbr-force-bootable \
  -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
  -appended_part_as_gpt \
  -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
  -c "/boot.catalog" \
  -b "/boot/grub/i386-pc/eltorito.img" \
  -no-emul-boot \
  -boot-load-size 4 \
  -boot-info-table \
  --grub2-boot-info \
  -eltorito-alt-boot \
  -e "--interval:appended_partition_2:::" \
  -no-emul-boot \
  "$FINAL_DIR"
 # ── Verify output ISO ──────────────────────────
 echo "→ Verifying rebuilt ISO manifest..."
 xorriso -osirrox on -indev "$OUTPUT_ISO" -extract / "$VERIFY_DIR" >/dev/null 2>&1
 chmod -R u+w "$VERIFY_DIR"
 (
  cd "$VERIFY_DIR"
  md5sum -c md5sum.txt >/tmp/cezen-iso-md5check-starter.log 2>&1 || {
    echo "ERROR: Rebuilt ISO failed its own md5sum.txt verification."
    sed -n '1,40p' /tmp/cezen-iso-md5check-starter.log
    exit 1
  }
 )
 echo "✓ Output ISO manifest verified"
 echo ""
 echo "╔══════════════════════════════════════════════════════╗"
 echo "║   Done! Starter Tier ISO ready.                     ║"
 echo "╚══════════════════════════════════════════════════════╝"
 echo ""
 ls -lh "$OUTPUT_ISO"
 echo ""
 echo "→ Transfer to MacBook:"
 echo "   scp user@server:~/aipackage/autoinstall/cezen-ai-starter-ubuntu2204.iso ."
 echo ""
 echo "→ Flash to USB (macOS):"
 echo "   diskutil list                            # find USB e.g. /dev/disk4"
 echo "   diskutil unmountDisk /dev/disk4"
 echo "   sudo dd if=cezen-ai-starter-ubuntu2204.iso of=/dev/disk4 bs=4m status=progress"
 echo ""
 echo "→ Post-flash: boot the workstation from USB."
 echo "   Unattended install completes in ~10 min."
 echo "   First-boot wizard runs on tty1 — set IP, org name, admin password."
 echo "   Then run: sudo bash /opt/aipackage/install.sh --tier starter"
--- a/autoinstall/build-iso.sh
+++ b/autoinstall/build-iso.sh
@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # ─────────────────────────────────────────────────────────────
-# Cezen AI Suite — Custom ISO Builder
+# Nexus One AI — Custom ISO Builder
 # Runs directly on Ubuntu 22.04 (run on the server)
 #
 # Usage:
@ -20,7 +20,7 @@ OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-ubuntu2204.iso"
 UBUNTU_URL="https://releases.ubuntu.com/22.04.5/ubuntu-22.04.5-live-server-amd64.iso"
 echo "╔══════════════════════════════════════════╗"
-echo "║   Cezen AI — ISO Builder                 ║"
+echo "║   Nexus One AI — ISO Builder                 ║"
 echo "╚══════════════════════════════════════════╝"
 echo ""
@ -56,15 +56,10 @@ cp "$SCRIPT_DIR/user-data" "$WORK_DIR/nocloud/user-data"
 cp "$SCRIPT_DIR/meta-data" "$WORK_DIR/nocloud/meta-data"
 echo "✓ user-data and meta-data injected"
-# Keep the installer payload on the ISO so first boot does not depend on a
+# Keep this as an online installer ISO. The installed system pulls the current
-# private Git server being reachable before the setup UI can start.
+# Nexus One AI package from cgit during first boot, which keeps the ISO small and
-echo "→ Bundling Cezen AI installer payload..."
+# avoids shipping stale backend/portal code inside the image.
-mkdir -p "$WORK_DIR/cezen-aipackage"
+echo "✓ Online installer mode: package will be pulled from cgit on first boot"
 rsync -a --delete \
  --exclude 'autoinstall/cezen-ai-ubuntu2204.iso' \
  --exclude '*.iso' \
  "$PACKAGE_DIR/" "$WORK_DIR/cezen-aipackage/"
 echo "✓ Installer payload bundled"
 # ── Patch GRUB ────────────────────────────────
 echo "→ Patching GRUB config..."
@ -82,20 +77,6 @@ sed -i "s/set timeout_style=.*/set timeout_style=countdown/" "$GRUB_CFG"
 sed -i '/^\s*linux.*vmlinuz/s|---|autoinstall ds=nocloud\\;s=/cdrom/nocloud/ ---|' "$GRUB_CFG"
 echo "✓ GRUB patched"
 # ── Refresh installer checksum manifest ─────────────────
 echo "→ Refreshing md5sum.txt..."
 (
  cd "$WORK_DIR"
  rm -f md5sum.txt
  find . -type f \
    ! -path './md5sum.txt' \
    ! -path './boot.catalog' \
    -print0 \
    | sort -z \
    | xargs -0 md5sum > md5sum.txt
 )
 echo "✓ md5sum.txt refreshed"
 # ── Extract MBR and EFI partition from original ISO ────
 echo "→ Extracting boot data from original ISO..."
 MBR_TEMPLATE=$(mktemp)
@ -119,7 +100,7 @@ dd if="$ORIGINAL_ISO" bs=512 skip="$EFI_START" count="$EFI_SIZE" \
 echo "✓ EFI partition extracted (start=$EFI_START, size=$EFI_SIZE)"
 # ── Repack ISO ─────────────────────────────────
-echo "→ Repacking ISO (this takes ~2 minutes)..."
+echo "→ Repacking ISO (pass 1)..."
 xorriso -as mkisofs \
  -r \
  -V "Cezen_AI_Ubuntu2204" \
@ -141,6 +122,64 @@ xorriso -as mkisofs \
  -no-emul-boot \
  "$WORK_DIR"
 echo "→ Refreshing md5sum.txt from pass-1 ISO contents..."
 FINAL_DIR=$(mktemp -d)
 VERIFY_DIR=$(mktemp -d)
 trap 'rm -rf "$WORK_DIR" "$MBR_TEMPLATE" "$EFI_IMG" "$FINAL_DIR" "$VERIFY_DIR"' EXIT
 xorriso -osirrox on \
  -indev "$OUTPUT_ISO" \
  -extract / "$FINAL_DIR" >/dev/null 2>&1
 chmod -R u+w "$FINAL_DIR"
 (
  cd "$FINAL_DIR"
  rm -f md5sum.txt
  find . -type f \
    ! -path './md5sum.txt' \
    ! -path './boot.catalog' \
    -print0 \
    | sort -z \
    | xargs -0 md5sum > md5sum.txt
 )
 echo "✓ md5sum.txt refreshed"
 echo "→ Repacking ISO (pass 2 with final manifest)..."
 xorriso -as mkisofs \
  -r \
  -V "Cezen_AI_Ubuntu2204" \
  -o "$OUTPUT_ISO" \
  --grub2-mbr "$MBR_TEMPLATE" \
  -partition_offset 16 \
  --mbr-force-bootable \
  -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
  -appended_part_as_gpt \
  -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
  -c "/boot.catalog" \
  -b "/boot/grub/i386-pc/eltorito.img" \
  -no-emul-boot \
  -boot-load-size 4 \
  -boot-info-table \
  --grub2-boot-info \
  -eltorito-alt-boot \
  -e "--interval:appended_partition_2:::" \
  -no-emul-boot \
  "$FINAL_DIR"
 # ── Verify output ISO integrity manifest ─────────────────
 echo "→ Verifying rebuilt ISO manifest..."
 xorriso -osirrox on \
  -indev "$OUTPUT_ISO" \
  -extract / "$VERIFY_DIR" >/dev/null 2>&1
 chmod -R u+w "$VERIFY_DIR"
 (
  cd "$VERIFY_DIR"
  md5sum -c md5sum.txt >/tmp/cezen-iso-md5check.log 2>&1 || {
    echo "ERROR: Rebuilt ISO failed its own md5sum.txt verification."
    sed -n '1,40p' /tmp/cezen-iso-md5check.log
    exit 1
  }
 )
 echo "✓ Output ISO manifest verified"
 echo ""
 echo "╔══════════════════════════════════════════════════════╗"
 echo "║   Done!                                              ║"
--- a/autoinstall/firstboot-setup.sh
+++ b/autoinstall/firstboot-setup.sh
@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # ─────────────────────────────────────────────────────────────
-# Cezen AI Suite — First Boot Setup Wizard
+# Nexus One AI — First Boot Setup Wizard
 # Runs on first boot after OS install via systemd service.
 # Uses whiptail for the TUI.
 # ─────────────────────────────────────────────────────────────
@ -8,6 +8,7 @@ set -e
 AIPACKAGE_DIR="/opt/aipackage"
 LOG_FILE="/var/log/cezen-setup.log"
 export TERM="${TERM:-linux}"
 exec > >(tee -a "$LOG_FILE") 2>&1
 detect_iface() {
@ -20,28 +21,28 @@ IFACE="${IFACE:-$(ip -o link show | awk -F': ' '$2 !~ /lo|docker|br-|veth/ {prin
 # ── Colors / terminal setup ────────────────────────────────
 export NEWT_COLORS='
 root=,black
-window=white,navy
+window=black,white
-border=white,navy
+border=white,black
-title=white,navy
+title=black,white
-button=black,cyan
+button=black,white
-actbutton=white,red
+actbutton=white,blue
-checkbox=white,navy
+checkbox=black,white
-actcheckbox=black,cyan
+actcheckbox=white,blue
-entry=white,navy
+entry=black,white
-label=white,navy
+label=black,white
-listbox=white,navy
+listbox=black,white
-actlistbox=black,cyan
+actlistbox=white,blue
-textbox=white,navy
+textbox=black,white
-acttextbox=black,cyan
+acttextbox=white,blue
 '
-TITLE="  Cezen AI Suite — Server Setup  "
+TITLE="  Nexus One AI — Server Setup  "
 H=20
 W=70
 # ── Welcome ────────────────────────────────────────────────
 whiptail --title "$TITLE" \
-  --msgbox "\nWelcome to the Cezen AI Suite installer.\n\nThis wizard will configure your network and install the AI stack.\n\nMake sure this server is connected to the internet before continuing." \
+  --msgbox "\nWelcome to the Nexus One AI installer.\n\nThis wizard will configure your network and install the AI stack.\n\nMake sure this server is connected to the internet before continuing." \
  $H $W
 # ════════════════════════════════════════════════════════════
@ -135,10 +136,11 @@ fi
 TIER=$(whiptail --title "$TITLE" \
  --menu "\nStep 2 of 3: Select AI Package Tier\n\nChoose the tier that matches your hardware:" \
-  $H $W 3 \
+  $H $W 4 \
-  "entry"    "Entry    — 3× NVIDIA L40S   (48GB each) · Up to 20 users" \
+  "starter" "Starter — 1× RTX 5090 / 32GB VRAM · Small team" \
-  "mid"      "Mid      — RTX Pro 6000 BW  (96GB each) · Up to 50 users" \
+  "basic" "Entry — 1× NVIDIA RTX Pro 6000 (96GB) · Up to 20 users" \
-  "advanced" "Advanced — HGX H200         (141GB each) · 200+ users" \
+  "pro"   "Pro   — 2× RTX 5090 / RTX Pro class · Up to 100 users" \
  "max"   "Max   — 4–8× H100/H200/A100 class · 100+ users" \
  3>&1 1>&2 2>&3)
 # ════════════════════════════════════════════════════════════
@ -177,7 +179,7 @@ whiptail --title "$TITLE" \
 clear
 echo ""
 echo "╔══════════════════════════════════════════╗"
-echo "║   Cezen AI Suite — Installing...         ║"
+echo "║   Nexus One AI — Installing...         ║"
 echo "║   Check progress: journalctl -f           ║"
 echo "╚══════════════════════════════════════════╝"
 echo ""
--- a/autoinstall/user-data
+++ b/autoinstall/user-data
@ -85,10 +85,11 @@ autoinstall:
    # mirrors instead of the custom ISO content.
    - sed -i 's/^deb cdrom:/# deb cdrom:/' /target/etc/apt/sources.list || true
-    # Install the Cezen AI payload from the ISO first. Fall back to Git only
+    # Pull the Nexus One AI installer from cgit. The ISO intentionally does not
-    # when building from older media that does not contain /cdrom/cezen-aipackage.
+    # bundle the full package, keeping the image small and the installed code
    # current at deployment time.
    - mkdir -p /target/opt/aipackage
-    - cp -a /cdrom/cezen-aipackage/. /target/opt/aipackage/ || git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
+    - git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
    # Deploy the console setup wizard
    - mkdir -p /target/opt/cezen
@ -104,23 +105,18 @@ autoinstall:
    - |
      cat > /target/etc/systemd/system/cezen-setup.service << 'EOF'
      [Unit]
-      Description=Cezen AI Suite — Console Setup Wizard
+      Description=Nexus One AI — Console Setup Wizard
      After=cloud-final.service cloud-init.target network-online.target
      Wants=cloud-init.target network-online.target
      Conflicts=getty@tty1.service
      ConditionPathExists=!/opt/cezen/.setup-done
      OnFailure=getty@tty1.service
      [Service]
-      Type=idle
+      Type=oneshot
-      ExecStartPre=-/usr/bin/systemctl stop getty@tty1.service
+      WorkingDirectory=/opt/cezen
-      ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; exec /opt/cezen/firstboot-setup.sh'
+      ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; /usr/bin/openvt -c 1 -f -w -- env TERM=linux /opt/cezen/firstboot-setup.sh'
-      StandardInput=tty-force
+      StandardOutput=journal+console
-      StandardOutput=tty
+      StandardError=journal+console
      StandardError=tty
      TTYPath=/dev/tty1
      TTYReset=yes
      TTYVHangup=yes
      TTYVTDisallocate=yes
      Restart=no
      [Install]
--- a/autoinstall/user-data-starter
+++ b/autoinstall/user-data-starter
@ -0,0 +1,137 @@
 #cloud-config
 # ─────────────────────────────────────────────────────────────
 # Nexus One AI — Starter Tier Autoinstall
 # Hardware target: compact workstation (Mini-ITX / SFF)
 #   GPU:     1× NVIDIA RTX 5090 (32 GB GDDR7)
 #   RAM:     64 GB DDR5
 #   Storage: 1× 2 TB NVMe SSD (single drive — simple LVM)
 #   Network: 2.5 GbE (single interface)
 # ─────────────────────────────────────────────────────────────
 autoinstall:
  version: 1
  # ── Locale & keyboard ──────────────────────────
  locale: en_IN.UTF-8
  keyboard:
    layout: us
  # ── Network: DHCP during install; static config applied post-install ──
  network:
    network:
      version: 2
      ethernets:
        any-en:
          dhcp4: true
          match:
            name: "en*"
        any-eth:
          dhcp4: true
          match:
            name: "eth*"
  # ── Storage: single 2 TB NVMe, simple LVM ─────
  # Starter workstations have one drive — no RAID needed.
  storage:
    layout:
      name: lvm
      match:
        size: largest
  # ── Identity ──────────────────────────────────
  identity:
    hostname: cezenai-starter
    username: cezen
    # Default password: cezen@123  (change via first-boot wizard)
    password: "$6$I5VA.42G1xTeVhCv$KCLzqIKg/kbNHZyiTEMAY4FZsJMDDwoS90k6Ffb9VEwmcK.wuzlJNe3ceiEfLrzYzXEvqjYsLc7klAbeGPGab."
  # ── SSH ───────────────────────────────────────
  ssh:
    install-server: true
    allow-pw: true
  # ── Base packages ─────────────────────────────
  packages:
    - git
    - curl
    - wget
    - python3
    - whiptail
    - openssh-server
    - nvme-cli          # NVMe health / SMART monitoring
  # ── Late commands ─────────────────────────────
  late-commands:
    # Expand LVM to fill the full 2 TB NVMe
    - lvextend -l +100%FREE /dev/ubuntu-vg/ubuntu-lv || true
    - resize2fs /dev/ubuntu-vg/ubuntu-lv || true
    # Passwordless sudo for cezen (needed by install.sh + first-boot wizard)
    - echo "cezen ALL=(ALL) NOPASSWD:ALL" > /target/etc/sudoers.d/cezen
    - chmod 440 /target/etc/sudoers.d/cezen
    # Replace installer netplan with simple DHCP target config.
    # The first-boot wizard will switch to static if desired.
    - rm -f /target/etc/netplan/50-cloud-init.yaml /target/etc/netplan/00-installer-config.yaml || true
    - |
      cat > /target/etc/netplan/99-cezen-dhcp.yaml << 'EOF'
      network:
        version: 2
        ethernets:
          any-en:
            dhcp4: true
            match:
              name: "en*"
          any-eth:
            dhcp4: true
            match:
              name: "eth*"
      EOF
    # Disable cdrom APT source
    - sed -i 's/^deb cdrom:/# deb cdrom:/' /target/etc/apt/sources.list || true
    # Pull the Nexus One AI installer from cgit. The ISO intentionally does not
    # bundle the full package, keeping the image small and the installed code
    # current at deployment time.
    - mkdir -p /target/opt/aipackage
    - git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
    # Write tier marker — used by install.sh and the portal branding system
    - mkdir -p /target/opt/cezen
    - echo "starter" > /target/opt/cezen/tier
    # Deploy first-boot TUI wizard
    - cp /target/opt/aipackage/autoinstall/firstboot-setup.sh /target/opt/cezen/firstboot-setup.sh
    - chmod +x /target/opt/cezen/firstboot-setup.sh
    # Set hostname
    - echo "cezenai-starter" > /target/etc/hostname
    - sed -i 's/aiserver/cezenai-starter/g' /target/etc/hosts || true
    # Systemd service: run first-boot wizard on tty1 once
    - |
      cat > /target/etc/systemd/system/cezen-setup.service << 'EOF'
      [Unit]
      Description=Nexus One AI — Console Setup Wizard (Starter)
      After=cloud-final.service cloud-init.target network-online.target
      Wants=cloud-init.target network-online.target
      ConditionPathExists=!/opt/cezen/.setup-done
      OnFailure=getty@tty1.service
      [Service]
      Type=oneshot
      WorkingDirectory=/opt/cezen
      ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; /usr/bin/openvt -c 1 -f -w -- env TERM=linux CEZEN_TIER=starter /opt/cezen/firstboot-setup.sh'
      StandardOutput=journal+console
      StandardError=journal+console
      Restart=no
      [Install]
      WantedBy=cloud-init.target
      EOF
    - curtin in-target -- systemctl enable ssh
    - curtin in-target -- systemctl enable cezen-setup.service
  user-data:
    disable_root: false
--- a/autoinstall/websetup/server.py
+++ b/autoinstall/websetup/server.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Cezen AI Suite — First Boot Web Setup Server
+Nexus One AI — First Boot Web Setup Server
 Serves on port 80. Access from any browser on the same network.
 """
 import os, json, subprocess, threading, time, socket, ipaddress
@ -123,7 +123,7 @@ HTML = r"""<!DOCTYPE html>
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>Cezen AI Suite — Server Setup</title>
+<title>Nexus One AI — Server Setup</title>
 <style>
  :root {
    --navy:    #1B2A4A;
@ -317,19 +317,24 @@ HTML = r"""<!DOCTYPE html>
      <h2>Select AI Package Tier</h2>
      <p class="desc">Choose the tier that matches your GPU hardware.</p>
      <div class="tier-grid">
-        <div class="tier-card" id="tier-entry" onclick="selectTier('entry')">
+        <div class="tier-card" id="tier-starter" onclick="selectTier('starter')">
          <div class="tier-name">Starter</div>
          <div class="tier-gpu">1× RTX 5090 / 32GB VRAM</div>
          <div class="tier-users">Small team deployment</div>
        </div>
        <div class="tier-card" id="tier-basic" onclick="selectTier('basic')">
          <div class="tier-name">Entry</div>
-          <div class="tier-gpu">3× NVIDIA L40S</div>
+          <div class="tier-gpu">1× NVIDIA RTX Pro 6000 (96GB)</div>
          <div class="tier-users">Up to 20 concurrent users</div>
        </div>
-        <div class="tier-card" id="tier-mid" onclick="selectTier('mid')">
+        <div class="tier-card" id="tier-pro" onclick="selectTier('pro')">
-          <div class="tier-name">Mid</div>
+          <div class="tier-name">Pro</div>
-          <div class="tier-gpu">3× RTX Pro 6000</div>
+          <div class="tier-gpu">2× RTX 5090 / RTX Pro class</div>
-          <div class="tier-users">Up to 50 concurrent users</div>
+          <div class="tier-users">Up to 100 concurrent users</div>
        </div>
-        <div class="tier-card" id="tier-advanced" onclick="selectTier('advanced')">
+        <div class="tier-card" id="tier-max" onclick="selectTier('max')">
-          <div class="tier-name">Advanced</div>
+          <div class="tier-name">Max</div>
-          <div class="tier-gpu">8× HGX H200</div>
+          <div class="tier-gpu">4–8× H100/H200/A100 class</div>
          <div class="tier-users">200+ concurrent users</div>
        </div>
      </div>
@ -369,7 +374,7 @@ HTML = r"""<!DOCTYPE html>
    <div class="progress-wrap" id="progress-wrap">
      <div class="card">
-        <h2>Installing Cezen AI Suite...</h2>
+        <h2>Installing Nexus One AI...</h2>
        <div class="progress-bar-bg"><div class="progress-bar" id="progress-bar"></div></div>
        <p id="progress-label" style="font-size:13px;color:var(--muted);margin-bottom:12px">Starting...</p>
        <div class="log-box" id="log-box"></div>
@ -379,7 +384,7 @@ HTML = r"""<!DOCTYPE html>
    <div id="done-screen" class="hidden">
      <div class="done-icon">✅</div>
      <h2>Installation Complete!</h2>
-      <p>Your Cezen AI Suite is ready.</p>
+      <p>Your Nexus One AI is ready.</p>
      <div class="services card" style="margin-top:24px;text-align:left">
        <div class="summary-row"><span class="key">Open WebUI</span><span class="val badge">:3001</span></div>
        <div class="summary-row"><span class="key">JupyterLab</span><span class="val badge">:8888</span></div>
@ -400,7 +405,7 @@ HTML = r"""<!DOCTYPE html>
 <script>
 // ── State ──────────────────────────────────────────────────
 let netMode = 'dhcp';
-let selectedTier = 'entry';
+let selectedTier = 'basic';
 let tools = {
  ollama:     { name: 'Ollama + Open WebUI', desc: 'LLM inference & chat',     icon: '🤖', on: true },
  jupyterlab: { name: 'JupyterLab',          desc: 'Notebook environment',      icon: '📓', on: true },
@ -418,7 +423,7 @@ window.onload = () => {
    document.getElementById('current-ip').textContent = d.ip || 'unknown';
  });
  renderTools();
-  selectTier('entry');
+  selectTier('basic');
 };
 // ── Navigation ─────────────────────────────────────────────
@ -467,7 +472,7 @@ function applyStaticIP() {
 // ── Tier ───────────────────────────────────────────────────
 function selectTier(t) {
  selectedTier = t;
-  ['entry','mid','advanced'].forEach(x =>
+  ['starter','basic','pro','max'].forEach(x =>
    document.getElementById('tier-'+x).classList.toggle('selected', x===t));
 }
@ -707,7 +712,7 @@ class Handler(BaseHTTPRequestHandler):
        elif path == "/api/install":
            global install_proc
-            tier = body.get("tier", "entry")
+            tier = body.get("tier", "basic")
            skip = body.get("skip_tools", [])
            if not install_status["running"]:
                t = threading.Thread(target=run_install, args=(tier, skip), daemon=True)
@ -752,7 +757,7 @@ def show_console_banner(ip):
    try:
        with open("/etc/issue", "w") as f:
            f.write(f"Ubuntu 22.04.5 LTS \\n \\l\n\n")
-            f.write(f"  \033[1;36mCezen AI Suite Setup:\033[0m http://{ip}  |  http://cezenai.local\n\n")
+            f.write(f"  \033[1;36mNexus One AI Setup:\033[0m http://{ip}  |  http://cezenai.local\n\n")
    except Exception:
        pass
--- a/install.sh
+++ b/install.sh
@ -1,17 +1,37 @@
 #!/usr/bin/env bash
 # ─────────────────────────────────────────────
-# Cezen AI Suite — Entry Level Installer
+# Nexus One AI — Installer
 # Usage:
-#   sudo bash install.sh           → Phase 1 (drivers + schedules reboot → Phase 2)
+#   sudo bash install.sh                       → auto-detect tier, Phase 1
-#   sudo bash install.sh --phase=2 → Phase 2 (all software, run after reboot)
+#   sudo bash install.sh --tier=starter        → Starter tier, Phase 1
 #   sudo bash install.sh --tier=basic          → Basic tier, Phase 1
 #   sudo bash install.sh --tier=pro            → Pro tier, Phase 1
 #   sudo bash install.sh --tier=max            → Max tier, Phase 1
 #   sudo bash install.sh --phase=2 --tier=...  → Phase 2 only (post-reboot)
 #   sudo bash install.sh --software-only       → install on customer-owned hardware
 #   sudo bash install.sh --feasibility-only    → scan hardware and exit
 #   sudo bash install.sh --skip-model-pull     → install Ollama without preloading models
 # ─────────────────────────────────────────────
 set -e
-TIER="entry"
+# Auto-detect tier from ISO marker written by autoinstall user-data
 if [ -f /opt/cezen/tier ]; then
  TIER="$(cat /opt/cezen/tier | tr -d '[:space:]')"
 elif [ -f /opt/aipackage/autoinstall/.tier ]; then
  TIER="$(cat /opt/aipackage/autoinstall/.tier | tr -d '[:space:]')"
 else
  TIER="basic"    # default if no marker found
 fi
 PHASE="1"
 SKIP_ROLES=""
 SOFTWARE_ONLY=false
 FEASIBILITY_ONLY=false
 SKIP_MODEL_PULL=false
 PROFILE="auto"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ANSIBLE_DIR="$SCRIPT_DIR/ansible"
 FEASIBILITY_SCRIPT="$SCRIPT_DIR/scripts/cezen-feasibility.sh"
 FEASIBILITY_JSON="/opt/cezen/feasibility.json"
 # Load saved config (written by web setup UI before phase 1)
 [ -f /opt/cezen/install.conf ] && source /opt/cezen/install.conf
@ -21,9 +41,24 @@ for arg in "$@"; do
    --tier=*)  TIER="${arg#*=}" ;;
    --phase=*) PHASE="${arg#*=}" ;;
    --skip=*)  SKIP_ROLES="${arg#*=}" ;;
    --profile=*) PROFILE="${arg#*=}" ;;
    --software-only) SOFTWARE_ONLY=true ;;
    --feasibility-only) FEASIBILITY_ONLY=true ;;
    --skip-model-pull) SKIP_MODEL_PULL=true ;;
  esac
 done
 normalize_tier() {
  case "$TIER" in
    entry|basic)      TIER="basic" ;;
    mid|pro)          TIER="pro" ;;
    advanced|max)     TIER="max" ;;
    starter)          TIER="starter" ;;
  esac
 }
 normalize_tier
 # ── Preflight ──────────────────────────────────
 check_root() {
  if [ "$EUID" -ne 0 ]; then
@ -52,6 +87,84 @@ install_ansible() {
  echo "✓ Ansible ready"
 }
 append_skip_role() {
  local role="$1"
  if [ -z "$SKIP_ROLES" ]; then
    SKIP_ROLES="$role"
  elif [[ ",$SKIP_ROLES," != *",$role,"* ]]; then
    SKIP_ROLES="$SKIP_ROLES,$role"
  fi
 }
 run_feasibility() {
  if [ -f "$FEASIBILITY_SCRIPT" ]; then
    bash "$FEASIBILITY_SCRIPT" "$FEASIBILITY_JSON"
  else
    echo "WARNING: Feasibility checker not found: $FEASIBILITY_SCRIPT"
  fi
 }
 json_field() {
  local expr="$1"
  python3 - "$FEASIBILITY_JSON" "$expr" <<'PY'
 import json, sys
 try:
    d=json.load(open(sys.argv[1]))
    cur=d
    for part in sys.argv[2].split("."):
        cur=cur[part]
    print(cur)
 except Exception:
    print("")
 PY
 }
 apply_profile_from_feasibility() {
  [ -f "$FEASIBILITY_JSON" ] || return 0
  local detected_profile
  detected_profile="$(json_field recommendation.recommended_profile)"
  if [ "$PROFILE" = "auto" ] && [ -n "$detected_profile" ]; then
    PROFILE="$detected_profile"
  fi
  case "$PROFILE" in
    core)
      append_skip_role docker
      append_skip_role k3s
      append_skip_role ollama
      append_skip_role vllm
      append_skip_role jupyterlab
      append_skip_role chromadb
      append_skip_role mlflow
      append_skip_role minio
      append_skip_role monitoring
      SKIP_MODEL_PULL=true
      ;;
    cpu-ai)
      append_skip_role k3s
      append_skip_role vllm
      append_skip_role mlflow
      append_skip_role minio
      SKIP_MODEL_PULL=true
      ;;
    gpu-lite|gpu-starter)
      append_skip_role k3s
      append_skip_role mlflow
      append_skip_role minio
      SKIP_MODEL_PULL=true
      ;;
    gpu-standard)
      append_skip_role mlflow
      append_skip_role minio
      ;;
    gpu-pro|gpu-max)
      ;;
    *)
      echo "WARNING: Unknown profile '$PROFILE'; using explicit skip list only."
      ;;
  esac
 }
 has_nvidia_pci_gpu() {
  for vendor_file in /sys/bus/pci/devices/*/vendor; do
    [ -f "$vendor_file" ] || continue
@ -70,7 +183,7 @@ has_working_nvidia_driver() {
 run_phase1() {
  echo ""
  echo "╔══════════════════════════════════════════╗"
-  echo "║   Cezen AI Suite — Phase 1: NVIDIA       ║"
+  echo "║   Nexus One AI — Phase 1: NVIDIA       ║"
  echo "╚══════════════════════════════════════════╝"
  if ! has_nvidia_pci_gpu; then
@ -87,7 +200,7 @@ run_phase1() {
  # Register phase 2 as a one-shot systemd service so it runs after reboot
  cat > /etc/systemd/system/cezen-phase2.service << EOF
 [Unit]
-Description=Cezen AI Suite Phase 2 Installer
+Description=Nexus One AI Phase 2 Installer
 After=network-online.target nvidia-persistenced.service
 Wants=network-online.target
@ -116,7 +229,7 @@ EOF
 run_phase2() {
  echo ""
  echo "╔══════════════════════════════════════════╗"
-  echo "║   Cezen AI Suite — Phase 2: Stack        ║"
+  echo "║   Nexus One AI — Phase 2: Stack        ║"
  echo "╚══════════════════════════════════════════╝"
  GPU_AVAILABLE=false
@ -129,12 +242,26 @@ run_phase2() {
  fi
  # Build skip_roles extra var (comma-separated list, empty string = skip nothing)
-  EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\" gpu_available=$GPU_AVAILABLE"
+  EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\" gpu_available=$GPU_AVAILABLE skip_model_pull=$SKIP_MODEL_PULL"
  echo "→ Tier: $TIER | Skip: ${SKIP_ROLES:-none}"
  echo "→ GPU available: $GPU_AVAILABLE"
  echo "→ Skip model pull: $SKIP_MODEL_PULL"
  # Select Ansible playbook by tier
  case "$TIER" in
    starter)        PLAYBOOK="$ANSIBLE_DIR/starter.yml" ;;
    basic|entry)    PLAYBOOK="$ANSIBLE_DIR/entry.yml" ;;
    pro)            PLAYBOOK="$ANSIBLE_DIR/pro.yml" ;;
    max)            PLAYBOOK="$ANSIBLE_DIR/max.yml" ;;
    *)
      echo "ERROR: Unknown tier '$TIER'. Valid: starter | basic | pro | max"
      exit 1
      ;;
  esac
  echo "→ Playbook: $PLAYBOOK"
  ANSIBLE_STDOUT_CALLBACK=yaml \
-  ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/entry.yml" \
+  ansible-playbook -i localhost, -c local "$PLAYBOOK" \
    -e "$EXTRA_VARS"
  # Disable one-shot service so it doesn't run again on next reboot
@ -142,19 +269,32 @@ run_phase2() {
  echo ""
  echo "╔══════════════════════════════════════════╗"
-  echo "║   Cezen AI Suite installation complete!  ║"
+  echo "║   Nexus One AI installation complete!  ║"
  echo "║   Tier: $(printf '%-33s' "$TIER")║"
  echo "║                                          ║"
-  echo "║   JupyterLab  → http://localhost:8888    ║"
+  echo "║   Portal     → http://localhost          ║"
-  echo "║   Ollama API  → http://localhost:11434   ║"
+  echo "║   Ollama API → http://localhost:11434    ║"
-  echo "║   MLflow      → http://localhost:5000    ║"
+  echo "║   vLLM API   → http://localhost:8000     ║"
-  echo "║   MinIO       → http://localhost:9001    ║"
+  echo "║   Grafana    → http://localhost:3000     ║"
  echo "║   Grafana     → http://localhost:3000    ║"
  echo "╚══════════════════════════════════════════╝"
 }
 # ── Main ───────────────────────────────────────
 check_root
 check_os
 if [ "$FEASIBILITY_ONLY" = true ]; then
  run_feasibility
  exit 0
 fi
 check_root
 run_feasibility
 if [ "$SOFTWARE_ONLY" = true ]; then
  PHASE="2"
  apply_profile_from_feasibility
 fi
 install_ansible
 if [ "$PHASE" = "1" ]; then
--- a/models/pull-models.sh
+++ b/models/pull-models.sh
@ -1,44 +1,68 @@
 #!/usr/bin/env bash
 # Pull additional AI models into Ollama
-# Run after install: bash models/pull-models.sh --tier=entry
+# Run after install: bash models/pull-models.sh --tier=starter
 # ─────────────────────────────────────────────
-TIER=${1:-entry}
+TIER="basic"   # default tier
-echo "Pulling models for tier: $TIER"
+for arg in "$@"; do
  case $arg in
    --tier=*) TIER="${arg#*=}" ;;
    *)        TIER="$arg" ;;     # allow positional: pull-models.sh pro
  esac
 done
-entry_models=(
+# Normalise legacy names
-  "llama3.1:8b"         # General purpose, good baseline
+case "$TIER" in
-  "mistral:7b"          # Fast, good for APIs
+  entry)    TIER="basic" ;;
-  "llama3.1:70b"        # Larger — only if enough VRAM (3× L40S has 144GB total)
+  mid)      TIER="pro" ;;
-  "nomic-embed-text"    # Embedding model for RAG
+  advanced) TIER="max" ;;
-  "codellama:13b"       # Code generation
+esac
 # ── Model lists ───────────────────────────────
 starter_models=(
  "phi3:mini"              # 3.8B — fits in 32 GB GDDR7 at full precision
  "nomic-embed-text"       # Embedding model for RAG
 )
-mid_models=(
+basic_models=(
-  "${entry_models[@]}"
+  "llama3.1:8b"            # General purpose, good baseline
-  "llama3.1:70b"
+  "mistral:7b"             # Fast, good for APIs
-  "mixtral:8x7b"
+  "nomic-embed-text"       # Embedding model for RAG
-  "deepseek-coder-v2:16b"
+  "codellama:13b"          # Code generation
 )
-advanced_models=(
+pro_models=(
-  "${mid_models[@]}"
+  "${basic_models[@]}"
-  "llama3.1:405b"
+  "llama3.1:70b"           # Large general purpose (needs 64+ GB VRAM at 4-bit)
-  "mixtral:8x22b"
+  "mixtral:8x7b"           # MoE model, strong reasoning
  "deepseek-coder-v2:16b"  # Code specialist
 )
 max_models=(
  "${pro_models[@]}"
  "llama3.1:405b"          # Flagship — needs 320+ GB VRAM or multi-node
  "mixtral:8x22b"          # Large MoE
 )
 case $TIER in
-  entry)    models=("${entry_models[@]}") ;;
+  starter) models=("${starter_models[@]}") ;;
-  mid)      models=("${mid_models[@]}") ;;
+  basic)   models=("${basic_models[@]}") ;;
-  advanced) models=("${advanced_models[@]}") ;;
+  pro)     models=("${pro_models[@]}") ;;
-  *)        echo "Unknown tier: $TIER. Use entry, mid, or advanced."; exit 1 ;;
+  max)     models=("${max_models[@]}") ;;
  *)
    echo "Unknown tier: $TIER"
    echo "Usage: bash pull-models.sh --tier=starter|basic|pro|max"
    exit 1
    ;;
 esac
 echo "Pulling models for tier: $TIER"
 echo ""
 for model in "${models[@]}"; do
  echo ""
  echo "→ Pulling $model..."
  ollama pull "$model"
  echo ""
 done
-echo ""
+echo "✓ Done. List installed models with: ollama list"
 echo "✓ All models pulled. List with: ollama list"
--- a/nginx/cezen.conf
+++ b/nginx/cezen.conf
@ -0,0 +1,135 @@
 # /etc/nginx/sites-available/cezen
 # Nexus One AI Portal — serves static portal, proxies API and console terminal
 #
 # Install:
 #   sudo cp cezen.conf /etc/nginx/sites-available/cezen
 #   sudo ln -sf /etc/nginx/sites-available/cezen /etc/nginx/sites-enabled/cezen
 #   sudo rm -f /etc/nginx/sites-enabled/default
 #   sudo nginx -t && sudo systemctl reload nginx
 # ─── Rate limiting zones (must be outside server block) ──────────────────────
 # Login: 5 requests/min per IP, burst of 3 queued, then 429
 limit_req_zone $binary_remote_addr zone=cezen_login:10m rate=5r/m;
 # General API: 60 req/min per IP (generous for dashboard polling)
 limit_req_zone $binary_remote_addr zone=cezen_api:10m   rate=60r/m;
 server {
    listen 80 default_server;
    listen [::]:80 default_server;
    server_name _;
    # Hide server version
    server_tokens off;
    # Logging
    access_log /var/log/nginx/cezen-access.log;
    error_log  /var/log/nginx/cezen-error.log;
    # ─── Global security headers ──────────────────────────────────────────────
    add_header X-Content-Type-Options  "nosniff"                   always;
    add_header X-Frame-Options         "SAMEORIGIN"                always;
    add_header X-XSS-Protection        "1; mode=block"             always;
    add_header Referrer-Policy         "strict-origin-when-cross-origin" always;
    add_header Permissions-Policy      "geolocation=(), camera=(), microphone=()" always;
    add_header Content-Security-Policy
        "default-src 'self'; "
        "script-src 'self' 'unsafe-inline'; "
        "style-src 'self' 'unsafe-inline'; "
        "img-src 'self' data:; "
        "connect-src 'self'; "
        "frame-src 'self'; "
        "font-src 'self'; "
        "object-src 'none'; "
        "base-uri 'self';"
        always;
    # ─── robots.txt — block all indexing (air-gapped / private portal) ────────
    location = /robots.txt {
        return 200 "User-agent: *\nDisallow: /\n";
        add_header Content-Type text/plain;
    }
    # ─── Static Portal ───────────────────────────────────────────────────────
    root /opt/cezen/portal;
    index index.html;
    location / {
        try_files $uri $uri/ /index.html;
    }
    # Cache static assets aggressively
    location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
        expires 7d;
        add_header Cache-Control "public, immutable";
    }
    # ─── Model upload (large files — no size limit, extended timeout) ────────
    location = /api/models/upload {
        client_max_body_size    0;         # unlimited — GGUF files can be 70 GB+
        proxy_request_buffering off;       # stream directly to backend, don't buffer in Nginx
        proxy_read_timeout      7200s;     # 2 hours for slow transfers
        proxy_send_timeout      7200s;
        proxy_pass         http://127.0.0.1:8080;
        proxy_http_version 1.1;
        proxy_set_header   Host              $host;
        proxy_set_header   X-Real-IP         $remote_addr;
        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
    }
    # ─── Login rate limit (tight) ─────────────────────────────────────────────
    location = /api/auth/login {
        limit_req zone=cezen_login burst=3 nodelay;
        limit_req_status 429;
        proxy_pass         http://127.0.0.1:8080;
        proxy_http_version 1.1;
        proxy_set_header   Host              $host;
        proxy_set_header   X-Real-IP         $remote_addr;
        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
        proxy_set_header   X-Forwarded-Proto $scheme;
        proxy_read_timeout 30s;
    }
    # ─── FastAPI Backend (/api/) ──────────────────────────────────────────────
    location /api/ {
        limit_req zone=cezen_api burst=20 nodelay;
        limit_req_status 429;
        proxy_pass         http://127.0.0.1:8080;
        proxy_http_version 1.1;
        proxy_set_header   Host              $host;
        proxy_set_header   X-Real-IP         $remote_addr;
        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
        proxy_set_header   X-Forwarded-Proto $scheme;
        proxy_read_timeout 120s;
    }
    # ─── Web Console (ttyd) (/console/) ──────────────────────────────────────
    location /console/ {
        proxy_pass         http://127.0.0.1:7681/;
        proxy_http_version 1.1;
        proxy_set_header   Upgrade           $http_upgrade;
        proxy_set_header   Connection        "upgrade";
        proxy_set_header   Host              $host;
        proxy_set_header   X-Real-IP         $remote_addr;
        proxy_read_timeout 86400s;
        # Rewrite paths so ttyd JS/CSS assets load correctly
        proxy_redirect     / /console/;
        sub_filter         'href="/'  'href="/console/';
        sub_filter         'src="/'   'src="/console/';
        sub_filter_once    off;
        sub_filter_types   text/html;
    }
    # ─── Block dotfiles and common attack paths ───────────────────────────────
    location ~ /\. {
        deny all;
    }
    location ~* \.(env|git|sql|bak|sh|py)$ {
        deny all;
    }
 }
--- a/scripts/cezen-backup.sh
+++ b/scripts/cezen-backup.sh
@ -0,0 +1,83 @@
 #!/usr/bin/env bash
 # Nexus One AI backup/restore helper.
 #
 # Usage:
 #   sudo bash scripts/cezen-backup.sh backup
 #   sudo bash scripts/cezen-backup.sh list
 #   sudo bash scripts/cezen-backup.sh restore /opt/cezen/backups/cezen-backup-YYYYmmdd-HHMMSS.zip
 set -euo pipefail
 ACTION="${1:-backup}"
 TARGET="${2:-}"
 DATA_DIR="${CEZEN_DATA:-/opt/cezen/data}"
 BACKUP_DIR="${CEZEN_BACKUP_DIR:-/opt/cezen/backups}"
 python3 - "$ACTION" "$TARGET" "$DATA_DIR" "$BACKUP_DIR" <<'PY'
 import json
 import shutil
 import sys
 import zipfile
 from datetime import datetime, timezone
 from pathlib import Path
 action, target, data_dir, backup_dir = sys.argv[1:5]
 data_dir = Path(data_dir)
 backup_dir = Path(backup_dir)
 backup_dir.mkdir(parents=True, exist_ok=True)
 def now_tag():
    return datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
 def iso_now():
    return datetime.now(timezone.utc).isoformat()
 def write_backup(dest):
    manifest = {
        "schema": "cezen.backup_manifest.v1",
        "created_at": iso_now(),
        "data_dir": str(data_dir),
        "source": "cezen-backup.sh",
    }
    with zipfile.ZipFile(dest, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        zf.writestr("manifest.json", json.dumps(manifest, indent=2))
        if data_dir.exists():
            for path in data_dir.rglob("*"):
                if path.is_file():
                    zf.write(path, path.relative_to(data_dir).as_posix())
 def restore(src):
    src = Path(src)
    if not src.exists():
        raise SystemExit(f"Backup not found: {src}")
    safety = backup_dir / f"pre-restore-{now_tag()}.zip"
    write_backup(safety)
    root = data_dir.resolve()
    with zipfile.ZipFile(src, "r") as zf:
        for member in zf.infolist():
            if member.filename == "manifest.json" or member.is_dir():
                continue
            target_path = (data_dir / member.filename).resolve()
            if root not in target_path.parents and target_path != root:
                raise SystemExit(f"Unsafe archive path: {member.filename}")
            target_path.parent.mkdir(parents=True, exist_ok=True)
            with zf.open(member) as source, open(target_path, "wb") as out:
                shutil.copyfileobj(source, out)
    print(json.dumps({"ok": True, "restored": str(src), "pre_restore_snapshot": str(safety)}, indent=2))
 if action == "backup":
    dest = backup_dir / f"cezen-backup-{now_tag()}.zip"
    write_backup(dest)
    print(json.dumps({"ok": True, "backup": str(dest)}, indent=2))
 elif action == "list":
    rows = []
    for path in sorted(backup_dir.glob("cezen-backup-*.zip"), key=lambda p: p.stat().st_mtime, reverse=True):
        rows.append({"name": path.name, "path": str(path), "size_bytes": path.stat().st_size})
    print(json.dumps({"backup_dir": str(backup_dir), "backups": rows}, indent=2))
 elif action == "restore":
    if not target:
        raise SystemExit("Usage: cezen-backup.sh restore /path/to/backup.zip")
    restore(target)
 else:
    raise SystemExit("Usage: cezen-backup.sh backup|list|restore [backup.zip]")
 PY
--- a/scripts/cezen-feasibility.sh
+++ b/scripts/cezen-feasibility.sh
@ -0,0 +1,218 @@
 #!/usr/bin/env bash
 # Nexus One AI feasibility checker.
 # Runs before installation to classify existing hardware for software-only or appliance deployments.
 set -euo pipefail
 OUT="${1:-/opt/cezen/feasibility.json}"
 mkdir -p "$(dirname "$OUT")" 2>/dev/null || true
 tmp_json="$(mktemp /tmp/cezen-feasibility.XXXXXX.json)"
 python3 - "$tmp_json" <<'PY'
 import json, os, platform, shutil, socket, subprocess, sys
 from pathlib import Path
 out = Path(sys.argv[1])
 def run(cmd, timeout=5):
    try:
        return subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True, timeout=timeout).strip()
    except Exception:
        return ""
 def read(path, default=""):
    try:
        return Path(path).read_text(errors="replace").strip()
    except Exception:
        return default
 def gb(n):
    return round(n / (1024 ** 3), 1)
 def os_info():
    data = {}
    for line in read("/etc/os-release").splitlines():
        if "=" in line:
            k, v = line.split("=", 1)
            data[k] = v.strip('"')
    return {
        "name": data.get("PRETTY_NAME") or platform.platform(),
        "id": data.get("ID", ""),
        "version_id": data.get("VERSION_ID", ""),
        "kernel": platform.release(),
    }
 def cpu_info():
    model = ""
    for line in read("/proc/cpuinfo").splitlines():
        if line.lower().startswith("model name"):
            model = line.split(":", 1)[1].strip()
            break
    return {"model": model or platform.processor(), "cores": os.cpu_count() or 0}
 def mem_gb():
    for line in read("/proc/meminfo").splitlines():
        if line.startswith("MemTotal:"):
            return round(int(line.split()[1]) / 1024 / 1024, 1)
    out = run(["sysctl", "-n", "hw.memsize"])
    if out.isdigit():
        return gb(int(out))
    try:
        pages = os.sysconf("SC_PHYS_PAGES")
        page_size = os.sysconf("SC_PAGE_SIZE")
        if pages and page_size:
            return gb(int(pages) * int(page_size))
    except Exception:
        pass
    return 0
 def disk_info():
    usage = shutil.disk_usage("/")
    return {"total_gb": gb(usage.total), "free_gb": gb(usage.free), "used_gb": gb(usage.used)}
 def gpu_info():
    gpus = []
    if shutil.which("nvidia-smi"):
        q = "name,memory.total,driver_version"
        out = run(["nvidia-smi", f"--query-gpu={q}", "--format=csv,noheader,nounits"])
        for row in out.splitlines():
            parts = [p.strip() for p in row.split(",")]
            if len(parts) >= 2:
                try:
                    vram = round(float(parts[1]) / 1024, 1)
                except Exception:
                    vram = 0
                gpus.append({"name": parts[0], "vram_gb": vram, "driver": parts[2] if len(parts) > 2 else ""})
    if not gpus:
        for vendor in Path("/sys/bus/pci/devices").glob("*/vendor"):
            if read(vendor).lower() == "0x10de":
                gpus.append({"name": "NVIDIA GPU detected (driver not ready)", "vram_gb": 0, "driver": ""})
                break
    return gpus
 def port_open(host, port):
    try:
        with socket.create_connection((host, port), timeout=1):
            return True
    except Exception:
        return False
 def tool_state():
    return {
        "docker": bool(shutil.which("docker")),
        "docker_running": bool(run(["docker", "info"], timeout=3)) if shutil.which("docker") else False,
        "ansible": bool(shutil.which("ansible-playbook")),
        "python3": bool(shutil.which("python3")),
        "curl": bool(shutil.which("curl")),
        "git": bool(shutil.which("git")),
    }
 def recommend(mem, disk, gpus, cpu_cores):
    max_vram = max([g.get("vram_gb", 0) for g in gpus] or [0])
    gpu_count = len([g for g in gpus if g.get("vram_gb", 0) > 0])
    profile = "core"
    tier = "starter"
    concurrency = "1-3"
    notes = []
    if max_vram >= 120 and gpu_count >= 4:
        tier, profile, concurrency = "max", "gpu-max", "100+"
    elif max_vram >= 80 and gpu_count >= 2:
        tier, profile, concurrency = "pro", "gpu-pro", "20-100"
    elif max_vram >= 48:
        tier, profile, concurrency = "basic", "gpu-standard", "5-20"
    elif max_vram >= 24:
        tier, profile, concurrency = "starter", "gpu-starter", "1-10"
    elif max_vram >= 8:
        tier, profile, concurrency = "starter", "gpu-lite", "1-5"
        notes.append("GPU is suitable for small quantized models only.")
    elif mem >= 32 and cpu_cores >= 8:
        tier, profile, concurrency = "starter", "cpu-ai", "1-3"
        notes.append("No usable NVIDIA VRAM found; local CPU inference is limited. Use cloud/external model fallback for better UX.")
    else:
        tier, profile, concurrency = "starter", "core", "1-2"
        notes.append("Hardware is best for portal, RAG management, workflows, and external/cloud model routing.")
    if mem < 16:
        notes.append("RAM below 16 GB; avoid local model serving.")
    if disk < 100:
        notes.append("Less than 100 GB free disk; model storage and document indexing will be constrained.")
    return {
        "recommended_tier": tier,
        "recommended_profile": profile,
        "estimated_concurrent_users": concurrency,
        "notes": notes,
    }
 mem = mem_gb()
 disk = disk_info()
 gpus = gpu_info()
 cpu = cpu_info()
 tools = tool_state()
 rec = recommend(mem, disk["free_gb"], gpus, cpu["cores"])
 features = {
    "portal": True,
    "users_auth": True,
    "document_intelligence": mem >= 8,
    "rag_chromadb": mem >= 16 and disk["free_gb"] >= 50,
    "ollama_cpu": mem >= 32,
    "ollama_gpu": any(g.get("vram_gb", 0) >= 8 for g in gpus),
    "vllm": any(g.get("vram_gb", 0) >= 24 for g in gpus),
    "fine_tuning_qlora": any(g.get("vram_gb", 0) >= 24 for g in gpus),
    "distributed_training": len([g for g in gpus if g.get("vram_gb", 0) >= 48]) >= 2,
    "monitoring": True,
    "software_only": True,
    "air_gapped_ready": True,
 }
 report = {
    "schema": "cezen.feasibility.v1",
    "generated_at": run(["date", "-u", "+%Y-%m-%dT%H:%M:%SZ"]) or "",
    "host": socket.gethostname(),
    "os": os_info(),
    "cpu": cpu,
    "ram_gb": mem,
    "disk": disk,
    "gpus": gpus,
    "tools": tools,
    "ports_in_use": {str(p): port_open("127.0.0.1", p) for p in [80, 8080, 11434, 8000, 3000, 8888]},
    "recommendation": rec,
    "features": features,
 }
 out.write_text(json.dumps(report, indent=2))
 PY
 if ! cp "$tmp_json" "$OUT" 2>/dev/null; then
  OUT="./feasibility.json"
  cp "$tmp_json" "$OUT"
 fi
 rm -f "$tmp_json"
 python3 - "$OUT" <<'PY'
 import json, sys
 p = sys.argv[1]
 d = json.load(open(p))
 r = d["recommendation"]
 print("")
 print("Nexus One AI Feasibility Report")
 print("--------------------------------")
 print(f"Host: {d['host']}")
 print(f"OS: {d['os']['name']}")
 print(f"CPU: {d['cpu']['cores']} cores | RAM: {d['ram_gb']} GB | Free disk: {d['disk']['free_gb']} GB")
 if d["gpus"]:
    print("GPU: " + "; ".join(f"{g['name']} ({g.get('vram_gb', 0)} GB VRAM)" for g in d["gpus"]))
 else:
    print("GPU: none detected")
 print("")
 print(f"Recommended tier: {r['recommended_tier'].upper()}")
 print(f"Recommended profile: {r['recommended_profile']}")
 print(f"Estimated concurrency: {r['estimated_concurrent_users']} users")
 if r["notes"]:
    print("Notes:")
    for n in r["notes"]:
        print(f"  - {n}")
 print("")
 print(f"JSON report: {p}")
 PY