Update ISO installer tiers and backend deployment
This commit is contained in:
parent
79784a6743
commit
56668f7bdc
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.iso
|
||||
67
README.md
67
README.md
@ -1,4 +1,4 @@
|
||||
# Cezen AI Suite — Installer
|
||||
# Nexus One AI — Installer
|
||||
|
||||
## Quick Start
|
||||
|
||||
@ -12,6 +12,61 @@ Server reboots automatically after NVIDIA drivers install. Phase 2 runs on its o
|
||||
|
||||
On the custom ISO, Ubuntu autoinstall now pauses on the installer network screen so the operator can choose the final IP address from the VM console before installation continues.
|
||||
|
||||
## Software-Only / Existing Hardware
|
||||
|
||||
Run a feasibility scan before quoting or installing on customer-owned hardware:
|
||||
|
||||
```bash
|
||||
bash scripts/cezen-feasibility.sh
|
||||
```
|
||||
|
||||
The checker reports CPU, RAM, disk, NVIDIA GPU/VRAM, tool readiness, available features, and a recommended Cezen profile. It writes JSON to `/opt/cezen/feasibility.json` when possible, otherwise `./feasibility.json`.
|
||||
|
||||
Install on existing hardware without the appliance NVIDIA phase:
|
||||
|
||||
```bash
|
||||
sudo bash install.sh --software-only --profile=auto
|
||||
```
|
||||
|
||||
For small systems or slow customer networks, the installer skips default model downloads on lightweight profiles. To force the same behavior manually:
|
||||
|
||||
```bash
|
||||
sudo bash install.sh --software-only --profile=cpu-ai --skip-model-pull
|
||||
```
|
||||
|
||||
Profiles:
|
||||
|
||||
| Profile | Use When | Installs |
|
||||
|---|---|---|
|
||||
| `core` | no GPU / low RAM | portal, backend, nginx, health/metrics API |
|
||||
| `cpu-ai` | 32 GB+ RAM, no usable GPU | core + Chroma/Ollama CPU path, model pull optional |
|
||||
| `gpu-starter` | 24-32 GB VRAM | local AI starter stack, model pull optional |
|
||||
| `gpu-standard` | 48-96 GB VRAM | standard GPU stack |
|
||||
| `gpu-pro` | multi/high-VRAM GPU | advanced GPU stack |
|
||||
| `gpu-max` | multi-node or HGX-class | full stack, custom sizing |
|
||||
|
||||
## Sellable v1 Admin APIs
|
||||
|
||||
The backend exposes the first productization APIs for software-only and appliance deployments:
|
||||
|
||||
| API | Purpose |
|
||||
|---|---|
|
||||
| `GET /api/license` | Shows current tier, feature matrix, and whether the tier is locked by Cezen. |
|
||||
| `GET /api/system/feasibility` | Returns the generated hardware feasibility report or live fallback. |
|
||||
| `GET /api/system/readiness-report` | Combines license, feasibility, and install readiness into a customer-facing report payload. |
|
||||
| `GET /api/audit/report?days=7` | Basic audit summary for handover and admin review. |
|
||||
| `GET /api/system/backups` | Lists local backups. |
|
||||
| `POST /api/system/backups` | Creates a local backup of Cezen data. |
|
||||
| `POST /api/system/backups/{name}/restore` | Restores a named local backup and creates a pre-restore safety snapshot. |
|
||||
|
||||
CLI backup helper:
|
||||
|
||||
```bash
|
||||
sudo bash scripts/cezen-backup.sh backup
|
||||
sudo bash scripts/cezen-backup.sh list
|
||||
sudo bash scripts/cezen-backup.sh restore /opt/cezen/backups/cezen-backup-YYYYmmdd-HHMMSS.zip
|
||||
```
|
||||
|
||||
## What Gets Installed (Entry Tier)
|
||||
|
||||
| Service | Port | Notes |
|
||||
@ -42,7 +97,10 @@ NVIDIA driver install will succeed but `nvidia-smi` won't show GPUs — that's e
|
||||
## Pull More Models
|
||||
|
||||
```bash
|
||||
bash models/pull-models.sh --tier=entry
|
||||
bash models/pull-models.sh --tier=starter # phi3:mini + embeddings
|
||||
bash models/pull-models.sh --tier=basic # llama3.1:8b, mistral:7b, codellama
|
||||
bash models/pull-models.sh --tier=pro # + llama3.1:70b, mixtral, deepseek-coder
|
||||
bash models/pull-models.sh --tier=max # + llama3.1:405b, mixtral:8x22b
|
||||
```
|
||||
|
||||
## File Structure
|
||||
@ -52,7 +110,10 @@ cgit/
|
||||
├── install.sh ← Entry point
|
||||
├── ansible/
|
||||
│ ├── phase1_nvidia.yml ← Phase 1: drivers (triggers reboot)
|
||||
│ ├── entry.yml ← Phase 2: full stack
|
||||
│ ├── starter.yml ← Phase 2: Starter tier (1 GPU, small team)
|
||||
│ ├── entry.yml ← Phase 2: Basic tier (1–2 GPU, department)
|
||||
│ ├── pro.yml ← Phase 2: Pro tier (2+ GPU, multi-team)
|
||||
│ ├── max.yml ← Phase 2: Max tier (4–8 GPU, enterprise)
|
||||
│ └── roles/
|
||||
│ ├── base/ ← OS, Python, Miniconda, LangChain
|
||||
│ ├── nvidia/ ← Drivers, CUDA 12.4, cuDNN 9
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
# Phase 2: Full Cezen AI Suite — Entry Tier
|
||||
# Phase 2: Full Nexus One AI — Entry Tier
|
||||
# Runs after NVIDIA driver reboot
|
||||
- name: Cezen AI — Entry Tier Stack
|
||||
- name: Nexus One AI — Entry Tier Stack
|
||||
hosts: localhost
|
||||
connection: local
|
||||
become: true
|
||||
@ -35,3 +35,9 @@
|
||||
when: "'minio' not in skip_roles.split(',')"
|
||||
- role: monitoring
|
||||
when: "'monitoring' not in skip_roles.split(',')"
|
||||
- role: cezen-backend
|
||||
when: "'cezen-backend' not in skip_roles.split(',')"
|
||||
- role: cezen-ttyd
|
||||
when: "'cezen-ttyd' not in skip_roles.split(',')"
|
||||
- role: cezen-nginx
|
||||
when: "'cezen-nginx' not in skip_roles.split(',')"
|
||||
|
||||
83
ansible/max.yml
Normal file
83
ansible/max.yml
Normal file
@ -0,0 +1,83 @@
|
||||
---
|
||||
# Nexus One AI — Max Tier Stack
|
||||
# Hardware: 4–8× NVIDIA H100/A100/RTX 5090 (80–320 GB VRAM total), 256–512 GB DDR5, 8 TB+ NVMe, 100 GbE
|
||||
# Capacity: 100+ concurrent users
|
||||
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
|
||||
#
|
||||
# Differences from Pro tier:
|
||||
# - vLLM tensor-parallel across 4+ GPUs (set vllm_tensor_parallel to GPU count)
|
||||
# - Full precision models (no quantization required)
|
||||
# - Advanced fine-tuning (QLoRA + DeepSpeed ZeRO-3 for multi-GPU training)
|
||||
# - Full MLflow + MinIO stack for experiment tracking and artifact storage
|
||||
# - All optional services enabled by default
|
||||
|
||||
- name: Nexus One AI — Max Tier Stack
|
||||
hosts: localhost
|
||||
connection: local
|
||||
become: true
|
||||
vars:
|
||||
cezen_user: "cezen"
|
||||
cezen_home: "/opt/cezen"
|
||||
cezen_login_home: "/home/cezen"
|
||||
python_version: "3.11"
|
||||
cuda_version: "12.6"
|
||||
skip_roles: "" # comma-separated list of roles to skip
|
||||
gpu_available: false
|
||||
tier: "max"
|
||||
|
||||
# ── vLLM — Max defaults ──────────────────────
|
||||
# Full-precision Llama-3.1-70B across 4 GPUs by default.
|
||||
# For HGX/DGX-class systems with 8 GPUs set vllm_tensor_parallel: 8
|
||||
# and switch to Llama-3.1-405B or Mixtral-8x22B.
|
||||
vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
vllm_tensor_parallel: 4
|
||||
vllm_gpu_memory_util: "0.90"
|
||||
vllm_max_model_len: 32768
|
||||
vllm_quantization: "" # full precision at Max tier
|
||||
|
||||
# ── Ollama — large model defaults ────────────
|
||||
ollama_default_model: "llama3.1:70b"
|
||||
|
||||
# ── DeepSpeed — multi-GPU fine-tuning ────────
|
||||
deepspeed_enabled: true
|
||||
deepspeed_zero_stage: 3 # ZeRO-3 for large model training
|
||||
|
||||
roles:
|
||||
- role: base
|
||||
when: "'base' not in skip_roles.split(',')"
|
||||
|
||||
- role: docker
|
||||
when: "'docker' not in skip_roles.split(',')"
|
||||
|
||||
- role: k3s
|
||||
when: "'k3s' not in skip_roles.split(',')"
|
||||
|
||||
- role: ollama
|
||||
when: "'ollama' not in skip_roles.split(',')"
|
||||
|
||||
- role: vllm
|
||||
when: "'vllm' not in skip_roles.split(',')"
|
||||
|
||||
- role: jupyterlab
|
||||
when: "'jupyterlab' not in skip_roles.split(',')"
|
||||
|
||||
- role: chromadb
|
||||
when: "'chromadb' not in skip_roles.split(',')"
|
||||
|
||||
- role: mlflow
|
||||
when: "'mlflow' not in skip_roles.split(',')"
|
||||
|
||||
- role: minio
|
||||
when: "'minio' not in skip_roles.split(',')"
|
||||
|
||||
- role: monitoring
|
||||
when: "'monitoring' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-backend
|
||||
when: "'cezen-backend' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-ttyd
|
||||
when: "'cezen-ttyd' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-nginx
|
||||
when: "'cezen-nginx' not in skip_roles.split(',')"
|
||||
@ -1,6 +1,6 @@
|
||||
---
|
||||
# Phase 1: NVIDIA drivers only. Server reboots after this.
|
||||
- name: Cezen AI — Phase 1 NVIDIA Drivers
|
||||
- name: Nexus One AI — Phase 1 NVIDIA Drivers
|
||||
hosts: localhost
|
||||
connection: local
|
||||
become: true
|
||||
|
||||
79
ansible/pro.yml
Normal file
79
ansible/pro.yml
Normal file
@ -0,0 +1,79 @@
|
||||
---
|
||||
# Nexus One AI — Pro Tier Stack
|
||||
# Hardware: 2× NVIDIA RTX 5090 (32 GB GDDR7 each / 64 GB total), 128 GB DDR5, 4 TB NVMe, 10 GbE
|
||||
# Capacity: 20–100 concurrent users
|
||||
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
|
||||
#
|
||||
# Differences from Basic tier:
|
||||
# - k3s included (multi-service orchestration at this scale)
|
||||
# - MLflow included (fine-tuning tracking needed at Pro)
|
||||
# - MinIO included (model + data storage at scale)
|
||||
# - vLLM runs tensor-parallel across 2 GPUs
|
||||
# - QLoRA fine-tuning available via portal
|
||||
|
||||
- name: Nexus One AI — Pro Tier Stack
|
||||
hosts: localhost
|
||||
connection: local
|
||||
become: true
|
||||
vars:
|
||||
cezen_user: "cezen"
|
||||
cezen_home: "/opt/cezen"
|
||||
cezen_login_home: "/home/cezen"
|
||||
python_version: "3.11"
|
||||
cuda_version: "12.6"
|
||||
skip_roles: "" # comma-separated list of roles to skip
|
||||
gpu_available: false
|
||||
tier: "pro"
|
||||
|
||||
# ── vLLM — Pro defaults ──────────────────────
|
||||
# Tensor-parallel across 2× RTX 5090 (64 GB combined GDDR7).
|
||||
# Llama-3.1-70B at 4-bit fits comfortably; switch to full-precision
|
||||
# smaller models via the portal Model Manager.
|
||||
vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
vllm_tensor_parallel: 2
|
||||
vllm_gpu_memory_util: "0.85"
|
||||
vllm_max_model_len: 8192
|
||||
vllm_quantization: "awq"
|
||||
|
||||
# ── Ollama — full-size models ─────────────────
|
||||
ollama_default_model: "llama3.1:70b"
|
||||
|
||||
roles:
|
||||
- role: base
|
||||
when: "'base' not in skip_roles.split(',')"
|
||||
|
||||
- role: docker
|
||||
when: "'docker' not in skip_roles.split(',')"
|
||||
|
||||
- role: k3s
|
||||
when: "'k3s' not in skip_roles.split(',')"
|
||||
|
||||
- role: ollama
|
||||
when: "'ollama' not in skip_roles.split(',')"
|
||||
|
||||
- role: vllm
|
||||
when: "'vllm' not in skip_roles.split(',')"
|
||||
|
||||
- role: jupyterlab
|
||||
when: "'jupyterlab' not in skip_roles.split(',')"
|
||||
|
||||
- role: chromadb
|
||||
when: "'chromadb' not in skip_roles.split(',')"
|
||||
|
||||
- role: mlflow
|
||||
when: "'mlflow' not in skip_roles.split(',')"
|
||||
|
||||
- role: minio
|
||||
when: "'minio' not in skip_roles.split(',')"
|
||||
|
||||
- role: monitoring
|
||||
when: "'monitoring' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-backend
|
||||
when: "'cezen-backend' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-ttyd
|
||||
when: "'cezen-ttyd' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-nginx
|
||||
when: "'cezen-nginx' not in skip_roles.split(',')"
|
||||
@ -50,6 +50,18 @@
|
||||
- "{{ cezen_home }}/models"
|
||||
- "{{ cezen_home }}/data"
|
||||
- "{{ cezen_home }}/logs"
|
||||
- "{{ cezen_home }}/scripts"
|
||||
|
||||
- name: Install Cezen operational helper scripts
|
||||
copy:
|
||||
src: "{{ playbook_dir }}/../scripts/{{ item }}"
|
||||
dest: "{{ cezen_home }}/scripts/{{ item }}"
|
||||
owner: "{{ cezen_user }}"
|
||||
group: "{{ cezen_user }}"
|
||||
mode: "0755"
|
||||
loop:
|
||||
- cezen-backup.sh
|
||||
- cezen-feasibility.sh
|
||||
|
||||
- name: Download Miniconda
|
||||
get_url:
|
||||
|
||||
20
ansible/roles/cezen-backend/files/cezen-api.service
Normal file
20
ansible/roles/cezen-backend/files/cezen-api.service
Normal file
@ -0,0 +1,20 @@
|
||||
[Unit]
|
||||
Description=Nexus One AI Management API
|
||||
After=network-online.target ollama.service
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=cezen
|
||||
WorkingDirectory=/opt/cezen/backend
|
||||
Environment="CEZEN_DATA=/opt/cezen/data"
|
||||
Environment="OLLAMA_URL=http://localhost:11434"
|
||||
Environment="PATH=/opt/cezen/backend/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin"
|
||||
ExecStart=/opt/cezen/backend/venv/bin/uvicorn main:app --host 0.0.0.0 --port 8080 --workers 2
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
4925
ansible/roles/cezen-backend/files/main.py
Normal file
4925
ansible/roles/cezen-backend/files/main.py
Normal file
File diff suppressed because it is too large
Load Diff
13
ansible/roles/cezen-backend/files/requirements.txt
Normal file
13
ansible/roles/cezen-backend/files/requirements.txt
Normal file
@ -0,0 +1,13 @@
|
||||
fastapi>=0.111.0
|
||||
uvicorn[standard]>=0.29.0
|
||||
python-jose[cryptography]>=3.3.0
|
||||
passlib[bcrypt]>=1.7.4
|
||||
bcrypt<4.0.0
|
||||
psutil>=5.9.0
|
||||
python-multipart>=0.0.9
|
||||
aiofiles>=23.0.0
|
||||
# Document Intelligence
|
||||
pymupdf>=1.24.0 # PDF text extraction (fitz)
|
||||
python-docx>=1.1.0 # Word document extraction
|
||||
# Scheduled Jobs
|
||||
apscheduler>=3.10.0 # In-process cron/interval scheduler
|
||||
309
ansible/roles/cezen-backend/files/train_qlora.py
Normal file
309
ansible/roles/cezen-backend/files/train_qlora.py
Normal file
@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Nexus One AI — QLoRA Fine-Tuning Runner
|
||||
Launched as a subprocess by the FastAPI backend.
|
||||
|
||||
Writes structured JSONL log lines to --log-path so the UI can stream
|
||||
live loss curves and progress. Updates training_jobs.status in SQLite.
|
||||
|
||||
Requires (install on the training node):
|
||||
pip install torch transformers datasets peft bitsandbytes trl
|
||||
|
||||
Optional (faster, lower VRAM):
|
||||
pip install unsloth
|
||||
|
||||
Usage (called by main.py — do not run manually in production):
|
||||
python3 train_qlora.py --job-id 1 --db-path /opt/cezen/data/cezen.db \
|
||||
--dataset /opt/cezen/data/datasets/abc.jsonl \
|
||||
--base-model mistral:7b --output-dir /opt/cezen/data/finetuned/mymodel \
|
||||
--log-path /opt/cezen/data/job_logs/abc.jsonl \
|
||||
--epochs 3 --lr 2e-4 --batch-size 4 --lora-r 16 --lora-alpha 32 \
|
||||
--output-name mymodel
|
||||
"""
|
||||
|
||||
import argparse, json, os, sqlite3, sys, time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# ── Argument parsing ──────────────────────────────────────────────────────────
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--job-id", type=int, required=True)
|
||||
parser.add_argument("--db-path", required=True)
|
||||
parser.add_argument("--dataset", required=True)
|
||||
parser.add_argument("--base-model", required=True)
|
||||
parser.add_argument("--output-dir", required=True)
|
||||
parser.add_argument("--log-path", required=True)
|
||||
parser.add_argument("--output-name", required=True)
|
||||
parser.add_argument("--epochs", type=int, default=3)
|
||||
parser.add_argument("--lr", type=float, default=2e-4)
|
||||
parser.add_argument("--batch-size", type=int, default=4)
|
||||
parser.add_argument("--lora-r", type=int, default=16)
|
||||
parser.add_argument("--lora-alpha", type=int, default=32)
|
||||
args = parser.parse_args()
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def utcnow():
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
def db_connect():
|
||||
conn = sqlite3.connect(args.db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
log_file = open(args.log_path, "a", buffering=1)
|
||||
|
||||
def log(type_: str, **kwargs):
|
||||
entry = {"ts": utcnow(), "type": type_, **kwargs}
|
||||
log_file.write(json.dumps(entry) + "\n")
|
||||
|
||||
def set_status(status: str):
|
||||
db = db_connect()
|
||||
if status in ("completed", "failed", "cancelled"):
|
||||
db.execute(
|
||||
"UPDATE training_jobs SET status=?, finished_at=? WHERE id=?",
|
||||
(status, utcnow(), args.job_id)
|
||||
)
|
||||
else:
|
||||
db.execute("UPDATE training_jobs SET status=? WHERE id=?", (status, args.job_id))
|
||||
db.commit()
|
||||
db.close()
|
||||
|
||||
# ── Dataset loading ───────────────────────────────────────────────────────────
|
||||
|
||||
def load_dataset_from_file(path: str):
|
||||
"""Load JSONL or CSV dataset into a list of dicts with 'text' or 'prompt'/'completion' keys."""
|
||||
p = Path(path)
|
||||
rows = []
|
||||
if p.suffix.lower() == ".csv":
|
||||
import csv
|
||||
with open(path, newline="", encoding="utf-8", errors="replace") as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
rows.append(dict(row))
|
||||
else:
|
||||
with open(path, encoding="utf-8", errors="replace") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
try:
|
||||
rows.append(json.loads(line))
|
||||
except Exception:
|
||||
pass
|
||||
return rows
|
||||
|
||||
def format_row(row: dict) -> str:
|
||||
"""Convert a dataset row to a plain text training string."""
|
||||
if "text" in row:
|
||||
return row["text"]
|
||||
if "prompt" in row and "completion" in row:
|
||||
return f"### Instruction:\n{row['prompt']}\n\n### Response:\n{row['completion']}"
|
||||
if "instruction" in row and "output" in row:
|
||||
inp = row.get("input", "")
|
||||
return (f"### Instruction:\n{row['instruction']}\n\n### Input:\n{inp}\n\n### Response:\n{row['output']}"
|
||||
if inp else
|
||||
f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['output']}")
|
||||
# Fallback: concatenate all values
|
||||
return " ".join(str(v) for v in row.values() if v)
|
||||
|
||||
# ── Main training routine ─────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
log("start", job_id=args.job_id, base_model=args.base_model,
|
||||
epochs=args.epochs, lr=args.lr, batch_size=args.batch_size,
|
||||
lora_r=args.lora_r, lora_alpha=args.lora_alpha)
|
||||
set_status("running")
|
||||
|
||||
# Resolve model name (Ollama uses "mistral:7b" style — strip the tag for HF)
|
||||
hf_model = args.base_model
|
||||
if ":" in hf_model and "/" not in hf_model:
|
||||
# e.g. "mistral:7b" → try to map to HF repo
|
||||
name_map = {
|
||||
"mistral": "mistralai/Mistral-7B-v0.1",
|
||||
"llama2": "meta-llama/Llama-2-7b-hf",
|
||||
"llama3": "meta-llama/Meta-Llama-3-8B",
|
||||
"phi3": "microsoft/Phi-3-mini-4k-instruct",
|
||||
"gemma": "google/gemma-7b",
|
||||
"codellama":"codellama/CodeLlama-7b-hf",
|
||||
"qwen2": "Qwen/Qwen2-7B",
|
||||
}
|
||||
base_name = hf_model.split(":")[0].lower()
|
||||
hf_model = name_map.get(base_name, hf_model)
|
||||
log("info", msg=f"Mapped '{args.base_model}' → '{hf_model}' (HuggingFace)")
|
||||
|
||||
# Load dataset
|
||||
log("info", msg="Loading dataset...")
|
||||
raw_rows = load_dataset_from_file(args.dataset)
|
||||
if not raw_rows:
|
||||
log("error", msg="Dataset is empty or could not be parsed")
|
||||
set_status("failed")
|
||||
sys.exit(1)
|
||||
|
||||
texts = [format_row(r) for r in raw_rows]
|
||||
log("info", msg=f"Loaded {len(texts)} training examples")
|
||||
|
||||
# Try Unsloth first (faster), fall back to HF PEFT
|
||||
use_unsloth = False
|
||||
try:
|
||||
from unsloth import FastLanguageModel
|
||||
use_unsloth = True
|
||||
log("info", msg="Using Unsloth for accelerated training")
|
||||
except ImportError:
|
||||
log("info", msg="Unsloth not available — using HuggingFace PEFT + BitsAndBytes")
|
||||
|
||||
try:
|
||||
import torch
|
||||
from transformers import TrainingArguments, TrainerCallback
|
||||
from datasets import Dataset as HFDataset
|
||||
|
||||
if use_unsloth:
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name=hf_model,
|
||||
max_seq_length=2048,
|
||||
dtype=None,
|
||||
load_in_4bit=True,
|
||||
)
|
||||
model = FastLanguageModel.get_peft_model(
|
||||
model,
|
||||
r=args.lora_r,
|
||||
lora_alpha=args.lora_alpha,
|
||||
target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
|
||||
lora_dropout=0,
|
||||
bias="none",
|
||||
use_gradient_checkpointing="unsloth",
|
||||
)
|
||||
else:
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
||||
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
||||
|
||||
bnb_cfg = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.float16,
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(hf_model, trust_remote_code=True)
|
||||
if tokenizer.pad_token is None:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
hf_model,
|
||||
quantization_config=bnb_cfg,
|
||||
device_map="auto",
|
||||
trust_remote_code=True,
|
||||
)
|
||||
model = prepare_model_for_kbit_training(model)
|
||||
|
||||
lora_cfg = LoraConfig(
|
||||
r=args.lora_r,
|
||||
lora_alpha=args.lora_alpha,
|
||||
target_modules=["q_proj","k_proj","v_proj","o_proj"],
|
||||
lora_dropout=0.05,
|
||||
bias="none",
|
||||
task_type="CAUSAL_LM",
|
||||
)
|
||||
model = get_peft_model(model, lora_cfg)
|
||||
|
||||
# Tokenise
|
||||
def tokenise(examples):
|
||||
return tokenizer(
|
||||
examples["text"],
|
||||
truncation=True,
|
||||
max_length=2048,
|
||||
padding="max_length",
|
||||
)
|
||||
|
||||
hf_ds = HFDataset.from_dict({"text": texts})
|
||||
hf_ds = hf_ds.map(tokenise, batched=True, remove_columns=["text"])
|
||||
|
||||
# Custom callback to stream loss to our log
|
||||
class LossLogger(TrainerCallback):
|
||||
def on_log(self, _args, state, control, logs=None, **kwargs):
|
||||
if logs and "loss" in logs:
|
||||
log("loss",
|
||||
step=state.global_step,
|
||||
loss=round(float(logs["loss"]), 6),
|
||||
epoch=round(float(logs.get("epoch", 0)), 3),
|
||||
lr=float(logs.get("learning_rate", args.lr)))
|
||||
|
||||
output_dir = args.output_dir
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
from trl import SFTTrainer
|
||||
trainer = SFTTrainer(
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
train_dataset=hf_ds,
|
||||
dataset_text_field="input_ids",
|
||||
max_seq_length=2048,
|
||||
args=TrainingArguments(
|
||||
output_dir=output_dir,
|
||||
num_train_epochs=args.epochs,
|
||||
per_device_train_batch_size=args.batch_size,
|
||||
gradient_accumulation_steps=4,
|
||||
warmup_steps=5,
|
||||
learning_rate=args.lr,
|
||||
fp16=not torch.cuda.is_bf16_supported(),
|
||||
bf16=torch.cuda.is_bf16_supported(),
|
||||
logging_steps=1,
|
||||
save_strategy="epoch",
|
||||
report_to="none",
|
||||
),
|
||||
callbacks=[LossLogger()],
|
||||
)
|
||||
|
||||
log("info", msg="Training started")
|
||||
trainer.train()
|
||||
log("info", msg="Training complete — saving model")
|
||||
trainer.save_model(output_dir)
|
||||
tokenizer.save_pretrained(output_dir)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
log("error", msg=str(e), traceback=traceback.format_exc())
|
||||
set_status("failed")
|
||||
sys.exit(1)
|
||||
|
||||
# Auto-register with Ollama via Modelfile
|
||||
try:
|
||||
_register_with_ollama(output_dir, args.output_name)
|
||||
except Exception as e:
|
||||
log("warning", msg=f"Could not auto-register with Ollama: {e}")
|
||||
|
||||
log("complete", msg="Job finished successfully", output_dir=output_dir)
|
||||
set_status("completed")
|
||||
|
||||
|
||||
def _register_with_ollama(model_dir: str, model_name: str):
|
||||
"""Create an Ollama Modelfile and register the fine-tuned model."""
|
||||
modelfile_path = Path(model_dir) / "Modelfile"
|
||||
modelfile_path.write_text(
|
||||
f'FROM {model_dir}\n'
|
||||
f'PARAMETER stop "<|im_end|>"\n'
|
||||
f'SYSTEM "This is a Nexus One AI fine-tuned model."\n'
|
||||
)
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
["ollama", "create", model_name, "-f", str(modelfile_path)],
|
||||
capture_output=True, text=True, timeout=300
|
||||
)
|
||||
if result.returncode == 0:
|
||||
log("info", msg=f"Model '{model_name}' registered with Ollama")
|
||||
else:
|
||||
log("warning", msg=f"Ollama registration failed: {result.stderr}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
log("error", msg="Job interrupted (SIGTERM/SIGINT)")
|
||||
set_status("cancelled")
|
||||
sys.exit(130)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
log("error", msg=str(e), traceback=traceback.format_exc())
|
||||
set_status("failed")
|
||||
sys.exit(1)
|
||||
finally:
|
||||
log_file.close()
|
||||
9
ansible/roles/cezen-backend/handlers/main.yml
Normal file
9
ansible/roles/cezen-backend/handlers/main.yml
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
- name: Reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Restart cezen-api
|
||||
systemd:
|
||||
name: cezen-api
|
||||
state: restarted
|
||||
113
ansible/roles/cezen-backend/tasks/main.yml
Normal file
113
ansible/roles/cezen-backend/tasks/main.yml
Normal file
@ -0,0 +1,113 @@
|
||||
---
|
||||
# cezen-backend role: installs the Nexus One AI FastAPI management API
|
||||
|
||||
- name: Install system Python deps
|
||||
apt:
|
||||
name:
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- python3.11
|
||||
- python3.11-venv
|
||||
- libmupdf-dev # required by pymupdf (Document Intelligence)
|
||||
- mupdf-tools
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: Create backend directory
|
||||
file:
|
||||
path: /opt/cezen/backend
|
||||
state: directory
|
||||
owner: "{{ cezen_user }}"
|
||||
group: "{{ cezen_user }}"
|
||||
mode: "0755"
|
||||
|
||||
- name: Create data directory (JWT secret + SQLite DB)
|
||||
file:
|
||||
path: /opt/cezen/data
|
||||
state: directory
|
||||
owner: "{{ cezen_user }}"
|
||||
group: "{{ cezen_user }}"
|
||||
mode: "0750"
|
||||
|
||||
- name: Copy FastAPI application
|
||||
copy:
|
||||
src: main.py
|
||||
dest: /opt/cezen/backend/main.py
|
||||
owner: "{{ cezen_user }}"
|
||||
group: "{{ cezen_user }}"
|
||||
mode: "0644"
|
||||
notify: Restart cezen-api
|
||||
|
||||
- name: Copy QLoRA training runner
|
||||
copy:
|
||||
src: train_qlora.py
|
||||
dest: /opt/cezen/backend/train_qlora.py
|
||||
owner: "{{ cezen_user }}"
|
||||
group: "{{ cezen_user }}"
|
||||
mode: "0755"
|
||||
|
||||
- name: Copy requirements.txt
|
||||
copy:
|
||||
src: requirements.txt
|
||||
dest: /opt/cezen/backend/requirements.txt
|
||||
owner: "{{ cezen_user }}"
|
||||
group: "{{ cezen_user }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Create Python virtual environment (Python 3.11)
|
||||
become_user: "{{ cezen_user }}"
|
||||
command: python3.11 -m venv /opt/cezen/backend/venv
|
||||
args:
|
||||
creates: /opt/cezen/backend/venv/bin/activate
|
||||
|
||||
- name: Install Python dependencies
|
||||
become_user: "{{ cezen_user }}"
|
||||
pip:
|
||||
requirements: /opt/cezen/backend/requirements.txt
|
||||
virtualenv: /opt/cezen/backend/venv
|
||||
notify: Restart cezen-api
|
||||
|
||||
- name: Install Pro/Max fine-tuning dependencies
|
||||
become_user: "{{ cezen_user }}"
|
||||
pip:
|
||||
name:
|
||||
- torch
|
||||
- transformers
|
||||
- datasets
|
||||
- peft
|
||||
- bitsandbytes
|
||||
- accelerate
|
||||
- trl
|
||||
- sentencepiece
|
||||
virtualenv: /opt/cezen/backend/venv
|
||||
retries: 3
|
||||
delay: 15
|
||||
when: (tier | default('basic')) in ['pro', 'max']
|
||||
|
||||
- name: Install Max multi-GPU training dependencies
|
||||
become_user: "{{ cezen_user }}"
|
||||
pip:
|
||||
name:
|
||||
- deepspeed
|
||||
virtualenv: /opt/cezen/backend/venv
|
||||
retries: 3
|
||||
delay: 15
|
||||
when: (tier | default('basic')) == 'max'
|
||||
|
||||
- name: Install systemd service unit
|
||||
copy:
|
||||
src: cezen-api.service
|
||||
dest: /etc/systemd/system/cezen-api.service
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart cezen-api
|
||||
|
||||
- name: Enable and start cezen-api service
|
||||
systemd:
|
||||
name: cezen-api
|
||||
enabled: yes
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
135
ansible/roles/cezen-nginx/files/cezen.conf
Normal file
135
ansible/roles/cezen-nginx/files/cezen.conf
Normal file
@ -0,0 +1,135 @@
|
||||
# /etc/nginx/sites-available/cezen
|
||||
# Nexus One AI Portal — serves static portal, proxies API and console terminal
|
||||
#
|
||||
# Install:
|
||||
# sudo cp cezen.conf /etc/nginx/sites-available/cezen
|
||||
# sudo ln -sf /etc/nginx/sites-available/cezen /etc/nginx/sites-enabled/cezen
|
||||
# sudo rm -f /etc/nginx/sites-enabled/default
|
||||
# sudo nginx -t && sudo systemctl reload nginx
|
||||
|
||||
# ─── Rate limiting zones (must be outside server block) ──────────────────────
|
||||
# Login: 5 requests/min per IP, burst of 3 queued, then 429
|
||||
limit_req_zone $binary_remote_addr zone=cezen_login:10m rate=5r/m;
|
||||
# General API: 60 req/min per IP (generous for dashboard polling)
|
||||
limit_req_zone $binary_remote_addr zone=cezen_api:10m rate=60r/m;
|
||||
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen [::]:80 default_server;
|
||||
|
||||
server_name _;
|
||||
|
||||
# Hide server version
|
||||
server_tokens off;
|
||||
|
||||
# Logging
|
||||
access_log /var/log/nginx/cezen-access.log;
|
||||
error_log /var/log/nginx/cezen-error.log;
|
||||
|
||||
# ─── Global security headers ──────────────────────────────────────────────
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
||||
add_header Permissions-Policy "geolocation=(), camera=(), microphone=()" always;
|
||||
add_header Content-Security-Policy
|
||||
"default-src 'self'; "
|
||||
"script-src 'self' 'unsafe-inline'; "
|
||||
"style-src 'self' 'unsafe-inline'; "
|
||||
"img-src 'self' data:; "
|
||||
"connect-src 'self'; "
|
||||
"frame-src 'self'; "
|
||||
"font-src 'self'; "
|
||||
"object-src 'none'; "
|
||||
"base-uri 'self';"
|
||||
always;
|
||||
|
||||
# ─── robots.txt — block all indexing (air-gapped / private portal) ────────
|
||||
location = /robots.txt {
|
||||
return 200 "User-agent: *\nDisallow: /\n";
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
|
||||
# ─── Static Portal ───────────────────────────────────────────────────────
|
||||
root /opt/cezen/portal;
|
||||
index index.html;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Cache static assets aggressively
|
||||
location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
|
||||
expires 7d;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# ─── Model upload (large files — no size limit, extended timeout) ────────
|
||||
location = /api/models/upload {
|
||||
client_max_body_size 0; # unlimited — GGUF files can be 70 GB+
|
||||
proxy_request_buffering off; # stream directly to backend, don't buffer in Nginx
|
||||
proxy_read_timeout 7200s; # 2 hours for slow transfers
|
||||
proxy_send_timeout 7200s;
|
||||
|
||||
proxy_pass http://127.0.0.1:8080;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
}
|
||||
|
||||
# ─── Login rate limit (tight) ─────────────────────────────────────────────
|
||||
location = /api/auth/login {
|
||||
limit_req zone=cezen_login burst=3 nodelay;
|
||||
limit_req_status 429;
|
||||
|
||||
proxy_pass http://127.0.0.1:8080;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 30s;
|
||||
}
|
||||
|
||||
# ─── FastAPI Backend (/api/) ──────────────────────────────────────────────
|
||||
location /api/ {
|
||||
limit_req zone=cezen_api burst=20 nodelay;
|
||||
limit_req_status 429;
|
||||
|
||||
proxy_pass http://127.0.0.1:8080;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 120s;
|
||||
}
|
||||
|
||||
# ─── Web Console (ttyd) (/console/) ──────────────────────────────────────
|
||||
location /console/ {
|
||||
proxy_pass http://127.0.0.1:7681/;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_read_timeout 86400s;
|
||||
|
||||
# Rewrite paths so ttyd JS/CSS assets load correctly
|
||||
proxy_redirect / /console/;
|
||||
sub_filter 'href="/' 'href="/console/';
|
||||
sub_filter 'src="/' 'src="/console/';
|
||||
sub_filter_once off;
|
||||
sub_filter_types text/html;
|
||||
}
|
||||
|
||||
# ─── Block dotfiles and common attack paths ───────────────────────────────
|
||||
location ~ /\. {
|
||||
deny all;
|
||||
}
|
||||
|
||||
location ~* \.(env|git|sql|bak|sh|py)$ {
|
||||
deny all;
|
||||
}
|
||||
}
|
||||
5
ansible/roles/cezen-nginx/handlers/main.yml
Normal file
5
ansible/roles/cezen-nginx/handlers/main.yml
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
- name: Reload nginx
|
||||
systemd:
|
||||
name: nginx
|
||||
state: reloaded
|
||||
59
ansible/roles/cezen-nginx/tasks/main.yml
Normal file
59
ansible/roles/cezen-nginx/tasks/main.yml
Normal file
@ -0,0 +1,59 @@
|
||||
---
|
||||
# cezen-nginx role: installs Nginx, deploys portal static files and site config
|
||||
|
||||
- name: Install Nginx
|
||||
apt:
|
||||
name: nginx
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: Create portal directory
|
||||
file:
|
||||
path: /opt/cezen/portal
|
||||
state: directory
|
||||
owner: "{{ cezen_user }}"
|
||||
group: www-data
|
||||
mode: "0755"
|
||||
|
||||
- name: Sync portal static files
|
||||
synchronize:
|
||||
src: "{{ playbook_dir }}/../../../cezen-portal/"
|
||||
dest: /opt/cezen/portal/
|
||||
delete: yes
|
||||
recursive: yes
|
||||
rsync_opts:
|
||||
- "--exclude=.DS_Store"
|
||||
- "--exclude=*.sh"
|
||||
notify: Reload nginx
|
||||
|
||||
- name: Deploy Nginx site config
|
||||
copy:
|
||||
src: cezen.conf
|
||||
dest: /etc/nginx/sites-available/cezen
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
notify: Reload nginx
|
||||
|
||||
- name: Enable Cezen site
|
||||
file:
|
||||
src: /etc/nginx/sites-available/cezen
|
||||
dest: /etc/nginx/sites-enabled/cezen
|
||||
state: link
|
||||
notify: Reload nginx
|
||||
|
||||
- name: Disable default Nginx site
|
||||
file:
|
||||
path: /etc/nginx/sites-enabled/default
|
||||
state: absent
|
||||
notify: Reload nginx
|
||||
|
||||
- name: Validate Nginx config
|
||||
command: nginx -t
|
||||
changed_when: false
|
||||
|
||||
- name: Ensure Nginx is enabled and running
|
||||
systemd:
|
||||
name: nginx
|
||||
enabled: yes
|
||||
state: started
|
||||
17
ansible/roles/cezen-ttyd/files/cezen-ttyd.service
Normal file
17
ansible/roles/cezen-ttyd/files/cezen-ttyd.service
Normal file
@ -0,0 +1,17 @@
|
||||
[Unit]
|
||||
Description=Cezen Web Terminal (ttyd)
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
# Bind to localhost only — Nginx proxies /console/ to this port
|
||||
ExecStart=/usr/bin/ttyd \
|
||||
--port 7681 \
|
||||
--interface 127.0.0.1 \
|
||||
--writable \
|
||||
login -f cezen-console
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
User=root
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
ansible/roles/cezen-ttyd/handlers/main.yml
Normal file
9
ansible/roles/cezen-ttyd/handlers/main.yml
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
- name: Reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Restart cezen-ttyd
|
||||
systemd:
|
||||
name: cezen-ttyd
|
||||
state: restarted
|
||||
72
ansible/roles/cezen-ttyd/tasks/main.yml
Normal file
72
ansible/roles/cezen-ttyd/tasks/main.yml
Normal file
@ -0,0 +1,72 @@
|
||||
---
|
||||
# cezen-ttyd role: browser-based terminal via ttyd, bound to localhost
|
||||
|
||||
- name: Install ttyd
|
||||
apt:
|
||||
name: ttyd
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: Create cezen-console restricted user
|
||||
user:
|
||||
name: cezen-console
|
||||
shell: /bin/bash
|
||||
comment: "Cezen Web Console User"
|
||||
groups: "{{ cezen_user }}"
|
||||
append: yes
|
||||
state: present
|
||||
create_home: yes
|
||||
|
||||
- name: Set cezen-console password
|
||||
# Change this password after first login or use PAM/SSO integration
|
||||
shell: echo "cezen-console:CezenConsole2024!" | chpasswd
|
||||
changed_when: false
|
||||
no_log: true
|
||||
|
||||
- name: Restrict cezen-console home directory
|
||||
file:
|
||||
path: /home/cezen-console
|
||||
owner: cezen-console
|
||||
group: cezen-console
|
||||
mode: "0750"
|
||||
|
||||
- name: Add useful aliases for console user
|
||||
copy:
|
||||
dest: /home/cezen-console/.bashrc
|
||||
owner: cezen-console
|
||||
group: cezen-console
|
||||
mode: "0644"
|
||||
content: |
|
||||
# Cezen Web Console — restricted shell environment
|
||||
PS1='\[\033[01;32m\]cezen-console\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
|
||||
|
||||
# Useful shortcuts
|
||||
alias ll='ls -lah --color=auto'
|
||||
alias logs='journalctl -u cezen-api -f'
|
||||
alias api-status='systemctl status cezen-api'
|
||||
alias ollama-ps='ollama ps'
|
||||
alias gpu='nvidia-smi'
|
||||
alias ports='ss -tlnp'
|
||||
|
||||
# Prevent accidental system damage
|
||||
alias rm='rm -i'
|
||||
alias mv='mv -i'
|
||||
alias cp='cp -i'
|
||||
|
||||
- name: Install ttyd systemd service
|
||||
copy:
|
||||
src: cezen-ttyd.service
|
||||
dest: /etc/systemd/system/cezen-ttyd.service
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart cezen-ttyd
|
||||
|
||||
- name: Enable and start ttyd service
|
||||
systemd:
|
||||
name: cezen-ttyd
|
||||
enabled: yes
|
||||
state: started
|
||||
daemon_reload: yes
|
||||
@ -51,7 +51,7 @@
|
||||
copy:
|
||||
dest: /opt/cezen/notebooks/README.md
|
||||
content: |
|
||||
# Cezen AI Suite — JupyterLab
|
||||
# Nexus One AI — JupyterLab
|
||||
|
||||
Default token: `cezen2024`
|
||||
|
||||
|
||||
@ -53,5 +53,5 @@
|
||||
wait_for:
|
||||
host: localhost
|
||||
port: 5000
|
||||
timeout: 30
|
||||
timeout: 120
|
||||
ignore_errors: true
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
register: dcgm_result
|
||||
failed_when: dcgm_result.rc != 0 and 'already in use' not in dcgm_result.stderr
|
||||
ignore_errors: true
|
||||
when: gpu_available | default(false) | bool
|
||||
|
||||
# ── Prometheus ──────────────────────────────────────────
|
||||
- name: Write Prometheus config
|
||||
@ -99,7 +100,8 @@
|
||||
wait_for:
|
||||
host: localhost
|
||||
port: 3000
|
||||
timeout: 60
|
||||
timeout: 120
|
||||
register: grafana_wait
|
||||
ignore_errors: true
|
||||
|
||||
- name: Add Prometheus datasource to Grafana
|
||||
@ -118,6 +120,7 @@
|
||||
isDefault: true
|
||||
status_code: [200, 409] # 409 = already exists, that's fine
|
||||
ignore_errors: true
|
||||
when: not (grafana_wait is failed)
|
||||
|
||||
- name: Import NVIDIA GPU dashboard (ID 12239)
|
||||
uri:
|
||||
@ -143,3 +146,6 @@
|
||||
uid: "nvidia-gpu"
|
||||
status_code: [200, 412]
|
||||
ignore_errors: true
|
||||
when:
|
||||
- not (grafana_wait is failed)
|
||||
- gpu_available | default(false) | bool
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
---
|
||||
# NVIDIA role: Drivers + CUDA + cuDNN
|
||||
# NOTE: Tested on L40S (Entry) and A40 (lab). Requires reboot after this role.
|
||||
# NOTE: Tested on RTX Pro 6000 (Entry) and A40 (lab). Requires reboot after this role.
|
||||
# If no GPU is present, this role will install drivers but nvidia-smi won't show GPUs.
|
||||
|
||||
- name: Add NVIDIA package repository key
|
||||
|
||||
@ -28,7 +28,7 @@
|
||||
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
Environment="OLLAMA_HOST=0.0.0.0:11434"
|
||||
Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
|
||||
Environment="CUDA_VISIBLE_DEVICES=0,1,2"
|
||||
Environment="CUDA_VISIBLE_DEVICES=0"
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@ -54,50 +54,49 @@
|
||||
port: 11434
|
||||
timeout: 60
|
||||
|
||||
- name: Pull default models (Llama 3.1 8B + Mistral 7B)
|
||||
- name: Select tier model set
|
||||
set_fact:
|
||||
ollama_models: >-
|
||||
{{
|
||||
{
|
||||
'starter': ['phi3:mini', 'nomic-embed-text'],
|
||||
'basic': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
|
||||
'entry': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
|
||||
'pro': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b'],
|
||||
'max': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b', 'llama3.1:405b', 'mixtral:8x22b']
|
||||
}.get(tier | default('basic'), ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text'])
|
||||
}}
|
||||
|
||||
- name: Pull tier Ollama models
|
||||
become_user: cezen
|
||||
command: ollama pull {{ item }}
|
||||
loop:
|
||||
- llama3.1:8b
|
||||
- mistral:7b
|
||||
loop: "{{ ollama_models }}"
|
||||
environment:
|
||||
OLLAMA_HOST: "http://localhost:11434"
|
||||
retries: 3
|
||||
delay: 15
|
||||
# NOTE: Models are large (~5GB each). This step takes time on first run.
|
||||
# Skip by setting: ansible-playbook ... -e "skip_model_pull=true"
|
||||
# NOTE: Pro/Max models are very large. Skip with --skip-model-pull for
|
||||
# bandwidth-constrained installs, then run models/pull-models.sh later.
|
||||
when: not (skip_model_pull | default(false))
|
||||
|
||||
# Open WebUI (chat interface on top of Ollama)
|
||||
- name: Deploy Open WebUI via Docker
|
||||
community.docker.docker_container:
|
||||
name: open-webui
|
||||
image: ghcr.io/open-webui/open-webui:main
|
||||
state: started
|
||||
restart_policy: always
|
||||
ports:
|
||||
- "3001:8080"
|
||||
volumes:
|
||||
- open-webui:/app/backend/data
|
||||
env:
|
||||
OLLAMA_BASE_URL: "http://host-gateway:11434"
|
||||
etc_hosts:
|
||||
host-gateway: "172.17.0.1"
|
||||
# Note: Requires docker community collection. Install with:
|
||||
# ansible-galaxy collection install community.docker
|
||||
ignore_errors: true # Falls back gracefully if docker collection not available
|
||||
|
||||
- name: Alternative Open WebUI start (if community.docker not available)
|
||||
- name: Start Open WebUI via Docker CLI
|
||||
shell: |
|
||||
docker run -d \
|
||||
--name open-webui \
|
||||
--restart always \
|
||||
-p 3001:8080 \
|
||||
--add-host=host-gateway:172.17.0.1 \
|
||||
-v open-webui:/app/backend/data \
|
||||
-e OLLAMA_BASE_URL=http://host-gateway:11434 \
|
||||
ghcr.io/open-webui/open-webui:main
|
||||
if docker ps -a --format '{{ "{{" }}.Names{{ "}}" }}' | grep -qx open-webui; then
|
||||
docker start open-webui
|
||||
else
|
||||
docker run -d \
|
||||
--name open-webui \
|
||||
--restart always \
|
||||
-p 3001:8080 \
|
||||
--add-host=host-gateway:172.17.0.1 \
|
||||
-v open-webui:/app/backend/data \
|
||||
-e OLLAMA_BASE_URL=http://host-gateway:11434 \
|
||||
ghcr.io/open-webui/open-webui:main
|
||||
fi
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: webui_result
|
||||
failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr
|
||||
changed_when: webui_result.rc == 0
|
||||
failed_when: webui_result.rc != 0 and 'already in use' not in (webui_result.stderr | default(''))
|
||||
ignore_errors: true
|
||||
|
||||
7
ansible/roles/vllm/defaults/main.yml
Normal file
7
ansible/roles/vllm/defaults/main.yml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
# vLLM role defaults — overridden per-tier in the tier playbook vars block
|
||||
vllm_model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
||||
vllm_tensor_parallel: 1
|
||||
vllm_gpu_memory_util: "0.70"
|
||||
vllm_max_model_len: 8192
|
||||
vllm_quantization: "" # blank = full precision; set to "awq" for 4-bit
|
||||
@ -1,6 +1,12 @@
|
||||
---
|
||||
# vLLM — high-performance LLM inference with OpenAI-compatible API
|
||||
# Skipped automatically if no GPU is present.
|
||||
# Variables (set defaults in defaults/main.yml, override per-tier in the playbook):
|
||||
# vllm_model HuggingFace model ID to load on start
|
||||
# vllm_tensor_parallel Number of GPUs for tensor parallelism (1 for Starter/Basic)
|
||||
# vllm_gpu_memory_util Fraction of VRAM to reserve for vLLM (0.0–1.0)
|
||||
# vllm_max_model_len Maximum context length in tokens
|
||||
# vllm_quantization Quantization method: "" (none) | "awq" | "gptq" | "fp8"
|
||||
|
||||
- name: Check for NVIDIA GPU
|
||||
shell: nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1
|
||||
@ -10,7 +16,9 @@
|
||||
|
||||
- name: Skip vLLM if no GPU detected
|
||||
debug:
|
||||
msg: "No GPU detected — skipping vLLM image pull. Run manually when GPU is available: docker pull vllm/vllm-openai:latest"
|
||||
msg: >
|
||||
No GPU detected — skipping vLLM image pull.
|
||||
Run manually when GPU is available: docker pull vllm/vllm-openai:latest
|
||||
when: gpu_check.stdout == "" or gpu_check.rc != 0
|
||||
|
||||
- name: Pull vLLM Docker image
|
||||
@ -19,18 +27,23 @@
|
||||
delay: 15
|
||||
when: gpu_check.stdout != "" and gpu_check.rc == 0
|
||||
|
||||
- name: Build vLLM quantization flag
|
||||
set_fact:
|
||||
vllm_quant_flag: "{{ '--quantization ' + vllm_quantization if vllm_quantization != '' else '' }}"
|
||||
|
||||
- name: Create vLLM systemd service
|
||||
copy:
|
||||
dest: /etc/systemd/system/vllm.service
|
||||
mode: "0644"
|
||||
content: |
|
||||
[Unit]
|
||||
Description=vLLM OpenAI-Compatible Inference Server
|
||||
After=docker.service ollama.service
|
||||
Description=vLLM OpenAI-Compatible Inference Server ({{ vllm_model }})
|
||||
After=docker.service
|
||||
Requires=docker.service
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
RestartSec=10
|
||||
ExecStartPre=-/usr/bin/docker stop vllm
|
||||
ExecStartPre=-/usr/bin/docker rm vllm
|
||||
ExecStart=/usr/bin/docker run \
|
||||
@ -41,15 +54,16 @@
|
||||
-v /opt/cezen/models:/root/.cache/huggingface \
|
||||
-e HF_HOME=/root/.cache/huggingface \
|
||||
vllm/vllm-openai:latest \
|
||||
--model meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||||
--gpu-memory-utilization 0.7 \
|
||||
--max-model-len 8192 \
|
||||
--tensor-parallel-size 1
|
||||
--model {{ vllm_model }} \
|
||||
--gpu-memory-utilization {{ vllm_gpu_memory_util }} \
|
||||
--max-model-len {{ vllm_max_model_len }} \
|
||||
--tensor-parallel-size {{ vllm_tensor_parallel }} \
|
||||
{{ vllm_quant_flag }}
|
||||
ExecStop=/usr/bin/docker stop vllm
|
||||
TimeoutStartSec=300
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
mode: "0644"
|
||||
|
||||
- name: Create vLLM model directory
|
||||
file:
|
||||
@ -57,3 +71,26 @@
|
||||
state: directory
|
||||
owner: cezen
|
||||
group: cezen
|
||||
mode: "0755"
|
||||
|
||||
- name: Write vLLM tier config file (for portal reference)
|
||||
copy:
|
||||
dest: /opt/cezen/vllm-config.json
|
||||
owner: cezen
|
||||
group: cezen
|
||||
mode: "0644"
|
||||
content: |
|
||||
{
|
||||
"model": "{{ vllm_model }}",
|
||||
"tensor_parallel_size": {{ vllm_tensor_parallel }},
|
||||
"gpu_memory_utilization": {{ vllm_gpu_memory_util }},
|
||||
"max_model_len": {{ vllm_max_model_len }},
|
||||
"quantization": "{{ vllm_quantization }}"
|
||||
}
|
||||
|
||||
- name: Enable and start vLLM service
|
||||
systemd:
|
||||
name: vllm
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
when: gpu_check.stdout != "" and gpu_check.rc == 0
|
||||
|
||||
76
ansible/starter.yml
Normal file
76
ansible/starter.yml
Normal file
@ -0,0 +1,76 @@
|
||||
---
|
||||
# Nexus One AI — Starter Tier Stack
|
||||
# Hardware: 1× NVIDIA RTX 5090 (32 GB GDDR7), 64 GB DDR5, 2 TB NVMe, 2.5 GbE
|
||||
# Capacity: 1–5 concurrent users
|
||||
# Runs after NVIDIA driver reboot (phase1_nvidia.yml)
|
||||
#
|
||||
# Differences from Basic tier:
|
||||
# - No k3s (Kubernetes overhead not justified for 64 GB RAM / 1-5 users)
|
||||
# - No MLflow (fine-tuning tracking overkill for Starter)
|
||||
# - No MinIO (local model cache is sufficient)
|
||||
# - vLLM uses Phi-3 Mini / Llama 3.2 3B with 4-bit quant by default
|
||||
# - JupyterLab is optional (off by default, wizard can enable)
|
||||
|
||||
- name: Nexus One AI — Starter Tier Stack
|
||||
hosts: localhost
|
||||
connection: local
|
||||
become: true
|
||||
vars:
|
||||
cezen_user: "cezen"
|
||||
cezen_home: "/opt/cezen"
|
||||
cezen_login_home: "/home/cezen"
|
||||
python_version: "3.11"
|
||||
cuda_version: "12.6" # RTX 5090 requires CUDA 12.6+
|
||||
skip_roles: "" # comma-separated list of roles to skip
|
||||
gpu_available: false
|
||||
tier: "starter"
|
||||
|
||||
# ── vLLM — Starter defaults ──────────────────
|
||||
# Small 4-bit quantised model fits comfortably in 32 GB GDDR7.
|
||||
# Users can swap to Llama-3.2-3B-Instruct via the portal Model Manager.
|
||||
vllm_model: "microsoft/Phi-3-mini-4k-instruct"
|
||||
vllm_tensor_parallel: 1
|
||||
vllm_gpu_memory_util: "0.85"
|
||||
vllm_max_model_len: 4096
|
||||
vllm_quantization: "awq"
|
||||
|
||||
# ── Ollama — lightweight models ───────────────
|
||||
ollama_default_model: "phi3:mini"
|
||||
|
||||
roles:
|
||||
- role: base
|
||||
when: "'base' not in skip_roles.split(',')"
|
||||
|
||||
- role: docker
|
||||
when: "'docker' not in skip_roles.split(',')"
|
||||
|
||||
# k3s intentionally omitted for Starter — insufficient RAM headroom
|
||||
|
||||
- role: ollama
|
||||
when: "'ollama' not in skip_roles.split(',')"
|
||||
|
||||
- role: vllm
|
||||
when: "'vllm' not in skip_roles.split(',')"
|
||||
|
||||
- role: chromadb
|
||||
when: "'chromadb' not in skip_roles.split(',')"
|
||||
|
||||
# mlflow / minio omitted for Starter
|
||||
|
||||
- role: monitoring
|
||||
when: "'monitoring' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-backend
|
||||
when: "'cezen-backend' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-ttyd
|
||||
when: "'cezen-ttyd' not in skip_roles.split(',')"
|
||||
|
||||
- role: cezen-nginx
|
||||
when: "'cezen-nginx' not in skip_roles.split(',')"
|
||||
|
||||
# JupyterLab — optional, install only when explicitly requested
|
||||
- role: jupyterlab
|
||||
when: >
|
||||
'jupyterlab' not in skip_roles.split(',') and
|
||||
install_jupyterlab | default(false) | bool
|
||||
199
autoinstall/build-iso-starter.sh
Normal file
199
autoinstall/build-iso-starter.sh
Normal file
@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env bash
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# Nexus One AI — Starter Tier ISO Builder
|
||||
# Hardware target: compact workstation (1× RTX 5090, 64 GB RAM, 2 TB NVMe)
|
||||
#
|
||||
# Usage:
|
||||
# cd ~/aipackage
|
||||
# bash autoinstall/build-iso-starter.sh
|
||||
#
|
||||
# Output: autoinstall/cezen-ai-starter-ubuntu2204.iso
|
||||
# Flash to USB:
|
||||
# diskutil unmountDisk /dev/diskN
|
||||
# sudo dd if=cezen-ai-starter-ubuntu2204.iso of=/dev/diskN bs=4m status=progress
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PACKAGE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
WORK_DIR="/tmp/cezen-iso-starter-work"
|
||||
ORIGINAL_ISO="/tmp/ubuntu-22.04.5-live-server-amd64.iso"
|
||||
OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-starter-ubuntu2204.iso"
|
||||
UBUNTU_URL="https://releases.ubuntu.com/22.04.5/ubuntu-22.04.5-live-server-amd64.iso"
|
||||
TIER="starter"
|
||||
|
||||
echo "╔══════════════════════════════════════════════════════╗"
|
||||
echo "║ Nexus One AI — ISO Builder [STARTER TIER] ║"
|
||||
echo "║ RTX 5090 · 64 GB RAM · 2 TB NVMe · 1–5 users ║"
|
||||
echo "╚══════════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
# ── Install build tools ────────────────────────
|
||||
echo "→ Installing build tools..."
|
||||
apt-get update -qq
|
||||
apt-get install -y -qq xorriso wget isolinux rsync
|
||||
echo "✓ Tools ready"
|
||||
|
||||
# ── Download Ubuntu ISO ────────────────────────
|
||||
if [ -f "$ORIGINAL_ISO" ]; then
|
||||
echo "✓ Ubuntu ISO already downloaded"
|
||||
else
|
||||
echo "→ Downloading Ubuntu 22.04.5 Server ISO (~1.8 GB)..."
|
||||
wget --show-progress -O "$ORIGINAL_ISO" "$UBUNTU_URL"
|
||||
echo "✓ Downloaded"
|
||||
fi
|
||||
|
||||
# ── Extract ISO ────────────────────────────────
|
||||
echo "→ Extracting ISO..."
|
||||
rm -rf "$WORK_DIR"
|
||||
mkdir -p "$WORK_DIR"
|
||||
xorriso -osirrox on \
|
||||
-indev "$ORIGINAL_ISO" \
|
||||
-extract / "$WORK_DIR" 2>/dev/null
|
||||
chmod -R u+w "$WORK_DIR"
|
||||
echo "✓ Extracted"
|
||||
|
||||
# ── Inject Starter autoinstall files ──────────
|
||||
echo "→ Injecting Starter autoinstall config..."
|
||||
mkdir -p "$WORK_DIR/nocloud"
|
||||
cp "$SCRIPT_DIR/user-data-starter" "$WORK_DIR/nocloud/user-data"
|
||||
cp "$SCRIPT_DIR/meta-data" "$WORK_DIR/nocloud/meta-data"
|
||||
echo "✓ user-data-starter and meta-data injected"
|
||||
|
||||
# ── Online installer mode ──────────────────────
|
||||
# The installed system pulls the current package from cgit on first boot. This
|
||||
# keeps the ISO small and avoids shipping stale backend/portal code.
|
||||
echo "✓ Online installer mode: package will be pulled from cgit on first boot"
|
||||
|
||||
# ── Patch GRUB ────────────────────────────────
|
||||
echo "→ Patching GRUB config..."
|
||||
GRUB_CFG="$WORK_DIR/boot/grub/grub.cfg"
|
||||
cp "$GRUB_CFG" "$GRUB_CFG.orig"
|
||||
|
||||
sed -i "s/set timeout=.*/set timeout=5/" "$GRUB_CFG"
|
||||
sed -i "s/set timeout_style=.*/set timeout_style=countdown/" "$GRUB_CFG"
|
||||
sed -i '/^\s*linux.*vmlinuz/s|---|autoinstall ds=nocloud\\;s=/cdrom/nocloud/ ---|' "$GRUB_CFG"
|
||||
|
||||
# Update GRUB title to reflect Starter tier
|
||||
sed -i 's/Install Ubuntu Server/Install Nexus One AI — Starter Tier/' "$GRUB_CFG" || true
|
||||
echo "✓ GRUB patched"
|
||||
|
||||
# ── Extract MBR and EFI boot data ─────────────
|
||||
echo "→ Extracting boot data from original ISO..."
|
||||
MBR_TEMPLATE=$(mktemp)
|
||||
EFI_IMG=$(mktemp)
|
||||
dd if="$ORIGINAL_ISO" bs=1 count=432 of="$MBR_TEMPLATE" 2>/dev/null
|
||||
|
||||
EFI_LINE=$(fdisk -l "$ORIGINAL_ISO" 2>/dev/null | grep "EFI")
|
||||
echo " EFI partition info: $EFI_LINE"
|
||||
EFI_START=$(echo "$EFI_LINE" | awk '{print $2}')
|
||||
EFI_SIZE=$(echo "$EFI_LINE" | awk '{print $4}')
|
||||
|
||||
if [ -z "$EFI_START" ] || [ -z "$EFI_SIZE" ]; then
|
||||
echo "ERROR: Could not detect EFI partition in ISO."
|
||||
echo "Run: fdisk -l $ORIGINAL_ISO"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
dd if="$ORIGINAL_ISO" bs=512 skip="$EFI_START" count="$EFI_SIZE" \
|
||||
of="$EFI_IMG" 2>/dev/null
|
||||
echo "✓ EFI partition extracted (start=$EFI_START, size=$EFI_SIZE)"
|
||||
|
||||
# ── Repack ISO (pass 1) ────────────────────────
|
||||
echo "→ Repacking ISO (pass 1)..."
|
||||
xorriso -as mkisofs \
|
||||
-r \
|
||||
-V "CezenAI_Starter_2204" \
|
||||
-o "$OUTPUT_ISO" \
|
||||
--grub2-mbr "$MBR_TEMPLATE" \
|
||||
-partition_offset 16 \
|
||||
--mbr-force-bootable \
|
||||
-append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
|
||||
-appended_part_as_gpt \
|
||||
-iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
|
||||
-c "/boot.catalog" \
|
||||
-b "/boot/grub/i386-pc/eltorito.img" \
|
||||
-no-emul-boot \
|
||||
-boot-load-size 4 \
|
||||
-boot-info-table \
|
||||
--grub2-boot-info \
|
||||
-eltorito-alt-boot \
|
||||
-e "--interval:appended_partition_2:::" \
|
||||
-no-emul-boot \
|
||||
"$WORK_DIR"
|
||||
|
||||
# ── Refresh md5sum.txt and repack (pass 2) ────
|
||||
echo "→ Refreshing md5sum.txt..."
|
||||
FINAL_DIR=$(mktemp -d)
|
||||
VERIFY_DIR=$(mktemp -d)
|
||||
trap 'rm -rf "$WORK_DIR" "$MBR_TEMPLATE" "$EFI_IMG" "$FINAL_DIR" "$VERIFY_DIR"' EXIT
|
||||
|
||||
xorriso -osirrox on -indev "$OUTPUT_ISO" -extract / "$FINAL_DIR" >/dev/null 2>&1
|
||||
chmod -R u+w "$FINAL_DIR"
|
||||
(
|
||||
cd "$FINAL_DIR"
|
||||
rm -f md5sum.txt
|
||||
find . -type f \
|
||||
! -path './md5sum.txt' \
|
||||
! -path './boot.catalog' \
|
||||
-print0 \
|
||||
| sort -z \
|
||||
| xargs -0 md5sum > md5sum.txt
|
||||
)
|
||||
echo "✓ md5sum.txt refreshed"
|
||||
|
||||
echo "→ Repacking ISO (pass 2)..."
|
||||
xorriso -as mkisofs \
|
||||
-r \
|
||||
-V "CezenAI_Starter_2204" \
|
||||
-o "$OUTPUT_ISO" \
|
||||
--grub2-mbr "$MBR_TEMPLATE" \
|
||||
-partition_offset 16 \
|
||||
--mbr-force-bootable \
|
||||
-append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
|
||||
-appended_part_as_gpt \
|
||||
-iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
|
||||
-c "/boot.catalog" \
|
||||
-b "/boot/grub/i386-pc/eltorito.img" \
|
||||
-no-emul-boot \
|
||||
-boot-load-size 4 \
|
||||
-boot-info-table \
|
||||
--grub2-boot-info \
|
||||
-eltorito-alt-boot \
|
||||
-e "--interval:appended_partition_2:::" \
|
||||
-no-emul-boot \
|
||||
"$FINAL_DIR"
|
||||
|
||||
# ── Verify output ISO ──────────────────────────
|
||||
echo "→ Verifying rebuilt ISO manifest..."
|
||||
xorriso -osirrox on -indev "$OUTPUT_ISO" -extract / "$VERIFY_DIR" >/dev/null 2>&1
|
||||
chmod -R u+w "$VERIFY_DIR"
|
||||
(
|
||||
cd "$VERIFY_DIR"
|
||||
md5sum -c md5sum.txt >/tmp/cezen-iso-md5check-starter.log 2>&1 || {
|
||||
echo "ERROR: Rebuilt ISO failed its own md5sum.txt verification."
|
||||
sed -n '1,40p' /tmp/cezen-iso-md5check-starter.log
|
||||
exit 1
|
||||
}
|
||||
)
|
||||
echo "✓ Output ISO manifest verified"
|
||||
|
||||
echo ""
|
||||
echo "╔══════════════════════════════════════════════════════╗"
|
||||
echo "║ Done! Starter Tier ISO ready. ║"
|
||||
echo "╚══════════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
ls -lh "$OUTPUT_ISO"
|
||||
echo ""
|
||||
echo "→ Transfer to MacBook:"
|
||||
echo " scp user@server:~/aipackage/autoinstall/cezen-ai-starter-ubuntu2204.iso ."
|
||||
echo ""
|
||||
echo "→ Flash to USB (macOS):"
|
||||
echo " diskutil list # find USB e.g. /dev/disk4"
|
||||
echo " diskutil unmountDisk /dev/disk4"
|
||||
echo " sudo dd if=cezen-ai-starter-ubuntu2204.iso of=/dev/disk4 bs=4m status=progress"
|
||||
echo ""
|
||||
echo "→ Post-flash: boot the workstation from USB."
|
||||
echo " Unattended install completes in ~10 min."
|
||||
echo " First-boot wizard runs on tty1 — set IP, org name, admin password."
|
||||
echo " Then run: sudo bash /opt/aipackage/install.sh --tier starter"
|
||||
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# Cezen AI Suite — Custom ISO Builder
|
||||
# Nexus One AI — Custom ISO Builder
|
||||
# Runs directly on Ubuntu 22.04 (run on the server)
|
||||
#
|
||||
# Usage:
|
||||
@ -20,7 +20,7 @@ OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-ubuntu2204.iso"
|
||||
UBUNTU_URL="https://releases.ubuntu.com/22.04.5/ubuntu-22.04.5-live-server-amd64.iso"
|
||||
|
||||
echo "╔══════════════════════════════════════════╗"
|
||||
echo "║ Cezen AI — ISO Builder ║"
|
||||
echo "║ Nexus One AI — ISO Builder ║"
|
||||
echo "╚══════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
@ -56,15 +56,10 @@ cp "$SCRIPT_DIR/user-data" "$WORK_DIR/nocloud/user-data"
|
||||
cp "$SCRIPT_DIR/meta-data" "$WORK_DIR/nocloud/meta-data"
|
||||
echo "✓ user-data and meta-data injected"
|
||||
|
||||
# Keep the installer payload on the ISO so first boot does not depend on a
|
||||
# private Git server being reachable before the setup UI can start.
|
||||
echo "→ Bundling Cezen AI installer payload..."
|
||||
mkdir -p "$WORK_DIR/cezen-aipackage"
|
||||
rsync -a --delete \
|
||||
--exclude 'autoinstall/cezen-ai-ubuntu2204.iso' \
|
||||
--exclude '*.iso' \
|
||||
"$PACKAGE_DIR/" "$WORK_DIR/cezen-aipackage/"
|
||||
echo "✓ Installer payload bundled"
|
||||
# Keep this as an online installer ISO. The installed system pulls the current
|
||||
# Nexus One AI package from cgit during first boot, which keeps the ISO small and
|
||||
# avoids shipping stale backend/portal code inside the image.
|
||||
echo "✓ Online installer mode: package will be pulled from cgit on first boot"
|
||||
|
||||
# ── Patch GRUB ────────────────────────────────
|
||||
echo "→ Patching GRUB config..."
|
||||
@ -82,20 +77,6 @@ sed -i "s/set timeout_style=.*/set timeout_style=countdown/" "$GRUB_CFG"
|
||||
sed -i '/^\s*linux.*vmlinuz/s|---|autoinstall ds=nocloud\\;s=/cdrom/nocloud/ ---|' "$GRUB_CFG"
|
||||
echo "✓ GRUB patched"
|
||||
|
||||
# ── Refresh installer checksum manifest ─────────────────
|
||||
echo "→ Refreshing md5sum.txt..."
|
||||
(
|
||||
cd "$WORK_DIR"
|
||||
rm -f md5sum.txt
|
||||
find . -type f \
|
||||
! -path './md5sum.txt' \
|
||||
! -path './boot.catalog' \
|
||||
-print0 \
|
||||
| sort -z \
|
||||
| xargs -0 md5sum > md5sum.txt
|
||||
)
|
||||
echo "✓ md5sum.txt refreshed"
|
||||
|
||||
# ── Extract MBR and EFI partition from original ISO ────
|
||||
echo "→ Extracting boot data from original ISO..."
|
||||
MBR_TEMPLATE=$(mktemp)
|
||||
@ -119,7 +100,7 @@ dd if="$ORIGINAL_ISO" bs=512 skip="$EFI_START" count="$EFI_SIZE" \
|
||||
echo "✓ EFI partition extracted (start=$EFI_START, size=$EFI_SIZE)"
|
||||
|
||||
# ── Repack ISO ─────────────────────────────────
|
||||
echo "→ Repacking ISO (this takes ~2 minutes)..."
|
||||
echo "→ Repacking ISO (pass 1)..."
|
||||
xorriso -as mkisofs \
|
||||
-r \
|
||||
-V "Cezen_AI_Ubuntu2204" \
|
||||
@ -141,6 +122,64 @@ xorriso -as mkisofs \
|
||||
-no-emul-boot \
|
||||
"$WORK_DIR"
|
||||
|
||||
echo "→ Refreshing md5sum.txt from pass-1 ISO contents..."
|
||||
FINAL_DIR=$(mktemp -d)
|
||||
VERIFY_DIR=$(mktemp -d)
|
||||
trap 'rm -rf "$WORK_DIR" "$MBR_TEMPLATE" "$EFI_IMG" "$FINAL_DIR" "$VERIFY_DIR"' EXIT
|
||||
xorriso -osirrox on \
|
||||
-indev "$OUTPUT_ISO" \
|
||||
-extract / "$FINAL_DIR" >/dev/null 2>&1
|
||||
chmod -R u+w "$FINAL_DIR"
|
||||
(
|
||||
cd "$FINAL_DIR"
|
||||
rm -f md5sum.txt
|
||||
find . -type f \
|
||||
! -path './md5sum.txt' \
|
||||
! -path './boot.catalog' \
|
||||
-print0 \
|
||||
| sort -z \
|
||||
| xargs -0 md5sum > md5sum.txt
|
||||
)
|
||||
echo "✓ md5sum.txt refreshed"
|
||||
|
||||
echo "→ Repacking ISO (pass 2 with final manifest)..."
|
||||
xorriso -as mkisofs \
|
||||
-r \
|
||||
-V "Cezen_AI_Ubuntu2204" \
|
||||
-o "$OUTPUT_ISO" \
|
||||
--grub2-mbr "$MBR_TEMPLATE" \
|
||||
-partition_offset 16 \
|
||||
--mbr-force-bootable \
|
||||
-append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \
|
||||
-appended_part_as_gpt \
|
||||
-iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \
|
||||
-c "/boot.catalog" \
|
||||
-b "/boot/grub/i386-pc/eltorito.img" \
|
||||
-no-emul-boot \
|
||||
-boot-load-size 4 \
|
||||
-boot-info-table \
|
||||
--grub2-boot-info \
|
||||
-eltorito-alt-boot \
|
||||
-e "--interval:appended_partition_2:::" \
|
||||
-no-emul-boot \
|
||||
"$FINAL_DIR"
|
||||
|
||||
# ── Verify output ISO integrity manifest ─────────────────
|
||||
echo "→ Verifying rebuilt ISO manifest..."
|
||||
xorriso -osirrox on \
|
||||
-indev "$OUTPUT_ISO" \
|
||||
-extract / "$VERIFY_DIR" >/dev/null 2>&1
|
||||
chmod -R u+w "$VERIFY_DIR"
|
||||
(
|
||||
cd "$VERIFY_DIR"
|
||||
md5sum -c md5sum.txt >/tmp/cezen-iso-md5check.log 2>&1 || {
|
||||
echo "ERROR: Rebuilt ISO failed its own md5sum.txt verification."
|
||||
sed -n '1,40p' /tmp/cezen-iso-md5check.log
|
||||
exit 1
|
||||
}
|
||||
)
|
||||
echo "✓ Output ISO manifest verified"
|
||||
|
||||
echo ""
|
||||
echo "╔══════════════════════════════════════════════════════╗"
|
||||
echo "║ Done! ║"
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# Cezen AI Suite — First Boot Setup Wizard
|
||||
# Nexus One AI — First Boot Setup Wizard
|
||||
# Runs on first boot after OS install via systemd service.
|
||||
# Uses whiptail for the TUI.
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
@ -8,6 +8,7 @@ set -e
|
||||
|
||||
AIPACKAGE_DIR="/opt/aipackage"
|
||||
LOG_FILE="/var/log/cezen-setup.log"
|
||||
export TERM="${TERM:-linux}"
|
||||
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||
|
||||
detect_iface() {
|
||||
@ -20,28 +21,28 @@ IFACE="${IFACE:-$(ip -o link show | awk -F': ' '$2 !~ /lo|docker|br-|veth/ {prin
|
||||
# ── Colors / terminal setup ────────────────────────────────
|
||||
export NEWT_COLORS='
|
||||
root=,black
|
||||
window=white,navy
|
||||
border=white,navy
|
||||
title=white,navy
|
||||
button=black,cyan
|
||||
actbutton=white,red
|
||||
checkbox=white,navy
|
||||
actcheckbox=black,cyan
|
||||
entry=white,navy
|
||||
label=white,navy
|
||||
listbox=white,navy
|
||||
actlistbox=black,cyan
|
||||
textbox=white,navy
|
||||
acttextbox=black,cyan
|
||||
window=black,white
|
||||
border=white,black
|
||||
title=black,white
|
||||
button=black,white
|
||||
actbutton=white,blue
|
||||
checkbox=black,white
|
||||
actcheckbox=white,blue
|
||||
entry=black,white
|
||||
label=black,white
|
||||
listbox=black,white
|
||||
actlistbox=white,blue
|
||||
textbox=black,white
|
||||
acttextbox=white,blue
|
||||
'
|
||||
|
||||
TITLE=" Cezen AI Suite — Server Setup "
|
||||
TITLE=" Nexus One AI — Server Setup "
|
||||
H=20
|
||||
W=70
|
||||
|
||||
# ── Welcome ────────────────────────────────────────────────
|
||||
whiptail --title "$TITLE" \
|
||||
--msgbox "\nWelcome to the Cezen AI Suite installer.\n\nThis wizard will configure your network and install the AI stack.\n\nMake sure this server is connected to the internet before continuing." \
|
||||
--msgbox "\nWelcome to the Nexus One AI installer.\n\nThis wizard will configure your network and install the AI stack.\n\nMake sure this server is connected to the internet before continuing." \
|
||||
$H $W
|
||||
|
||||
# ════════════════════════════════════════════════════════════
|
||||
@ -135,10 +136,11 @@ fi
|
||||
|
||||
TIER=$(whiptail --title "$TITLE" \
|
||||
--menu "\nStep 2 of 3: Select AI Package Tier\n\nChoose the tier that matches your hardware:" \
|
||||
$H $W 3 \
|
||||
"entry" "Entry — 3× NVIDIA L40S (48GB each) · Up to 20 users" \
|
||||
"mid" "Mid — RTX Pro 6000 BW (96GB each) · Up to 50 users" \
|
||||
"advanced" "Advanced — HGX H200 (141GB each) · 200+ users" \
|
||||
$H $W 4 \
|
||||
"starter" "Starter — 1× RTX 5090 / 32GB VRAM · Small team" \
|
||||
"basic" "Entry — 1× NVIDIA RTX Pro 6000 (96GB) · Up to 20 users" \
|
||||
"pro" "Pro — 2× RTX 5090 / RTX Pro class · Up to 100 users" \
|
||||
"max" "Max — 4–8× H100/H200/A100 class · 100+ users" \
|
||||
3>&1 1>&2 2>&3)
|
||||
|
||||
# ════════════════════════════════════════════════════════════
|
||||
@ -177,7 +179,7 @@ whiptail --title "$TITLE" \
|
||||
clear
|
||||
echo ""
|
||||
echo "╔══════════════════════════════════════════╗"
|
||||
echo "║ Cezen AI Suite — Installing... ║"
|
||||
echo "║ Nexus One AI — Installing... ║"
|
||||
echo "║ Check progress: journalctl -f ║"
|
||||
echo "╚══════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
@ -85,10 +85,11 @@ autoinstall:
|
||||
# mirrors instead of the custom ISO content.
|
||||
- sed -i 's/^deb cdrom:/# deb cdrom:/' /target/etc/apt/sources.list || true
|
||||
|
||||
# Install the Cezen AI payload from the ISO first. Fall back to Git only
|
||||
# when building from older media that does not contain /cdrom/cezen-aipackage.
|
||||
# Pull the Nexus One AI installer from cgit. The ISO intentionally does not
|
||||
# bundle the full package, keeping the image small and the installed code
|
||||
# current at deployment time.
|
||||
- mkdir -p /target/opt/aipackage
|
||||
- cp -a /cdrom/cezen-aipackage/. /target/opt/aipackage/ || git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
|
||||
- git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
|
||||
|
||||
# Deploy the console setup wizard
|
||||
- mkdir -p /target/opt/cezen
|
||||
@ -104,23 +105,18 @@ autoinstall:
|
||||
- |
|
||||
cat > /target/etc/systemd/system/cezen-setup.service << 'EOF'
|
||||
[Unit]
|
||||
Description=Cezen AI Suite — Console Setup Wizard
|
||||
Description=Nexus One AI — Console Setup Wizard
|
||||
After=cloud-final.service cloud-init.target network-online.target
|
||||
Wants=cloud-init.target network-online.target
|
||||
Conflicts=getty@tty1.service
|
||||
ConditionPathExists=!/opt/cezen/.setup-done
|
||||
OnFailure=getty@tty1.service
|
||||
|
||||
[Service]
|
||||
Type=idle
|
||||
ExecStartPre=-/usr/bin/systemctl stop getty@tty1.service
|
||||
ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; exec /opt/cezen/firstboot-setup.sh'
|
||||
StandardInput=tty-force
|
||||
StandardOutput=tty
|
||||
StandardError=tty
|
||||
TTYPath=/dev/tty1
|
||||
TTYReset=yes
|
||||
TTYVHangup=yes
|
||||
TTYVTDisallocate=yes
|
||||
Type=oneshot
|
||||
WorkingDirectory=/opt/cezen
|
||||
ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; /usr/bin/openvt -c 1 -f -w -- env TERM=linux /opt/cezen/firstboot-setup.sh'
|
||||
StandardOutput=journal+console
|
||||
StandardError=journal+console
|
||||
Restart=no
|
||||
|
||||
[Install]
|
||||
|
||||
137
autoinstall/user-data-starter
Normal file
137
autoinstall/user-data-starter
Normal file
@ -0,0 +1,137 @@
|
||||
#cloud-config
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# Nexus One AI — Starter Tier Autoinstall
|
||||
# Hardware target: compact workstation (Mini-ITX / SFF)
|
||||
# GPU: 1× NVIDIA RTX 5090 (32 GB GDDR7)
|
||||
# RAM: 64 GB DDR5
|
||||
# Storage: 1× 2 TB NVMe SSD (single drive — simple LVM)
|
||||
# Network: 2.5 GbE (single interface)
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
autoinstall:
|
||||
version: 1
|
||||
|
||||
# ── Locale & keyboard ──────────────────────────
|
||||
locale: en_IN.UTF-8
|
||||
keyboard:
|
||||
layout: us
|
||||
|
||||
# ── Network: DHCP during install; static config applied post-install ──
|
||||
network:
|
||||
network:
|
||||
version: 2
|
||||
ethernets:
|
||||
any-en:
|
||||
dhcp4: true
|
||||
match:
|
||||
name: "en*"
|
||||
any-eth:
|
||||
dhcp4: true
|
||||
match:
|
||||
name: "eth*"
|
||||
|
||||
# ── Storage: single 2 TB NVMe, simple LVM ─────
|
||||
# Starter workstations have one drive — no RAID needed.
|
||||
storage:
|
||||
layout:
|
||||
name: lvm
|
||||
match:
|
||||
size: largest
|
||||
|
||||
# ── Identity ──────────────────────────────────
|
||||
identity:
|
||||
hostname: cezenai-starter
|
||||
username: cezen
|
||||
# Default password: cezen@123 (change via first-boot wizard)
|
||||
password: "$6$I5VA.42G1xTeVhCv$KCLzqIKg/kbNHZyiTEMAY4FZsJMDDwoS90k6Ffb9VEwmcK.wuzlJNe3ceiEfLrzYzXEvqjYsLc7klAbeGPGab."
|
||||
|
||||
# ── SSH ───────────────────────────────────────
|
||||
ssh:
|
||||
install-server: true
|
||||
allow-pw: true
|
||||
|
||||
# ── Base packages ─────────────────────────────
|
||||
packages:
|
||||
- git
|
||||
- curl
|
||||
- wget
|
||||
- python3
|
||||
- whiptail
|
||||
- openssh-server
|
||||
- nvme-cli # NVMe health / SMART monitoring
|
||||
|
||||
# ── Late commands ─────────────────────────────
|
||||
late-commands:
|
||||
# Expand LVM to fill the full 2 TB NVMe
|
||||
- lvextend -l +100%FREE /dev/ubuntu-vg/ubuntu-lv || true
|
||||
- resize2fs /dev/ubuntu-vg/ubuntu-lv || true
|
||||
|
||||
# Passwordless sudo for cezen (needed by install.sh + first-boot wizard)
|
||||
- echo "cezen ALL=(ALL) NOPASSWD:ALL" > /target/etc/sudoers.d/cezen
|
||||
- chmod 440 /target/etc/sudoers.d/cezen
|
||||
|
||||
# Replace installer netplan with simple DHCP target config.
|
||||
# The first-boot wizard will switch to static if desired.
|
||||
- rm -f /target/etc/netplan/50-cloud-init.yaml /target/etc/netplan/00-installer-config.yaml || true
|
||||
- |
|
||||
cat > /target/etc/netplan/99-cezen-dhcp.yaml << 'EOF'
|
||||
network:
|
||||
version: 2
|
||||
ethernets:
|
||||
any-en:
|
||||
dhcp4: true
|
||||
match:
|
||||
name: "en*"
|
||||
any-eth:
|
||||
dhcp4: true
|
||||
match:
|
||||
name: "eth*"
|
||||
EOF
|
||||
|
||||
# Disable cdrom APT source
|
||||
- sed -i 's/^deb cdrom:/# deb cdrom:/' /target/etc/apt/sources.list || true
|
||||
|
||||
# Pull the Nexus One AI installer from cgit. The ISO intentionally does not
|
||||
# bundle the full package, keeping the image small and the installed code
|
||||
# current at deployment time.
|
||||
- mkdir -p /target/opt/aipackage
|
||||
- git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage
|
||||
|
||||
# Write tier marker — used by install.sh and the portal branding system
|
||||
- mkdir -p /target/opt/cezen
|
||||
- echo "starter" > /target/opt/cezen/tier
|
||||
|
||||
# Deploy first-boot TUI wizard
|
||||
- cp /target/opt/aipackage/autoinstall/firstboot-setup.sh /target/opt/cezen/firstboot-setup.sh
|
||||
- chmod +x /target/opt/cezen/firstboot-setup.sh
|
||||
|
||||
# Set hostname
|
||||
- echo "cezenai-starter" > /target/etc/hostname
|
||||
- sed -i 's/aiserver/cezenai-starter/g' /target/etc/hosts || true
|
||||
|
||||
# Systemd service: run first-boot wizard on tty1 once
|
||||
- |
|
||||
cat > /target/etc/systemd/system/cezen-setup.service << 'EOF'
|
||||
[Unit]
|
||||
Description=Nexus One AI — Console Setup Wizard (Starter)
|
||||
After=cloud-final.service cloud-init.target network-online.target
|
||||
Wants=cloud-init.target network-online.target
|
||||
ConditionPathExists=!/opt/cezen/.setup-done
|
||||
OnFailure=getty@tty1.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
WorkingDirectory=/opt/cezen
|
||||
ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; /usr/bin/openvt -c 1 -f -w -- env TERM=linux CEZEN_TIER=starter /opt/cezen/firstboot-setup.sh'
|
||||
StandardOutput=journal+console
|
||||
StandardError=journal+console
|
||||
Restart=no
|
||||
|
||||
[Install]
|
||||
WantedBy=cloud-init.target
|
||||
EOF
|
||||
|
||||
- curtin in-target -- systemctl enable ssh
|
||||
- curtin in-target -- systemctl enable cezen-setup.service
|
||||
|
||||
user-data:
|
||||
disable_root: false
|
||||
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Cezen AI Suite — First Boot Web Setup Server
|
||||
Nexus One AI — First Boot Web Setup Server
|
||||
Serves on port 80. Access from any browser on the same network.
|
||||
"""
|
||||
import os, json, subprocess, threading, time, socket, ipaddress
|
||||
@ -123,7 +123,7 @@ HTML = r"""<!DOCTYPE html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Cezen AI Suite — Server Setup</title>
|
||||
<title>Nexus One AI — Server Setup</title>
|
||||
<style>
|
||||
:root {
|
||||
--navy: #1B2A4A;
|
||||
@ -317,19 +317,24 @@ HTML = r"""<!DOCTYPE html>
|
||||
<h2>Select AI Package Tier</h2>
|
||||
<p class="desc">Choose the tier that matches your GPU hardware.</p>
|
||||
<div class="tier-grid">
|
||||
<div class="tier-card" id="tier-entry" onclick="selectTier('entry')">
|
||||
<div class="tier-card" id="tier-starter" onclick="selectTier('starter')">
|
||||
<div class="tier-name">Starter</div>
|
||||
<div class="tier-gpu">1× RTX 5090 / 32GB VRAM</div>
|
||||
<div class="tier-users">Small team deployment</div>
|
||||
</div>
|
||||
<div class="tier-card" id="tier-basic" onclick="selectTier('basic')">
|
||||
<div class="tier-name">Entry</div>
|
||||
<div class="tier-gpu">3× NVIDIA L40S</div>
|
||||
<div class="tier-gpu">1× NVIDIA RTX Pro 6000 (96GB)</div>
|
||||
<div class="tier-users">Up to 20 concurrent users</div>
|
||||
</div>
|
||||
<div class="tier-card" id="tier-mid" onclick="selectTier('mid')">
|
||||
<div class="tier-name">Mid</div>
|
||||
<div class="tier-gpu">3× RTX Pro 6000</div>
|
||||
<div class="tier-users">Up to 50 concurrent users</div>
|
||||
<div class="tier-card" id="tier-pro" onclick="selectTier('pro')">
|
||||
<div class="tier-name">Pro</div>
|
||||
<div class="tier-gpu">2× RTX 5090 / RTX Pro class</div>
|
||||
<div class="tier-users">Up to 100 concurrent users</div>
|
||||
</div>
|
||||
<div class="tier-card" id="tier-advanced" onclick="selectTier('advanced')">
|
||||
<div class="tier-name">Advanced</div>
|
||||
<div class="tier-gpu">8× HGX H200</div>
|
||||
<div class="tier-card" id="tier-max" onclick="selectTier('max')">
|
||||
<div class="tier-name">Max</div>
|
||||
<div class="tier-gpu">4–8× H100/H200/A100 class</div>
|
||||
<div class="tier-users">200+ concurrent users</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -369,7 +374,7 @@ HTML = r"""<!DOCTYPE html>
|
||||
|
||||
<div class="progress-wrap" id="progress-wrap">
|
||||
<div class="card">
|
||||
<h2>Installing Cezen AI Suite...</h2>
|
||||
<h2>Installing Nexus One AI...</h2>
|
||||
<div class="progress-bar-bg"><div class="progress-bar" id="progress-bar"></div></div>
|
||||
<p id="progress-label" style="font-size:13px;color:var(--muted);margin-bottom:12px">Starting...</p>
|
||||
<div class="log-box" id="log-box"></div>
|
||||
@ -379,7 +384,7 @@ HTML = r"""<!DOCTYPE html>
|
||||
<div id="done-screen" class="hidden">
|
||||
<div class="done-icon">✅</div>
|
||||
<h2>Installation Complete!</h2>
|
||||
<p>Your Cezen AI Suite is ready.</p>
|
||||
<p>Your Nexus One AI is ready.</p>
|
||||
<div class="services card" style="margin-top:24px;text-align:left">
|
||||
<div class="summary-row"><span class="key">Open WebUI</span><span class="val badge">:3001</span></div>
|
||||
<div class="summary-row"><span class="key">JupyterLab</span><span class="val badge">:8888</span></div>
|
||||
@ -400,7 +405,7 @@ HTML = r"""<!DOCTYPE html>
|
||||
<script>
|
||||
// ── State ──────────────────────────────────────────────────
|
||||
let netMode = 'dhcp';
|
||||
let selectedTier = 'entry';
|
||||
let selectedTier = 'basic';
|
||||
let tools = {
|
||||
ollama: { name: 'Ollama + Open WebUI', desc: 'LLM inference & chat', icon: '🤖', on: true },
|
||||
jupyterlab: { name: 'JupyterLab', desc: 'Notebook environment', icon: '📓', on: true },
|
||||
@ -418,7 +423,7 @@ window.onload = () => {
|
||||
document.getElementById('current-ip').textContent = d.ip || 'unknown';
|
||||
});
|
||||
renderTools();
|
||||
selectTier('entry');
|
||||
selectTier('basic');
|
||||
};
|
||||
|
||||
// ── Navigation ─────────────────────────────────────────────
|
||||
@ -467,7 +472,7 @@ function applyStaticIP() {
|
||||
// ── Tier ───────────────────────────────────────────────────
|
||||
function selectTier(t) {
|
||||
selectedTier = t;
|
||||
['entry','mid','advanced'].forEach(x =>
|
||||
['starter','basic','pro','max'].forEach(x =>
|
||||
document.getElementById('tier-'+x).classList.toggle('selected', x===t));
|
||||
}
|
||||
|
||||
@ -707,7 +712,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||
|
||||
elif path == "/api/install":
|
||||
global install_proc
|
||||
tier = body.get("tier", "entry")
|
||||
tier = body.get("tier", "basic")
|
||||
skip = body.get("skip_tools", [])
|
||||
if not install_status["running"]:
|
||||
t = threading.Thread(target=run_install, args=(tier, skip), daemon=True)
|
||||
@ -752,7 +757,7 @@ def show_console_banner(ip):
|
||||
try:
|
||||
with open("/etc/issue", "w") as f:
|
||||
f.write(f"Ubuntu 22.04.5 LTS \\n \\l\n\n")
|
||||
f.write(f" \033[1;36mCezen AI Suite Setup:\033[0m http://{ip} | http://cezenai.local\n\n")
|
||||
f.write(f" \033[1;36mNexus One AI Setup:\033[0m http://{ip} | http://cezenai.local\n\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
172
install.sh
172
install.sh
@ -1,17 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
# ─────────────────────────────────────────────
|
||||
# Cezen AI Suite — Entry Level Installer
|
||||
# Nexus One AI — Installer
|
||||
# Usage:
|
||||
# sudo bash install.sh → Phase 1 (drivers + schedules reboot → Phase 2)
|
||||
# sudo bash install.sh --phase=2 → Phase 2 (all software, run after reboot)
|
||||
# sudo bash install.sh → auto-detect tier, Phase 1
|
||||
# sudo bash install.sh --tier=starter → Starter tier, Phase 1
|
||||
# sudo bash install.sh --tier=basic → Basic tier, Phase 1
|
||||
# sudo bash install.sh --tier=pro → Pro tier, Phase 1
|
||||
# sudo bash install.sh --tier=max → Max tier, Phase 1
|
||||
# sudo bash install.sh --phase=2 --tier=... → Phase 2 only (post-reboot)
|
||||
# sudo bash install.sh --software-only → install on customer-owned hardware
|
||||
# sudo bash install.sh --feasibility-only → scan hardware and exit
|
||||
# sudo bash install.sh --skip-model-pull → install Ollama without preloading models
|
||||
# ─────────────────────────────────────────────
|
||||
set -e
|
||||
|
||||
TIER="entry"
|
||||
# Auto-detect tier from ISO marker written by autoinstall user-data
|
||||
if [ -f /opt/cezen/tier ]; then
|
||||
TIER="$(cat /opt/cezen/tier | tr -d '[:space:]')"
|
||||
elif [ -f /opt/aipackage/autoinstall/.tier ]; then
|
||||
TIER="$(cat /opt/aipackage/autoinstall/.tier | tr -d '[:space:]')"
|
||||
else
|
||||
TIER="basic" # default if no marker found
|
||||
fi
|
||||
PHASE="1"
|
||||
SKIP_ROLES=""
|
||||
SOFTWARE_ONLY=false
|
||||
FEASIBILITY_ONLY=false
|
||||
SKIP_MODEL_PULL=false
|
||||
PROFILE="auto"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
ANSIBLE_DIR="$SCRIPT_DIR/ansible"
|
||||
FEASIBILITY_SCRIPT="$SCRIPT_DIR/scripts/cezen-feasibility.sh"
|
||||
FEASIBILITY_JSON="/opt/cezen/feasibility.json"
|
||||
|
||||
# Load saved config (written by web setup UI before phase 1)
|
||||
[ -f /opt/cezen/install.conf ] && source /opt/cezen/install.conf
|
||||
@ -21,9 +41,24 @@ for arg in "$@"; do
|
||||
--tier=*) TIER="${arg#*=}" ;;
|
||||
--phase=*) PHASE="${arg#*=}" ;;
|
||||
--skip=*) SKIP_ROLES="${arg#*=}" ;;
|
||||
--profile=*) PROFILE="${arg#*=}" ;;
|
||||
--software-only) SOFTWARE_ONLY=true ;;
|
||||
--feasibility-only) FEASIBILITY_ONLY=true ;;
|
||||
--skip-model-pull) SKIP_MODEL_PULL=true ;;
|
||||
esac
|
||||
done
|
||||
|
||||
normalize_tier() {
|
||||
case "$TIER" in
|
||||
entry|basic) TIER="basic" ;;
|
||||
mid|pro) TIER="pro" ;;
|
||||
advanced|max) TIER="max" ;;
|
||||
starter) TIER="starter" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
normalize_tier
|
||||
|
||||
# ── Preflight ──────────────────────────────────
|
||||
check_root() {
|
||||
if [ "$EUID" -ne 0 ]; then
|
||||
@ -52,6 +87,84 @@ install_ansible() {
|
||||
echo "✓ Ansible ready"
|
||||
}
|
||||
|
||||
append_skip_role() {
|
||||
local role="$1"
|
||||
if [ -z "$SKIP_ROLES" ]; then
|
||||
SKIP_ROLES="$role"
|
||||
elif [[ ",$SKIP_ROLES," != *",$role,"* ]]; then
|
||||
SKIP_ROLES="$SKIP_ROLES,$role"
|
||||
fi
|
||||
}
|
||||
|
||||
run_feasibility() {
|
||||
if [ -f "$FEASIBILITY_SCRIPT" ]; then
|
||||
bash "$FEASIBILITY_SCRIPT" "$FEASIBILITY_JSON"
|
||||
else
|
||||
echo "WARNING: Feasibility checker not found: $FEASIBILITY_SCRIPT"
|
||||
fi
|
||||
}
|
||||
|
||||
json_field() {
|
||||
local expr="$1"
|
||||
python3 - "$FEASIBILITY_JSON" "$expr" <<'PY'
|
||||
import json, sys
|
||||
try:
|
||||
d=json.load(open(sys.argv[1]))
|
||||
cur=d
|
||||
for part in sys.argv[2].split("."):
|
||||
cur=cur[part]
|
||||
print(cur)
|
||||
except Exception:
|
||||
print("")
|
||||
PY
|
||||
}
|
||||
|
||||
apply_profile_from_feasibility() {
|
||||
[ -f "$FEASIBILITY_JSON" ] || return 0
|
||||
local detected_profile
|
||||
detected_profile="$(json_field recommendation.recommended_profile)"
|
||||
if [ "$PROFILE" = "auto" ] && [ -n "$detected_profile" ]; then
|
||||
PROFILE="$detected_profile"
|
||||
fi
|
||||
|
||||
case "$PROFILE" in
|
||||
core)
|
||||
append_skip_role docker
|
||||
append_skip_role k3s
|
||||
append_skip_role ollama
|
||||
append_skip_role vllm
|
||||
append_skip_role jupyterlab
|
||||
append_skip_role chromadb
|
||||
append_skip_role mlflow
|
||||
append_skip_role minio
|
||||
append_skip_role monitoring
|
||||
SKIP_MODEL_PULL=true
|
||||
;;
|
||||
cpu-ai)
|
||||
append_skip_role k3s
|
||||
append_skip_role vllm
|
||||
append_skip_role mlflow
|
||||
append_skip_role minio
|
||||
SKIP_MODEL_PULL=true
|
||||
;;
|
||||
gpu-lite|gpu-starter)
|
||||
append_skip_role k3s
|
||||
append_skip_role mlflow
|
||||
append_skip_role minio
|
||||
SKIP_MODEL_PULL=true
|
||||
;;
|
||||
gpu-standard)
|
||||
append_skip_role mlflow
|
||||
append_skip_role minio
|
||||
;;
|
||||
gpu-pro|gpu-max)
|
||||
;;
|
||||
*)
|
||||
echo "WARNING: Unknown profile '$PROFILE'; using explicit skip list only."
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
has_nvidia_pci_gpu() {
|
||||
for vendor_file in /sys/bus/pci/devices/*/vendor; do
|
||||
[ -f "$vendor_file" ] || continue
|
||||
@ -70,7 +183,7 @@ has_working_nvidia_driver() {
|
||||
run_phase1() {
|
||||
echo ""
|
||||
echo "╔══════════════════════════════════════════╗"
|
||||
echo "║ Cezen AI Suite — Phase 1: NVIDIA ║"
|
||||
echo "║ Nexus One AI — Phase 1: NVIDIA ║"
|
||||
echo "╚══════════════════════════════════════════╝"
|
||||
|
||||
if ! has_nvidia_pci_gpu; then
|
||||
@ -87,7 +200,7 @@ run_phase1() {
|
||||
# Register phase 2 as a one-shot systemd service so it runs after reboot
|
||||
cat > /etc/systemd/system/cezen-phase2.service << EOF
|
||||
[Unit]
|
||||
Description=Cezen AI Suite Phase 2 Installer
|
||||
Description=Nexus One AI Phase 2 Installer
|
||||
After=network-online.target nvidia-persistenced.service
|
||||
Wants=network-online.target
|
||||
|
||||
@ -116,7 +229,7 @@ EOF
|
||||
run_phase2() {
|
||||
echo ""
|
||||
echo "╔══════════════════════════════════════════╗"
|
||||
echo "║ Cezen AI Suite — Phase 2: Stack ║"
|
||||
echo "║ Nexus One AI — Phase 2: Stack ║"
|
||||
echo "╚══════════════════════════════════════════╝"
|
||||
|
||||
GPU_AVAILABLE=false
|
||||
@ -129,12 +242,26 @@ run_phase2() {
|
||||
fi
|
||||
|
||||
# Build skip_roles extra var (comma-separated list, empty string = skip nothing)
|
||||
EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\" gpu_available=$GPU_AVAILABLE"
|
||||
EXTRA_VARS="tier=$TIER skip_roles=\"$SKIP_ROLES\" gpu_available=$GPU_AVAILABLE skip_model_pull=$SKIP_MODEL_PULL"
|
||||
echo "→ Tier: $TIER | Skip: ${SKIP_ROLES:-none}"
|
||||
echo "→ GPU available: $GPU_AVAILABLE"
|
||||
echo "→ Skip model pull: $SKIP_MODEL_PULL"
|
||||
|
||||
# Select Ansible playbook by tier
|
||||
case "$TIER" in
|
||||
starter) PLAYBOOK="$ANSIBLE_DIR/starter.yml" ;;
|
||||
basic|entry) PLAYBOOK="$ANSIBLE_DIR/entry.yml" ;;
|
||||
pro) PLAYBOOK="$ANSIBLE_DIR/pro.yml" ;;
|
||||
max) PLAYBOOK="$ANSIBLE_DIR/max.yml" ;;
|
||||
*)
|
||||
echo "ERROR: Unknown tier '$TIER'. Valid: starter | basic | pro | max"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "→ Playbook: $PLAYBOOK"
|
||||
ANSIBLE_STDOUT_CALLBACK=yaml \
|
||||
ansible-playbook -i localhost, -c local "$ANSIBLE_DIR/entry.yml" \
|
||||
ansible-playbook -i localhost, -c local "$PLAYBOOK" \
|
||||
-e "$EXTRA_VARS"
|
||||
|
||||
# Disable one-shot service so it doesn't run again on next reboot
|
||||
@ -142,19 +269,32 @@ run_phase2() {
|
||||
|
||||
echo ""
|
||||
echo "╔══════════════════════════════════════════╗"
|
||||
echo "║ Cezen AI Suite installation complete! ║"
|
||||
echo "║ Nexus One AI installation complete! ║"
|
||||
echo "║ Tier: $(printf '%-33s' "$TIER")║"
|
||||
echo "║ ║"
|
||||
echo "║ JupyterLab → http://localhost:8888 ║"
|
||||
echo "║ Ollama API → http://localhost:11434 ║"
|
||||
echo "║ MLflow → http://localhost:5000 ║"
|
||||
echo "║ MinIO → http://localhost:9001 ║"
|
||||
echo "║ Grafana → http://localhost:3000 ║"
|
||||
echo "║ Portal → http://localhost ║"
|
||||
echo "║ Ollama API → http://localhost:11434 ║"
|
||||
echo "║ vLLM API → http://localhost:8000 ║"
|
||||
echo "║ Grafana → http://localhost:3000 ║"
|
||||
echo "╚══════════════════════════════════════════╝"
|
||||
}
|
||||
|
||||
# ── Main ───────────────────────────────────────
|
||||
check_root
|
||||
check_os
|
||||
|
||||
if [ "$FEASIBILITY_ONLY" = true ]; then
|
||||
run_feasibility
|
||||
exit 0
|
||||
fi
|
||||
|
||||
check_root
|
||||
run_feasibility
|
||||
|
||||
if [ "$SOFTWARE_ONLY" = true ]; then
|
||||
PHASE="2"
|
||||
apply_profile_from_feasibility
|
||||
fi
|
||||
|
||||
install_ansible
|
||||
|
||||
if [ "$PHASE" = "1" ]; then
|
||||
|
||||
@ -1,44 +1,68 @@
|
||||
#!/usr/bin/env bash
|
||||
# Pull additional AI models into Ollama
|
||||
# Run after install: bash models/pull-models.sh --tier=entry
|
||||
# Run after install: bash models/pull-models.sh --tier=starter
|
||||
# ─────────────────────────────────────────────
|
||||
TIER=${1:-entry}
|
||||
TIER="basic" # default tier
|
||||
|
||||
echo "Pulling models for tier: $TIER"
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--tier=*) TIER="${arg#*=}" ;;
|
||||
*) TIER="$arg" ;; # allow positional: pull-models.sh pro
|
||||
esac
|
||||
done
|
||||
|
||||
entry_models=(
|
||||
"llama3.1:8b" # General purpose, good baseline
|
||||
"mistral:7b" # Fast, good for APIs
|
||||
"llama3.1:70b" # Larger — only if enough VRAM (3× L40S has 144GB total)
|
||||
"nomic-embed-text" # Embedding model for RAG
|
||||
"codellama:13b" # Code generation
|
||||
# Normalise legacy names
|
||||
case "$TIER" in
|
||||
entry) TIER="basic" ;;
|
||||
mid) TIER="pro" ;;
|
||||
advanced) TIER="max" ;;
|
||||
esac
|
||||
|
||||
# ── Model lists ───────────────────────────────
|
||||
starter_models=(
|
||||
"phi3:mini" # 3.8B — fits in 32 GB GDDR7 at full precision
|
||||
"nomic-embed-text" # Embedding model for RAG
|
||||
)
|
||||
|
||||
mid_models=(
|
||||
"${entry_models[@]}"
|
||||
"llama3.1:70b"
|
||||
"mixtral:8x7b"
|
||||
"deepseek-coder-v2:16b"
|
||||
basic_models=(
|
||||
"llama3.1:8b" # General purpose, good baseline
|
||||
"mistral:7b" # Fast, good for APIs
|
||||
"nomic-embed-text" # Embedding model for RAG
|
||||
"codellama:13b" # Code generation
|
||||
)
|
||||
|
||||
advanced_models=(
|
||||
"${mid_models[@]}"
|
||||
"llama3.1:405b"
|
||||
"mixtral:8x22b"
|
||||
pro_models=(
|
||||
"${basic_models[@]}"
|
||||
"llama3.1:70b" # Large general purpose (needs 64+ GB VRAM at 4-bit)
|
||||
"mixtral:8x7b" # MoE model, strong reasoning
|
||||
"deepseek-coder-v2:16b" # Code specialist
|
||||
)
|
||||
|
||||
max_models=(
|
||||
"${pro_models[@]}"
|
||||
"llama3.1:405b" # Flagship — needs 320+ GB VRAM or multi-node
|
||||
"mixtral:8x22b" # Large MoE
|
||||
)
|
||||
|
||||
case $TIER in
|
||||
entry) models=("${entry_models[@]}") ;;
|
||||
mid) models=("${mid_models[@]}") ;;
|
||||
advanced) models=("${advanced_models[@]}") ;;
|
||||
*) echo "Unknown tier: $TIER. Use entry, mid, or advanced."; exit 1 ;;
|
||||
starter) models=("${starter_models[@]}") ;;
|
||||
basic) models=("${basic_models[@]}") ;;
|
||||
pro) models=("${pro_models[@]}") ;;
|
||||
max) models=("${max_models[@]}") ;;
|
||||
*)
|
||||
echo "Unknown tier: $TIER"
|
||||
echo "Usage: bash pull-models.sh --tier=starter|basic|pro|max"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Pulling models for tier: $TIER"
|
||||
echo ""
|
||||
|
||||
for model in "${models[@]}"; do
|
||||
echo ""
|
||||
echo "→ Pulling $model..."
|
||||
ollama pull "$model"
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "✓ All models pulled. List with: ollama list"
|
||||
echo "✓ Done. List installed models with: ollama list"
|
||||
|
||||
135
nginx/cezen.conf
Normal file
135
nginx/cezen.conf
Normal file
@ -0,0 +1,135 @@
|
||||
# /etc/nginx/sites-available/cezen
|
||||
# Nexus One AI Portal — serves static portal, proxies API and console terminal
|
||||
#
|
||||
# Install:
|
||||
# sudo cp cezen.conf /etc/nginx/sites-available/cezen
|
||||
# sudo ln -sf /etc/nginx/sites-available/cezen /etc/nginx/sites-enabled/cezen
|
||||
# sudo rm -f /etc/nginx/sites-enabled/default
|
||||
# sudo nginx -t && sudo systemctl reload nginx
|
||||
|
||||
# ─── Rate limiting zones (must be outside server block) ──────────────────────
|
||||
# Login: 5 requests/min per IP, burst of 3 queued, then 429
|
||||
limit_req_zone $binary_remote_addr zone=cezen_login:10m rate=5r/m;
|
||||
# General API: 60 req/min per IP (generous for dashboard polling)
|
||||
limit_req_zone $binary_remote_addr zone=cezen_api:10m rate=60r/m;
|
||||
|
||||
server {
|
||||
listen 80 default_server;
|
||||
listen [::]:80 default_server;
|
||||
|
||||
server_name _;
|
||||
|
||||
# Hide server version
|
||||
server_tokens off;
|
||||
|
||||
# Logging
|
||||
access_log /var/log/nginx/cezen-access.log;
|
||||
error_log /var/log/nginx/cezen-error.log;
|
||||
|
||||
# ─── Global security headers ──────────────────────────────────────────────
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
||||
add_header Permissions-Policy "geolocation=(), camera=(), microphone=()" always;
|
||||
add_header Content-Security-Policy
|
||||
"default-src 'self'; "
|
||||
"script-src 'self' 'unsafe-inline'; "
|
||||
"style-src 'self' 'unsafe-inline'; "
|
||||
"img-src 'self' data:; "
|
||||
"connect-src 'self'; "
|
||||
"frame-src 'self'; "
|
||||
"font-src 'self'; "
|
||||
"object-src 'none'; "
|
||||
"base-uri 'self';"
|
||||
always;
|
||||
|
||||
# ─── robots.txt — block all indexing (air-gapped / private portal) ────────
|
||||
location = /robots.txt {
|
||||
return 200 "User-agent: *\nDisallow: /\n";
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
|
||||
# ─── Static Portal ───────────────────────────────────────────────────────
|
||||
root /opt/cezen/portal;
|
||||
index index.html;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Cache static assets aggressively
|
||||
location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
|
||||
expires 7d;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# ─── Model upload (large files — no size limit, extended timeout) ────────
|
||||
location = /api/models/upload {
|
||||
client_max_body_size 0; # unlimited — GGUF files can be 70 GB+
|
||||
proxy_request_buffering off; # stream directly to backend, don't buffer in Nginx
|
||||
proxy_read_timeout 7200s; # 2 hours for slow transfers
|
||||
proxy_send_timeout 7200s;
|
||||
|
||||
proxy_pass http://127.0.0.1:8080;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
}
|
||||
|
||||
# ─── Login rate limit (tight) ─────────────────────────────────────────────
|
||||
location = /api/auth/login {
|
||||
limit_req zone=cezen_login burst=3 nodelay;
|
||||
limit_req_status 429;
|
||||
|
||||
proxy_pass http://127.0.0.1:8080;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 30s;
|
||||
}
|
||||
|
||||
# ─── FastAPI Backend (/api/) ──────────────────────────────────────────────
|
||||
location /api/ {
|
||||
limit_req zone=cezen_api burst=20 nodelay;
|
||||
limit_req_status 429;
|
||||
|
||||
proxy_pass http://127.0.0.1:8080;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 120s;
|
||||
}
|
||||
|
||||
# ─── Web Console (ttyd) (/console/) ──────────────────────────────────────
|
||||
location /console/ {
|
||||
proxy_pass http://127.0.0.1:7681/;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_read_timeout 86400s;
|
||||
|
||||
# Rewrite paths so ttyd JS/CSS assets load correctly
|
||||
proxy_redirect / /console/;
|
||||
sub_filter 'href="/' 'href="/console/';
|
||||
sub_filter 'src="/' 'src="/console/';
|
||||
sub_filter_once off;
|
||||
sub_filter_types text/html;
|
||||
}
|
||||
|
||||
# ─── Block dotfiles and common attack paths ───────────────────────────────
|
||||
location ~ /\. {
|
||||
deny all;
|
||||
}
|
||||
|
||||
location ~* \.(env|git|sql|bak|sh|py)$ {
|
||||
deny all;
|
||||
}
|
||||
}
|
||||
83
scripts/cezen-backup.sh
Normal file
83
scripts/cezen-backup.sh
Normal file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env bash
|
||||
# Nexus One AI backup/restore helper.
|
||||
#
|
||||
# Usage:
|
||||
# sudo bash scripts/cezen-backup.sh backup
|
||||
# sudo bash scripts/cezen-backup.sh list
|
||||
# sudo bash scripts/cezen-backup.sh restore /opt/cezen/backups/cezen-backup-YYYYmmdd-HHMMSS.zip
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ACTION="${1:-backup}"
|
||||
TARGET="${2:-}"
|
||||
DATA_DIR="${CEZEN_DATA:-/opt/cezen/data}"
|
||||
BACKUP_DIR="${CEZEN_BACKUP_DIR:-/opt/cezen/backups}"
|
||||
|
||||
python3 - "$ACTION" "$TARGET" "$DATA_DIR" "$BACKUP_DIR" <<'PY'
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import zipfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
action, target, data_dir, backup_dir = sys.argv[1:5]
|
||||
data_dir = Path(data_dir)
|
||||
backup_dir = Path(backup_dir)
|
||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def now_tag():
|
||||
return datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||||
|
||||
def iso_now():
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
def write_backup(dest):
|
||||
manifest = {
|
||||
"schema": "cezen.backup_manifest.v1",
|
||||
"created_at": iso_now(),
|
||||
"data_dir": str(data_dir),
|
||||
"source": "cezen-backup.sh",
|
||||
}
|
||||
with zipfile.ZipFile(dest, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
|
||||
if data_dir.exists():
|
||||
for path in data_dir.rglob("*"):
|
||||
if path.is_file():
|
||||
zf.write(path, path.relative_to(data_dir).as_posix())
|
||||
|
||||
def restore(src):
|
||||
src = Path(src)
|
||||
if not src.exists():
|
||||
raise SystemExit(f"Backup not found: {src}")
|
||||
safety = backup_dir / f"pre-restore-{now_tag()}.zip"
|
||||
write_backup(safety)
|
||||
root = data_dir.resolve()
|
||||
with zipfile.ZipFile(src, "r") as zf:
|
||||
for member in zf.infolist():
|
||||
if member.filename == "manifest.json" or member.is_dir():
|
||||
continue
|
||||
target_path = (data_dir / member.filename).resolve()
|
||||
if root not in target_path.parents and target_path != root:
|
||||
raise SystemExit(f"Unsafe archive path: {member.filename}")
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with zf.open(member) as source, open(target_path, "wb") as out:
|
||||
shutil.copyfileobj(source, out)
|
||||
print(json.dumps({"ok": True, "restored": str(src), "pre_restore_snapshot": str(safety)}, indent=2))
|
||||
|
||||
if action == "backup":
|
||||
dest = backup_dir / f"cezen-backup-{now_tag()}.zip"
|
||||
write_backup(dest)
|
||||
print(json.dumps({"ok": True, "backup": str(dest)}, indent=2))
|
||||
elif action == "list":
|
||||
rows = []
|
||||
for path in sorted(backup_dir.glob("cezen-backup-*.zip"), key=lambda p: p.stat().st_mtime, reverse=True):
|
||||
rows.append({"name": path.name, "path": str(path), "size_bytes": path.stat().st_size})
|
||||
print(json.dumps({"backup_dir": str(backup_dir), "backups": rows}, indent=2))
|
||||
elif action == "restore":
|
||||
if not target:
|
||||
raise SystemExit("Usage: cezen-backup.sh restore /path/to/backup.zip")
|
||||
restore(target)
|
||||
else:
|
||||
raise SystemExit("Usage: cezen-backup.sh backup|list|restore [backup.zip]")
|
||||
PY
|
||||
218
scripts/cezen-feasibility.sh
Normal file
218
scripts/cezen-feasibility.sh
Normal file
@ -0,0 +1,218 @@
|
||||
#!/usr/bin/env bash
|
||||
# Nexus One AI feasibility checker.
|
||||
# Runs before installation to classify existing hardware for software-only or appliance deployments.
|
||||
set -euo pipefail
|
||||
|
||||
OUT="${1:-/opt/cezen/feasibility.json}"
|
||||
mkdir -p "$(dirname "$OUT")" 2>/dev/null || true
|
||||
|
||||
tmp_json="$(mktemp /tmp/cezen-feasibility.XXXXXX.json)"
|
||||
|
||||
python3 - "$tmp_json" <<'PY'
|
||||
import json, os, platform, shutil, socket, subprocess, sys
|
||||
from pathlib import Path
|
||||
|
||||
out = Path(sys.argv[1])
|
||||
|
||||
def run(cmd, timeout=5):
|
||||
try:
|
||||
return subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True, timeout=timeout).strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def read(path, default=""):
|
||||
try:
|
||||
return Path(path).read_text(errors="replace").strip()
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
def gb(n):
|
||||
return round(n / (1024 ** 3), 1)
|
||||
|
||||
def os_info():
|
||||
data = {}
|
||||
for line in read("/etc/os-release").splitlines():
|
||||
if "=" in line:
|
||||
k, v = line.split("=", 1)
|
||||
data[k] = v.strip('"')
|
||||
return {
|
||||
"name": data.get("PRETTY_NAME") or platform.platform(),
|
||||
"id": data.get("ID", ""),
|
||||
"version_id": data.get("VERSION_ID", ""),
|
||||
"kernel": platform.release(),
|
||||
}
|
||||
|
||||
def cpu_info():
|
||||
model = ""
|
||||
for line in read("/proc/cpuinfo").splitlines():
|
||||
if line.lower().startswith("model name"):
|
||||
model = line.split(":", 1)[1].strip()
|
||||
break
|
||||
return {"model": model or platform.processor(), "cores": os.cpu_count() or 0}
|
||||
|
||||
def mem_gb():
|
||||
for line in read("/proc/meminfo").splitlines():
|
||||
if line.startswith("MemTotal:"):
|
||||
return round(int(line.split()[1]) / 1024 / 1024, 1)
|
||||
out = run(["sysctl", "-n", "hw.memsize"])
|
||||
if out.isdigit():
|
||||
return gb(int(out))
|
||||
try:
|
||||
pages = os.sysconf("SC_PHYS_PAGES")
|
||||
page_size = os.sysconf("SC_PAGE_SIZE")
|
||||
if pages and page_size:
|
||||
return gb(int(pages) * int(page_size))
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
def disk_info():
|
||||
usage = shutil.disk_usage("/")
|
||||
return {"total_gb": gb(usage.total), "free_gb": gb(usage.free), "used_gb": gb(usage.used)}
|
||||
|
||||
def gpu_info():
|
||||
gpus = []
|
||||
if shutil.which("nvidia-smi"):
|
||||
q = "name,memory.total,driver_version"
|
||||
out = run(["nvidia-smi", f"--query-gpu={q}", "--format=csv,noheader,nounits"])
|
||||
for row in out.splitlines():
|
||||
parts = [p.strip() for p in row.split(",")]
|
||||
if len(parts) >= 2:
|
||||
try:
|
||||
vram = round(float(parts[1]) / 1024, 1)
|
||||
except Exception:
|
||||
vram = 0
|
||||
gpus.append({"name": parts[0], "vram_gb": vram, "driver": parts[2] if len(parts) > 2 else ""})
|
||||
if not gpus:
|
||||
for vendor in Path("/sys/bus/pci/devices").glob("*/vendor"):
|
||||
if read(vendor).lower() == "0x10de":
|
||||
gpus.append({"name": "NVIDIA GPU detected (driver not ready)", "vram_gb": 0, "driver": ""})
|
||||
break
|
||||
return gpus
|
||||
|
||||
def port_open(host, port):
|
||||
try:
|
||||
with socket.create_connection((host, port), timeout=1):
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def tool_state():
|
||||
return {
|
||||
"docker": bool(shutil.which("docker")),
|
||||
"docker_running": bool(run(["docker", "info"], timeout=3)) if shutil.which("docker") else False,
|
||||
"ansible": bool(shutil.which("ansible-playbook")),
|
||||
"python3": bool(shutil.which("python3")),
|
||||
"curl": bool(shutil.which("curl")),
|
||||
"git": bool(shutil.which("git")),
|
||||
}
|
||||
|
||||
def recommend(mem, disk, gpus, cpu_cores):
|
||||
max_vram = max([g.get("vram_gb", 0) for g in gpus] or [0])
|
||||
gpu_count = len([g for g in gpus if g.get("vram_gb", 0) > 0])
|
||||
profile = "core"
|
||||
tier = "starter"
|
||||
concurrency = "1-3"
|
||||
notes = []
|
||||
|
||||
if max_vram >= 120 and gpu_count >= 4:
|
||||
tier, profile, concurrency = "max", "gpu-max", "100+"
|
||||
elif max_vram >= 80 and gpu_count >= 2:
|
||||
tier, profile, concurrency = "pro", "gpu-pro", "20-100"
|
||||
elif max_vram >= 48:
|
||||
tier, profile, concurrency = "basic", "gpu-standard", "5-20"
|
||||
elif max_vram >= 24:
|
||||
tier, profile, concurrency = "starter", "gpu-starter", "1-10"
|
||||
elif max_vram >= 8:
|
||||
tier, profile, concurrency = "starter", "gpu-lite", "1-5"
|
||||
notes.append("GPU is suitable for small quantized models only.")
|
||||
elif mem >= 32 and cpu_cores >= 8:
|
||||
tier, profile, concurrency = "starter", "cpu-ai", "1-3"
|
||||
notes.append("No usable NVIDIA VRAM found; local CPU inference is limited. Use cloud/external model fallback for better UX.")
|
||||
else:
|
||||
tier, profile, concurrency = "starter", "core", "1-2"
|
||||
notes.append("Hardware is best for portal, RAG management, workflows, and external/cloud model routing.")
|
||||
|
||||
if mem < 16:
|
||||
notes.append("RAM below 16 GB; avoid local model serving.")
|
||||
if disk < 100:
|
||||
notes.append("Less than 100 GB free disk; model storage and document indexing will be constrained.")
|
||||
|
||||
return {
|
||||
"recommended_tier": tier,
|
||||
"recommended_profile": profile,
|
||||
"estimated_concurrent_users": concurrency,
|
||||
"notes": notes,
|
||||
}
|
||||
|
||||
mem = mem_gb()
|
||||
disk = disk_info()
|
||||
gpus = gpu_info()
|
||||
cpu = cpu_info()
|
||||
tools = tool_state()
|
||||
rec = recommend(mem, disk["free_gb"], gpus, cpu["cores"])
|
||||
|
||||
features = {
|
||||
"portal": True,
|
||||
"users_auth": True,
|
||||
"document_intelligence": mem >= 8,
|
||||
"rag_chromadb": mem >= 16 and disk["free_gb"] >= 50,
|
||||
"ollama_cpu": mem >= 32,
|
||||
"ollama_gpu": any(g.get("vram_gb", 0) >= 8 for g in gpus),
|
||||
"vllm": any(g.get("vram_gb", 0) >= 24 for g in gpus),
|
||||
"fine_tuning_qlora": any(g.get("vram_gb", 0) >= 24 for g in gpus),
|
||||
"distributed_training": len([g for g in gpus if g.get("vram_gb", 0) >= 48]) >= 2,
|
||||
"monitoring": True,
|
||||
"software_only": True,
|
||||
"air_gapped_ready": True,
|
||||
}
|
||||
|
||||
report = {
|
||||
"schema": "cezen.feasibility.v1",
|
||||
"generated_at": run(["date", "-u", "+%Y-%m-%dT%H:%M:%SZ"]) or "",
|
||||
"host": socket.gethostname(),
|
||||
"os": os_info(),
|
||||
"cpu": cpu,
|
||||
"ram_gb": mem,
|
||||
"disk": disk,
|
||||
"gpus": gpus,
|
||||
"tools": tools,
|
||||
"ports_in_use": {str(p): port_open("127.0.0.1", p) for p in [80, 8080, 11434, 8000, 3000, 8888]},
|
||||
"recommendation": rec,
|
||||
"features": features,
|
||||
}
|
||||
out.write_text(json.dumps(report, indent=2))
|
||||
PY
|
||||
|
||||
if ! cp "$tmp_json" "$OUT" 2>/dev/null; then
|
||||
OUT="./feasibility.json"
|
||||
cp "$tmp_json" "$OUT"
|
||||
fi
|
||||
rm -f "$tmp_json"
|
||||
|
||||
python3 - "$OUT" <<'PY'
|
||||
import json, sys
|
||||
p = sys.argv[1]
|
||||
d = json.load(open(p))
|
||||
r = d["recommendation"]
|
||||
print("")
|
||||
print("Nexus One AI Feasibility Report")
|
||||
print("--------------------------------")
|
||||
print(f"Host: {d['host']}")
|
||||
print(f"OS: {d['os']['name']}")
|
||||
print(f"CPU: {d['cpu']['cores']} cores | RAM: {d['ram_gb']} GB | Free disk: {d['disk']['free_gb']} GB")
|
||||
if d["gpus"]:
|
||||
print("GPU: " + "; ".join(f"{g['name']} ({g.get('vram_gb', 0)} GB VRAM)" for g in d["gpus"]))
|
||||
else:
|
||||
print("GPU: none detected")
|
||||
print("")
|
||||
print(f"Recommended tier: {r['recommended_tier'].upper()}")
|
||||
print(f"Recommended profile: {r['recommended_profile']}")
|
||||
print(f"Estimated concurrency: {r['estimated_concurrent_users']} users")
|
||||
if r["notes"]:
|
||||
print("Notes:")
|
||||
for n in r["notes"]:
|
||||
print(f" - {n}")
|
||||
print("")
|
||||
print(f"JSON report: {p}")
|
||||
PY
|
||||
Loading…
Reference in New Issue
Block a user