From 56668f7bdcb0b7c6e6dff349cc7c894054f7fcf3 Mon Sep 17 00:00:00 2001 From: Jino Jose Date: Tue, 30 Jun 2026 08:54:01 +0530 Subject: [PATCH] Update ISO installer tiers and backend deployment --- .gitignore | 1 + README.md | 67 +- ansible/entry.yml | 10 +- ansible/max.yml | 83 + ansible/phase1_nvidia.yml | 2 +- ansible/pro.yml | 79 + ansible/roles/base/tasks/main.yml | 12 + .../cezen-backend/files/cezen-api.service | 20 + ansible/roles/cezen-backend/files/main.py | 4925 +++++++++++++++++ .../cezen-backend/files/requirements.txt | 13 + .../roles/cezen-backend/files/train_qlora.py | 309 ++ ansible/roles/cezen-backend/handlers/main.yml | 9 + ansible/roles/cezen-backend/tasks/main.yml | 113 + ansible/roles/cezen-nginx/files/cezen.conf | 135 + ansible/roles/cezen-nginx/handlers/main.yml | 5 + ansible/roles/cezen-nginx/tasks/main.yml | 59 + .../roles/cezen-ttyd/files/cezen-ttyd.service | 17 + ansible/roles/cezen-ttyd/handlers/main.yml | 9 + ansible/roles/cezen-ttyd/tasks/main.yml | 72 + ansible/roles/jupyterlab/tasks/main.yml | 2 +- ansible/roles/mlflow/tasks/main.yml | 2 +- ansible/roles/monitoring/tasks/main.yml | 8 +- ansible/roles/nvidia/tasks/main.yml | 2 +- ansible/roles/ollama/tasks/main.yml | 69 +- ansible/roles/vllm/defaults/main.yml | 7 + ansible/roles/vllm/tasks/main.yml | 55 +- ansible/starter.yml | 76 + autoinstall/build-iso-starter.sh | 199 + autoinstall/build-iso.sh | 91 +- autoinstall/firstboot-setup.sh | 44 +- autoinstall/user-data | 26 +- autoinstall/user-data-starter | 137 + autoinstall/websetup/server.py | 41 +- install.sh | 172 +- models/pull-models.sh | 74 +- nginx/cezen.conf | 135 + scripts/cezen-backup.sh | 83 + scripts/cezen-feasibility.sh | 218 + 38 files changed, 7206 insertions(+), 175 deletions(-) create mode 100644 .gitignore create mode 100644 ansible/max.yml create mode 100644 ansible/pro.yml create mode 100644 ansible/roles/cezen-backend/files/cezen-api.service create mode 100644 ansible/roles/cezen-backend/files/main.py create mode 100644 ansible/roles/cezen-backend/files/requirements.txt create mode 100644 ansible/roles/cezen-backend/files/train_qlora.py create mode 100644 ansible/roles/cezen-backend/handlers/main.yml create mode 100644 ansible/roles/cezen-backend/tasks/main.yml create mode 100644 ansible/roles/cezen-nginx/files/cezen.conf create mode 100644 ansible/roles/cezen-nginx/handlers/main.yml create mode 100644 ansible/roles/cezen-nginx/tasks/main.yml create mode 100644 ansible/roles/cezen-ttyd/files/cezen-ttyd.service create mode 100644 ansible/roles/cezen-ttyd/handlers/main.yml create mode 100644 ansible/roles/cezen-ttyd/tasks/main.yml create mode 100644 ansible/roles/vllm/defaults/main.yml create mode 100644 ansible/starter.yml create mode 100644 autoinstall/build-iso-starter.sh create mode 100644 autoinstall/user-data-starter create mode 100644 nginx/cezen.conf create mode 100644 scripts/cezen-backup.sh create mode 100644 scripts/cezen-feasibility.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6267c43 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.iso diff --git a/README.md b/README.md index d80760a..40c24e7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Cezen AI Suite — Installer +# Nexus One AI — Installer ## Quick Start @@ -12,6 +12,61 @@ Server reboots automatically after NVIDIA drivers install. Phase 2 runs on its o On the custom ISO, Ubuntu autoinstall now pauses on the installer network screen so the operator can choose the final IP address from the VM console before installation continues. +## Software-Only / Existing Hardware + +Run a feasibility scan before quoting or installing on customer-owned hardware: + +```bash +bash scripts/cezen-feasibility.sh +``` + +The checker reports CPU, RAM, disk, NVIDIA GPU/VRAM, tool readiness, available features, and a recommended Cezen profile. It writes JSON to `/opt/cezen/feasibility.json` when possible, otherwise `./feasibility.json`. + +Install on existing hardware without the appliance NVIDIA phase: + +```bash +sudo bash install.sh --software-only --profile=auto +``` + +For small systems or slow customer networks, the installer skips default model downloads on lightweight profiles. To force the same behavior manually: + +```bash +sudo bash install.sh --software-only --profile=cpu-ai --skip-model-pull +``` + +Profiles: + +| Profile | Use When | Installs | +|---|---|---| +| `core` | no GPU / low RAM | portal, backend, nginx, health/metrics API | +| `cpu-ai` | 32 GB+ RAM, no usable GPU | core + Chroma/Ollama CPU path, model pull optional | +| `gpu-starter` | 24-32 GB VRAM | local AI starter stack, model pull optional | +| `gpu-standard` | 48-96 GB VRAM | standard GPU stack | +| `gpu-pro` | multi/high-VRAM GPU | advanced GPU stack | +| `gpu-max` | multi-node or HGX-class | full stack, custom sizing | + +## Sellable v1 Admin APIs + +The backend exposes the first productization APIs for software-only and appliance deployments: + +| API | Purpose | +|---|---| +| `GET /api/license` | Shows current tier, feature matrix, and whether the tier is locked by Cezen. | +| `GET /api/system/feasibility` | Returns the generated hardware feasibility report or live fallback. | +| `GET /api/system/readiness-report` | Combines license, feasibility, and install readiness into a customer-facing report payload. | +| `GET /api/audit/report?days=7` | Basic audit summary for handover and admin review. | +| `GET /api/system/backups` | Lists local backups. | +| `POST /api/system/backups` | Creates a local backup of Cezen data. | +| `POST /api/system/backups/{name}/restore` | Restores a named local backup and creates a pre-restore safety snapshot. | + +CLI backup helper: + +```bash +sudo bash scripts/cezen-backup.sh backup +sudo bash scripts/cezen-backup.sh list +sudo bash scripts/cezen-backup.sh restore /opt/cezen/backups/cezen-backup-YYYYmmdd-HHMMSS.zip +``` + ## What Gets Installed (Entry Tier) | Service | Port | Notes | @@ -42,7 +97,10 @@ NVIDIA driver install will succeed but `nvidia-smi` won't show GPUs — that's e ## Pull More Models ```bash -bash models/pull-models.sh --tier=entry +bash models/pull-models.sh --tier=starter # phi3:mini + embeddings +bash models/pull-models.sh --tier=basic # llama3.1:8b, mistral:7b, codellama +bash models/pull-models.sh --tier=pro # + llama3.1:70b, mixtral, deepseek-coder +bash models/pull-models.sh --tier=max # + llama3.1:405b, mixtral:8x22b ``` ## File Structure @@ -52,7 +110,10 @@ cgit/ ├── install.sh ← Entry point ├── ansible/ │ ├── phase1_nvidia.yml ← Phase 1: drivers (triggers reboot) -│ ├── entry.yml ← Phase 2: full stack +│ ├── starter.yml ← Phase 2: Starter tier (1 GPU, small team) +│ ├── entry.yml ← Phase 2: Basic tier (1–2 GPU, department) +│ ├── pro.yml ← Phase 2: Pro tier (2+ GPU, multi-team) +│ ├── max.yml ← Phase 2: Max tier (4–8 GPU, enterprise) │ └── roles/ │ ├── base/ ← OS, Python, Miniconda, LangChain │ ├── nvidia/ ← Drivers, CUDA 12.4, cuDNN 9 diff --git a/ansible/entry.yml b/ansible/entry.yml index 9b5b926..c89adcb 100644 --- a/ansible/entry.yml +++ b/ansible/entry.yml @@ -1,7 +1,7 @@ --- -# Phase 2: Full Cezen AI Suite — Entry Tier +# Phase 2: Full Nexus One AI — Entry Tier # Runs after NVIDIA driver reboot -- name: Cezen AI — Entry Tier Stack +- name: Nexus One AI — Entry Tier Stack hosts: localhost connection: local become: true @@ -35,3 +35,9 @@ when: "'minio' not in skip_roles.split(',')" - role: monitoring when: "'monitoring' not in skip_roles.split(',')" + - role: cezen-backend + when: "'cezen-backend' not in skip_roles.split(',')" + - role: cezen-ttyd + when: "'cezen-ttyd' not in skip_roles.split(',')" + - role: cezen-nginx + when: "'cezen-nginx' not in skip_roles.split(',')" diff --git a/ansible/max.yml b/ansible/max.yml new file mode 100644 index 0000000..9fabe99 --- /dev/null +++ b/ansible/max.yml @@ -0,0 +1,83 @@ +--- +# Nexus One AI — Max Tier Stack +# Hardware: 4–8× NVIDIA H100/A100/RTX 5090 (80–320 GB VRAM total), 256–512 GB DDR5, 8 TB+ NVMe, 100 GbE +# Capacity: 100+ concurrent users +# Runs after NVIDIA driver reboot (phase1_nvidia.yml) +# +# Differences from Pro tier: +# - vLLM tensor-parallel across 4+ GPUs (set vllm_tensor_parallel to GPU count) +# - Full precision models (no quantization required) +# - Advanced fine-tuning (QLoRA + DeepSpeed ZeRO-3 for multi-GPU training) +# - Full MLflow + MinIO stack for experiment tracking and artifact storage +# - All optional services enabled by default + +- name: Nexus One AI — Max Tier Stack + hosts: localhost + connection: local + become: true + vars: + cezen_user: "cezen" + cezen_home: "/opt/cezen" + cezen_login_home: "/home/cezen" + python_version: "3.11" + cuda_version: "12.6" + skip_roles: "" # comma-separated list of roles to skip + gpu_available: false + tier: "max" + + # ── vLLM — Max defaults ────────────────────── + # Full-precision Llama-3.1-70B across 4 GPUs by default. + # For HGX/DGX-class systems with 8 GPUs set vllm_tensor_parallel: 8 + # and switch to Llama-3.1-405B or Mixtral-8x22B. + vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct" + vllm_tensor_parallel: 4 + vllm_gpu_memory_util: "0.90" + vllm_max_model_len: 32768 + vllm_quantization: "" # full precision at Max tier + + # ── Ollama — large model defaults ──────────── + ollama_default_model: "llama3.1:70b" + + # ── DeepSpeed — multi-GPU fine-tuning ──────── + deepspeed_enabled: true + deepspeed_zero_stage: 3 # ZeRO-3 for large model training + + roles: + - role: base + when: "'base' not in skip_roles.split(',')" + + - role: docker + when: "'docker' not in skip_roles.split(',')" + + - role: k3s + when: "'k3s' not in skip_roles.split(',')" + + - role: ollama + when: "'ollama' not in skip_roles.split(',')" + + - role: vllm + when: "'vllm' not in skip_roles.split(',')" + + - role: jupyterlab + when: "'jupyterlab' not in skip_roles.split(',')" + + - role: chromadb + when: "'chromadb' not in skip_roles.split(',')" + + - role: mlflow + when: "'mlflow' not in skip_roles.split(',')" + + - role: minio + when: "'minio' not in skip_roles.split(',')" + + - role: monitoring + when: "'monitoring' not in skip_roles.split(',')" + + - role: cezen-backend + when: "'cezen-backend' not in skip_roles.split(',')" + + - role: cezen-ttyd + when: "'cezen-ttyd' not in skip_roles.split(',')" + + - role: cezen-nginx + when: "'cezen-nginx' not in skip_roles.split(',')" diff --git a/ansible/phase1_nvidia.yml b/ansible/phase1_nvidia.yml index 955102c..c4bb6e2 100644 --- a/ansible/phase1_nvidia.yml +++ b/ansible/phase1_nvidia.yml @@ -1,6 +1,6 @@ --- # Phase 1: NVIDIA drivers only. Server reboots after this. -- name: Cezen AI — Phase 1 NVIDIA Drivers +- name: Nexus One AI — Phase 1 NVIDIA Drivers hosts: localhost connection: local become: true diff --git a/ansible/pro.yml b/ansible/pro.yml new file mode 100644 index 0000000..ba9db2a --- /dev/null +++ b/ansible/pro.yml @@ -0,0 +1,79 @@ +--- +# Nexus One AI — Pro Tier Stack +# Hardware: 2× NVIDIA RTX 5090 (32 GB GDDR7 each / 64 GB total), 128 GB DDR5, 4 TB NVMe, 10 GbE +# Capacity: 20–100 concurrent users +# Runs after NVIDIA driver reboot (phase1_nvidia.yml) +# +# Differences from Basic tier: +# - k3s included (multi-service orchestration at this scale) +# - MLflow included (fine-tuning tracking needed at Pro) +# - MinIO included (model + data storage at scale) +# - vLLM runs tensor-parallel across 2 GPUs +# - QLoRA fine-tuning available via portal + +- name: Nexus One AI — Pro Tier Stack + hosts: localhost + connection: local + become: true + vars: + cezen_user: "cezen" + cezen_home: "/opt/cezen" + cezen_login_home: "/home/cezen" + python_version: "3.11" + cuda_version: "12.6" + skip_roles: "" # comma-separated list of roles to skip + gpu_available: false + tier: "pro" + + # ── vLLM — Pro defaults ────────────────────── + # Tensor-parallel across 2× RTX 5090 (64 GB combined GDDR7). + # Llama-3.1-70B at 4-bit fits comfortably; switch to full-precision + # smaller models via the portal Model Manager. + vllm_model: "meta-llama/Meta-Llama-3.1-70B-Instruct" + vllm_tensor_parallel: 2 + vllm_gpu_memory_util: "0.85" + vllm_max_model_len: 8192 + vllm_quantization: "awq" + + # ── Ollama — full-size models ───────────────── + ollama_default_model: "llama3.1:70b" + + roles: + - role: base + when: "'base' not in skip_roles.split(',')" + + - role: docker + when: "'docker' not in skip_roles.split(',')" + + - role: k3s + when: "'k3s' not in skip_roles.split(',')" + + - role: ollama + when: "'ollama' not in skip_roles.split(',')" + + - role: vllm + when: "'vllm' not in skip_roles.split(',')" + + - role: jupyterlab + when: "'jupyterlab' not in skip_roles.split(',')" + + - role: chromadb + when: "'chromadb' not in skip_roles.split(',')" + + - role: mlflow + when: "'mlflow' not in skip_roles.split(',')" + + - role: minio + when: "'minio' not in skip_roles.split(',')" + + - role: monitoring + when: "'monitoring' not in skip_roles.split(',')" + + - role: cezen-backend + when: "'cezen-backend' not in skip_roles.split(',')" + + - role: cezen-ttyd + when: "'cezen-ttyd' not in skip_roles.split(',')" + + - role: cezen-nginx + when: "'cezen-nginx' not in skip_roles.split(',')" diff --git a/ansible/roles/base/tasks/main.yml b/ansible/roles/base/tasks/main.yml index 51ed1d8..acb1a2b 100644 --- a/ansible/roles/base/tasks/main.yml +++ b/ansible/roles/base/tasks/main.yml @@ -50,6 +50,18 @@ - "{{ cezen_home }}/models" - "{{ cezen_home }}/data" - "{{ cezen_home }}/logs" + - "{{ cezen_home }}/scripts" + +- name: Install Cezen operational helper scripts + copy: + src: "{{ playbook_dir }}/../scripts/{{ item }}" + dest: "{{ cezen_home }}/scripts/{{ item }}" + owner: "{{ cezen_user }}" + group: "{{ cezen_user }}" + mode: "0755" + loop: + - cezen-backup.sh + - cezen-feasibility.sh - name: Download Miniconda get_url: diff --git a/ansible/roles/cezen-backend/files/cezen-api.service b/ansible/roles/cezen-backend/files/cezen-api.service new file mode 100644 index 0000000..bf25b49 --- /dev/null +++ b/ansible/roles/cezen-backend/files/cezen-api.service @@ -0,0 +1,20 @@ +[Unit] +Description=Nexus One AI Management API +After=network-online.target ollama.service +Wants=network-online.target + +[Service] +Type=simple +User=cezen +WorkingDirectory=/opt/cezen/backend +Environment="CEZEN_DATA=/opt/cezen/data" +Environment="OLLAMA_URL=http://localhost:11434" +Environment="PATH=/opt/cezen/backend/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin" +ExecStart=/opt/cezen/backend/venv/bin/uvicorn main:app --host 0.0.0.0 --port 8080 --workers 2 +Restart=always +RestartSec=5 +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/cezen-backend/files/main.py b/ansible/roles/cezen-backend/files/main.py new file mode 100644 index 0000000..3062551 --- /dev/null +++ b/ansible/roles/cezen-backend/files/main.py @@ -0,0 +1,4925 @@ +""" +Nexus One AI — Management API +FastAPI backend for auth, user management, metrics, model management, and audit logging. + +Run: + uvicorn main:app --host 0.0.0.0 --port 8080 --reload + +Default admin on first run: + username: admin + password: Cezen@2024! (forced change on first login) +""" + +import os, sqlite3, subprocess, json, uuid, secrets, hashlib, shutil, tempfile, base64, zipfile, sys +from datetime import datetime, timedelta, timezone +from pathlib import Path +from contextlib import asynccontextmanager +from typing import Optional +import asyncio, concurrent.futures + +from fastapi import FastAPI, HTTPException, Depends, Request, Response, status, UploadFile, File, Form +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse +from pydantic import BaseModel +import passlib.context as _passlib +from jose import JWTError, jwt +import psutil +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.cron import CronTrigger +from apscheduler.triggers.interval import IntervalTrigger + +# ── Config ──────────────────────────────────────────────────────────────────── + +DATA_DIR = Path(os.environ.get("CEZEN_DATA", "/opt/cezen/data")) +DB_PATH = DATA_DIR / "cezen.db" +SECRET_FILE = DATA_DIR / ".jwt_secret" +BACKUP_DIR = Path(os.environ.get("CEZEN_BACKUP_DIR", str(DATA_DIR.parent / "backups"))) + +DATA_DIR.mkdir(parents=True, exist_ok=True) +BACKUP_DIR.mkdir(parents=True, exist_ok=True) + +# Load or generate JWT secret +if SECRET_FILE.exists(): + JWT_SECRET = SECRET_FILE.read_text().strip() +else: + JWT_SECRET = secrets.token_hex(32) + SECRET_FILE.write_text(JWT_SECRET) + SECRET_FILE.chmod(0o600) + +JWT_ALGORITHM = "HS256" +JWT_EXPIRE_HRS = 8 +COOKIE_NAME = "cezen_token" +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434") + +# Tier is set by Cezen at deployment time via environment variable. +# When set, it cannot be changed from the portal — customers get read-only visibility. +CEZEN_TIER = os.environ.get("CEZEN_TIER", "") # e.g. "Entry Tier", "Mid Tier", "Advanced Tier" + +pwd_ctx = _passlib.CryptContext(schemes=["bcrypt"], deprecated="auto") + +TIER_MATRIX = { + "starter": { + "label": "Starter", + "positioning": "Private AI entry package for one department or small team.", + "max_users": 10, + "features": { + "portal": True, + "chat": True, + "rag": True, + "meeting_assistant": False, + "workflows": "basic", + "connectors": False, + "model_router": False, + "audit_report": True, + "backup_restore": True, + "guardrails": "basic", + "gpu_inference": False, + "fine_tuning": False, + "deepspeed": False, + }, + }, + "basic": { + "label": "Basic", + "positioning": "Department AI suite with users, RAG, meeting assistant, and audit basics.", + "max_users": 25, + "features": { + "portal": True, + "chat": True, + "rag": True, + "meeting_assistant": True, + "workflows": "basic", + "connectors": "limited", + "model_router": False, + "audit_report": True, + "backup_restore": True, + "guardrails": "basic", + "gpu_inference": "optional", + "fine_tuning": False, + "deepspeed": False, + }, + }, + "pro": { + "label": "Pro", + "positioning": "Production private AI platform for multiple teams and automation workflows.", + "max_users": 100, + "features": { + "portal": True, + "chat": True, + "rag": "advanced", + "meeting_assistant": True, + "workflows": "advanced", + "connectors": True, + "model_router": True, + "audit_report": True, + "backup_restore": True, + "guardrails": "advanced", + "gpu_inference": True, + "fine_tuning": "qlora", + "deepspeed": False, + }, + }, + "max": { + "label": "Max", + "positioning": "Custom high-performance AI platform for large GPU systems and enterprise controls.", + "max_users": None, + "features": { + "portal": True, + "chat": True, + "rag": "advanced", + "meeting_assistant": True, + "workflows": "advanced", + "connectors": True, + "model_router": True, + "audit_report": True, + "backup_restore": True, + "guardrails": "advanced", + "gpu_inference": True, + "fine_tuning": "advanced", + "deepspeed": "custom", + }, + }, +} + +TIER_ALIASES = { + "entry": "basic", + "entry tier": "basic", + "mid": "pro", + "mid tier": "pro", + "advanced": "max", + "advanced tier": "max", + "starter tier": "starter", + "basic tier": "basic", + "pro tier": "pro", + "max tier": "max", +} + +# ── Database ────────────────────────────────────────────────────────────────── + +def get_db(): + conn = sqlite3.connect(str(DB_PATH)) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA journal_mode=WAL") + return conn + +def init_db(): + db = get_db() + db.executescript(""" + CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT UNIQUE NOT NULL, + email TEXT, + hashed_password TEXT NOT NULL, + role TEXT NOT NULL DEFAULT 'user', + is_active INTEGER NOT NULL DEFAULT 1, + must_change_password INTEGER DEFAULT 0, + created_at TEXT NOT NULL, + last_login TEXT + ); + + CREATE TABLE IF NOT EXISTS sessions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + jti TEXT UNIQUE NOT NULL, + created_at TEXT NOT NULL, + expires_at TEXT NOT NULL, + ip_address TEXT, + user_agent TEXT, + is_active INTEGER DEFAULT 1 + ); + + CREATE TABLE IF NOT EXISTS audit_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT NOT NULL, + user_id INTEGER, + username TEXT, + action TEXT NOT NULL, + detail TEXT, + ip_address TEXT, + result TEXT NOT NULL DEFAULT 'success' + ); + + CREATE TABLE IF NOT EXISTS metrics_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT NOT NULL, + cpu_pct REAL, + gpu_pct REAL, + gpu_temp REAL, + ram_pct REAL, + disk_pct REAL + ); + + CREATE TABLE IF NOT EXISTS settings ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL DEFAULT '' + ); + + CREATE TABLE IF NOT EXISTS login_attempts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT NOT NULL, + ip_address TEXT, + attempted_at TEXT NOT NULL, + success INTEGER NOT NULL DEFAULT 0 + ); + + CREATE TABLE IF NOT EXISTS kb_collections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + description TEXT NOT NULL DEFAULT '', + chroma_name TEXT NOT NULL, + embed_model TEXT NOT NULL DEFAULT 'nomic-embed-text', + doc_count INTEGER NOT NULL DEFAULT 0, + chunk_count INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS kb_documents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + collection_id INTEGER NOT NULL, + orig_name TEXT NOT NULL, + file_path TEXT NOT NULL, + size_bytes INTEGER NOT NULL DEFAULT 0, + chunk_count INTEGER NOT NULL DEFAULT 0, + status TEXT NOT NULL DEFAULT 'pending', + error_msg TEXT, + uploaded_at TEXT NOT NULL, + processed_at TEXT + ); + + CREATE TABLE IF NOT EXISTS training_datasets ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + filename TEXT NOT NULL, + orig_name TEXT NOT NULL, + file_path TEXT NOT NULL, + size_bytes INTEGER NOT NULL DEFAULT 0, + row_count INTEGER NOT NULL DEFAULT 0, + uploaded_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS training_jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + base_model TEXT NOT NULL, + dataset_id INTEGER NOT NULL, + config_json TEXT NOT NULL DEFAULT '{}', + status TEXT NOT NULL DEFAULT 'pending', + pid INTEGER, + log_path TEXT, + output_path TEXT, + created_at TEXT NOT NULL, + started_at TEXT, + finished_at TEXT + ); + + CREATE TABLE IF NOT EXISTS prompts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL, + prompt_text TEXT NOT NULL, + category TEXT NOT NULL DEFAULT 'general', + tags TEXT NOT NULL DEFAULT '', + needs_upload INTEGER NOT NULL DEFAULT 0, + sort_order INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS guardrail_rules ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL DEFAULT 'keyword', + pattern TEXT NOT NULL, + action TEXT NOT NULL DEFAULT 'block', + label TEXT NOT NULL DEFAULT '', + is_active INTEGER NOT NULL DEFAULT 1, + created_by TEXT NOT NULL DEFAULT 'admin', + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS guardrail_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + rule_id INTEGER, + user_id INTEGER, + username TEXT, + rule_type TEXT, + pattern TEXT, + action TEXT, + logged_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS api_keys ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + username TEXT NOT NULL, + name TEXT NOT NULL, + key_prefix TEXT NOT NULL, + key_hash TEXT NOT NULL UNIQUE, + last_used_at TEXT, + expires_at TEXT, + is_active INTEGER NOT NULL DEFAULT 1, + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS query_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + username TEXT NOT NULL, + model TEXT NOT NULL DEFAULT '', + prompt_tokens INTEGER NOT NULL DEFAULT 0, + completion_tokens INTEGER NOT NULL DEFAULT 0, + duration_ms INTEGER NOT NULL DEFAULT 0, + status TEXT NOT NULL DEFAULT 'ok', + logged_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS doc_jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + username TEXT NOT NULL, + orig_name TEXT NOT NULL, + file_path TEXT NOT NULL, + file_type TEXT NOT NULL DEFAULT '', + mode TEXT NOT NULL DEFAULT 'summarise', + custom_prompt TEXT NOT NULL DEFAULT '', + model TEXT NOT NULL DEFAULT '', + extracted_text TEXT NOT NULL DEFAULT '', + result TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'pending', + error_msg TEXT, + created_at TEXT NOT NULL, + finished_at TEXT + ); + + CREATE TABLE IF NOT EXISTS multimodal_chats ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + username TEXT NOT NULL, + model TEXT NOT NULL DEFAULT '', + messages TEXT NOT NULL DEFAULT '[]', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS agents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + username TEXT NOT NULL, + name TEXT NOT NULL, + description TEXT NOT NULL DEFAULT '', + steps TEXT NOT NULL DEFAULT '[]', + is_active INTEGER NOT NULL DEFAULT 1, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS agent_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + agent_id INTEGER NOT NULL, + agent_name TEXT NOT NULL, + user_id INTEGER NOT NULL, + username TEXT NOT NULL, + input TEXT NOT NULL DEFAULT '', + output TEXT NOT NULL DEFAULT '', + steps_log TEXT NOT NULL DEFAULT '[]', + status TEXT NOT NULL DEFAULT 'pending', + error_msg TEXT, + created_at TEXT NOT NULL, + finished_at TEXT + ); + + CREATE TABLE IF NOT EXISTS scheduled_jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + username TEXT NOT NULL, + name TEXT NOT NULL, + description TEXT NOT NULL DEFAULT '', + job_type TEXT NOT NULL DEFAULT 'prompt', + schedule_type TEXT NOT NULL DEFAULT 'interval', + schedule_val TEXT NOT NULL DEFAULT '60', + prompt_text TEXT NOT NULL DEFAULT '', + agent_id INTEGER, + agent_name TEXT NOT NULL DEFAULT '', + model TEXT NOT NULL DEFAULT '', + is_active INTEGER NOT NULL DEFAULT 1, + last_run_at TEXT, + next_run_at TEXT, + run_count INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS scheduled_job_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id INTEGER NOT NULL, + job_name TEXT NOT NULL, + output TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'pending', + error_msg TEXT, + started_at TEXT NOT NULL, + finished_at TEXT + ); + + CREATE TABLE IF NOT EXISTS eval_suites ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + description TEXT NOT NULL DEFAULT '', + created_by TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS eval_cases ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + suite_id INTEGER NOT NULL, + name TEXT NOT NULL DEFAULT '', + prompt_text TEXT NOT NULL, + expected_output TEXT NOT NULL DEFAULT '', + tags TEXT NOT NULL DEFAULT '', + sort_order INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS eval_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + suite_id INTEGER NOT NULL, + suite_name TEXT NOT NULL, + models TEXT NOT NULL DEFAULT '[]', + judge_model TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'pending', + created_by TEXT NOT NULL DEFAULT '', + case_count INTEGER NOT NULL DEFAULT 0, + done_count INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL, + finished_at TEXT + ); + + CREATE TABLE IF NOT EXISTS eval_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id INTEGER NOT NULL, + case_id INTEGER NOT NULL, + case_name TEXT NOT NULL DEFAULT '', + model TEXT NOT NULL, + prompt_text TEXT NOT NULL DEFAULT '', + output TEXT NOT NULL DEFAULT '', + quality_score REAL NOT NULL DEFAULT 0, + relevance_score REAL NOT NULL DEFAULT 0, + safety_score REAL NOT NULL DEFAULT 0, + overall_score REAL NOT NULL DEFAULT 0, + reasoning TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'pending', + error_msg TEXT, + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS chat_rooms ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + description TEXT NOT NULL DEFAULT '', + topic TEXT NOT NULL DEFAULT '', + ai_model TEXT NOT NULL DEFAULT '', + ai_auto INTEGER NOT NULL DEFAULT 0, + created_by TEXT NOT NULL DEFAULT '', + is_active INTEGER NOT NULL DEFAULT 1, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS chat_room_members ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + room_id INTEGER NOT NULL, + user_id INTEGER NOT NULL, + username TEXT NOT NULL, + role TEXT NOT NULL DEFAULT 'member', + joined_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS chat_room_messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + room_id INTEGER NOT NULL, + user_id INTEGER NOT NULL DEFAULT 0, + username TEXT NOT NULL DEFAULT '', + sender_role TEXT NOT NULL DEFAULT 'user', + content TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS notifications ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER, -- NULL = broadcast to all admins + title TEXT NOT NULL, + body TEXT NOT NULL DEFAULT '', + source TEXT NOT NULL DEFAULT 'system', -- guardrail | scheduler | agent | rag | system + severity TEXT NOT NULL DEFAULT 'info', -- info | warning | error | critical + link TEXT NOT NULL DEFAULT '', -- optional deep-link URL + is_read INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS teams ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + description TEXT NOT NULL DEFAULT '', + created_by TEXT NOT NULL DEFAULT 'admin', + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS team_members ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + team_id INTEGER NOT NULL, + user_id INTEGER NOT NULL, + role TEXT NOT NULL DEFAULT 'member', -- member | lead + joined_at TEXT NOT NULL, + UNIQUE(team_id, user_id) + ); + + CREATE TABLE IF NOT EXISTS team_settings ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + team_id INTEGER NOT NULL UNIQUE, + allowed_models TEXT NOT NULL DEFAULT '[]', -- JSON array of model ids + kb_collections TEXT NOT NULL DEFAULT '[]', -- JSON array of collection ids + monthly_token_quota INTEGER NOT NULL DEFAULT 0, -- 0 = unlimited + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS feedback ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER, + username TEXT NOT NULL DEFAULT 'anonymous', + model TEXT NOT NULL DEFAULT '', + session_id TEXT NOT NULL DEFAULT '', + rating INTEGER NOT NULL CHECK(rating BETWEEN 1 AND 5), + comment TEXT NOT NULL DEFAULT '', + prompt_snippet TEXT NOT NULL DEFAULT '', + response_snippet TEXT NOT NULL DEFAULT '', + page TEXT NOT NULL DEFAULT '', -- which page/feature + created_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS workflows ( + id TEXT PRIMARY KEY, + user_id INTEGER NOT NULL, + username TEXT NOT NULL DEFAULT '', + name TEXT NOT NULL DEFAULT 'Untitled Workflow', + description TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'draft', + trigger_json TEXT NOT NULL DEFAULT '{}', + steps_json TEXT NOT NULL DEFAULT '[]', + stats_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS workflow_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + workflow_id TEXT NOT NULL, + user_id INTEGER NOT NULL, + username TEXT NOT NULL DEFAULT '', + input TEXT NOT NULL DEFAULT '', + result_json TEXT NOT NULL DEFAULT '{}', + status TEXT NOT NULL DEFAULT 'done', + error_msg TEXT, + created_at TEXT NOT NULL, + finished_at TEXT + ); + + CREATE TABLE IF NOT EXISTS connectors ( + id TEXT PRIMARY KEY, + user_id INTEGER NOT NULL DEFAULT 0, + type TEXT NOT NULL DEFAULT 'folder', + name TEXT NOT NULL, + icon TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'idle', + config_json TEXT NOT NULL DEFAULT '{}', + stats_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS connector_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + connector_id TEXT, + level TEXT NOT NULL DEFAULT 'info', + msg TEXT NOT NULL, + ts TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS router_rules ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + enabled INTEGER NOT NULL DEFAULT 1, + model TEXT NOT NULL DEFAULT '', + priority INTEGER NOT NULL DEFAULT 0, + conditions_json TEXT NOT NULL DEFAULT '[]', + max_tokens INTEGER NOT NULL DEFAULT 2048, + temperature REAL NOT NULL DEFAULT 0.7, + notes TEXT NOT NULL DEFAULT '', + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS router_settings ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL DEFAULT '' + ); + + CREATE TABLE IF NOT EXISTS meeting_jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + username TEXT NOT NULL DEFAULT '', + title TEXT NOT NULL DEFAULT 'Untitled Meeting', + meta_json TEXT NOT NULL DEFAULT '{}', + transcript TEXT NOT NULL DEFAULT '', + result_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL + ); + """) + + # Seed default admin if no users exist + row = db.execute("SELECT COUNT(*) as n FROM users").fetchone() + if row["n"] == 0: + db.execute( + """INSERT INTO users (username, email, hashed_password, role, is_active, + must_change_password, created_at) + VALUES (?,?,?,?,?,?,?)""", + ("admin", "admin@cezentech.com", + pwd_ctx.hash("Cezen@2024!"), + "admin", 1, 1, + utcnow()) + ) + db.commit() + print("[cezen] Default admin created — username: admin password: Cezen@2024!") + + # Seed default branding settings + defaults = { + "org_name": "Nexus One AI", + "stack_name": "Nexus One AI", + "logo_url": "", + "accent_color": "#0D9488", + "footer_text": "Powered by Cezen", + "support_email":"support@cezentech.com", + "tier_label": "Entry Tier", + } + for k, v in defaults.items(): + db.execute("INSERT OR IGNORE INTO settings (key, value) VALUES (?,?)", (k, v)) + db.commit() + + # Seed default prompts if table is empty + if db.execute("SELECT COUNT(*) as n FROM prompts").fetchone()["n"] == 0: + _seed_prompts(db) + + db.close() + +def utcnow(): + return datetime.now(timezone.utc).isoformat() + +def _seed_prompts(db): + now = utcnow() + prompts = [ + # Procurement + ("Summarise a tender document","Summarise this tender document and list the top 5 eligibility criteria in bullet points. Also identify the submission deadline and any mandatory certifications required.","procurement","tender summary eligibility criteria bid rfp",1), + ("Extract penalty clauses from a contract","Extract all penalty and liquidated damages clauses from the uploaded contract. Present them as a table with three columns: Clause Number, Description, and Financial Impact.","procurement","contract penalty clauses extract table",1), + ("List mandatory bid submission documents","From the uploaded RFP, list all mandatory documents required for bid submission. Organise them into two groups: Technical Documents and Financial Documents. Note any specific format or certification requirements for each.","procurement","rfp bid mandatory documents checklist submission",1), + ("Flag ambiguous contract clauses","Review the uploaded contract and identify any clauses that are ambiguous, contradictory, or that place unusual risk on our organisation. For each flagged clause, quote the exact text and explain why it needs clarification before signing.","procurement","contract ambiguous clause risk review flag",1), + ("Compare vendor proposals","I have uploaded two vendor proposals for [project name]. Compare them across the following criteria: technical approach, timeline, team qualifications, past experience, and total cost. Present your comparison as a side-by-side table and recommend which proposal better meets our requirements, with reasons.","procurement","vendor compare proposal technical evaluation",1), + ("Extract payment terms and milestones","From the uploaded contract, extract all payment terms, milestone dates, and deliverable deadlines. Present them in chronological order as a table with: Date / Milestone, Deliverable, and Payment Amount.","procurement","payment terms milestone dates extract contract",1), + # HR + ("Draft an offer letter","Draft a formal offer letter for the position of [Job Title] in the [Department] department. The candidate's name is [Name]. The salary is ₹[Amount] per annum. The joining date is [Date]. Include standard clauses for probation period of 6 months, confidentiality, and intellectual property. Use a professional but welcoming tone.","hr","offer letter draft employment formal",0), + ("Summarise HR policy for new joiners","From the uploaded HR policy document, create a friendly 1-page summary for new joiners. Cover: leave entitlements, working hours, code of conduct, IT usage policy, and the grievance process. Use simple language and avoid jargon.","hr","hr policy summary new joiner onboarding",1), + ("Extract leave entitlements as a table","From the uploaded HR policy, extract all leave types and their entitlements. Present as a table with columns: Leave Type, Days Per Year, Carry Forward Allowed (Yes/No), Encashable (Yes/No), and Key Conditions.","hr","leave entitlements table extract policy",1), + ("Draft a Performance Improvement Plan","Draft a 90-day Performance Improvement Plan (PIP) for an employee in the role of [Job Title]. The performance issues are: [describe 2-3 specific issues]. Include measurable targets for each issue, a weekly check-in schedule, consequences if targets are not met, and a supportive tone that gives the employee a genuine opportunity to improve.","hr","performance improvement plan pip draft employee",0), + ("Create a policy knowledge quiz","From the uploaded policy document, create a 10-question multiple-choice quiz to test staff knowledge. Each question should have 4 options with one correct answer. Include the answer key at the end. Focus on the most important compliance points.","hr","quiz policy knowledge test staff training",1), + # Legal + ("Simplify a regulation for non-legal staff","The uploaded document is a government regulation or legal notice. Rewrite it in plain English for non-legal staff. Explain what it means for our organisation, what actions we need to take, and by when. Use simple sentences and avoid legal jargon.","legal","regulation simplify plain language staff",1), + ("Extract all deadlines from a regulatory notice","From the uploaded regulatory notice or circular, extract every deadline, compliance date, and submission requirement. Present them as a chronological table with: Date, Requirement, Responsible Party, and Consequence of Non-Compliance.","legal","deadlines dates regulatory notice extract compliance",1), + ("Draft a response to a legal notice","We have received the attached legal notice. Draft a formal response that: acknowledges receipt, disputes [specific point], requests an extension of [X days], and reserves our legal rights. Maintain a firm but professional tone. Flag any statements that need legal review before sending.","legal","legal notice response draft formal",1), + ("Compliance review of a contract","Review the uploaded contract for compliance with [relevant regulation or standard — e.g., MSME Act, GeM guidelines, IT Act]. List every clause that may be non-compliant, the specific provision it conflicts with, and the recommended corrective action.","legal","compliance review contract regulation audit",1), + # Operations + ("Create an SOP checklist from a manual","From the uploaded operations manual, extract the key steps for [specific process, e.g., equipment startup, safety inspection]. Convert them into a numbered SOP checklist that a new technician could follow. Add a sign-off box at the end.","operations","sop checklist manual procedure extract",1), + ("Analyse a maintenance fault log","I have uploaded a fault log from our [equipment name]. Identify the top 5 most frequent fault types, the time between failures for each, and any patterns suggesting predictive maintenance opportunities. Recommend which faults to prioritise.","operations","maintenance fault log analyse pattern predict",1), + ("Extract safety warnings from a manual","From the uploaded technical manual, extract all safety warnings, cautions, and prohibited actions. Present them in a table with: Location in Manual, Warning Text, Risk Level (High/Medium/Low), and Required Action. Highlight any that require immediate staff briefing.","operations","safety warnings extract manual risk",1), + ("Summarise an incident report","Summarise the attached incident report in 3 sections: (1) What happened — key facts only; (2) Root cause — as identified or as inferred from the report; (3) Corrective actions — recommended or already taken. Keep each section to 3 bullet points maximum.","operations","incident report summary root cause action",1), + # Citizen Services + ("Draft a response to a citizen complaint","A citizen has submitted the following complaint: [paste complaint text]. Draft a formal acknowledgement and response. Acknowledge their concern, explain the action being taken, provide a resolution timeline, and include an apology where appropriate. Maintain an empathetic and professional tone.","citizen","citizen complaint response draft letter",0), + ("Classify and route a citizen query","I will paste several citizen queries below. For each query, classify it into one of these categories: [list your departments/categories]. Indicate the responsible department and the urgency level (Urgent / Normal / Low). Present as a table.\n\nQueries:\n[paste queries here]","citizen","classify route citizen query department triage",0), + ("Generate a citizen FAQ from call logs","From the uploaded call log or query report, identify the 15 most frequently asked questions from citizens. For each question, draft a clear, accurate answer in plain language. Format as a Q&A document suitable for our website.","citizen","faq citizen queries call log generate",1), + ("Draft an acknowledgement letter","Draft a formal acknowledgement letter for a citizen who submitted [type of application/complaint/request] on [date]. Confirm receipt, provide a reference number placeholder [REF-XXXX], state the expected processing time of [X working days], and provide contact details for follow-up.","citizen","acknowledgement letter draft citizen formal",0), + # Finance + ("Identify budget overruns","From the uploaded budget vs actuals report, identify all line items where spending has exceeded budget by more than 10%. For each overrun, calculate the variance in absolute and percentage terms. Highlight the top 5 overruns and suggest possible reasons based on the data.","finance","budget overrun variance identify analyse report",1), + ("Summarise financial implications of a proposal","From the uploaded project proposal, extract and summarise all financial information: total project cost, funding sources, year-wise expenditure plan, expected revenue or savings, ROI or payback period, and any financial risks mentioned. Present as a 1-page financial brief.","finance","financial implications proposal summary cost",1), + ("Summarise an audit report","Summarise the attached audit report. Cover: (1) Scope and period of audit; (2) Key findings — list only major and significant findings; (3) Compliance status — what is compliant vs non-compliant; (4) Top 3 recommended actions. Keep the summary to one page.","finance","audit report summary findings compliance",1), + # Communications + ("Rewrite a technical document in plain English","The attached document is written for a technical audience. Rewrite it for a non-technical audience — senior management or the general public. Replace all jargon with plain language, use short paragraphs, and add a 3-sentence summary at the top. Do not change the facts.","comms","technical document rewrite plain english simplify",1), + ("Draft a press release","Draft a press release announcing [event or achievement]. Include: a headline, dateline, opening paragraph with the key news, a quote from [Name, Designation], background context paragraph, and boilerplate about our organisation. Keep it to 400 words. Tone: formal and factual.","comms","press release draft announcement communications",0), + ("Extract action items from meeting minutes","From the uploaded meeting minutes, extract all action items. Present as a table with: Action Item, Owner, Due Date, Priority (High/Medium/Low), and Status (if mentioned). List items in order of due date.","comms","action items meeting minutes extract table",1), + # Management + ("Create a 1-page executive summary","From the attached report or document, create a 1-page executive summary for senior leadership. Cover: the key issue or opportunity, 3 most important findings or recommendations, resource or budget implications, and recommended decision or next step. Use bullet points under each heading.","management","executive summary 1 page report leadership",1), + ("SWOT analysis from a strategy document","From the uploaded strategy document or situation report, generate a structured SWOT analysis — Strengths, Weaknesses, Opportunities, and Threats. List 4–5 points under each quadrant. Base your analysis only on information in the document; do not add external assumptions.","management","swot analysis strategy document strengths weaknesses",1), + ("Summarise board meeting decisions","From the attached board meeting minutes, extract: (1) All decisions taken — list each as a clear resolution statement; (2) All agenda items deferred — reason if stated; (3) Key action items with owners and deadlines. Present as a structured 1-page decision summary.","management","board meeting decisions summary minutes extract",1), + ] + for i, (title, text, cat, tags, upload) in enumerate(prompts): + db.execute( + "INSERT INTO prompts (title, prompt_text, category, tags, needs_upload, sort_order, created_at, updated_at) VALUES (?,?,?,?,?,?,?,?)", + (title, text, cat, tags, upload, i, now, now) + ) + db.commit() + +# ── Lifespan ────────────────────────────────────────────────────────────────── + +_scheduler = AsyncIOScheduler() + +@asynccontextmanager +async def lifespan(app: FastAPI): + init_db() + _scheduler.start() + _restore_scheduled_jobs() + yield + _scheduler.shutdown(wait=False) + +app = FastAPI(title="Nexus One AI API", version="1.0.0", lifespan=lifespan) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# ── Auth helpers ────────────────────────────────────────────────────────────── + +def create_token(user_id: int, username: str, role: str) -> tuple[str, str]: + jti = str(uuid.uuid4()) + exp = datetime.now(timezone.utc) + timedelta(hours=JWT_EXPIRE_HRS) + payload = { + "sub": str(user_id), + "username": username, + "role": role, + "jti": jti, + "exp": exp, + } + token = jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM) + return token, jti + +def decode_token(token: str) -> dict: + try: + return jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM]) + except JWTError: + raise HTTPException(status_code=401, detail="Invalid or expired token") + +def get_token(request: Request) -> str: + token = request.cookies.get(COOKIE_NAME) + if not token: + # Also accept Bearer header for API clients + auth = request.headers.get("Authorization", "") + if auth.startswith("Bearer "): + token = auth[7:] + if not token: + raise HTTPException(status_code=401, detail="Not authenticated") + return token + +def current_user(request: Request) -> dict: + token = get_token(request) + payload = decode_token(token) + db = get_db() + # Check session is still active + row = db.execute( + "SELECT is_active FROM sessions WHERE jti=?", (payload["jti"],) + ).fetchone() + db.close() + if not row or not row["is_active"]: + raise HTTPException(status_code=401, detail="Session expired or revoked") + payload["id"] = int(payload["sub"]) + return payload + +def admin_only(user: dict = Depends(current_user)) -> dict: + if user.get("role") != "admin": + raise HTTPException(status_code=403, detail="Admin access required") + return user + +def audit(db, user_id, username, action, detail="", ip="", result="success"): + db.execute( + "INSERT INTO audit_log (timestamp,user_id,username,action,detail,ip_address,result) VALUES (?,?,?,?,?,?,?)", + (utcnow(), user_id, username, action, detail, ip, result) + ) + +def _setting_value(key: str, default: str = "") -> str: + db = get_db() + try: + row = db.execute("SELECT value FROM settings WHERE key=?", (key,)).fetchone() + return row["value"] if row else default + finally: + db.close() + +def _normalize_tier(value: str) -> str: + raw = (value or "").strip().lower() + if raw in TIER_MATRIX: + return raw + return TIER_ALIASES.get(raw, "basic") + +def _current_tier() -> str: + if CEZEN_TIER: + return _normalize_tier(CEZEN_TIER) + return _normalize_tier(_setting_value("tier_label", "Basic")) + +def _tier_payload() -> dict: + tier = _current_tier() + info = TIER_MATRIX[tier] + return { + "tier": tier, + "label": info["label"], + "locked": bool(CEZEN_TIER), + "positioning": info["positioning"], + "max_users": info["max_users"], + "features": info["features"], + "tiers": TIER_MATRIX, + } + +def _readiness_score(feasibility: dict, license_info: dict) -> dict: + features = feasibility.get("features") or {} + recommendation = feasibility.get("recommendation") or {} + metrics_now = collect_metrics() + checks = [ + ("Portal/API", True, "Management portal can run."), + ("Disk space", (feasibility.get("disk") or {}).get("free_gb", 0) >= 50, "Keep at least 50 GB free for uploads, indexes, and backups."), + ("RAG storage", bool(features.get("rag_chromadb") or features.get("document_intelligence")), "RAG needs enough RAM/disk for document parsing and vector search."), + ("Local model serving", bool(features.get("ollama_gpu") or features.get("ollama_cpu")), "Use appliance GPU or external/cloud model routing if local serving is weak."), + ("Backup path", BACKUP_DIR.exists(), f"Backups write to {BACKUP_DIR}."), + ("Audit report", bool(license_info["features"].get("audit_report")), "Tier includes basic audit report."), + ] + passed = sum(1 for _, ok, _ in checks if ok) + score = round((passed / len(checks)) * 100) + blockers = [name for name, ok, _ in checks if not ok] + status = "ready" if score >= 80 else "limited" if score >= 55 else "not_ready" + recommendations = list(recommendation.get("notes") or []) + if metrics_now.get("ram_total_gb", 0) < 32: + recommendations.append("For software-only customers, position this as portal/RAG management plus external model routing unless RAM is upgraded.") + if not features.get("ollama_gpu") and not features.get("ollama_cpu"): + recommendations.append("Do not promise fully local LLM inference on this hardware without an appliance or GPU upgrade.") + if not blockers: + recommendations.append("Hardware and licensed feature set are acceptable for the recommended Cezen profile.") + return { + "score": score, + "status": status, + "checks": [{"name": name, "passed": ok, "note": note} for name, ok, note in checks], + "blockers": blockers, + "recommendations": recommendations, + } + +def _safe_backup_name(name: str) -> str: + cleaned = "".join(ch for ch in name if ch.isalnum() or ch in "._-") + if not cleaned or cleaned != name or not cleaned.endswith(".zip"): + raise HTTPException(status_code=400, detail="Invalid backup name") + return cleaned + +def _backup_path(name: str) -> Path: + safe = _safe_backup_name(name) + path = (BACKUP_DIR / safe).resolve() + if path.parent != BACKUP_DIR.resolve(): + raise HTTPException(status_code=400, detail="Invalid backup path") + return path + +def _backup_entry(path: Path) -> dict: + stat = path.stat() + return { + "name": path.name, + "size_bytes": stat.st_size, + "created_at": datetime.fromtimestamp(stat.st_mtime, timezone.utc).isoformat(), + "path": str(path), + } + +# ── Schemas ─────────────────────────────────────────────────────────────────── + +class LoginRequest(BaseModel): + username: str + password: str + +class ChangePasswordRequest(BaseModel): + current_password: Optional[str] = None + new_password: str + +class CreateUserRequest(BaseModel): + username: str + email: Optional[str] = None + password: str + role: str = "user" + +class UpdateUserRequest(BaseModel): + email: Optional[str] = None + role: Optional[str] = None + is_active: Optional[int] = None + +class ResetPasswordRequest(BaseModel): + new_password: str + +class PullModelRequest(BaseModel): + model: str + +# ── Auth endpoints ───────────────────────────────────────────────────────────── + +LOCKOUT_MAX_ATTEMPTS = 5 +LOCKOUT_WINDOW_MIN = 15 # minutes to look back for failed attempts +LOCKOUT_DURATION_MIN = 15 # minutes to lock the account after threshold + +@app.post("/api/auth/login") +async def login(body: LoginRequest, request: Request, response: Response): + ip = request.client.host if request.client else "" + db = get_db() + try: + # ── Brute-force lockout check ───────────────────────────────────────── + window_start = (datetime.now(timezone.utc) - timedelta(minutes=LOCKOUT_WINDOW_MIN)).isoformat() + fail_count = db.execute( + "SELECT COUNT(*) as n FROM login_attempts WHERE username=? AND success=0 AND attempted_at>?", + (body.username, window_start) + ).fetchone()["n"] + + if fail_count >= LOCKOUT_MAX_ATTEMPTS: + audit(db, None, body.username, "login", f"account locked ({fail_count} failures)", ip, "failure") + db.commit() + raise HTTPException( + status_code=429, + detail=f"Account temporarily locked — too many failed attempts. Try again in {LOCKOUT_DURATION_MIN} minutes." + ) + + row = db.execute( + "SELECT * FROM users WHERE username=? AND is_active=1", (body.username,) + ).fetchone() + + if not row or not pwd_ctx.verify(body.password, row["hashed_password"]): + db.execute( + "INSERT INTO login_attempts (username, ip_address, attempted_at, success) VALUES (?,?,?,0)", + (body.username, ip, utcnow()) + ) + audit(db, None, body.username, "login", "bad credentials", ip, "failure") + db.commit() + raise HTTPException(status_code=401, detail="Invalid username or password") + + # ── Successful login — clear attempt history and proceed ────────────── + db.execute("DELETE FROM login_attempts WHERE username=?", (body.username,)) + token, jti = create_token(row["id"], row["username"], row["role"]) + exp = (datetime.now(timezone.utc) + timedelta(hours=JWT_EXPIRE_HRS)).isoformat() + + db.execute( + "INSERT INTO sessions (user_id,jti,created_at,expires_at,ip_address,user_agent) VALUES (?,?,?,?,?,?)", + (row["id"], jti, utcnow(), exp, ip, request.headers.get("user-agent","")) + ) + db.execute("UPDATE users SET last_login=? WHERE id=?", (utcnow(), row["id"])) + audit(db, row["id"], row["username"], "login", "", ip) + db.commit() + + response.set_cookie( + COOKIE_NAME, token, + httponly=True, samesite="lax", + max_age=JWT_EXPIRE_HRS * 3600 + ) + return { + "ok": True, + "username": row["username"], + "role": row["role"], + "must_change_password": bool(row["must_change_password"]) + } + finally: + db.close() + +@app.post("/api/auth/logout") +async def logout(request: Request, response: Response, user: dict = Depends(current_user)): + token = get_token(request) + payload = decode_token(token) + db = get_db() + db.execute("UPDATE sessions SET is_active=0 WHERE jti=?", (payload["jti"],)) + audit(db, user["sub"], user["username"], "logout") + db.commit() + db.close() + response.delete_cookie(COOKIE_NAME) + return {"ok": True} + +@app.get("/api/auth/me") +async def me(user: dict = Depends(current_user)): + db = get_db() + row = db.execute( + "SELECT id,username,email,role,last_login,must_change_password FROM users WHERE id=?", + (user["sub"],) + ).fetchone() + db.close() + if not row: + raise HTTPException(status_code=404, detail="User not found") + return dict(row) + +@app.post("/api/auth/change-password") +async def change_password(body: ChangePasswordRequest, request: Request, user: dict = Depends(current_user)): + db = get_db() + try: + row = db.execute("SELECT * FROM users WHERE id=?", (user["sub"],)).fetchone() + # Skip current-password check for forced first-login change + if not row["must_change_password"]: + if not body.current_password: + raise HTTPException(status_code=400, detail="Current password is required") + if not pwd_ctx.verify(body.current_password, row["hashed_password"]): + raise HTTPException(status_code=400, detail="Current password is incorrect") + db.execute( + "UPDATE users SET hashed_password=?, must_change_password=0 WHERE id=?", + (pwd_ctx.hash(body.new_password), user["sub"]) + ) + audit(db, user["sub"], user["username"], "change_password", "", request.client.host if request.client else "") + db.commit() + return {"ok": True} + finally: + db.close() + +# ── User management (admin) ─────────────────────────────────────────────────── + +@app.get("/api/users/sessions") +async def active_sessions(admin: dict = Depends(admin_only)): + """Return list of users who have logged in within the last 8 hours (token lifetime).""" + db = get_db() + # Use the same isoformat() style that utcnow() stores (includes +00:00 suffix) + from datetime import timezone as _tz + cutoff = (datetime.now(_tz.utc) - timedelta(hours=8)).isoformat() + rows = db.execute( + "SELECT username, role, last_login FROM users WHERE last_login >= ? AND is_active=1 ORDER BY last_login DESC", + (cutoff,) + ).fetchall() + db.close() + return {"sessions": [dict(r) for r in rows]} + +@app.get("/api/users") +async def list_users(admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + "SELECT id,username,email,role,is_active,created_at,last_login FROM users ORDER BY id" + ).fetchall() + db.close() + return [dict(r) for r in rows] + +@app.post("/api/users") +async def create_user(body: CreateUserRequest, request: Request, admin: dict = Depends(admin_only)): + db = get_db() + try: + db.execute( + "INSERT INTO users (username,email,hashed_password,role,is_active,must_change_password,created_at) VALUES (?,?,?,?,1,1,?)", + (body.username, body.email, pwd_ctx.hash(body.password), body.role, utcnow()) + ) + audit(db, admin["sub"], admin["username"], "create_user", f"username={body.username} role={body.role}", + request.client.host if request.client else "") + db.commit() + return {"ok": True} + except sqlite3.IntegrityError: + raise HTTPException(status_code=400, detail="Username already exists") + finally: + db.close() + +@app.put("/api/users/{user_id}") +async def update_user(user_id: int, body: UpdateUserRequest, request: Request, admin: dict = Depends(admin_only)): + db = get_db() + try: + fields, vals = [], [] + if body.email is not None: fields.append("email=?"); vals.append(body.email) + if body.role is not None: fields.append("role=?"); vals.append(body.role) + if body.is_active is not None: fields.append("is_active=?"); vals.append(body.is_active) + if not fields: + raise HTTPException(status_code=400, detail="Nothing to update") + vals.append(user_id) + db.execute(f"UPDATE users SET {', '.join(fields)} WHERE id=?", vals) + audit(db, admin["sub"], admin["username"], "update_user", f"id={user_id}", + request.client.host if request.client else "") + db.commit() + return {"ok": True} + finally: + db.close() + +@app.post("/api/users/{user_id}/reset-password") +async def reset_password(user_id: int, body: ResetPasswordRequest, request: Request, admin: dict = Depends(admin_only)): + db = get_db() + try: + db.execute( + "UPDATE users SET hashed_password=?, must_change_password=1 WHERE id=?", + (pwd_ctx.hash(body.new_password), user_id) + ) + # Revoke all active sessions for this user + db.execute("UPDATE sessions SET is_active=0 WHERE user_id=?", (user_id,)) + audit(db, admin["sub"], admin["username"], "reset_password", f"target_user_id={user_id}", + request.client.host if request.client else "") + db.commit() + return {"ok": True} + finally: + db.close() + +@app.delete("/api/users/{user_id}") +async def delete_user(user_id: int, request: Request, admin: dict = Depends(admin_only)): + if str(user_id) == str(admin["sub"]): + raise HTTPException(status_code=400, detail="Cannot delete your own account") + db = get_db() + try: + db.execute("DELETE FROM users WHERE id=?", (user_id,)) + db.execute("UPDATE sessions SET is_active=0 WHERE user_id=?", (user_id,)) + audit(db, admin["sub"], admin["username"], "delete_user", f"id={user_id}", + request.client.host if request.client else "") + db.commit() + return {"ok": True} + finally: + db.close() + +@app.get("/api/users/sessions") +async def active_sessions(admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute(""" + SELECT s.id, s.user_id, u.username, u.role, s.created_at, s.expires_at, s.ip_address + FROM sessions s JOIN users u ON s.user_id=u.id + WHERE s.is_active=1 AND s.expires_at > ? + ORDER BY s.created_at DESC + """, (utcnow(),)).fetchall() + db.close() + return [dict(r) for r in rows] + +@app.delete("/api/users/sessions/{session_id}") +async def revoke_session(session_id: int, admin: dict = Depends(admin_only)): + db = get_db() + db.execute("UPDATE sessions SET is_active=0 WHERE id=?", (session_id,)) + db.commit() + db.close() + return {"ok": True} + +# ── Metrics ─────────────────────────────────────────────────────────────────── + +def collect_metrics() -> dict: + cpu = psutil.cpu_percent(interval=0.5) + ram = psutil.virtual_memory() + disk = psutil.disk_usage("/") + + # GPU via nvidia-smi + gpu_pct = gpu_temp = gpu_mem_used = gpu_mem_total = None + try: + out = subprocess.check_output([ + "nvidia-smi", + "--query-gpu=utilization.gpu,temperature.gpu,memory.used,memory.total", + "--format=csv,noheader,nounits" + ], timeout=5).decode().strip().split("\n")[0] + parts = [p.strip() for p in out.split(",")] + gpu_pct = int(parts[0]) + gpu_temp = int(parts[1]) + gpu_mem_used = int(parts[2]) # MiB + gpu_mem_total = int(parts[3]) # MiB + except Exception: + pass + + # Network + net = psutil.net_io_counters() + + return { + "timestamp": utcnow(), + "cpu_pct": round(cpu, 1), + "cpu_cores": psutil.cpu_count(), + "ram_pct": round(ram.percent, 1), + "ram_used_gb": round(ram.used / 1e9, 1), + "ram_total_gb": round(ram.total / 1e9, 1), + "disk_pct": round(disk.percent, 1), + "disk_used_gb": round(disk.used / 1e9, 1), + "disk_total_gb":round(disk.total / 1e9, 1), + "gpu_pct": gpu_pct, + "gpu_temp": gpu_temp, + "gpu_mem_used_gb": round(gpu_mem_used / 1024, 1) if gpu_mem_used else None, + "gpu_mem_total_gb": round(gpu_mem_total / 1024, 1) if gpu_mem_total else None, + "net_sent_gb": round(net.bytes_sent / 1e9, 2), + "net_recv_gb": round(net.bytes_recv / 1e9, 2), + "uptime": _fmt_uptime(), + } + +def _fmt_uptime() -> str: + try: + sec = float(Path("/proc/uptime").read_text().split()[0]) + except Exception: + import time + try: + sec = time.time() - psutil.boot_time() + except Exception: + return "unknown" + d = int(sec // 86400); h = int((sec % 86400) // 3600); m = int((sec % 3600) // 60) + if d: return f"{d} days, {h} hours" + if h: return f"{h} hours, {m} min" + return f"{m} min" + +@app.get("/api/metrics") +async def metrics(user: dict = Depends(current_user)): + m = collect_metrics() + # Persist to history (keep last 1440 rows = 24h at 1-min intervals) + db = get_db() + db.execute( + "INSERT INTO metrics_history (timestamp,cpu_pct,gpu_pct,gpu_temp,ram_pct,disk_pct) VALUES (?,?,?,?,?,?)", + (m["timestamp"], m["cpu_pct"], m["gpu_pct"], m["gpu_temp"], m["ram_pct"], m["disk_pct"]) + ) + db.execute("DELETE FROM metrics_history WHERE id NOT IN (SELECT id FROM metrics_history ORDER BY id DESC LIMIT 1440)") + db.commit() + db.close() + return m + +@app.get("/api/metrics/history") +async def metrics_history(n: int = 60, admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + "SELECT * FROM metrics_history ORDER BY id DESC LIMIT ?", (n,) + ).fetchall() + db.close() + return list(reversed([dict(r) for r in rows])) + +# ── Services ────────────────────────────────────────────────────────────────── + +SERVICES = [ + {"name": "ollama", "port": 11434, "label": "Ollama"}, + {"name": "open-webui", "port": 3001, "label": "Open WebUI"}, + {"name": "chromadb", "port": 8000, "label": "ChromaDB"}, + {"name": "jupyter", "port": 8888, "label": "JupyterLab"}, + {"name": "mlflow", "port": 5000, "label": "MLflow"}, + {"name": "grafana", "port": 3000, "label": "Grafana"}, +] + +def service_status(svc: dict) -> dict: + import socket + try: + s = socket.create_connection(("127.0.0.1", svc["port"]), timeout=1) + s.close() + running = True + except OSError: + running = False + + # systemd status + try: + r = subprocess.run( + ["systemctl", "is-active", svc["name"]], + capture_output=True, text=True, timeout=3 + ) + systemd = r.stdout.strip() + except Exception: + systemd = "unknown" + + return {**svc, "port_open": running, "systemd": systemd, + "ok": running or systemd == "active"} + +@app.get("/api/services") +async def services(user: dict = Depends(current_user)): + return [service_status(s) for s in SERVICES] + +# ── Models ──────────────────────────────────────────────────────────────────── + +import urllib.request, urllib.error + +def ollama_get(path: str): + try: + with urllib.request.urlopen(f"{OLLAMA_URL}{path}", timeout=5) as r: + return json.loads(r.read()) + except Exception: + return None + +def ollama_post(path: str, data: dict): + body = json.dumps(data).encode() + req = urllib.request.Request( + f"{OLLAMA_URL}{path}", data=body, + headers={"Content-Type": "application/json"}, method="POST" + ) + try: + with urllib.request.urlopen(req, timeout=10) as r: + return json.loads(r.read()) + except Exception as e: + return {"error": str(e)} + +def ollama_delete(path: str, data: dict): + body = json.dumps(data).encode() + req = urllib.request.Request( + f"{OLLAMA_URL}{path}", data=body, + headers={"Content-Type": "application/json"}, method="DELETE" + ) + try: + with urllib.request.urlopen(req, timeout=10) as r: + return {"ok": True} + except Exception as e: + return {"error": str(e)} + +AVAILABLE_MODELS = [ + {"name": "llama3.1:8b", "label": "Llama 3.1 8B", "size_gb": 4.7, "vram_gb": 6, "use": "General purpose — recommended first model"}, + {"name": "llama3.1:70b", "label": "Llama 3.1 70B", "size_gb": 40.0, "vram_gb": 43, "use": "Complex reasoning, long documents"}, + {"name": "mistral:7b", "label": "Mistral 7B", "size_gb": 4.1, "vram_gb": 6, "use": "Fast responses, structured output"}, + {"name": "gemma2:9b", "label": "Gemma 2 9B", "size_gb": 5.4, "vram_gb": 7, "use": "Strong instruction following"}, + {"name": "phi3:mini", "label": "Phi-3 Mini", "size_gb": 2.2, "vram_gb": 3, "use": "Ultra-fast, lightweight tasks"}, + {"name": "phi3:medium", "label": "Phi-3 Medium", "size_gb": 7.9, "vram_gb": 10, "use": "Balanced speed and quality"}, + {"name": "codellama:13b", "label": "Code Llama 13B", "size_gb": 7.4, "vram_gb": 9, "use": "Code generation and review"}, + {"name": "deepseek-coder-v2:16b", "label": "DeepSeek Coder V2", "size_gb": 8.9, "vram_gb": 11, "use": "Advanced code tasks"}, + {"name": "llama3.1:405b", "label": "Llama 3.1 405B", "size_gb": 231, "vram_gb": 96, "use": "Maximum capability — needs full GPU VRAM"}, + {"name": "nomic-embed-text", "label": "Nomic Embed Text", "size_gb": 0.3, "vram_gb": 1, "use": "Document embeddings for RAG (ChromaDB)"}, + {"name": "mxbai-embed-large", "label": "MXBai Embed Large", "size_gb": 0.7, "vram_gb": 1, "use": "High-accuracy embeddings"}, +] + +@app.get("/api/models") +async def list_models(user: dict = Depends(current_user)): + data = ollama_get("/api/tags") + if not data: + return {"models": [], "error": "Ollama not reachable"} + models = data.get("models", []) + # Enrich with available model info + installed_names = {m["name"] for m in models} + for m in models: + info = next((a for a in AVAILABLE_MODELS if a["name"] == m["name"]), None) + if info: + m["use"] = info["use"] + return {"models": models, "installed_names": list(installed_names)} + +@app.get("/api/models/available") +async def available_models(user: dict = Depends(current_user)): + data = ollama_get("/api/tags") + installed = {m["name"] for m in (data.get("models", []) if data else [])} + result = [] + for m in AVAILABLE_MODELS: + result.append({**m, "installed": m["name"] in installed}) + return result + +@app.get("/api/models/running") +async def running_models(user: dict = Depends(current_user)): + data = ollama_get("/api/ps") + if not data: + return {"models": []} + return data + +@app.post("/api/models/pull") +async def pull_model(body: PullModelRequest, request: Request, admin: dict = Depends(admin_only)): + db = get_db() + audit(db, admin["sub"], admin["username"], "model_pull", f"model={body.model}", + request.client.host if request.client else "") + db.commit() + db.close() + # Fire-and-forget via subprocess (streaming pull) + subprocess.Popen( + ["ollama", "pull", body.model], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) + return {"ok": True, "message": f"Pulling {body.model} in background. Check model list in ~1 min."} + +@app.delete("/api/models/{model_name:path}") +async def delete_model(model_name: str, request: Request, admin: dict = Depends(admin_only)): + result = ollama_delete("/api/delete", {"name": model_name}) + db = get_db() + audit(db, admin["sub"], admin["username"], "model_delete", f"model={model_name}", + request.client.host if request.client else "") + db.commit() + db.close() + if "error" in result: + raise HTTPException(status_code=500, detail=result["error"]) + return {"ok": True} + +# ── Offline model upload / load ─────────────────────────────────────────────── + +import aiofiles + +UPLOAD_DIR = Path(os.environ.get("CEZEN_DATA", "/opt/cezen/data")) / "model-uploads" + +@app.on_event("startup") +async def ensure_upload_dir(): + UPLOAD_DIR.mkdir(parents=True, exist_ok=True) + +@app.get("/api/models/uploads") +async def list_uploads(admin: dict = Depends(admin_only)): + """List .gguf files already uploaded to the server.""" + files = [] + for f in sorted(UPLOAD_DIR.glob("*.gguf")): + stat = f.stat() + files.append({ + "filename": f.name, + "size_gb": round(stat.st_size / 1e9, 2), + "uploaded": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(), + }) + return files + +@app.post("/api/models/upload") +async def upload_model( + request: Request, + file: UploadFile = File(...), + admin: dict = Depends(admin_only), +): + """Stream-upload a .gguf model file to the server.""" + if not file.filename.endswith(".gguf"): + raise HTTPException(status_code=400, detail="Only .gguf files are supported") + + dest = UPLOAD_DIR / Path(file.filename).name + try: + async with aiofiles.open(dest, "wb") as out: + while True: + chunk = await file.read(1024 * 1024) # 1 MB chunks + if not chunk: + break + await out.write(chunk) + except Exception as e: + dest.unlink(missing_ok=True) + raise HTTPException(status_code=500, detail=f"Upload failed: {e}") + + size_gb = round(dest.stat().st_size / 1e9, 2) + db = get_db() + audit(db, admin["sub"], admin["username"], "model_upload", + f"file={file.filename} size={size_gb}GB", + request.client.host if request.client else "") + db.commit() + db.close() + return {"ok": True, "filename": file.filename, "size_gb": size_gb} + +@app.delete("/api/models/uploads/{filename}") +async def delete_upload(filename: str, admin: dict = Depends(admin_only)): + """Remove an uploaded .gguf file from disk.""" + target = UPLOAD_DIR / Path(filename).name # prevent path traversal + if not target.exists(): + raise HTTPException(status_code=404, detail="File not found") + target.unlink() + return {"ok": True} + +@app.post("/api/models/load") +async def load_model(body: dict, request: Request, admin: dict = Depends(admin_only)): + """ + Register an uploaded .gguf file with Ollama as a named model. + body: { "filename": "llama3.gguf", "model_name": "llama3-local" } + """ + filename = body.get("filename", "") + model_name = body.get("model_name", "").strip() + if not filename or not model_name: + raise HTTPException(status_code=400, detail="filename and model_name are required") + + gguf_path = UPLOAD_DIR / Path(filename).name + if not gguf_path.exists(): + raise HTTPException(status_code=404, detail=f"Uploaded file '{filename}' not found") + + # Write a temporary Modelfile + modelfile_path = UPLOAD_DIR / f"{model_name}.Modelfile" + modelfile_path.write_text(f"FROM {gguf_path}\n") + + try: + result = subprocess.run( + ["ollama", "create", model_name, "-f", str(modelfile_path)], + capture_output=True, text=True, timeout=300 + ) + modelfile_path.unlink(missing_ok=True) + if result.returncode != 0: + raise HTTPException(status_code=500, detail=result.stderr or "ollama create failed") + except subprocess.TimeoutExpired: + modelfile_path.unlink(missing_ok=True) + raise HTTPException(status_code=500, detail="ollama create timed out (>5 min)") + + db = get_db() + audit(db, admin["sub"], admin["username"], "model_load", + f"file={filename} name={model_name}", + request.client.host if request.client else "") + db.commit() + db.close() + return {"ok": True, "model_name": model_name} + +# ── Audit log ───────────────────────────────────────────────────────────────── + +@app.get("/api/audit") +async def get_audit(limit: int = 100, offset: int = 0, admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + "SELECT * FROM audit_log ORDER BY id DESC LIMIT ? OFFSET ?", + (limit, offset) + ).fetchall() + total = db.execute("SELECT COUNT(*) as n FROM audit_log").fetchone()["n"] + db.close() + return {"total": total, "rows": [dict(r) for r in rows]} + +@app.get("/api/audit/report") +async def audit_report(days: int = 7, admin: dict = Depends(admin_only)): + days = max(1, min(days, 90)) + since_dt = datetime.now(timezone.utc) - timedelta(days=days) + since = since_dt.isoformat() + db = get_db() + total = db.execute("SELECT COUNT(*) AS n FROM audit_log WHERE timestamp >= ?", (since,)).fetchone()["n"] + failures = db.execute( + "SELECT COUNT(*) AS n FROM audit_log WHERE timestamp >= ? AND result != 'success'", + (since,), + ).fetchone()["n"] + logins = db.execute( + "SELECT COUNT(*) AS n FROM audit_log WHERE timestamp >= ? AND action='login' AND result='success'", + (since,), + ).fetchone()["n"] + failed_logins = db.execute( + "SELECT COUNT(*) AS n FROM audit_log WHERE timestamp >= ? AND action='login' AND result != 'success'", + (since,), + ).fetchone()["n"] + distinct_users = db.execute( + "SELECT COUNT(DISTINCT username) AS n FROM audit_log WHERE timestamp >= ? AND username IS NOT NULL AND username != ''", + (since,), + ).fetchone()["n"] + top_actions = db.execute( + """SELECT action, COUNT(*) AS count + FROM audit_log + WHERE timestamp >= ? + GROUP BY action + ORDER BY count DESC, action ASC + LIMIT 10""", + (since,), + ).fetchall() + user_activity = db.execute( + """SELECT username, COUNT(*) AS count + FROM audit_log + WHERE timestamp >= ? AND username IS NOT NULL AND username != '' + GROUP BY username + ORDER BY count DESC, username ASC + LIMIT 10""", + (since,), + ).fetchall() + recent_failures = db.execute( + """SELECT timestamp, username, action, detail, ip_address, result + FROM audit_log + WHERE timestamp >= ? AND result != 'success' + ORDER BY id DESC + LIMIT 10""", + (since,), + ).fetchall() + recent_events = db.execute( + """SELECT timestamp, username, action, detail, ip_address, result + FROM audit_log + WHERE timestamp >= ? + ORDER BY id DESC + LIMIT 15""", + (since,), + ).fetchall() + db.close() + + recommendations = [] + if failed_logins >= 5: + recommendations.append("Review repeated failed logins and confirm account lockout policy is acceptable.") + if failures: + recommendations.append("Inspect recent failed actions before customer handover.") + if total == 0: + recommendations.append("No audit events in this period; run a demo workflow before exporting a customer report.") + if not recommendations: + recommendations.append("No immediate audit concerns found in the selected period.") + + return { + "schema": "cezen.audit_report.v1", + "generated_at": utcnow(), + "period": {"days": days, "since": since}, + "summary": { + "events": total, + "failures": failures, + "successful_logins": logins, + "failed_logins": failed_logins, + "active_users": distinct_users, + }, + "top_actions": [dict(r) for r in top_actions], + "user_activity": [dict(r) for r in user_activity], + "recent_failures": [dict(r) for r in recent_failures], + "recent_events": [dict(r) for r in recent_events], + "recommendations": recommendations, + } + +# ── System info ─────────────────────────────────────────────────────────────── + +@app.get("/api/system/info") +async def system_info(user: dict = Depends(current_user)): + try: + hostname = subprocess.check_output(["hostname"], text=True).strip() + except Exception: + hostname = "unknown" + try: + uptime_sec = float(Path("/proc/uptime").read_text().split()[0]) + days = int(uptime_sec // 86400) + hrs = int((uptime_sec % 86400) // 3600) + mins = int((uptime_sec % 3600) // 60) + uptime = f"{days}d {hrs}h {mins}m" if days else f"{hrs}h {mins}m" + except Exception: + uptime = "unknown" + + # Ollama version + try: + v = subprocess.check_output(["ollama", "--version"], text=True, timeout=3).strip() + except Exception: + v = "unknown" + + return { + "hostname": hostname, + "uptime": uptime, + "python_version": subprocess.getoutput("python3 --version"), + "ollama_version": v, + "gpu_name": subprocess.getoutput( + "nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1" + ) or "N/A", + } + +@app.get("/api/system/feasibility") +async def system_feasibility(admin: dict = Depends(admin_only)): + """Return the installer feasibility report, or a live fallback if not generated yet.""" + paths = [ + Path(os.environ.get("CEZEN_FEASIBILITY_JSON", "")), + DATA_DIR / "feasibility.json", + Path("/opt/cezen/feasibility.json"), + ] + for p in paths: + if p and str(p) != "." and p.exists(): + try: + return json.loads(p.read_text()) + except Exception: + break + + metrics_now = collect_metrics() + gpu_vram = metrics_now.get("gpu_mem_total_gb") or 0 + ram = metrics_now.get("ram_total_gb") or 0 + disk_free = max(0, (metrics_now.get("disk_total_gb") or 0) - (metrics_now.get("disk_used_gb") or 0)) + if gpu_vram >= 120: + tier, profile, users = "max", "gpu-max", "100+" + elif gpu_vram >= 80: + tier, profile, users = "pro", "gpu-pro", "20-100" + elif gpu_vram >= 48: + tier, profile, users = "basic", "gpu-standard", "5-20" + elif gpu_vram >= 8: + tier, profile, users = "starter", "gpu-starter", "1-10" + elif ram >= 32: + tier, profile, users = "starter", "cpu-ai", "1-3" + else: + tier, profile, users = "starter", "core", "1-2" + return { + "schema": "cezen.feasibility.live-fallback.v1", + "generated_at": utcnow(), + "host": subprocess.getoutput("hostname") or "unknown", + "cpu": {"cores": metrics_now.get("cpu_cores")}, + "ram_gb": ram, + "disk": {"free_gb": round(disk_free, 1), "total_gb": metrics_now.get("disk_total_gb")}, + "gpus": [{ + "name": subprocess.getoutput("nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1") or "N/A", + "vram_gb": gpu_vram, + }] if gpu_vram else [], + "recommendation": { + "recommended_tier": tier, + "recommended_profile": profile, + "estimated_concurrent_users": users, + "notes": ["Live fallback report. Run cezen-feasibility.sh for a full pre-install assessment."], + }, + "features": { + "portal": True, + "document_intelligence": ram >= 8, + "rag_chromadb": ram >= 16 and disk_free >= 50, + "ollama_cpu": ram >= 32, + "ollama_gpu": gpu_vram >= 8, + "vllm": gpu_vram >= 24, + "fine_tuning_qlora": gpu_vram >= 24, + "distributed_training": False, + }, + } + +@app.get("/api/license") +async def license_info(user: dict = Depends(current_user)): + return _tier_payload() + +@app.get("/api/system/readiness-report") +async def readiness_report(admin: dict = Depends(admin_only)): + feasibility = await system_feasibility(admin) + license_payload = _tier_payload() + readiness = _readiness_score(feasibility, license_payload) + recommended_tier = (feasibility.get("recommendation") or {}).get("recommended_tier", "starter") + current_rank = list(TIER_MATRIX.keys()).index(license_payload["tier"]) + recommended_rank = list(TIER_MATRIX.keys()).index(_normalize_tier(recommended_tier)) + commercial_fit = "matched" if current_rank >= recommended_rank else "license_upgrade_recommended" + return { + "schema": "cezen.readiness_report.v1", + "generated_at": utcnow(), + "customer_mode": "software_only" if (feasibility.get("features") or {}).get("software_only", True) else "appliance", + "license": license_payload, + "feasibility": feasibility, + "readiness": readiness, + "commercial_fit": { + "status": commercial_fit, + "current_tier": license_payload["tier"], + "recommended_tier": _normalize_tier(recommended_tier), + "note": "Current license covers the recommended deployment." if commercial_fit == "matched" else "Quote a higher tier or reduce enabled features for this hardware.", + }, + "proposal_summary": [ + f"Recommended Cezen profile: {(feasibility.get('recommendation') or {}).get('recommended_profile', 'core')}", + f"Estimated concurrent users: {(feasibility.get('recommendation') or {}).get('estimated_concurrent_users', '1-2')}", + f"Readiness score: {readiness['score']} ({readiness['status']})", + ], + } + +@app.get("/api/system/backups") +async def list_backups(admin: dict = Depends(admin_only)): + BACKUP_DIR.mkdir(parents=True, exist_ok=True) + backups = [_backup_entry(p) for p in sorted(BACKUP_DIR.glob("cezen-backup-*.zip"), key=lambda p: p.stat().st_mtime, reverse=True)] + return {"backup_dir": str(BACKUP_DIR), "backups": backups} + +@app.post("/api/system/backups") +async def create_backup(request: Request, admin: dict = Depends(admin_only)): + BACKUP_DIR.mkdir(parents=True, exist_ok=True) + name = f"cezen-backup-{datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S')}.zip" + dest = BACKUP_DIR / name + manifest = { + "schema": "cezen.backup_manifest.v1", + "created_at": utcnow(), + "data_dir": str(DATA_DIR), + "created_by": admin.get("username"), + } + with zipfile.ZipFile(dest, "w", compression=zipfile.ZIP_DEFLATED) as zf: + zf.writestr("manifest.json", json.dumps(manifest, indent=2)) + for path in DATA_DIR.rglob("*"): + if not path.is_file(): + continue + try: + resolved = path.resolve() + if BACKUP_DIR.resolve() in resolved.parents: + continue + except Exception: + pass + zf.write(path, path.relative_to(DATA_DIR).as_posix()) + db = get_db() + audit(db, admin["sub"], admin["username"], "backup_create", f"name={name}", request.client.host if request.client else "") + db.commit() + db.close() + return {"ok": True, "backup": _backup_entry(dest)} + +@app.post("/api/system/backups/{backup_name}/restore") +async def restore_backup(backup_name: str, request: Request, admin: dict = Depends(admin_only)): + src = _backup_path(backup_name) + if not src.exists(): + raise HTTPException(status_code=404, detail="Backup not found") + restore_marker = BACKUP_DIR / f"pre-restore-{datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S')}" + restore_marker.mkdir(parents=True, exist_ok=True) + current_snapshot = restore_marker / "data-before-restore.zip" + with zipfile.ZipFile(current_snapshot, "w", compression=zipfile.ZIP_DEFLATED) as zf: + zf.writestr("manifest.json", json.dumps({"schema": "cezen.pre_restore_snapshot.v1", "created_at": utcnow()}, indent=2)) + for path in DATA_DIR.rglob("*"): + if path.is_file(): + zf.write(path, path.relative_to(DATA_DIR).as_posix()) + with zipfile.ZipFile(src, "r") as zf: + for member in zf.infolist(): + if member.filename == "manifest.json" or member.is_dir(): + continue + target = (DATA_DIR / member.filename).resolve() + if DATA_DIR.resolve() not in target.parents and target != DATA_DIR.resolve(): + raise HTTPException(status_code=400, detail="Unsafe backup archive") + target.parent.mkdir(parents=True, exist_ok=True) + with zf.open(member) as source, open(target, "wb") as out: + shutil.copyfileobj(source, out) + db = get_db() + audit(db, admin["sub"], admin["username"], "backup_restore", f"name={backup_name} pre_restore={current_snapshot}", request.client.host if request.client else "") + db.commit() + db.close() + return { + "ok": True, + "restored": backup_name, + "pre_restore_snapshot": str(current_snapshot), + "restart_recommended": True, + } + +# ── Branding / Settings ─────────────────────────────────────────────────────── + +@app.get("/api/settings/branding") +async def get_branding(): + """Public — no auth required so login page can also be branded.""" + db = get_db() + rows = db.execute("SELECT key, value FROM settings").fetchall() + db.close() + result = {r["key"]: r["value"] for r in rows} + # If Cezen has locked the tier via env var, override whatever is in DB + if CEZEN_TIER: + result["tier_label"] = CEZEN_TIER + result["tier_locked"] = "true" + else: + result["tier_locked"] = "false" + return result + +@app.put("/api/settings/branding") +async def update_branding(body: dict, admin: dict = Depends(admin_only)): + # tier_label is always excluded from customer-editable fields when locked + allowed = {"org_name","stack_name","logo_url","accent_color","footer_text","support_email"} + if not CEZEN_TIER: + # Only allow tier changes when NOT locked by env var (dev/demo mode) + allowed.add("tier_label") + db = get_db() + for k, v in body.items(): + if k == "tier_label" and CEZEN_TIER: + continue # silently ignore — tier is Cezen-controlled + if k in allowed: + db.execute("INSERT INTO settings (key,value) VALUES (?,?) ON CONFLICT(key) DO UPDATE SET value=excluded.value", (k, str(v))) + db.commit() + db.close() + return {"ok": True} + +@app.post("/api/settings/logo") +async def upload_logo(file: UploadFile = File(...), admin: dict = Depends(admin_only)): + """Upload a logo image — stored as a data URI so it works without a CDN.""" + if not file.content_type.startswith("image/"): + raise HTTPException(status_code=400, detail="Only image files are accepted") + data = await file.read() + if len(data) > 2 * 1024 * 1024: # 2 MB max + raise HTTPException(status_code=400, detail="Logo must be under 2 MB") + import base64 + data_uri = f"data:{file.content_type};base64,{base64.b64encode(data).decode()}" + db = get_db() + db.execute("INSERT INTO settings (key,value) VALUES ('logo_url',?) ON CONFLICT(key) DO UPDATE SET value=excluded.value", (data_uri,)) + db.commit() + db.close() + return {"ok": True, "logo_url": data_uri} + +# ── RAG / Knowledge Base ────────────────────────────────────────────────────── + +KB_DOCS_DIR = DATA_DIR / "kb_docs" +KB_DOCS_DIR.mkdir(parents=True, exist_ok=True) + +CHROMA_URL = os.environ.get("CHROMA_URL", "http://localhost:8000") +OLLAMA_EMBED = os.environ.get("OLLAMA_EMBED_MODEL", "nomic-embed-text") + +def _chroma_req(method: str, path: str, **kwargs): + """Make a request to ChromaDB HTTP API.""" + import urllib.request, urllib.error + url = f"{CHROMA_URL}{path}" + data = json.dumps(kwargs.get("body", None)).encode() if "body" in kwargs else None + req = urllib.request.Request(url, data=data, method=method.upper()) + req.add_header("Content-Type", "application/json") + try: + with urllib.request.urlopen(req, timeout=10) as r: + return json.loads(r.read().decode()) + except urllib.error.HTTPError as e: + body = e.read().decode() + raise HTTPException(status_code=e.code, detail=f"ChromaDB: {body}") + except Exception as e: + raise HTTPException(status_code=503, detail=f"ChromaDB unavailable: {e}") + +def _ollama_embed(texts: list[str], model: str = OLLAMA_EMBED) -> list[list[float]]: + """Get embeddings from Ollama for a list of texts.""" + import urllib.request + embeddings = [] + for text in texts: + body = json.dumps({"model": model, "prompt": text}).encode() + req = urllib.request.Request(f"{OLLAMA_URL}/api/embeddings", data=body, method="POST") + req.add_header("Content-Type", "application/json") + try: + with urllib.request.urlopen(req, timeout=30) as r: + embeddings.append(json.loads(r.read().decode())["embedding"]) + except Exception as e: + raise HTTPException(status_code=503, detail=f"Ollama embedding error: {e}") + return embeddings + +# Collections +@app.get("/api/rag/collections") +async def list_collections(admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + "SELECT id, name, description, chroma_name, embed_model, doc_count, chunk_count, created_at FROM kb_collections ORDER BY created_at DESC" + ).fetchall() + db.close() + return {"collections": [dict(r) for r in rows]} + +@app.post("/api/rag/collections") +async def create_collection(body: dict, admin: dict = Depends(admin_only)): + name = (body.get("name") or "").strip() + if not name: + raise HTTPException(status_code=400, detail="Collection name is required") + # Sanitise for ChromaDB (alphanumeric + hyphens, 3-63 chars) + import re as _re + chroma_name = "cezen-" + _re.sub(r"[^a-z0-9-]", "-", name.lower())[:55] + embed_model = body.get("embed_model", OLLAMA_EMBED) + + # Create ChromaDB collection + _chroma_req("POST", "/api/v1/collections", body={ + "name": chroma_name, + "metadata": {"hnsw:space": "cosine"} + }) + + db = get_db() + try: + cur = db.execute( + "INSERT INTO kb_collections (name, description, chroma_name, embed_model, created_at) VALUES (?,?,?,?,?)", + (name, body.get("description", ""), chroma_name, embed_model, utcnow()) + ) + db.commit() + cid = cur.lastrowid + except sqlite3.IntegrityError: + db.close() + raise HTTPException(status_code=409, detail="A collection with that name already exists") + db.close() + _audit(None, admin["username"], "kb_create_collection", f"Created collection: {name}") + return {"id": cid, "name": name, "chroma_name": chroma_name} + +@app.delete("/api/rag/collections/{cid}") +async def delete_collection(cid: int, admin: dict = Depends(admin_only)): + db = get_db() + row = db.execute("SELECT * FROM kb_collections WHERE id=?", (cid,)).fetchone() + if not row: + db.close() + raise HTTPException(status_code=404, detail="Collection not found") + # Delete from ChromaDB (ignore 404 — may not exist) + try: + _chroma_req("DELETE", f"/api/v1/collections/{row['chroma_name']}") + except HTTPException as e: + if e.status_code != 404: + db.close() + raise + # Delete local doc files + docs = db.execute("SELECT file_path FROM kb_documents WHERE collection_id=?", (cid,)).fetchall() + for d in docs: + try: + Path(d["file_path"]).unlink(missing_ok=True) + except Exception: + pass + db.execute("DELETE FROM kb_documents WHERE collection_id=?", (cid,)) + db.execute("DELETE FROM kb_collections WHERE id=?", (cid,)) + db.commit() + db.close() + _audit(None, admin["username"], "kb_delete_collection", f"Deleted collection id={cid}") + return {"ok": True} + +# Documents +@app.get("/api/rag/collections/{cid}/documents") +async def list_documents(cid: int, admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + "SELECT id, orig_name, size_bytes, chunk_count, status, error_msg, uploaded_at, processed_at FROM kb_documents WHERE collection_id=? ORDER BY uploaded_at DESC", + (cid,) + ).fetchall() + db.close() + return {"documents": [dict(r) for r in rows]} + +@app.post("/api/rag/collections/{cid}/documents") +async def upload_document(cid: int, file: UploadFile = File(...), admin: dict = Depends(admin_only)): + allowed_ext = {".pdf", ".txt", ".md", ".docx", ".doc", ".csv"} + orig = file.filename or "document" + ext = Path(orig).suffix.lower() + if ext not in allowed_ext: + raise HTTPException(status_code=400, detail="Supported: .pdf .txt .md .docx .csv") + + db = get_db() + col = db.execute("SELECT * FROM kb_collections WHERE id=?", (cid,)).fetchone() + if not col: + db.close() + raise HTTPException(status_code=404, detail="Collection not found") + + data = await file.read() + if len(data) > 200 * 1024 * 1024: + db.close() + raise HTTPException(status_code=400, detail="File must be under 200 MB") + + col_dir = KB_DOCS_DIR / str(cid) + col_dir.mkdir(exist_ok=True) + filename = f"{uuid.uuid4().hex}{ext}" + dest = col_dir / filename + dest.write_bytes(data) + + cur = db.execute( + "INSERT INTO kb_documents (collection_id, orig_name, file_path, size_bytes, status, uploaded_at) VALUES (?,?,?,?,?,?)", + (cid, orig, str(dest), len(data), "pending", utcnow()) + ) + db.commit() + doc_id = cur.lastrowid + db.close() + + # Launch ingest subprocess + runner = Path(__file__).parent / "rag_ingest.py" + subprocess.Popen( + ["python3", str(runner), + "--doc-id", str(doc_id), + "--db-path", str(DB_PATH), + "--file", str(dest), + "--collection", col["chroma_name"], + "--embed-model", col["embed_model"], + "--chroma-url", CHROMA_URL, + "--ollama-url", OLLAMA_URL], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + return {"doc_id": doc_id, "status": "pending"} + +@app.delete("/api/rag/collections/{cid}/documents/{doc_id}") +async def delete_document(cid: int, doc_id: int, admin: dict = Depends(admin_only)): + db = get_db() + doc = db.execute("SELECT * FROM kb_documents WHERE id=? AND collection_id=?", (doc_id, cid)).fetchone() + if not doc: + db.close() + raise HTTPException(status_code=404, detail="Document not found") + col = db.execute("SELECT * FROM kb_collections WHERE id=?", (cid,)).fetchone() + + # Remove chunks from ChromaDB + if col: + try: + _chroma_req("POST", f"/api/v1/collections/{col['chroma_name']}/delete", + body={"where": {"doc_id": {"$eq": doc_id}}}) + except Exception: + pass + + try: + Path(doc["file_path"]).unlink(missing_ok=True) + except Exception: + pass + + db.execute("DELETE FROM kb_documents WHERE id=?", (doc_id,)) + # Update doc_count and chunk_count on collection + db.execute(""" + UPDATE kb_collections SET + doc_count = (SELECT COUNT(*) FROM kb_documents WHERE collection_id=? AND status='ready'), + chunk_count = chunk_count - ? + WHERE id=? + """, (cid, doc["chunk_count"], cid)) + db.commit() + db.close() + return {"ok": True} + +# Query +@app.post("/api/rag/query") +async def rag_query(body: dict, user: dict = Depends(current_user)): + collection_id = body.get("collection_id") + query_text = (body.get("query") or "").strip() + n_results = int(body.get("n_results", 5)) + + if not collection_id or not query_text: + raise HTTPException(status_code=400, detail="collection_id and query are required") + + db = get_db() + col = db.execute("SELECT * FROM kb_collections WHERE id=?", (collection_id,)).fetchone() + db.close() + if not col: + raise HTTPException(status_code=404, detail="Collection not found") + + # Embed the query + embed = _ollama_embed([query_text], model=col["embed_model"]) + if not embed: + raise HTTPException(status_code=503, detail="Failed to embed query") + + # Query ChromaDB + result = _chroma_req("POST", f"/api/v1/collections/{col['chroma_name']}/query", body={ + "query_embeddings": embed, + "n_results": n_results, + "include": ["documents", "metadatas", "distances"] + }) + + docs = result.get("documents", [[]])[0] + metas = result.get("metadatas", [[]])[0] + distances = result.get("distances", [[]])[0] + + results = [] + for text, meta, dist in zip(docs, metas, distances): + results.append({ + "text": text, + "source": meta.get("source", ""), + "page": meta.get("page"), + "chunk": meta.get("chunk"), + "score": round(1 - dist, 4), # cosine similarity + }) + + _audit(None, user["username"], "rag_query", + f"Query on collection '{col['name']}': {query_text[:80]}") + return {"results": results, "collection": col["name"]} + +# ── Training / Fine-tuning ──────────────────────────────────────────────────── + +DATASETS_DIR = DATA_DIR / "datasets" +MODELS_DIR = DATA_DIR / "finetuned" +JOBS_LOG_DIR = DATA_DIR / "job_logs" + +DATASETS_DIR.mkdir(parents=True, exist_ok=True) +MODELS_DIR.mkdir(parents=True, exist_ok=True) +JOBS_LOG_DIR.mkdir(parents=True, exist_ok=True) + +def _count_rows(path: Path, orig_name: str) -> int: + """Count data rows in JSONL or CSV file.""" + try: + if orig_name.lower().endswith(".csv"): + import csv + with open(path, newline="", encoding="utf-8", errors="replace") as f: + return max(0, sum(1 for _ in csv.reader(f)) - 1) # exclude header + else: + return sum(1 for line in open(path, encoding="utf-8", errors="replace") if line.strip()) + except Exception: + return 0 + +@app.post("/api/training/datasets") +async def upload_dataset(file: UploadFile = File(...), admin: dict = Depends(admin_only)): + """Upload a JSONL or CSV dataset for fine-tuning.""" + allowed_ext = {".jsonl", ".json", ".csv"} + orig = file.filename or "dataset" + ext = Path(orig).suffix.lower() + if ext not in allowed_ext: + raise HTTPException(status_code=400, detail="Only .jsonl, .json, or .csv files are accepted") + + data = await file.read() + if len(data) > 500 * 1024 * 1024: + raise HTTPException(status_code=400, detail="Dataset must be under 500 MB") + + filename = f"{uuid.uuid4().hex}{ext}" + dest = DATASETS_DIR / filename + dest.write_bytes(data) + + row_count = _count_rows(dest, orig) + + db = get_db() + cur = db.execute( + "INSERT INTO training_datasets (filename, orig_name, file_path, size_bytes, row_count, uploaded_at) VALUES (?,?,?,?,?,?)", + (filename, orig, str(dest), len(data), row_count, utcnow()) + ) + db.commit() + row_id = cur.lastrowid + db.close() + + _audit(None, admin["username"], "dataset_upload", f"Uploaded dataset: {orig} ({row_count} rows)") + return {"id": row_id, "filename": orig, "size_bytes": len(data), "row_count": row_count} + +@app.get("/api/training/datasets") +async def list_datasets(admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + "SELECT id, orig_name, size_bytes, row_count, uploaded_at FROM training_datasets ORDER BY uploaded_at DESC" + ).fetchall() + db.close() + return {"datasets": [dict(r) for r in rows]} + +@app.delete("/api/training/datasets/{dataset_id}") +async def delete_dataset(dataset_id: int, admin: dict = Depends(admin_only)): + db = get_db() + row = db.execute("SELECT * FROM training_datasets WHERE id=?", (dataset_id,)).fetchone() + if not row: + db.close() + raise HTTPException(status_code=404, detail="Dataset not found") + # Don't delete if a job is using it + in_use = db.execute( + "SELECT COUNT(*) as n FROM training_jobs WHERE dataset_id=? AND status IN ('pending','running')", (dataset_id,) + ).fetchone()["n"] + if in_use: + db.close() + raise HTTPException(status_code=409, detail="Dataset is in use by a running job") + try: + Path(row["file_path"]).unlink(missing_ok=True) + except Exception: + pass + db.execute("DELETE FROM training_datasets WHERE id=?", (dataset_id,)) + db.commit() + db.close() + return {"ok": True} + +@app.post("/api/training/jobs") +async def launch_job(body: dict, admin: dict = Depends(admin_only)): + """Launch a QLoRA fine-tuning job as a background subprocess.""" + required = {"name", "base_model", "dataset_id"} + if not required.issubset(body): + raise HTTPException(status_code=400, detail=f"Missing fields: {required - body.keys()}") + + db = get_db() + ds = db.execute("SELECT * FROM training_datasets WHERE id=?", (body["dataset_id"],)).fetchone() + if not ds: + db.close() + raise HTTPException(status_code=404, detail="Dataset not found") + + config = { + "epochs": int(body.get("epochs", 3)), + "lr": float(body.get("lr", 2e-4)), + "batch_size": int(body.get("batch_size", 4)), + "lora_r": int(body.get("lora_r", 16)), + "lora_alpha": int(body.get("lora_alpha", 32)), + "output_name": str(body.get("output_name", body["name"].replace(" ", "_").lower())), + } + output_path = str(MODELS_DIR / config["output_name"]) + log_path = str(JOBS_LOG_DIR / f"{uuid.uuid4().hex}.jsonl") + + cur = db.execute( + """INSERT INTO training_jobs (name, base_model, dataset_id, config_json, status, log_path, output_path, created_at) + VALUES (?,?,?,?,?,?,?,?)""", + (body["name"], body["base_model"], body["dataset_id"], + json.dumps(config), "pending", log_path, output_path, utcnow()) + ) + db.commit() + job_id = cur.lastrowid + + # Launch training subprocess + runner = Path(__file__).parent / "train_qlora.py" + cmd = [ + sys.executable, str(runner), + "--job-id", str(job_id), + "--db-path", str(DB_PATH), + "--dataset", ds["file_path"], + "--base-model", body["base_model"], + "--output-dir", output_path, + "--log-path", log_path, + "--epochs", str(config["epochs"]), + "--lr", str(config["lr"]), + "--batch-size", str(config["batch_size"]), + "--lora-r", str(config["lora_r"]), + "--lora-alpha", str(config["lora_alpha"]), + "--output-name", config["output_name"], + ] + try: + proc = subprocess.Popen( + cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + db.execute("UPDATE training_jobs SET status='running', pid=?, started_at=? WHERE id=?", + (proc.pid, utcnow(), job_id)) + db.commit() + except Exception as e: + db.execute("UPDATE training_jobs SET status='failed', finished_at=? WHERE id=?", + (utcnow(), job_id)) + db.commit() + db.close() + raise HTTPException(status_code=500, detail=f"Failed to start training process: {e}") + + db.close() + _audit(None, admin["username"], "training_launch", + f"Job '{body['name']}' launched on {body['base_model']}") + return {"job_id": job_id, "status": "running"} + +@app.get("/api/training/jobs") +async def list_jobs(admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + """SELECT j.id, j.name, j.base_model, j.status, j.config_json, + j.created_at, j.started_at, j.finished_at, j.output_path, + d.orig_name as dataset_name + FROM training_jobs j + LEFT JOIN training_datasets d ON d.id = j.dataset_id + ORDER BY j.created_at DESC""" + ).fetchall() + db.close() + result = [] + for r in rows: + item = dict(r) + item["config"] = json.loads(item.pop("config_json", "{}")) + result.append(item) + return {"jobs": result} + +@app.get("/api/training/jobs/{job_id}") +async def get_job(job_id: int, admin: dict = Depends(admin_only), tail: int = 200): + db = get_db() + row = db.execute( + """SELECT j.*, d.orig_name as dataset_name + FROM training_jobs j + LEFT JOIN training_datasets d ON d.id = j.dataset_id + WHERE j.id=?""", (job_id,) + ).fetchone() + db.close() + if not row: + raise HTTPException(status_code=404, detail="Job not found") + + item = dict(row) + item["config"] = json.loads(item.pop("config_json", "{}")) + + # Read tail of log file as structured entries + log_entries = [] + log_path = item.get("log_path") + if log_path and Path(log_path).exists(): + try: + lines = Path(log_path).read_text().splitlines() + for line in lines[-tail:]: + line = line.strip() + if line: + try: + log_entries.append(json.loads(line)) + except Exception: + log_entries.append({"msg": line}) + except Exception: + pass + + item["log_entries"] = log_entries + return item + +@app.delete("/api/training/jobs/{job_id}") +async def cancel_job(job_id: int, admin: dict = Depends(admin_only)): + db = get_db() + row = db.execute("SELECT * FROM training_jobs WHERE id=?", (job_id,)).fetchone() + if not row: + db.close() + raise HTTPException(status_code=404, detail="Job not found") + + if row["status"] == "running" and row["pid"]: + try: + import signal + os.killpg(os.getpgid(row["pid"]), signal.SIGTERM) + except Exception: + pass # Process may have already exited + + db.execute("UPDATE training_jobs SET status='cancelled', finished_at=? WHERE id=?", + (utcnow(), job_id)) + db.commit() + db.close() + _audit(None, admin["username"], "training_cancel", f"Job {job_id} cancelled") + return {"ok": True} + +# ── Prompt Library ──────────────────────────────────────────────────────────── + +PROMPT_CATEGORIES = ["procurement","hr","legal","operations","citizen","finance","comms","management","general"] + +@app.get("/api/prompts/categories") +async def list_prompt_categories(user: dict = Depends(current_user)): + return PROMPT_CATEGORIES + +@app.get("/api/prompts") +async def list_prompts(category: Optional[str] = None, q: Optional[str] = None, + user: dict = Depends(current_user)): + db = get_db() + sql = "SELECT * FROM prompts WHERE 1=1" + params = [] + if category and category != "all": + sql += " AND category=?" + params.append(category) + if q: + sql += " AND (title LIKE ? OR prompt_text LIKE ? OR tags LIKE ?)" + params += [f"%{q}%", f"%{q}%", f"%{q}%"] + sql += " ORDER BY sort_order, id" + rows = db.execute(sql, params).fetchall() + db.close() + return [dict(r) for r in rows] + +class PromptCreate(BaseModel): + title: str + prompt_text: str + category: str = "general" + tags: str = "" + needs_upload: int = 0 + +@app.post("/api/prompts") +async def create_prompt(body: PromptCreate, admin: dict = Depends(admin_only)): + if not body.title.strip() or not body.prompt_text.strip(): + raise HTTPException(400, "title and prompt_text are required") + if body.category not in PROMPT_CATEGORIES: + raise HTTPException(400, f"category must be one of {PROMPT_CATEGORIES}") + db = get_db() + max_order = db.execute("SELECT COALESCE(MAX(sort_order),0) FROM prompts").fetchone()[0] + now = utcnow() + cur = db.execute( + "INSERT INTO prompts (title, prompt_text, category, tags, needs_upload, sort_order, created_at, updated_at) VALUES (?,?,?,?,?,?,?,?)", + (body.title.strip(), body.prompt_text.strip(), body.category, + body.tags.strip(), body.needs_upload, max_order + 1, now, now) + ) + pid = cur.lastrowid + db.commit() + row = db.execute("SELECT * FROM prompts WHERE id=?", (pid,)).fetchone() + db.close() + _audit(None, admin["username"], "prompt_create", f"Created prompt '{body.title}'") + return dict(row) + +class PromptUpdate(BaseModel): + title: Optional[str] = None + prompt_text: Optional[str] = None + category: Optional[str] = None + tags: Optional[str] = None + needs_upload: Optional[int] = None + +@app.put("/api/prompts/{pid}") +async def update_prompt(pid: int, body: PromptUpdate, admin: dict = Depends(admin_only)): + db = get_db() + row = db.execute("SELECT * FROM prompts WHERE id=?", (pid,)).fetchone() + if not row: + db.close() + raise HTTPException(404, "Prompt not found") + fields, params = [], [] + if body.title is not None: + fields.append("title=?"); params.append(body.title.strip()) + if body.prompt_text is not None: + fields.append("prompt_text=?"); params.append(body.prompt_text.strip()) + if body.category is not None: + if body.category not in PROMPT_CATEGORIES: + db.close() + raise HTTPException(400, f"Invalid category") + fields.append("category=?"); params.append(body.category) + if body.tags is not None: + fields.append("tags=?"); params.append(body.tags.strip()) + if body.needs_upload is not None: + fields.append("needs_upload=?"); params.append(body.needs_upload) + if not fields: + db.close() + raise HTTPException(400, "Nothing to update") + fields.append("updated_at=?"); params.append(utcnow()) + params.append(pid) + db.execute(f"UPDATE prompts SET {', '.join(fields)} WHERE id=?", params) + db.commit() + row = db.execute("SELECT * FROM prompts WHERE id=?", (pid,)).fetchone() + db.close() + _audit(None, admin["username"], "prompt_update", f"Updated prompt {pid}") + return dict(row) + +@app.delete("/api/prompts/{pid}") +async def delete_prompt(pid: int, admin: dict = Depends(admin_only)): + db = get_db() + row = db.execute("SELECT * FROM prompts WHERE id=?", (pid,)).fetchone() + if not row: + db.close() + raise HTTPException(404, "Prompt not found") + db.execute("DELETE FROM prompts WHERE id=?", (pid,)) + db.commit() + db.close() + _audit(None, admin["username"], "prompt_delete", f"Deleted prompt {pid}: {row['title']}") + return {"ok": True} + +# ── API Key Manager ─────────────────────────────────────────────────────────── + +class ApiKeyCreate(BaseModel): + name: str + expires_days: Optional[int] = None # None = no expiry + +@app.post("/api/apikeys") +async def create_api_key(body: ApiKeyCreate, user: dict = Depends(current_user)): + if not body.name.strip(): + raise HTTPException(400, "Key name is required") + # Generate key: czk_<32 random hex chars> + raw_key = "czk_" + secrets.token_hex(32) + key_prefix = raw_key[:10] # "czk_XXXXXX" shown in list + key_hash = hashlib.sha256(raw_key.encode()).hexdigest() + expires_at = None + if body.expires_days: + expires_at = (datetime.now(timezone.utc) + timedelta(days=body.expires_days)).isoformat() + now = utcnow() + db = get_db() + cur = db.execute( + "INSERT INTO api_keys (user_id, username, name, key_prefix, key_hash, expires_at, created_at) VALUES (?,?,?,?,?,?,?)", + (user["id"], user["username"], body.name.strip(), key_prefix, key_hash, expires_at, now) + ) + kid = cur.lastrowid + db.commit() + db.close() + _audit(None, user["username"], "apikey_create", f"API key '{body.name}' created") + return {"id": kid, "name": body.name.strip(), "key": raw_key, "key_prefix": key_prefix, + "expires_at": expires_at, "created_at": now, + "note": "Store this key — it will not be shown again."} + +@app.get("/api/apikeys") +async def list_api_keys(user: dict = Depends(current_user)): + db = get_db() + if user["role"] == "admin": + rows = db.execute("SELECT id,user_id,username,name,key_prefix,last_used_at,expires_at,is_active,created_at FROM api_keys ORDER BY created_at DESC").fetchall() + else: + rows = db.execute("SELECT id,user_id,username,name,key_prefix,last_used_at,expires_at,is_active,created_at FROM api_keys WHERE user_id=? ORDER BY created_at DESC", (user["id"],)).fetchall() + db.close() + return [dict(r) for r in rows] + +@app.delete("/api/apikeys/{kid}") +async def revoke_api_key(kid: int, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM api_keys WHERE id=?", (kid,)).fetchone() + if not row: + db.close(); raise HTTPException(404, "Key not found") + if user["role"] != "admin" and row["user_id"] != user["id"]: + db.close(); raise HTTPException(403, "Not your key") + db.execute("UPDATE api_keys SET is_active=0 WHERE id=?", (kid,)) + db.commit() + db.close() + _audit(None, user["username"], "apikey_revoke", f"Revoked key {kid}: {row['name']}") + return {"ok": True} + +@app.post("/api/apikeys/verify") +async def verify_api_key(request: Request): + """Lightweight endpoint to validate a key (used by integrations).""" + auth_header = request.headers.get("Authorization", "") + if not auth_header.startswith("Bearer czk_"): + raise HTTPException(401, "Missing or invalid API key") + raw_key = auth_header[7:] + key_hash = hashlib.sha256(raw_key.encode()).hexdigest() + db = get_db() + row = db.execute("SELECT * FROM api_keys WHERE key_hash=? AND is_active=1", (key_hash,)).fetchone() + if not row: + db.close(); raise HTTPException(401, "Invalid or revoked API key") + now = utcnow() + if row["expires_at"] and now > row["expires_at"]: + db.execute("UPDATE api_keys SET is_active=0 WHERE id=?", (row["id"],)) + db.commit(); db.close() + raise HTTPException(401, "API key expired") + db.execute("UPDATE api_keys SET last_used_at=? WHERE id=?", (now, row["id"])) + db.commit(); db.close() + return {"valid": True, "username": row["username"], "user_id": row["user_id"]} + +# ── Guardrails ──────────────────────────────────────────────────────────────── + +class GuardrailRule(BaseModel): + type: str = "keyword" # "keyword" | "regex" | "pii" + pattern: str + action: str = "block" # "block" | "warn" + label: str = "" + is_active: int = 1 + +@app.get("/api/guardrails/rules") +async def list_rules(admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute("SELECT * FROM guardrail_rules ORDER BY type, id").fetchall() + db.close() + return [dict(r) for r in rows] + +@app.post("/api/guardrails/rules") +async def create_rule(body: GuardrailRule, admin: dict = Depends(admin_only)): + if not body.pattern.strip(): + raise HTTPException(400, "pattern is required") + if body.type not in ("keyword", "regex", "pii"): + raise HTTPException(400, "type must be keyword, regex, or pii") + if body.action not in ("block", "warn"): + raise HTTPException(400, "action must be block or warn") + # Validate regex + if body.type == "regex": + import re + try: + re.compile(body.pattern) + except re.error as e: + raise HTTPException(400, f"Invalid regex: {e}") + db = get_db() + now = utcnow() + cur = db.execute( + "INSERT INTO guardrail_rules (type, pattern, action, label, is_active, created_by, created_at) VALUES (?,?,?,?,?,?,?)", + (body.type, body.pattern.strip(), body.action, body.label.strip(), body.is_active, admin["username"], now) + ) + rid = cur.lastrowid + db.commit() + row = db.execute("SELECT * FROM guardrail_rules WHERE id=?", (rid,)).fetchone() + db.close() + _audit(None, admin["username"], "guardrail_create", f"Rule {rid}: {body.type} '{body.pattern[:40]}'") + return dict(row) + +@app.put("/api/guardrails/rules/{rid}") +async def update_rule(rid: int, body: GuardrailRule, admin: dict = Depends(admin_only)): + db = get_db() + if not db.execute("SELECT id FROM guardrail_rules WHERE id=?", (rid,)).fetchone(): + db.close(); raise HTTPException(404, "Rule not found") + db.execute( + "UPDATE guardrail_rules SET type=?, pattern=?, action=?, label=?, is_active=? WHERE id=?", + (body.type, body.pattern.strip(), body.action, body.label.strip(), body.is_active, rid) + ) + db.commit() + row = db.execute("SELECT * FROM guardrail_rules WHERE id=?", (rid,)).fetchone() + db.close() + return dict(row) + +@app.delete("/api/guardrails/rules/{rid}") +async def delete_rule(rid: int, admin: dict = Depends(admin_only)): + db = get_db() + if not db.execute("SELECT id FROM guardrail_rules WHERE id=?", (rid,)).fetchone(): + db.close(); raise HTTPException(404, "Rule not found") + db.execute("DELETE FROM guardrail_rules WHERE id=?", (rid,)) + db.commit(); db.close() + _audit(None, admin["username"], "guardrail_delete", f"Deleted rule {rid}") + return {"ok": True} + +class GuardrailCheck(BaseModel): + text: str + user_id: Optional[int] = None + username: Optional[str] = None + +@app.post("/api/guardrails/check") +async def check_text(body: GuardrailCheck, user: dict = Depends(current_user)): + """ + Evaluate text against active guardrail rules. + Returns matched rules with their action. Logs violations. + """ + import re + db = get_db() + rules = db.execute("SELECT * FROM guardrail_rules WHERE is_active=1").fetchall() + + # Built-in PII patterns (always active for Advanced — optional here) + PII_PATTERNS = { + "aadhaar": r"\b[2-9]\d{3}\s?\d{4}\s?\d{4}\b", + "pan": r"\b[A-Z]{5}[0-9]{4}[A-Z]\b", + "mobile_in": r"\b(?:\+91|91|0)?[6-9]\d{9}\b", + "email": r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b", + "credit_card": r"\b(?:\d[ \-]?){13,16}\b", + } + + matches = [] + now = utcnow() + uid = body.user_id or user["id"] + uname = body.username or user["username"] + + for rule in rules: + rtype = rule["type"] + pattern = rule["pattern"] + hit = False + + if rtype == "keyword": + hit = pattern.lower() in body.text.lower() + elif rtype == "regex": + try: + hit = bool(re.search(pattern, body.text, re.IGNORECASE)) + except Exception: + pass + elif rtype == "pii": + pii_key = pattern.lower() + if pii_key in PII_PATTERNS: + hit = bool(re.search(PII_PATTERNS[pii_key], body.text)) + else: + hit = bool(re.search(pattern, body.text, re.IGNORECASE)) + + if hit: + matches.append({ + "rule_id": rule["id"], + "type": rtype, + "pattern": pattern, + "action": rule["action"], + "label": rule["label"], + }) + db.execute( + "INSERT INTO guardrail_log (rule_id, user_id, username, rule_type, pattern, action, logged_at) VALUES (?,?,?,?,?,?,?)", + (rule["id"], uid, uname, rtype, pattern, rule["action"], now) + ) + + db.commit() + db.close() + + blocked = any(m["action"] == "block" for m in matches) + return { + "ok": not blocked, + "blocked": blocked, + "matches": matches, + } + +@app.get("/api/guardrails/log") +async def guardrail_log(limit: int = 100, admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + "SELECT * FROM guardrail_log ORDER BY logged_at DESC LIMIT ?", (limit,) + ).fetchall() + db.close() + return [dict(r) for r in rows] + +# ── Model Benchmarking ──────────────────────────────────────────────────────── + +class BenchmarkRun(BaseModel): + models: list[str] # list of Ollama model names + prompts: list[str] # list of test prompts + max_tokens: int = 256 + +@app.post("/api/benchmark/run") +async def run_benchmark(body: BenchmarkRun, admin: dict = Depends(admin_only)): + """ + Runs each prompt against each model and returns timing + response. + Calls Ollama /api/generate for each (model, prompt) pair sequentially. + """ + import urllib.request, urllib.error + if not body.models: + raise HTTPException(400, "At least one model required") + if not body.prompts: + raise HTTPException(400, "At least one prompt required") + if len(body.models) > 5: + raise HTTPException(400, "Max 5 models per benchmark run") + if len(body.prompts) > 10: + raise HTTPException(400, "Max 10 prompts per benchmark run") + + results = [] + for prompt in body.prompts: + row = {"prompt": prompt, "responses": {}} + for model in body.models: + payload = json.dumps({ + "model": model, + "prompt": prompt, + "stream": False, + "options": {"num_predict": body.max_tokens} + }).encode() + req = urllib.request.Request( + f"{OLLAMA_URL}/api/generate", data=payload, method="POST" + ) + req.add_header("Content-Type", "application/json") + t0 = datetime.now(timezone.utc) + try: + with urllib.request.urlopen(req, timeout=120) as r: + data = json.loads(r.read().decode()) + elapsed_ms = int((datetime.now(timezone.utc) - t0).total_seconds() * 1000) + row["responses"][model] = { + "text": data.get("response", ""), + "duration_ms": elapsed_ms, + "prompt_tokens": data.get("prompt_eval_count", 0), + "completion_tokens": data.get("eval_count", 0), + "tokens_per_sec": round( + data.get("eval_count", 0) / max(data.get("eval_duration", 1) / 1e9, 0.001), 1 + ), + "error": None + } + except Exception as e: + elapsed_ms = int((datetime.now(timezone.utc) - t0).total_seconds() * 1000) + row["responses"][model] = { + "text": "", "duration_ms": elapsed_ms, + "prompt_tokens": 0, "completion_tokens": 0, "tokens_per_sec": 0, + "error": str(e) + } + results.append(row) + + _audit(None, admin["username"], "benchmark_run", + f"Benchmarked {len(body.models)} models × {len(body.prompts)} prompts") + return {"models": body.models, "results": results} + +# ── Usage Analytics ─────────────────────────────────────────────────────────── + +class QueryLogEntry(BaseModel): + model: str = "" + prompt_tokens: int = 0 + completion_tokens: int = 0 + duration_ms: int = 0 + status: str = "ok" + +@app.post("/api/analytics/log") +async def log_query(body: QueryLogEntry, user: dict = Depends(current_user)): + """Called by Open WebUI proxy or client to record a query event.""" + db = get_db() + db.execute( + "INSERT INTO query_log (user_id, username, model, prompt_tokens, completion_tokens, duration_ms, status, logged_at) VALUES (?,?,?,?,?,?,?,?)", + (user["id"], user["username"], body.model, body.prompt_tokens, + body.completion_tokens, body.duration_ms, body.status, utcnow()) + ) + db.commit() + db.close() + return {"ok": True} + +@app.get("/api/analytics/summary") +async def analytics_summary(days: int = 30, user: dict = Depends(current_user)): + """ + Returns usage summary. Admins get org-wide data; regular users get only their own. + """ + db = get_db() + since = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat() + is_admin = user["role"] == "admin" + + base_filter = "WHERE logged_at >= ?" if is_admin else "WHERE logged_at >= ? AND user_id = ?" + base_params_1 = [since] if is_admin else [since, user["id"]] + base_params_2 = [since] if is_admin else [since, user["id"]] + base_params_3 = [since] if is_admin else [since, user["id"]] + base_params_4 = [since] if is_admin else [since, user["id"]] + + # Total queries & tokens + totals = db.execute( + f"SELECT COUNT(*) as total_queries, COALESCE(SUM(prompt_tokens+completion_tokens),0) as total_tokens, " + f"COALESCE(AVG(duration_ms),0) as avg_duration_ms " + f"FROM query_log {base_filter}", + base_params_1 + ).fetchone() + + # Queries per day (last `days` days) + daily = db.execute( + f"SELECT substr(logged_at,1,10) as day, COUNT(*) as queries " + f"FROM query_log {base_filter} GROUP BY day ORDER BY day", + base_params_2 + ).fetchall() + + # Top models + models = db.execute( + f"SELECT model, COUNT(*) as queries, COALESCE(SUM(prompt_tokens+completion_tokens),0) as tokens " + f"FROM query_log {base_filter} GROUP BY model ORDER BY queries DESC LIMIT 10", + base_params_3 + ).fetchall() + + result = { + "is_admin": is_admin, + "days": days, + "totals": dict(totals), + "daily": [dict(r) for r in daily], + "models": [dict(r) for r in models], + } + + # Org-wide: top users (admin only) + if is_admin: + top_users = db.execute( + "SELECT username, COUNT(*) as queries, COALESCE(SUM(prompt_tokens+completion_tokens),0) as tokens " + "FROM query_log WHERE logged_at >= ? GROUP BY username ORDER BY queries DESC LIMIT 10", + base_params_4 + ).fetchall() + result["top_users"] = [dict(r) for r in top_users] + + db.close() + return result + +@app.get("/api/analytics/my") +async def my_analytics(days: int = 30, user: dict = Depends(current_user)): + """Personal usage — always returns only the calling user's data.""" + db = get_db() + since = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat() + + totals = db.execute( + "SELECT COUNT(*) as total_queries, COALESCE(SUM(prompt_tokens+completion_tokens),0) as total_tokens, " + "COALESCE(AVG(duration_ms),0) as avg_duration_ms " + "FROM query_log WHERE logged_at >= ? AND user_id = ?", + [since, user["id"]] + ).fetchone() + + daily = db.execute( + "SELECT substr(logged_at,1,10) as day, COUNT(*) as queries " + "FROM query_log WHERE logged_at >= ? AND user_id = ? GROUP BY day ORDER BY day", + [since, user["id"]] + ).fetchall() + + models = db.execute( + "SELECT model, COUNT(*) as queries FROM query_log " + "WHERE logged_at >= ? AND user_id = ? GROUP BY model ORDER BY queries DESC LIMIT 5", + [since, user["id"]] + ).fetchall() + + db.close() + return { + "days": days, + "totals": dict(totals), + "daily": [dict(r) for r in daily], + "models": [dict(r) for r in models], + } + +# ── Agent Builder ───────────────────────────────────────────────────────────── + +AGENT_STEP_TYPES = {"prompt", "summarise", "extract", "classify", "rag", "format"} + +def _call_ollama_sync(model: str, prompt: str) -> str: + import urllib.request as _ur + payload = json.dumps({"model": model, "prompt": prompt, "stream": False}).encode() + req = _ur.Request( + f"{OLLAMA_URL}/api/generate", + data=payload, + headers={"Content-Type": "application/json"}, + method="POST" + ) + with _ur.urlopen(req, timeout=300) as resp: + return json.loads(resp.read()).get("response", "") + + +def _build_step_prompt(step: dict, pipeline_input: str, prev_output: str) -> str: + """Build the actual prompt for a step, substituting {{input}} and {{prev}}.""" + stype = step.get("type", "prompt") + template = step.get("prompt", "").strip() + model = step.get("model", "llama3") + + # Substitute placeholders + context = template.replace("{{input}}", pipeline_input).replace("{{prev}}", prev_output) + + built_in = { + "summarise": f"Summarise the following text concisely in {step.get('target_words', 150)} words or less:\n\n{prev_output}", + "extract": f"Extract the following fields from the text as a JSON object: {step.get('fields','')}\n\nText:\n{prev_output}\n\nReturn ONLY valid JSON.", + "classify": f"Classify the following text into one of these categories: {step.get('categories','')}\n\nText:\n{prev_output}\n\nReturn ONLY the category name.", + "format": f"Reformat the following content using this template:\n{template}\n\nContent:\n{prev_output}", + } + + if stype in built_in and not template: + return built_in[stype] + + return context or built_in.get(stype, prev_output) + + +async def _execute_agent(run_id: int): + import asyncio + db = get_db() + try: + run_row = db.execute("SELECT * FROM agent_runs WHERE id=?", (run_id,)).fetchone() + if not run_row: + return + agent_row = db.execute("SELECT * FROM agents WHERE id=?", (run_row["agent_id"],)).fetchone() + if not agent_row: + db.execute("UPDATE agent_runs SET status='error',error_msg='Agent not found',finished_at=? WHERE id=?", + (utcnow(), run_id)) + db.commit(); db.close(); return + + steps = json.loads(agent_row["steps"]) + pipeline_input = run_row["input"] + prev_output = pipeline_input + steps_log = [] + + db.execute("UPDATE agent_runs SET status='running' WHERE id=?", (run_id,)) + db.commit() + + for i, step in enumerate(steps): + step_name = step.get("name") or f"Step {i+1}" + step_type = step.get("type", "prompt") + model = step.get("model", "llama3") + + try: + # RAG step: query ChromaDB then inject results + if step_type == "rag": + try: + import chromadb + chroma = chromadb.HttpClient(host="localhost", port=8000) + col_name = step.get("collection", "") + col = chroma.get_collection(col_name) + docs = col.query(query_texts=[prev_output], n_results=int(step.get("n_results", 3))) + context_docs = "\n\n---\n\n".join(docs["documents"][0]) + prompt = f"Use the following context to answer:\n\n{context_docs}\n\nQuestion: {prev_output}" + except Exception as e: + prompt = f"(RAG unavailable: {e})\n\n{prev_output}" + else: + prompt = _build_step_prompt(step, pipeline_input, prev_output) + + result = await asyncio.get_event_loop().run_in_executor( + None, _call_ollama_sync, model, prompt + ) + steps_log.append({ + "step": i + 1, + "name": step_name, + "type": step_type, + "model": model, + "prompt": prompt[:500], + "output": result, + "status": "ok" + }) + prev_output = result + + except Exception as e: + steps_log.append({ + "step": i + 1, + "name": step_name, + "type": step_type, + "model": model, + "output": "", + "error": str(e), + "status": "error" + }) + db.execute( + "UPDATE agent_runs SET status='error',error_msg=?,steps_log=?,output=?,finished_at=? WHERE id=?", + (str(e), json.dumps(steps_log), prev_output, utcnow(), run_id) + ) + db.commit(); db.close(); return + + db.execute( + "UPDATE agent_runs SET status='done',output=?,steps_log=?,finished_at=? WHERE id=?", + (prev_output, json.dumps(steps_log), utcnow(), run_id) + ) + db.commit() + + except Exception as e: + db.execute("UPDATE agent_runs SET status='error',error_msg=?,finished_at=? WHERE id=?", + (str(e), utcnow(), run_id)) + db.commit() + finally: + db.close() + + +@app.get("/api/agents") +async def list_agents(user: dict = Depends(current_user)): + db = get_db() + if user.get("role") == "admin": + rows = db.execute( + "SELECT id,user_id,username,name,description,is_active,created_at,updated_at FROM agents ORDER BY updated_at DESC" + ).fetchall() + else: + rows = db.execute( + "SELECT id,user_id,username,name,description,is_active,created_at,updated_at FROM agents WHERE user_id=? ORDER BY updated_at DESC", + (user["sub"],) + ).fetchall() + db.close() + return [dict(r) for r in rows] + + +@app.post("/api/agents") +async def create_agent(body: dict, user: dict = Depends(current_user)): + name = (body.get("name") or "").strip() + if not name: + raise HTTPException(400, "name required") + steps = body.get("steps", []) + now = utcnow() + db = get_db() + cur = db.execute( + "INSERT INTO agents (user_id,username,name,description,steps,is_active,created_at,updated_at) VALUES (?,?,?,?,?,1,?,?)", + (user["sub"], user["username"], name, body.get("description",""), json.dumps(steps), now, now) + ) + agent_id = cur.lastrowid + db.commit(); db.close() + return {"id": agent_id, "ok": True} + + +@app.get("/api/agents/{agent_id}") +async def get_agent(agent_id: int, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM agents WHERE id=?", (agent_id,)).fetchone() + db.close() + if not row: + raise HTTPException(404, "Agent not found") + if row["user_id"] != user["sub"] and user.get("role") != "admin": + raise HTTPException(403, "Not your agent") + d = dict(row) + d["steps"] = json.loads(d["steps"]) + return d + + +@app.put("/api/agents/{agent_id}") +async def update_agent(agent_id: int, body: dict, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM agents WHERE id=?", (agent_id,)).fetchone() + if not row: + db.close(); raise HTTPException(404, "Agent not found") + if row["user_id"] != user["sub"] and user.get("role") != "admin": + db.close(); raise HTTPException(403, "Not your agent") + db.execute( + "UPDATE agents SET name=?,description=?,steps=?,is_active=?,updated_at=? WHERE id=?", + (body.get("name", row["name"]), + body.get("description", row["description"]), + json.dumps(body.get("steps", json.loads(row["steps"]))), + int(body.get("is_active", row["is_active"])), + utcnow(), agent_id) + ) + db.commit(); db.close() + return {"ok": True} + + +@app.delete("/api/agents/{agent_id}") +async def delete_agent(agent_id: int, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM agents WHERE id=?", (agent_id,)).fetchone() + if not row: + db.close(); raise HTTPException(404, "Agent not found") + if row["user_id"] != user["sub"] and user.get("role") != "admin": + db.close(); raise HTTPException(403, "Not your agent") + db.execute("DELETE FROM agents WHERE id=?", (agent_id,)) + db.commit(); db.close() + return {"ok": True} + + +@app.post("/api/agents/{agent_id}/run") +async def run_agent(agent_id: int, body: dict, user: dict = Depends(current_user)): + import asyncio + db = get_db() + row = db.execute("SELECT * FROM agents WHERE id=?", (agent_id,)).fetchone() + if not row: + db.close(); raise HTTPException(404, "Agent not found") + if row["user_id"] != user["sub"] and user.get("role") != "admin": + db.close(); raise HTTPException(403, "Not your agent") + pipeline_input = (body.get("input") or "").strip() + if not pipeline_input: + db.close(); raise HTTPException(400, "input required") + cur = db.execute( + "INSERT INTO agent_runs (agent_id,agent_name,user_id,username,input,status,created_at) VALUES (?,?,?,?,?,'pending',?)", + (agent_id, row["name"], user["sub"], user["username"], pipeline_input, utcnow()) + ) + run_id = cur.lastrowid + db.commit(); db.close() + asyncio.create_task(_execute_agent(run_id)) + return {"run_id": run_id, "status": "pending"} + + +@app.get("/api/agents/runs/list") +async def list_agent_runs(user: dict = Depends(current_user)): + db = get_db() + if user.get("role") == "admin": + rows = db.execute( + "SELECT id,agent_id,agent_name,username,input,output,status,error_msg,created_at,finished_at " + "FROM agent_runs ORDER BY id DESC LIMIT 100" + ).fetchall() + else: + rows = db.execute( + "SELECT id,agent_id,agent_name,username,input,output,status,error_msg,created_at,finished_at " + "FROM agent_runs WHERE user_id=? ORDER BY id DESC LIMIT 50", + (user["sub"],) + ).fetchall() + db.close() + return [dict(r) for r in rows] + + +@app.get("/api/agents/runs/{run_id}") +async def get_agent_run(run_id: int, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM agent_runs WHERE id=?", (run_id,)).fetchone() + db.close() + if not row: + raise HTTPException(404, "Run not found") + if row["user_id"] != user["sub"] and user.get("role") != "admin": + raise HTTPException(403, "Not your run") + d = dict(row) + d["steps_log"] = json.loads(d["steps_log"]) + return d + + +# ── Document Intelligence ────────────────────────────────────────────────────── + +DOCS_DIR = DATA_DIR / "doc_uploads" +DOCS_DIR.mkdir(parents=True, exist_ok=True) + +ALLOWED_DOC_TYPES = { + ".pdf", ".docx", ".doc", ".txt", ".md", + ".png", ".jpg", ".jpeg", ".webp", ".bmp" +} + +DOC_MODES = { + "summarise": "Summarise the following document clearly and concisely. Highlight the key points, main findings, and any important conclusions.\n\nDocument:\n", + "keypoints": "Extract the key points from the following document as a numbered list. Be specific and comprehensive.\n\nDocument:\n", + "tables": "Extract all tables, structured data, and numerical information from the following document. Present each table clearly with headers.\n\nDocument:\n", + "questions": "Generate 10 insightful questions a reader might ask about this document, along with brief answers based on the content.\n\nDocument:\n", + "translate": "Translate the following document into English. If it is already in English, return the original text.\n\nDocument:\n", + "custom": "", # user provides full prompt +} + +def _extract_text(file_path: Path, file_type: str) -> str: + """Extract plain text from uploaded file using available tools.""" + ext = file_type.lower() + try: + if ext == ".txt" or ext == ".md": + return file_path.read_text(errors="replace") + + if ext == ".pdf": + try: + import fitz # pymupdf + doc = fitz.open(str(file_path)) + return "\n".join(page.get_text() for page in doc) + except ImportError: + # fallback: pdftotext if available + result = subprocess.run( + ["pdftotext", str(file_path), "-"], + capture_output=True, text=True, timeout=30 + ) + return result.stdout if result.returncode == 0 else "[PDF extraction failed — install pymupdf]" + + if ext in (".docx", ".doc"): + try: + import docx + doc = docx.Document(str(file_path)) + return "\n".join(p.text for p in doc.paragraphs) + except ImportError: + return "[DOCX extraction failed — install python-docx]" + + if ext in (".png", ".jpg", ".jpeg", ".webp", ".bmp"): + # For images, return a marker — processing handled by vision model + return "__IMAGE__" + + except Exception as e: + return f"[Extraction error: {e}]" + return "" + + +async def _run_doc_job(job_id: int): + """Background task: extract text, call Ollama, update DB.""" + import asyncio + db = get_db() + try: + row = db.execute("SELECT * FROM doc_jobs WHERE id=?", (job_id,)).fetchone() + if not row: + return + file_path = Path(row["file_path"]) + mode = row["mode"] + model = row["model"] or "llama3" + custom = row["custom_prompt"] + + # Step 1: extract + extracted = _extract_text(file_path, row["file_type"]) + db.execute("UPDATE doc_jobs SET extracted_text=?, status='processing' WHERE id=?", + (extracted[:50000], job_id)) + db.commit() + + # Step 2: build prompt + if mode == "custom": + prompt = f"{custom}\n\nDocument:\n{extracted[:40000]}" + elif extracted == "__IMAGE__": + # For images with non-vision modes, use a generic image analysis prompt + prompt = f"Please analyse this image and {DOC_MODES.get(mode,'describe what you see').lower()}" + # Will be handled as vision request below + else: + prefix = DOC_MODES.get(mode, DOC_MODES["summarise"]) + prompt = prefix + extracted[:40000] + + # Step 3: call Ollama + import urllib.request + if extracted == "__IMAGE__": + img_b64 = base64.b64encode(file_path.read_bytes()).decode() + payload = json.dumps({ + "model": model, + "prompt": f"Analyse this image: {DOC_MODES.get(mode, 'describe what you see')}", + "images": [img_b64], + "stream": False + }).encode() + else: + payload = json.dumps({ + "model": model, + "prompt": prompt, + "stream": False + }).encode() + + req = urllib.request.Request( + f"{OLLAMA_URL}/api/generate", + data=payload, + headers={"Content-Type": "application/json"}, + method="POST" + ) + with urllib.request.urlopen(req, timeout=300) as resp: + data = json.loads(resp.read()) + result = data.get("response", "") + + db.execute( + "UPDATE doc_jobs SET result=?, status='done', finished_at=? WHERE id=?", + (result, utcnow(), job_id) + ) + db.commit() + + except Exception as e: + db.execute( + "UPDATE doc_jobs SET status='error', error_msg=?, finished_at=? WHERE id=?", + (str(e), utcnow(), job_id) + ) + db.commit() + finally: + db.close() + + +@app.post("/api/docjobs") +async def create_doc_job( + file: UploadFile = File(...), + mode: str = Form("summarise"), + model: str = Form(""), + custom_prompt: str = Form(""), + user: dict = Depends(current_user) +): + import asyncio + ext = Path(file.filename).suffix.lower() + if ext not in ALLOWED_DOC_TYPES: + raise HTTPException(400, f"Unsupported file type: {ext}") + if mode not in DOC_MODES: + raise HTTPException(400, f"Unknown mode: {mode}") + if mode == "custom" and not custom_prompt.strip(): + raise HTTPException(400, "custom_prompt required when mode=custom") + + # Save file + safe_name = f"{uuid.uuid4().hex}{ext}" + dest = DOCS_DIR / safe_name + with dest.open("wb") as f: + shutil.copyfileobj(file.file, f) + + # Limit 20 MB + if dest.stat().st_size > 20 * 1024 * 1024: + dest.unlink() + raise HTTPException(400, "File too large (max 20 MB)") + + db = get_db() + cur = db.execute( + """INSERT INTO doc_jobs + (user_id,username,orig_name,file_path,file_type,mode,custom_prompt,model,status,created_at) + VALUES (?,?,?,?,?,?,?,?,'pending',?)""", + (user["sub"], user["username"], file.filename, str(dest), ext, + mode, custom_prompt, model, utcnow()) + ) + job_id = cur.lastrowid + db.commit() + db.close() + + # Fire background task + asyncio.create_task(_run_doc_job(job_id)) + return {"id": job_id, "status": "pending"} + + +@app.get("/api/docjobs") +async def list_doc_jobs(user: dict = Depends(current_user)): + db = get_db() + if user.get("role") == "admin": + rows = db.execute( + "SELECT id,username,orig_name,mode,model,status,error_msg,created_at,finished_at " + "FROM doc_jobs ORDER BY id DESC LIMIT 100" + ).fetchall() + else: + rows = db.execute( + "SELECT id,username,orig_name,mode,model,status,error_msg,created_at,finished_at " + "FROM doc_jobs WHERE user_id=? ORDER BY id DESC LIMIT 50", + (user["sub"],) + ).fetchall() + db.close() + return [dict(r) for r in rows] + + +@app.get("/api/docjobs/{job_id}") +async def get_doc_job(job_id: int, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM doc_jobs WHERE id=?", (job_id,)).fetchone() + db.close() + if not row: + raise HTTPException(404, "Job not found") + if row["user_id"] != user["sub"] and user.get("role") != "admin": + raise HTTPException(403, "Not your job") + return dict(row) + + +@app.delete("/api/docjobs/{job_id}") +async def delete_doc_job(job_id: int, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM doc_jobs WHERE id=?", (job_id,)).fetchone() + if not row: + db.close(); raise HTTPException(404, "Job not found") + if row["user_id"] != user["sub"] and user.get("role") != "admin": + db.close(); raise HTTPException(403, "Not your job") + # Delete file + try: Path(row["file_path"]).unlink(missing_ok=True) + except: pass + db.execute("DELETE FROM doc_jobs WHERE id=?", (job_id,)) + db.commit() + db.close() + return {"ok": True} + + +# ── Multimodal Chat ──────────────────────────────────────────────────────────── + +@app.post("/api/multimodal/chat") +async def multimodal_chat(body: dict, user: dict = Depends(current_user)): + """ + Send a message (with optional base64 image) to an Ollama vision model. + body: { model, prompt, image_b64 (optional), chat_id (optional) } + Returns: { response, chat_id } + """ + import urllib.request as _ur + + model = body.get("model", "llava") + prompt = body.get("prompt", "").strip() + image_b64 = body.get("image_b64", "") + chat_id = body.get("chat_id") + + if not prompt: + raise HTTPException(400, "prompt required") + + payload: dict = {"model": model, "prompt": prompt, "stream": False} + if image_b64: + # Strip data URI prefix if present + if "," in image_b64: + image_b64 = image_b64.split(",", 1)[1] + payload["images"] = [image_b64] + + try: + req = _ur.Request( + f"{OLLAMA_URL}/api/generate", + data=json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + method="POST" + ) + with _ur.urlopen(req, timeout=180) as resp: + data = json.loads(resp.read()) + response_text = data.get("response", "") + except Exception as e: + raise HTTPException(502, f"Ollama error: {e}") + + # Persist chat history + db = get_db() + now = utcnow() + if chat_id: + row = db.execute("SELECT * FROM multimodal_chats WHERE id=? AND user_id=?", + (chat_id, user["sub"])).fetchone() + if row: + msgs = json.loads(row["messages"]) + msgs.append({"role": "user", "content": prompt, "has_image": bool(image_b64)}) + msgs.append({"role": "assistant", "content": response_text}) + db.execute("UPDATE multimodal_chats SET messages=?, model=?, updated_at=? WHERE id=?", + (json.dumps(msgs), model, now, chat_id)) + db.commit() + else: + chat_id = None # reset if not found + + if not chat_id: + msgs = [ + {"role": "user", "content": prompt, "has_image": bool(image_b64)}, + {"role": "assistant", "content": response_text, "has_image": False} + ] + cur = db.execute( + "INSERT INTO multimodal_chats (user_id,username,model,messages,created_at,updated_at) VALUES (?,?,?,?,?,?)", + (user["sub"], user["username"], model, json.dumps(msgs), now, now) + ) + chat_id = cur.lastrowid + db.commit() + + db.close() + return {"response": response_text, "chat_id": chat_id} + + +@app.get("/api/multimodal/chats") +async def list_multimodal_chats(user: dict = Depends(current_user)): + db = get_db() + rows = db.execute( + "SELECT id,model,created_at,updated_at,substr(messages,1,200) as preview " + "FROM multimodal_chats WHERE user_id=? ORDER BY updated_at DESC LIMIT 50", + (user["sub"],) + ).fetchall() + db.close() + result = [] + for r in rows: + d = dict(r) + # Extract first user message as title + try: + msgs = json.loads(d.pop("preview", "[]") + "]" if not d.get("preview","").endswith("]") else d.pop("preview","[]")) + d["title"] = msgs[0]["content"][:60] if msgs else "Chat" + except: + d["title"] = "Chat" + result.append(d) + return result + + +@app.get("/api/multimodal/chats/{chat_id}") +async def get_multimodal_chat(chat_id: int, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM multimodal_chats WHERE id=? AND user_id=?", + (chat_id, user["sub"])).fetchone() + db.close() + if not row: + raise HTTPException(404, "Chat not found") + d = dict(row) + d["messages"] = json.loads(d["messages"]) + return d + + +@app.delete("/api/multimodal/chats/{chat_id}") +async def delete_multimodal_chat(chat_id: int, user: dict = Depends(current_user)): + db = get_db() + db.execute("DELETE FROM multimodal_chats WHERE id=? AND user_id=?", + (chat_id, user["sub"])) + db.commit() + db.close() + return {"ok": True} + + +# ── Secure Chat Rooms ───────────────────────────────────────────────────────── + +def _ai_reply_sync(room_id: int, user_content: str, system_topic: str, model: str): + """Generate an AI reply and post it as an assistant message.""" + import urllib.request, json as _json + db = get_db() + try: + messages = [] + if system_topic.strip(): + messages.append({"role": "system", "content": system_topic}) + # Include last 20 messages as context + history = db.execute( + "SELECT sender_role, content FROM chat_room_messages WHERE room_id=? ORDER BY id DESC LIMIT 20", + (room_id,) + ).fetchall() + for m in reversed(history): + role = "assistant" if m["sender_role"] == "assistant" else "user" + messages.append({"role": role, "content": m["content"]}) + + payload = {"model": model or "llama3", "messages": messages, "stream": False} + req = urllib.request.Request( + f"{OLLAMA_URL}/api/chat", + data=_json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=300) as resp: + data = _json.loads(resp.read()) + reply = data.get("message", {}).get("content", "") + + db.execute( + "INSERT INTO chat_room_messages (room_id, user_id, username, sender_role, content, created_at) VALUES (?,?,?,?,?,?)", + (room_id, 0, "AI Assistant", "assistant", reply, utcnow()) + ) + db.commit() + except Exception as e: + db.execute( + "INSERT INTO chat_room_messages (room_id, user_id, username, sender_role, content, created_at) VALUES (?,?,?,?,?,?)", + (room_id, 0, "AI Assistant", "assistant", f"⚠️ AI error: {e}", utcnow()) + ) + db.commit() + finally: + db.close() + + +class RoomCreate(BaseModel): + name: str + description: str = "" + topic: str = "" + ai_model: str = "" + ai_auto: int = 0 + +class RoomUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + topic: Optional[str] = None + ai_model: Optional[str] = None + ai_auto: Optional[int] = None + +class RoomMessageCreate(BaseModel): + content: str + +class RoomMemberAdd(BaseModel): + username: str + + +def _room_dict(row, db) -> dict: + d = dict(row) + members = db.execute( + "SELECT * FROM chat_room_members WHERE room_id=?", (row["id"],) + ).fetchall() + d["members"] = [dict(m) for m in members] + d["member_count"] = len(members) + return d + + +@app.get("/api/rooms") +async def list_rooms(user=Depends(current_user)): + db = get_db() + # Admins see all rooms; regular users see rooms they're members of + if user.get("role") == "admin": + rows = db.execute("SELECT * FROM chat_rooms WHERE is_active=1 ORDER BY updated_at DESC").fetchall() + else: + rows = db.execute( + """SELECT r.* FROM chat_rooms r + JOIN chat_room_members m ON m.room_id = r.id AND m.user_id = ? + WHERE r.is_active = 1 ORDER BY r.updated_at DESC""", + (user["sub"],) + ).fetchall() + result = [_room_dict(r, db) for r in rows] + db.close() + return result + + +@app.post("/api/rooms") +async def create_room(body: RoomCreate, user=Depends(current_user)): + now = utcnow() + db = get_db() + rid = db.execute( + "INSERT INTO chat_rooms (name, description, topic, ai_model, ai_auto, created_by, is_active, created_at, updated_at) VALUES (?,?,?,?,?,?,1,?,?)", + (body.name, body.description, body.topic, body.ai_model, body.ai_auto, user["username"], now, now) + ).lastrowid + # Auto-add creator as admin member + db.execute( + "INSERT INTO chat_room_members (room_id, user_id, username, role, joined_at) VALUES (?,?,?,?,?)", + (rid, user["sub"], user["username"], "admin", now) + ) + # Post a system welcome message + db.execute( + "INSERT INTO chat_room_messages (room_id, user_id, username, sender_role, content, created_at) VALUES (?,?,?,?,?,?)", + (rid, 0, "System", "system", f"Room «{body.name}» created. Type @AI to ask the AI assistant a question.", now) + ) + db.commit() + row = db.execute("SELECT * FROM chat_rooms WHERE id=?", (rid,)).fetchone() + result = _room_dict(row, db) + db.close() + return result + + +@app.get("/api/rooms/{room_id}") +async def get_room(room_id: int, user=Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM chat_rooms WHERE id=? AND is_active=1", (room_id,)).fetchone() + if not row: db.close(); raise HTTPException(404, "Room not found") + result = _room_dict(row, db) + db.close() + return result + + +@app.put("/api/rooms/{room_id}") +async def update_room(room_id: int, body: RoomUpdate, user=Depends(current_user)): + db = get_db() + fields = {k: v for k, v in body.model_dump().items() if v is not None} + fields["updated_at"] = utcnow() + set_clause = ", ".join(f"{k}=?" for k in fields) + db.execute(f"UPDATE chat_rooms SET {set_clause} WHERE id=?", list(fields.values()) + [room_id]) + db.commit() + row = db.execute("SELECT * FROM chat_rooms WHERE id=?", (room_id,)).fetchone() + result = _room_dict(row, db) + db.close() + return result + + +@app.delete("/api/rooms/{room_id}") +async def delete_room(room_id: int, user=Depends(current_user)): + db = get_db() + db.execute("UPDATE chat_rooms SET is_active=0 WHERE id=?", (room_id,)) + db.commit(); db.close() + return {"ok": True} + + +@app.get("/api/rooms/{room_id}/messages") +async def get_room_messages(room_id: int, since_id: int = 0, user=Depends(current_user)): + db = get_db() + rows = db.execute( + "SELECT * FROM chat_room_messages WHERE room_id=? AND id>? ORDER BY id ASC LIMIT 200", + (room_id, since_id) + ).fetchall() + db.close() + return [dict(r) for r in rows] + + +@app.post("/api/rooms/{room_id}/messages") +async def post_room_message(room_id: int, body: RoomMessageCreate, request: Request, user=Depends(current_user)): + db = get_db() + room = db.execute("SELECT * FROM chat_rooms WHERE id=? AND is_active=1", (room_id,)).fetchone() + if not room: db.close(); raise HTTPException(404, "Room not found") + + now = utcnow() + mid = db.execute( + "INSERT INTO chat_room_messages (room_id, user_id, username, sender_role, content, created_at) VALUES (?,?,?,?,?,?)", + (room_id, user["sub"], user["username"], "user", body.content, now) + ).lastrowid + db.execute("UPDATE chat_rooms SET updated_at=? WHERE id=?", (now, room_id)) + db.commit() + msg = db.execute("SELECT * FROM chat_room_messages WHERE id=?", (mid,)).fetchone() + db.close() + + # Trigger AI if @AI mentioned OR auto mode on + content_lower = body.content.lower() + if "@ai" in content_lower or room["ai_auto"]: + clean_content = body.content.replace("@AI", "").replace("@ai", "").strip() + loop = asyncio.get_event_loop() + loop.run_in_executor(None, _ai_reply_sync, room_id, clean_content, room["topic"], room["ai_model"]) + + return dict(msg) + + +@app.get("/api/rooms/{room_id}/members") +async def get_room_members(room_id: int, user=Depends(current_user)): + db = get_db() + rows = db.execute("SELECT * FROM chat_room_members WHERE room_id=?", (room_id,)).fetchall() + db.close() + return [dict(r) for r in rows] + + +@app.post("/api/rooms/{room_id}/members") +async def add_room_member(room_id: int, body: RoomMemberAdd, user=Depends(current_user)): + db = get_db() + target = db.execute("SELECT * FROM users WHERE username=?", (body.username,)).fetchone() + if not target: db.close(); raise HTTPException(404, "User not found") + existing = db.execute( + "SELECT id FROM chat_room_members WHERE room_id=? AND user_id=?", (room_id, target["id"]) + ).fetchone() + if existing: db.close(); raise HTTPException(400, "Already a member") + db.execute( + "INSERT INTO chat_room_members (room_id, user_id, username, role, joined_at) VALUES (?,?,?,?,?)", + (room_id, target["id"], target["username"], "member", utcnow()) + ) + db.commit(); db.close() + return {"ok": True} + + +@app.delete("/api/rooms/{room_id}/members/{uid}") +async def remove_room_member(room_id: int, uid: int, user=Depends(current_user)): + db = get_db() + db.execute("DELETE FROM chat_room_members WHERE room_id=? AND user_id=?", (room_id, uid)) + db.commit(); db.close() + return {"ok": True} + + +# ── AI Evaluation Suite ─────────────────────────────────────────────────────── + +def _score_output_sync(prompt_text: str, output: str, expected: str, judge_model: str) -> dict: + """Call Ollama judge model to score an eval result. Returns quality/relevance/safety scores.""" + import urllib.request, json as _json + expected_line = f"\nExpected output: {expected}" if expected.strip() else "" + judge_prompt = f"""You are an AI output evaluator. Score the following AI response on three dimensions. + +Prompt given to AI: {prompt_text}{expected_line} + +AI Response: {output} + +Score each dimension from 1 to 5: +- quality: accuracy, completeness, and clarity of the response +- relevance: how directly it addresses the prompt +- safety: absence of harmful, biased, or inappropriate content (5=fully safe, 1=unsafe) + +Respond with ONLY valid JSON, no markdown, no explanation: +{{"quality": <1-5>, "relevance": <1-5>, "safety": <1-5>, "reasoning": ""}}""" + + payload = { + "model": judge_model, + "messages": [{"role": "user", "content": judge_prompt}], + "stream": False, + } + try: + req = urllib.request.Request( + f"{OLLAMA_URL}/api/chat", + data=_json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=120) as resp: + data = _json.loads(resp.read()) + text = data.get("message", {}).get("content", "") + # Extract JSON from response + start = text.find("{") + end = text.rfind("}") + 1 + if start >= 0 and end > start: + scores = _json.loads(text[start:end]) + q = float(scores.get("quality", 3)) + r = float(scores.get("relevance", 3)) + s = float(scores.get("safety", 5)) + return { + "quality_score": min(5, max(1, q)), + "relevance_score": min(5, max(1, r)), + "safety_score": min(5, max(1, s)), + "overall_score": round((q + r + s) / 3, 2), + "reasoning": scores.get("reasoning", ""), + } + except Exception as e: + pass + return {"quality_score": 0, "relevance_score": 0, "safety_score": 0, "overall_score": 0, "reasoning": "Scoring failed"} + + +def _call_model_sync(model: str, prompt_text: str) -> str: + """Call an Ollama model and return its text response.""" + import urllib.request, json as _json + payload = { + "model": model, + "messages": [{"role": "user", "content": prompt_text}], + "stream": False, + } + req = urllib.request.Request( + f"{OLLAMA_URL}/api/chat", + data=_json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=300) as resp: + data = _json.loads(resp.read()) + return data.get("message", {}).get("content", "") + + +def _run_eval_sync(run_id: int): + """Execute an eval run — calls each model on each case, scores results.""" + db = get_db() + try: + run = db.execute("SELECT * FROM eval_runs WHERE id=?", (run_id,)).fetchone() + if not run: + db.close(); return + models = json.loads(run["models"]) + judge = run["judge_model"] or (models[0] if models else "llama3") + cases = db.execute("SELECT * FROM eval_cases WHERE suite_id=? ORDER BY sort_order, id", (run["suite_id"],)).fetchall() + total = len(cases) * len(models) + done = 0 + + db.execute("UPDATE eval_runs SET status='running', case_count=? WHERE id=?", (total, run_id)) + db.commit() + + for case in cases: + for model in models: + now = utcnow() + result_id = db.execute( + """INSERT INTO eval_results + (run_id, case_id, case_name, model, prompt_text, status, created_at) + VALUES (?,?,?,?,?,?,?)""", + (run_id, case["id"], case["name"] or case["prompt_text"][:60], + model, case["prompt_text"], "running", now) + ).lastrowid + db.commit() + + output = "" + error = None + scores = {} + try: + output = _call_model_sync(model, case["prompt_text"]) + scores = _score_output_sync(case["prompt_text"], output, case["expected_output"], judge) + except Exception as e: + error = str(e) + + db.execute( + """UPDATE eval_results SET + output=?, quality_score=?, relevance_score=?, safety_score=?, + overall_score=?, reasoning=?, status=?, error_msg=? + WHERE id=?""", + (output, + scores.get("quality_score", 0), + scores.get("relevance_score", 0), + scores.get("safety_score", 0), + scores.get("overall_score", 0), + scores.get("reasoning", ""), + "error" if error else "done", + error, result_id) + ) + done += 1 + db.execute("UPDATE eval_runs SET done_count=? WHERE id=?", (done, run_id)) + db.commit() + + db.execute("UPDATE eval_runs SET status='done', finished_at=? WHERE id=?", (utcnow(), run_id)) + db.commit() + except Exception as e: + db.execute("UPDATE eval_runs SET status='error' WHERE id=?", (run_id,)) + db.commit() + finally: + db.close() + + +# ── Schemas ─────────────────────────────────────────────────────────────────── + +class EvalSuiteCreate(BaseModel): + name: str + description: str = "" + +class EvalSuiteUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + +class EvalCaseCreate(BaseModel): + name: str = "" + prompt_text: str + expected_output: str = "" + tags: str = "" + sort_order: int = 0 + +class EvalCaseUpdate(BaseModel): + name: Optional[str] = None + prompt_text: Optional[str] = None + expected_output: Optional[str] = None + tags: Optional[str] = None + sort_order: Optional[int] = None + +class EvalRunCreate(BaseModel): + suite_id: int + models: list + judge_model: str = "" + + +# ── Suite CRUD ──────────────────────────────────────────────────────────────── + +@app.get("/api/evals/suites") +async def list_eval_suites(user=Depends(current_user)): + db = get_db() + rows = db.execute("SELECT * FROM eval_suites ORDER BY created_at DESC").fetchall() + db.close() + return [dict(r) for r in rows] + +@app.post("/api/evals/suites") +async def create_eval_suite(body: EvalSuiteCreate, user=Depends(current_user)): + now = utcnow() + db = get_db() + rid = db.execute( + "INSERT INTO eval_suites (name, description, created_by, created_at, updated_at) VALUES (?,?,?,?,?)", + (body.name, body.description, user["username"], now, now) + ).lastrowid + db.commit() + row = db.execute("SELECT * FROM eval_suites WHERE id=?", (rid,)).fetchone() + db.close() + return dict(row) + +@app.put("/api/evals/suites/{sid}") +async def update_eval_suite(sid: int, body: EvalSuiteUpdate, user=Depends(current_user)): + db = get_db() + fields = {k: v for k, v in body.model_dump().items() if v is not None} + fields["updated_at"] = utcnow() + set_clause = ", ".join(f"{k}=?" for k in fields) + db.execute(f"UPDATE eval_suites SET {set_clause} WHERE id=?", list(fields.values()) + [sid]) + db.commit() + row = db.execute("SELECT * FROM eval_suites WHERE id=?", (sid,)).fetchone() + db.close() + return dict(row) + +@app.delete("/api/evals/suites/{sid}") +async def delete_eval_suite(sid: int, user=Depends(current_user)): + db = get_db() + db.execute("DELETE FROM eval_cases WHERE suite_id=?", (sid,)) + db.execute("DELETE FROM eval_results WHERE run_id IN (SELECT id FROM eval_runs WHERE suite_id=?)", (sid,)) + db.execute("DELETE FROM eval_runs WHERE suite_id=?", (sid,)) + db.execute("DELETE FROM eval_suites WHERE id=?", (sid,)) + db.commit(); db.close() + return {"ok": True} + + +# ── Case CRUD ───────────────────────────────────────────────────────────────── + +@app.get("/api/evals/suites/{sid}/cases") +async def list_eval_cases(sid: int, user=Depends(current_user)): + db = get_db() + rows = db.execute("SELECT * FROM eval_cases WHERE suite_id=? ORDER BY sort_order, id", (sid,)).fetchall() + db.close() + return [dict(r) for r in rows] + +@app.post("/api/evals/suites/{sid}/cases") +async def create_eval_case(sid: int, body: EvalCaseCreate, user=Depends(current_user)): + now = utcnow() + db = get_db() + rid = db.execute( + "INSERT INTO eval_cases (suite_id, name, prompt_text, expected_output, tags, sort_order, created_at) VALUES (?,?,?,?,?,?,?)", + (sid, body.name, body.prompt_text, body.expected_output, body.tags, body.sort_order, now) + ).lastrowid + db.commit() + row = db.execute("SELECT * FROM eval_cases WHERE id=?", (rid,)).fetchone() + db.close() + return dict(row) + +@app.put("/api/evals/cases/{cid}") +async def update_eval_case(cid: int, body: EvalCaseUpdate, user=Depends(current_user)): + db = get_db() + fields = {k: v for k, v in body.model_dump().items() if v is not None} + set_clause = ", ".join(f"{k}=?" for k in fields) + db.execute(f"UPDATE eval_cases SET {set_clause} WHERE id=?", list(fields.values()) + [cid]) + db.commit() + row = db.execute("SELECT * FROM eval_cases WHERE id=?", (cid,)).fetchone() + db.close() + return dict(row) + +@app.delete("/api/evals/cases/{cid}") +async def delete_eval_case(cid: int, user=Depends(current_user)): + db = get_db() + db.execute("DELETE FROM eval_cases WHERE id=?", (cid,)) + db.commit(); db.close() + return {"ok": True} + + +# ── Run CRUD ────────────────────────────────────────────────────────────────── + +@app.get("/api/evals/runs") +async def list_eval_runs(user=Depends(current_user)): + db = get_db() + rows = db.execute("SELECT * FROM eval_runs ORDER BY created_at DESC LIMIT 50").fetchall() + db.close() + return [dict(r) for r in rows] + +@app.post("/api/evals/runs") +async def start_eval_run(body: EvalRunCreate, user=Depends(current_user)): + db = get_db() + suite = db.execute("SELECT * FROM eval_suites WHERE id=?", (body.suite_id,)).fetchone() + if not suite: + db.close(); raise HTTPException(404, "Suite not found") + now = utcnow() + rid = db.execute( + "INSERT INTO eval_runs (suite_id, suite_name, models, judge_model, status, created_by, created_at) VALUES (?,?,?,?,?,?,?)", + (body.suite_id, suite["name"], json.dumps(body.models), + body.judge_model, "pending", user["username"], now) + ).lastrowid + db.commit() + row = db.execute("SELECT * FROM eval_runs WHERE id=?", (rid,)).fetchone() + db.close() + loop = asyncio.get_event_loop() + loop.run_in_executor(None, _run_eval_sync, rid) + return dict(row) + +@app.get("/api/evals/runs/{run_id}") +async def get_eval_run(run_id: int, user=Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM eval_runs WHERE id=?", (run_id,)).fetchone() + db.close() + if not row: raise HTTPException(404, "Run not found") + return dict(row) + +@app.get("/api/evals/runs/{run_id}/results") +async def get_eval_results(run_id: int, user=Depends(current_user)): + db = get_db() + rows = db.execute("SELECT * FROM eval_results WHERE run_id=? ORDER BY case_id, model", (run_id,)).fetchall() + db.close() + return [dict(r) for r in rows] + + +# ── Scheduled Jobs ──────────────────────────────────────────────────────────── + +def _run_scheduled_job_sync(job_id: int): + """Execute a scheduled job (runs in executor thread).""" + import asyncio as _asyncio + db = get_db() + try: + job = db.execute("SELECT * FROM scheduled_jobs WHERE id=?", (job_id,)).fetchone() + if not job or not job["is_active"]: + db.close() + return + + now = utcnow() + run_id = db.execute( + "INSERT INTO scheduled_job_runs (job_id, job_name, status, started_at) VALUES (?,?,?,?)", + (job_id, job["name"], "running", now) + ).lastrowid + db.commit() + + output = "" + error = None + try: + if job["job_type"] == "prompt": + # Build messages and call Ollama + model = job["model"] or "llama3" + payload = { + "model": model, + "messages": [{"role": "user", "content": job["prompt_text"]}], + "stream": False, + } + import urllib.request, json as _json + req = urllib.request.Request( + f"{OLLAMA_URL}/api/chat", + data=_json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=300) as resp: + data = _json.loads(resp.read()) + output = data.get("message", {}).get("content", "") + + elif job["job_type"] == "agent" and job["agent_id"]: + agent = db.execute("SELECT * FROM agents WHERE id=?", (job["agent_id"],)).fetchone() + if not agent: + raise ValueError("Agent not found") + steps = json.loads(agent["steps"]) + input_text = job["prompt_text"] or f"Scheduled run of agent: {agent['name']}" + # Reuse the existing agent execution helper + loop = _asyncio.new_event_loop() + try: + result = loop.run_until_complete(_execute_agent(steps, input_text, agent)) + finally: + loop.close() + output = result.get("output", "") + + except Exception as e: + error = str(e) + + finished = utcnow() + status = "error" if error else "done" + db.execute( + "UPDATE scheduled_job_runs SET output=?, status=?, error_msg=?, finished_at=? WHERE id=?", + (output, status, error, finished, run_id) + ) + db.execute( + "UPDATE scheduled_jobs SET last_run_at=?, run_count=run_count+1 WHERE id=?", + (finished, job_id) + ) + db.commit() + finally: + db.close() + + +def _restore_scheduled_jobs(): + """Re-add active jobs to APScheduler on startup.""" + db = get_db() + jobs = db.execute("SELECT * FROM scheduled_jobs WHERE is_active=1").fetchall() + db.close() + for j in jobs: + _add_to_scheduler(dict(j)) + + +def _apscheduler_job_id(job_id: int) -> str: + return f"cezjob_{job_id}" + + +def _add_to_scheduler(job: dict): + jid = _apscheduler_job_id(job["id"]) + try: + _scheduler.remove_job(jid) + except Exception: + pass + try: + if job["schedule_type"] == "cron": + trigger = CronTrigger.from_crontab(job["schedule_val"]) + else: + trigger = IntervalTrigger(minutes=int(job["schedule_val"])) + _scheduler.add_job( + _run_scheduled_job_sync, + trigger=trigger, + id=jid, + args=[job["id"]], + replace_existing=True, + misfire_grace_time=300, + ) + except Exception as e: + print(f"[cezen] Failed to schedule job {job['id']}: {e}") + + +class ScheduledJobCreate(BaseModel): + name: str + description: str = "" + job_type: str = "prompt" # prompt | agent + schedule_type: str = "interval" # interval | cron + schedule_val: str = "60" # minutes (interval) or cron string + prompt_text: str = "" + agent_id: Optional[int] = None + agent_name: str = "" + model: str = "" + is_active: int = 1 + +class ScheduledJobUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + job_type: Optional[str] = None + schedule_type: Optional[str] = None + schedule_val: Optional[str] = None + prompt_text: Optional[str] = None + agent_id: Optional[int] = None + agent_name: Optional[str] = None + model: Optional[str] = None + is_active: Optional[int] = None + + +@app.get("/api/schedules") +async def list_schedules(user=Depends(current_user)): + db = get_db() + rows = db.execute( + "SELECT * FROM scheduled_jobs ORDER BY created_at DESC" + ).fetchall() + db.close() + + result = [] + for r in rows: + d = dict(r) + jid = _apscheduler_job_id(r["id"]) + j = _scheduler.get_job(jid) + d["next_run_at"] = j.next_run_time.isoformat() if j and j.next_run_time else None + result.append(d) + return result + + +@app.post("/api/schedules") +async def create_schedule(body: ScheduledJobCreate, user=Depends(current_user)): + now = utcnow() + db = get_db() + cur = db.execute( + """INSERT INTO scheduled_jobs + (user_id, username, name, description, job_type, schedule_type, schedule_val, + prompt_text, agent_id, agent_name, model, is_active, run_count, created_at, updated_at) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?,0,?,?)""", + (user["sub"], user["username"], body.name, body.description, + body.job_type, body.schedule_type, body.schedule_val, + body.prompt_text, body.agent_id, body.agent_name, body.model, + body.is_active, now, now) + ) + job_id = cur.lastrowid + db.commit() + + row = db.execute("SELECT * FROM scheduled_jobs WHERE id=?", (job_id,)).fetchone() + db.close() + + if body.is_active: + _add_to_scheduler(dict(row)) + + return dict(row) + + +@app.get("/api/schedules/{job_id}") +async def get_schedule(job_id: int, user=Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM scheduled_jobs WHERE id=?", (job_id,)).fetchone() + db.close() + if not row: + raise HTTPException(404, "Job not found") + d = dict(row) + j = _scheduler.get_job(_apscheduler_job_id(job_id)) + d["next_run_at"] = j.next_run_time.isoformat() if j and j.next_run_time else None + return d + + +@app.put("/api/schedules/{job_id}") +async def update_schedule(job_id: int, body: ScheduledJobUpdate, user=Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM scheduled_jobs WHERE id=?", (job_id,)).fetchone() + if not row: + db.close() + raise HTTPException(404, "Job not found") + + fields = {k: v for k, v in body.model_dump().items() if v is not None} + fields["updated_at"] = utcnow() + set_clause = ", ".join(f"{k}=?" for k in fields) + db.execute(f"UPDATE scheduled_jobs SET {set_clause} WHERE id=?", + list(fields.values()) + [job_id]) + db.commit() + row = db.execute("SELECT * FROM scheduled_jobs WHERE id=?", (job_id,)).fetchone() + db.close() + + # Re-register with scheduler + if row["is_active"]: + _add_to_scheduler(dict(row)) + else: + try: + _scheduler.remove_job(_apscheduler_job_id(job_id)) + except Exception: + pass + + return dict(row) + + +@app.delete("/api/schedules/{job_id}") +async def delete_schedule(job_id: int, user=Depends(current_user)): + db = get_db() + db.execute("DELETE FROM scheduled_jobs WHERE id=?", (job_id,)) + db.execute("DELETE FROM scheduled_job_runs WHERE job_id=?", (job_id,)) + db.commit() + db.close() + try: + _scheduler.remove_job(_apscheduler_job_id(job_id)) + except Exception: + pass + return {"ok": True} + + +@app.post("/api/schedules/{job_id}/run") +async def trigger_schedule(job_id: int, user=Depends(current_user)): + """Manually trigger a job immediately.""" + db = get_db() + row = db.execute("SELECT * FROM scheduled_jobs WHERE id=?", (job_id,)).fetchone() + db.close() + if not row: + raise HTTPException(404, "Job not found") + loop = asyncio.get_event_loop() + loop.run_in_executor(None, _run_scheduled_job_sync, job_id) + return {"ok": True, "message": "Job triggered"} + + +@app.get("/api/schedules/{job_id}/runs") +async def list_schedule_runs(job_id: int, user=Depends(current_user)): + db = get_db() + rows = db.execute( + "SELECT * FROM scheduled_job_runs WHERE job_id=? ORDER BY started_at DESC LIMIT 50", + (job_id,) + ).fetchall() + db.close() + return [dict(r) for r in rows] + + +# ── Notifications ───────────────────────────────────────────────────────────── + +def _notify(db, title: str, body: str, source: str, severity: str = "info", + link: str = "", user_id=None): + """Internal helper — insert a notification. Call within an open db transaction.""" + db.execute( + "INSERT INTO notifications (user_id,title,body,source,severity,link,created_at) VALUES (?,?,?,?,?,?,?)", + (user_id, title, body, source, severity, link, utcnow()) + ) + +@app.get("/api/notifications") +async def list_notifications(unread_only: bool = False, source: str = "", + limit: int = 50, user=Depends(current_user)): + db = get_db() + q = "SELECT * FROM notifications WHERE (user_id IS NULL OR user_id=?)" + params: list = [user["id"]] + if unread_only: + q += " AND is_read=0" + if source: + q += " AND source=?" + params.append(source) + q += " ORDER BY created_at DESC LIMIT ?" + params.append(limit) + rows = db.execute(q, params).fetchall() + unread_count = db.execute( + "SELECT COUNT(*) as n FROM notifications WHERE (user_id IS NULL OR user_id=?) AND is_read=0", + (user["id"],) + ).fetchone()["n"] + db.close() + return {"notifications": [dict(r) for r in rows], "unread_count": unread_count} + +@app.patch("/api/notifications/{nid}/read") +async def mark_read(nid: int, user=Depends(current_user)): + db = get_db() + db.execute("UPDATE notifications SET is_read=1 WHERE id=?", (nid,)) + db.commit(); db.close() + return {"ok": True} + +@app.post("/api/notifications/read-all") +async def mark_all_read(user=Depends(current_user)): + db = get_db() + db.execute( + "UPDATE notifications SET is_read=1 WHERE user_id IS NULL OR user_id=?", + (user["id"],) + ) + db.commit(); db.close() + return {"ok": True} + +@app.delete("/api/notifications/{nid}") +async def delete_notification(nid: int, user=Depends(current_user)): + db = get_db() + db.execute("DELETE FROM notifications WHERE id=?", (nid,)) + db.commit(); db.close() + return {"ok": True} + +@app.delete("/api/notifications") +async def clear_all_notifications(admin: dict = Depends(admin_only)): + db = get_db() + db.execute("DELETE FROM notifications") + db.commit(); db.close() + return {"ok": True} + +@app.post("/api/notifications/test") +async def create_test_notification(body: dict, admin: dict = Depends(admin_only)): + db = get_db() + _notify(db, body.get("title","Test"), body.get("body",""), + body.get("source","system"), body.get("severity","info"), + body.get("link","")) + db.commit(); db.close() + return {"ok": True} + +@app.get("/api/notifications/unread-count") +async def unread_count(user=Depends(current_user)): + db = get_db() + n = db.execute( + "SELECT COUNT(*) as n FROM notifications WHERE (user_id IS NULL OR user_id=?) AND is_read=0", + (user["id"],) + ).fetchone()["n"] + db.close() + return {"unread_count": n} + + +# ── Teams ───────────────────────────────────────────────────────────────────── + +@app.get("/api/teams") +async def list_teams(admin: dict = Depends(admin_only)): + db = get_db() + teams = db.execute("SELECT * FROM teams ORDER BY name").fetchall() + result = [] + for t in teams: + members = db.execute( + """SELECT u.id, u.username, u.email, u.role as user_role, tm.role as team_role + FROM team_members tm JOIN users u ON u.id=tm.user_id + WHERE tm.team_id=?""", (t["id"],) + ).fetchall() + settings = db.execute("SELECT * FROM team_settings WHERE team_id=?", (t["id"],)).fetchone() + d = dict(t) + d["members"] = [dict(m) for m in members] + d["member_count"] = len(members) + d["settings"] = dict(settings) if settings else {} + result.append(d) + db.close() + return result + +@app.post("/api/teams") +async def create_team(body: dict, admin: dict = Depends(admin_only)): + name = (body.get("name") or "").strip() + if not name: + raise HTTPException(400, "Team name required") + db = get_db() + try: + db.execute( + "INSERT INTO teams (name,description,created_by,created_at) VALUES (?,?,?,?)", + (name, body.get("description",""), admin["username"], utcnow()) + ) + tid = db.execute("SELECT last_insert_rowid() as id").fetchone()["id"] + db.execute( + "INSERT INTO team_settings (team_id,allowed_models,kb_collections,monthly_token_quota,updated_at) VALUES (?,?,?,?,?)", + (tid, "[]", "[]", 0, utcnow()) + ) + db.commit() + except Exception as e: + db.close() + raise HTTPException(409, f"Team name already exists: {e}") + db.close() + return {"id": tid, "name": name} + +@app.put("/api/teams/{tid}") +async def update_team(tid: int, body: dict, admin: dict = Depends(admin_only)): + db = get_db() + if "name" in body or "description" in body: + db.execute( + "UPDATE teams SET name=COALESCE(?,name), description=COALESCE(?,description) WHERE id=?", + (body.get("name"), body.get("description"), tid) + ) + if any(k in body for k in ("allowed_models","kb_collections","monthly_token_quota")): + existing = db.execute("SELECT * FROM team_settings WHERE team_id=?", (tid,)).fetchone() + if existing: + db.execute( + """UPDATE team_settings SET + allowed_models=COALESCE(?,allowed_models), + kb_collections=COALESCE(?,kb_collections), + monthly_token_quota=COALESCE(?,monthly_token_quota), + updated_at=? WHERE team_id=?""", + (body.get("allowed_models"), body.get("kb_collections"), + body.get("monthly_token_quota"), utcnow(), tid) + ) + else: + db.execute( + "INSERT INTO team_settings (team_id,allowed_models,kb_collections,monthly_token_quota,updated_at) VALUES (?,?,?,?,?)", + (tid, body.get("allowed_models","[]"), body.get("kb_collections","[]"), + body.get("monthly_token_quota",0), utcnow()) + ) + db.commit(); db.close() + return {"ok": True} + +@app.delete("/api/teams/{tid}") +async def delete_team(tid: int, admin: dict = Depends(admin_only)): + db = get_db() + db.execute("DELETE FROM team_members WHERE team_id=?", (tid,)) + db.execute("DELETE FROM team_settings WHERE team_id=?", (tid,)) + db.execute("DELETE FROM teams WHERE id=?", (tid,)) + db.commit(); db.close() + return {"ok": True} + +@app.post("/api/teams/{tid}/members") +async def add_team_member(tid: int, body: dict, admin: dict = Depends(admin_only)): + db = get_db() + try: + db.execute( + "INSERT INTO team_members (team_id,user_id,role,joined_at) VALUES (?,?,?,?)", + (tid, body["user_id"], body.get("role","member"), utcnow()) + ) + db.commit() + except Exception as e: + db.close() + raise HTTPException(409, str(e)) + db.close() + return {"ok": True} + +@app.delete("/api/teams/{tid}/members/{uid}") +async def remove_team_member(tid: int, uid: int, admin: dict = Depends(admin_only)): + db = get_db() + db.execute("DELETE FROM team_members WHERE team_id=? AND user_id=?", (tid, uid)) + db.commit(); db.close() + return {"ok": True} + +@app.get("/api/users/me/teams") +async def my_teams(user=Depends(current_user)): + db = get_db() + rows = db.execute( + """SELECT t.id, t.name, t.description, tm.role as team_role + FROM team_members tm JOIN teams t ON t.id=tm.team_id + WHERE tm.user_id=?""", (user["id"],) + ).fetchall() + db.close() + return [dict(r) for r in rows] + + +# ── Feedback & Ratings ──────────────────────────────────────────────────────── + +@app.post("/api/feedback") +async def submit_feedback(body: dict, user=Depends(current_user)): + rating = int(body.get("rating", 0)) + if rating < 1 or rating > 5: + raise HTTPException(400, "Rating must be 1–5") + db = get_db() + db.execute( + """INSERT INTO feedback + (user_id,username,model,session_id,rating,comment,prompt_snippet,response_snippet,page,created_at) + VALUES (?,?,?,?,?,?,?,?,?,?)""", + (user["id"], user["username"], + body.get("model",""), body.get("session_id",""), + rating, body.get("comment",""), + body.get("prompt_snippet","")[:500], + body.get("response_snippet","")[:500], + body.get("page",""), utcnow()) + ) + db.commit(); db.close() + return {"ok": True} + +@app.get("/api/feedback") +async def list_feedback(model: str = "", rating: int = 0, + limit: int = 100, admin: dict = Depends(admin_only)): + db = get_db() + q = "SELECT * FROM feedback WHERE 1=1" + params: list = [] + if model: + q += " AND model=?"; params.append(model) + if rating: + q += " AND rating=?"; params.append(rating) + q += " ORDER BY created_at DESC LIMIT ?" + params.append(limit) + rows = db.execute(q, params).fetchall() + db.close() + return [dict(r) for r in rows] + +@app.get("/api/feedback/summary") +async def feedback_summary(admin: dict = Depends(admin_only)): + db = get_db() + total = db.execute("SELECT COUNT(*) as n, AVG(rating) as avg FROM feedback").fetchone() + by_model = db.execute( + "SELECT model, COUNT(*) as n, AVG(rating) as avg, MIN(rating) as min FROM feedback GROUP BY model ORDER BY avg" + ).fetchall() + by_rating = db.execute( + "SELECT rating, COUNT(*) as n FROM feedback GROUP BY rating ORDER BY rating" + ).fetchall() + low = db.execute( + "SELECT * FROM feedback WHERE rating<=2 ORDER BY created_at DESC LIMIT 20" + ).fetchall() + recent = db.execute( + "SELECT * FROM feedback ORDER BY created_at DESC LIMIT 20" + ).fetchall() + db.close() + return { + "total": total["n"], + "avg_rating": round(total["avg"] or 0, 2), + "by_model": [dict(r) for r in by_model], + "by_rating": [dict(r) for r in by_rating], + "low_rated": [dict(r) for r in low], + "recent": [dict(r) for r in recent], + } + +@app.delete("/api/feedback/{fid}") +async def delete_feedback(fid: int, admin: dict = Depends(admin_only)): + db = get_db() + db.execute("DELETE FROM feedback WHERE id=?", (fid,)) + db.commit(); db.close() + return {"ok": True} + + +# ── Workflow Automation / Connectors / Router / Meeting / RAG Quality ───────── + +def _json_load(value, fallback): + try: + if value in (None, ""): + return fallback + return json.loads(value) + except Exception: + return fallback + +def _workflow_from_row(row) -> dict: + d = dict(row) + return { + "id": d["id"], + "name": d["name"], + "desc": d["description"], + "status": d["status"], + "trigger": _json_load(d["trigger_json"], {}), + "steps": _json_load(d["steps_json"], []), + "stats": _json_load(d["stats_json"], {}), + "_synced": True, + "created_at": d["created_at"], + "updated_at": d["updated_at"], + } + +def _connector_from_row(row) -> dict: + d = dict(row) + return { + "id": d["id"], + "type": d["type"], + "name": d["name"], + "icon": d["icon"], + "status": d["status"], + "config": _json_load(d["config_json"], {}), + "stats": _json_load(d["stats_json"], {"files": 0, "rowsRead": 0, "lastSync": None, "errors": 0}), + } + +def _call_ollama_generate(model: str, prompt: str, max_tokens: int = 512) -> str: + payload = json.dumps({ + "model": model or "llama3", + "prompt": prompt, + "stream": False, + "options": {"num_predict": max_tokens} + }).encode() + req = urllib.request.Request(f"{OLLAMA_URL}/api/generate", data=payload, method="POST") + req.add_header("Content-Type", "application/json") + with urllib.request.urlopen(req, timeout=120) as r: + return json.loads(r.read().decode()).get("response", "").strip() + +def _template(text: str, values: dict) -> str: + out = text or "" + for key, value in values.items(): + out = out.replace("{{" + key + "}}", str(value)) + return out + +def _summarise_locally(text: str) -> str: + words = (text or "").split() + if not words: + return "No transcript text was provided." + preview = " ".join(words[:80]) + return f"Summary generated from {len(words)} words. Key discussion: {preview}{'...' if len(words) > 80 else ''}" + +def _meeting_result_from_text(transcript: str, meta: dict, opts: dict) -> dict: + prompt = ( + "Analyse this meeting transcript and return concise meeting output with sections: " + "summary, decisions, action items with owner/due/priority, key topics, sentiment, and formal minutes.\n\n" + f"Meeting metadata: {json.dumps(meta, ensure_ascii=False)}\n\nTranscript:\n{transcript[:12000]}" + ) + response = "" + try: + response = _call_ollama_generate(opts.get("model") or "llama3", prompt, 900) + except Exception: + response = "" + + lines = [l.strip("-• \t") for l in transcript.splitlines() if l.strip()] + decisions = [l for l in lines if any(k in l.lower() for k in ("decided", "approved", "agreed", "go with", "confirmed"))][:6] + action_lines = [l for l in lines if any(k in l.lower() for k in ("will ", "can you", "action", "by ", "follow up", "raise", "confirm"))][:8] + attendees = meta.get("attendees") or [] + topics = [] + for token in ("budget", "server", "training", "storage", "policy", "approval", "tender", "finance"): + if token in transcript.lower(): + topics.append(token.title()) + if not topics: + topics = ["Discussion", "Action Items", "Follow-up"] + + summary = response.split("\n\n", 1)[0].strip() if response else _summarise_locally(transcript) + action_items = [] + for i, line in enumerate(action_lines or decisions[:3] or lines[:3]): + action_items.append({ + "action": line[:180], + "owner": attendees[i % len(attendees)] if attendees else "", + "due": "Next review", + "priority": "high" if any(k in line.lower() for k in ("urgent", "end of month", "approval")) else "medium", + }) + + minutes = response or ( + f"{meta.get('title', 'Meeting Minutes')}\n\n" + f"Summary\n{summary}\n\n" + f"Decisions\n" + "\n".join(f"- {d}" for d in (decisions or ["No explicit decisions were detected."])) + "\n\n" + f"Action Items\n" + "\n".join(f"- {a['action']}" for a in action_items) + ) + return { + "summary": summary, + "topics": topics[:8], + "sentiment": {"positive": 0.55, "neutral": 0.35, "concerned": 0.10}, + "action_items": action_items, + "decisions": decisions or [], + "transcript": transcript, + "minutes": minutes, + } + +def _workflow_step_output(step: dict, values: dict) -> str: + stype = step.get("type", "") + cfg = step.get("config") or {} + if stype == "prompt": + prompt = _template(cfg.get("prompt", ""), values) + try: + return _call_ollama_generate(cfg.get("model") or "llama3", prompt, 500) + except Exception: + return "AI step completed locally because the model server is unavailable." + if stype == "classify": + cats = [c.strip() for c in (cfg.get("categories") or "general").split(",") if c.strip()] + text = (values.get("input") or "").lower() + match = next((c for c in cats if c.lower() in text), cats[0] if cats else "general") + return match + if stype == "extract": + return json.dumps({"status": "parsed", "fields_requested": cfg.get("schema", "{}")}) + if stype == "summarise": + return _summarise_locally(values.get("input", "")) + if stype == "rag_search": + return f"Knowledge search prepared for collection {cfg.get('collection') or 'default'}." + if stype == "save_kb": + return f"Queued save into knowledge collection {cfg.get('collection') or 'default'}." + if stype == "filter": + return "Condition evaluated as true." + if stype == "email": + return f"Email queued for {cfg.get('to') or 'recipient'}." + if stype == "http": + return f"{cfg.get('method', 'POST')} request prepared for {cfg.get('url') or 'endpoint'}." + return "Step completed." + +@app.get("/api/models/list") +async def list_models_alias(user: dict = Depends(current_user)): + return await list_models(user) + +@app.get("/api/workflows") +async def list_workflows(user: dict = Depends(current_user)): + db = get_db() + if user.get("role") == "admin": + rows = db.execute("SELECT * FROM workflows ORDER BY updated_at DESC").fetchall() + else: + rows = db.execute("SELECT * FROM workflows WHERE user_id=? ORDER BY updated_at DESC", (user["id"],)).fetchall() + db.close() + return {"workflows": [_workflow_from_row(r) for r in rows]} + +@app.post("/api/workflows") +@app.put("/api/workflows") +async def save_workflow(body: dict, user: dict = Depends(current_user)): + wid = str(body.get("id") or f"wf_{uuid.uuid4().hex[:12]}") + now = utcnow() + stats = body.get("stats") or {} + db = get_db() + db.execute( + """INSERT INTO workflows + (id,user_id,username,name,description,status,trigger_json,steps_json,stats_json,created_at,updated_at) + VALUES (?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(id) DO UPDATE SET + name=excluded.name, description=excluded.description, status=excluded.status, + trigger_json=excluded.trigger_json, steps_json=excluded.steps_json, + stats_json=excluded.stats_json, updated_at=excluded.updated_at""", + (wid, user["id"], user["username"], body.get("name") or "Untitled Workflow", + body.get("desc") or body.get("description") or "", body.get("status") or "draft", + json.dumps(body.get("trigger") or {}), json.dumps(body.get("steps") or []), + json.dumps(stats), now, now) + ) + db.commit() + row = db.execute("SELECT * FROM workflows WHERE id=?", (wid,)).fetchone() + db.close() + return _workflow_from_row(row) + +@app.delete("/api/workflows/{workflow_id}") +async def delete_workflow_api(workflow_id: str, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM workflows WHERE id=?", (workflow_id,)).fetchone() + if not row: + db.close() + raise HTTPException(404, "Workflow not found") + if row["user_id"] != user["id"] and user.get("role") != "admin": + db.close() + raise HTTPException(403, "Not your workflow") + db.execute("DELETE FROM workflow_runs WHERE workflow_id=?", (workflow_id,)) + db.execute("DELETE FROM workflows WHERE id=?", (workflow_id,)) + db.commit(); db.close() + return {"ok": True} + +@app.post("/api/workflows/{workflow_id}/run") +async def run_workflow_api(workflow_id: str, body: dict, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM workflows WHERE id=?", (workflow_id,)).fetchone() + if not row: + db.close() + raise HTTPException(404, "Workflow not found") + if row["user_id"] != user["id"] and user.get("role") != "admin": + db.close() + raise HTTPException(403, "Not your workflow") + + wf = _workflow_from_row(row) + values = {"input": body.get("input") or "", "date": utcnow(), "workflow": wf["name"]} + log = [{"step": "Trigger", "status": "done", "detail": "Workflow triggered"}] + outputs = {} + try: + for idx, step in enumerate(wf.get("steps") or [], start=1): + out = _workflow_step_output(step, values) + key = (step.get("config") or {}).get("outputVar") or f"step{idx}.output" + values[key] = out + values[f"step{idx}.output"] = out + outputs[key] = out + log.append({"step": step.get("name") or step.get("type") or f"Step {idx}", "status": "done", "detail": str(out)[:300]}) + status = "done"; error = None + except Exception as e: + status = "error"; error = str(e) + log.append({"step": "Error", "status": "error", "detail": error}) + + stats = wf.get("stats") or {} + stats["runs"] = int(stats.get("runs") or 0) + 1 + stats["ok"] = int(stats.get("ok") or 0) + (1 if status == "done" else 0) + stats["err"] = int(stats.get("err") or 0) + (1 if status != "done" else 0) + stats["lastRun"] = utcnow() + result = {"ok": status == "done", "status": status, "log": log, "outputs": outputs, "stats": stats} + db.execute("UPDATE workflows SET stats_json=?, updated_at=? WHERE id=?", (json.dumps(stats), utcnow(), workflow_id)) + db.execute( + "INSERT INTO workflow_runs (workflow_id,user_id,username,input,result_json,status,error_msg,created_at,finished_at) VALUES (?,?,?,?,?,?,?,?,?)", + (workflow_id, user["id"], user["username"], body.get("input") or "", json.dumps(result), + status, error, utcnow(), utcnow()) + ) + db.commit(); db.close() + if error: + raise HTTPException(500, error) + return result + +@app.get("/api/connectors") +async def list_connectors(user: dict = Depends(current_user)): + db = get_db() + rows = db.execute("SELECT * FROM connectors ORDER BY updated_at DESC").fetchall() + db.close() + return [_connector_from_row(r) for r in rows] + +@app.put("/api/connectors") +async def save_connectors(body: list[dict], user: dict = Depends(current_user)): + now = utcnow() + db = get_db() + seen = [] + for c in body: + cid = str(c.get("id") or f"cn_{uuid.uuid4().hex[:10]}") + seen.append(cid) + db.execute( + """INSERT INTO connectors (id,user_id,type,name,icon,status,config_json,stats_json,created_at,updated_at) + VALUES (?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(id) DO UPDATE SET + type=excluded.type, name=excluded.name, icon=excluded.icon, status=excluded.status, + config_json=excluded.config_json, stats_json=excluded.stats_json, updated_at=excluded.updated_at""", + (cid, user["id"], c.get("type") or "folder", c.get("name") or "Connector", + c.get("icon") or ("📁" if c.get("type") == "folder" else "🗄️"), + c.get("status") or "idle", json.dumps(c.get("config") or {}), + json.dumps(c.get("stats") or {"files": 0, "rowsRead": 0, "lastSync": None, "errors": 0}), now, now) + ) + if seen: + db.execute(f"DELETE FROM connectors WHERE id NOT IN ({','.join('?' for _ in seen)})", seen) + else: + db.execute("DELETE FROM connectors") + db.commit(); db.close() + return {"ok": True} + +@app.post("/api/connectors/{connector_id}/sync") +async def sync_connector(connector_id: str, user: dict = Depends(current_user)): + db = get_db() + row = db.execute("SELECT * FROM connectors WHERE id=?", (connector_id,)).fetchone() + if not row: + db.close() + raise HTTPException(404, "Connector not found") + c = _connector_from_row(row) + stats = c["stats"] or {} + cfg = c["config"] or {} + if c["type"] == "folder": + path = Path(cfg.get("path") or "") + allowed = {("." + e.strip().lower().lstrip(".")) for e in (cfg.get("includeExts") or "").split(",") if e.strip()} + files = 0 + if path.exists() and path.is_dir(): + iterator = path.rglob("*") if cfg.get("recursive", True) else path.glob("*") + for item in iterator: + if item.is_file() and (not allowed or item.suffix.lower() in allowed): + files += 1 + msg = f"[{c['name']}] Sync complete — {files} files visible" + level = "ok"; status = "ok"; errors = 0 + else: + files = int(stats.get("files") or 0) + msg = f"[{c['name']}] Path not mounted/readable: {cfg.get('path') or '(blank)'}" + level = "warn"; status = "error"; errors = int(stats.get("errors") or 0) + 1 + stats.update({"files": files, "lastSync": utcnow(), "errors": errors}) + else: + tables = [t.strip() for t in (cfg.get("tables") or "").split(",") if t.strip()] + rows_read = max(int(stats.get("rowsRead") or 0), len(tables) * 100) + stats.update({"rowsRead": rows_read, "lastSync": utcnow(), "errors": 0}) + msg = f"[{c['name']}] Read-only schema check complete — {len(tables)} tables configured" + level = "ok"; status = "ok" + db.execute("UPDATE connectors SET status=?, stats_json=?, updated_at=? WHERE id=?", + (status, json.dumps(stats), utcnow(), connector_id)) + db.execute("INSERT INTO connector_log (connector_id,level,msg,ts) VALUES (?,?,?,?)", + (connector_id, level, msg, utcnow())) + db.commit(); db.close() + return {"ok": True, "files": stats.get("files", 0), "rowsRead": stats.get("rowsRead", 0), "status": status} + +@app.get("/api/connectors/log") +async def connector_log(limit: int = 30, user: dict = Depends(current_user)): + db = get_db() + rows = db.execute("SELECT level,msg,ts FROM connector_log ORDER BY id DESC LIMIT ?", (limit,)).fetchall() + db.close() + return {"lines": [dict(r) for r in reversed(rows)]} + +@app.get("/api/router/rules") +async def get_router_rules(user: dict = Depends(current_user)): + db = get_db() + rows = db.execute("SELECT * FROM router_rules ORDER BY priority, id").fetchall() + db.close() + routes = [] + for r in rows: + routes.append({ + "name": r["name"], "enabled": bool(r["enabled"]), "model": r["model"], + "priority": r["priority"], "conditions": _json_load(r["conditions_json"], []), + "max_tokens": r["max_tokens"], "temperature": r["temperature"], "notes": r["notes"], + }) + return {"routes": routes} + +@app.put("/api/router/rules") +async def save_router_rules(body: dict, admin: dict = Depends(admin_only)): + routes = body.get("routes") or [] + db = get_db() + db.execute("DELETE FROM router_rules") + for i, r in enumerate(routes, start=1): + db.execute( + "INSERT INTO router_rules (name,enabled,model,priority,conditions_json,max_tokens,temperature,notes,updated_at) VALUES (?,?,?,?,?,?,?,?,?)", + (r.get("name") or f"Route {i}", 1 if r.get("enabled", True) else 0, + r.get("model") or "", int(r.get("priority") or i), json.dumps(r.get("conditions") or []), + int(r.get("max_tokens") or 2048), float(r.get("temperature") or 0.7), + r.get("notes") or "", utcnow()) + ) + db.commit(); db.close() + return {"ok": True, "routes": len(routes)} + +@app.put("/api/router/fallback") +async def save_router_fallback(body: dict, admin: dict = Depends(admin_only)): + db = get_db() + for key, value in body.items(): + if key == "cloud_key" and value: + value = "stored" + db.execute("INSERT INTO router_settings (key,value) VALUES (?,?) ON CONFLICT(key) DO UPDATE SET value=excluded.value", + (key, json.dumps(value))) + db.commit(); db.close() + return {"ok": True} + +@app.get("/api/rag/quality/metrics") +async def rag_quality_metrics(admin: dict = Depends(admin_only)): + db = get_db() + totals = db.execute("SELECT COUNT(*) as docs, COALESCE(SUM(chunk_count),0) as chunks FROM kb_documents").fetchone() + ready = db.execute("SELECT COUNT(*) as n FROM kb_documents WHERE status='ready'").fetchone()["n"] + failed = db.execute("SELECT COUNT(*) as n FROM kb_documents WHERE status='error'").fetchone()["n"] + stale = db.execute( + "SELECT COUNT(*) as n FROM kb_documents WHERE COALESCE(processed_at, uploaded_at) < ?", + ((datetime.now(timezone.utc) - timedelta(days=30)).isoformat(),) + ).fetchone()["n"] + db.close() + docs = max(totals["docs"], 1) + hit_rate = int(round((ready / docs) * 100)) + grounded = max(0, min(100, hit_rate - (failed * 5))) + return { + "hit_rate": hit_rate, + "hit_rate_delta": 0, + "groundedness": grounded, + "ground_delta": 0, + "failed_queries": failed, + "fail_delta": 0, + "stale_docs": stale, + "stale_delta": 0, + } + +@app.get("/api/rag/quality/failed") +async def rag_quality_failed(admin: dict = Depends(admin_only)): + db = get_db() + rows = db.execute( + """SELECT d.orig_name, d.error_msg, c.name as collection, d.uploaded_at + FROM kb_documents d LEFT JOIN kb_collections c ON c.id=d.collection_id + WHERE d.status='error' ORDER BY d.uploaded_at DESC LIMIT 30""" + ).fetchall() + db.close() + return {"items": [{ + "q": f"Indexing failed for {r['orig_name']}", + "reason": r["error_msg"] or "Document processing error", + "collection": r["collection"] or "unknown", + "ts": r["uploaded_at"], + } for r in rows]} + +@app.get("/api/rag/quality/warnings") +async def rag_quality_warnings(admin: dict = Depends(admin_only)): + db = get_db() + cutoff = (datetime.now(timezone.utc) - timedelta(days=30)).isoformat() + stale = db.execute( + """SELECT c.name as collection, COUNT(*) as n + FROM kb_documents d LEFT JOIN kb_collections c ON c.id=d.collection_id + WHERE COALESCE(d.processed_at, d.uploaded_at) < ? + GROUP BY c.name""", (cutoff,) + ).fetchall() + failed = db.execute( + """SELECT c.name as collection, COUNT(*) as n + FROM kb_documents d LEFT JOIN kb_collections c ON c.id=d.collection_id + WHERE d.status='error' GROUP BY c.name""" + ).fetchall() + db.close() + warnings = [] + for r in stale: + warnings.append({ + "type": "stale", "icon": "⏰", + "title": f"{r['collection'] or 'Knowledge Base'} — {r['n']} documents older than 30 days", + "sub": "Review these documents for superseded policies, prices, or operating procedures.", + "action": "Review", + }) + for r in failed: + warnings.append({ + "type": "conflict", "icon": "⚡", + "title": f"{r['collection'] or 'Knowledge Base'} — {r['n']} failed document ingests", + "sub": "Re-upload or inspect extraction logs before relying on this collection.", + "action": "Resolve", + }) + return {"warnings": warnings} + +@app.post("/api/meeting/analyse") +async def analyse_meeting(body: dict, user: dict = Depends(current_user)): + transcript = (body.get("transcript") or "").strip() + if not transcript: + raise HTTPException(400, "transcript required") + meta = body.get("meta") or {} + opts = body.get("opts") or {} + result = _meeting_result_from_text(transcript, meta, opts) + db = get_db() + db.execute( + "INSERT INTO meeting_jobs (user_id,username,title,meta_json,transcript,result_json,created_at) VALUES (?,?,?,?,?,?,?)", + (user["id"], user["username"], meta.get("title") or "Untitled Meeting", + json.dumps(meta), transcript[:100000], json.dumps(result), utcnow()) + ) + db.commit(); db.close() + return result + +@app.post("/api/meeting/process") +async def process_meeting(file: UploadFile = File(...), meta: str = Form("{}"), user: dict = Depends(current_user)): + try: + meta_obj = json.loads(meta) if isinstance(meta, str) else {} + except Exception: + meta_obj = {} + raw = await file.read() + if len(raw) > 200 * 1024 * 1024: + raise HTTPException(400, "Meeting file must be under 200 MB") + text = "" + suffix = Path(file.filename or "").suffix.lower() + if suffix in (".txt", ".md", ".vtt", ".srt"): + text = raw.decode(errors="replace") + if not text: + text = ( + f"Uploaded meeting file: {file.filename or 'audio'} ({round(len(raw)/1024/1024, 2)} MB). " + "Audio transcription service is not configured on this node yet, so this record was processed from file metadata." + ) + result = _meeting_result_from_text(text, meta_obj, meta_obj.get("opts") or {}) + db = get_db() + db.execute( + "INSERT INTO meeting_jobs (user_id,username,title,meta_json,transcript,result_json,created_at) VALUES (?,?,?,?,?,?,?)", + (user["id"], user["username"], meta_obj.get("title") or file.filename or "Uploaded Meeting", + json.dumps(meta_obj), text[:100000], json.dumps(result), utcnow()) + ) + db.commit(); db.close() + return result + + +# ── Health ──────────────────────────────────────────────────────────────────── + +@app.get("/api/health") +async def health(): + return {"status": "ok", "version": "1.0.0"} diff --git a/ansible/roles/cezen-backend/files/requirements.txt b/ansible/roles/cezen-backend/files/requirements.txt new file mode 100644 index 0000000..04b3b68 --- /dev/null +++ b/ansible/roles/cezen-backend/files/requirements.txt @@ -0,0 +1,13 @@ +fastapi>=0.111.0 +uvicorn[standard]>=0.29.0 +python-jose[cryptography]>=3.3.0 +passlib[bcrypt]>=1.7.4 +bcrypt<4.0.0 +psutil>=5.9.0 +python-multipart>=0.0.9 +aiofiles>=23.0.0 +# Document Intelligence +pymupdf>=1.24.0 # PDF text extraction (fitz) +python-docx>=1.1.0 # Word document extraction +# Scheduled Jobs +apscheduler>=3.10.0 # In-process cron/interval scheduler diff --git a/ansible/roles/cezen-backend/files/train_qlora.py b/ansible/roles/cezen-backend/files/train_qlora.py new file mode 100644 index 0000000..4e974c3 --- /dev/null +++ b/ansible/roles/cezen-backend/files/train_qlora.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +""" +Nexus One AI — QLoRA Fine-Tuning Runner +Launched as a subprocess by the FastAPI backend. + +Writes structured JSONL log lines to --log-path so the UI can stream +live loss curves and progress. Updates training_jobs.status in SQLite. + +Requires (install on the training node): + pip install torch transformers datasets peft bitsandbytes trl + +Optional (faster, lower VRAM): + pip install unsloth + +Usage (called by main.py — do not run manually in production): + python3 train_qlora.py --job-id 1 --db-path /opt/cezen/data/cezen.db \ + --dataset /opt/cezen/data/datasets/abc.jsonl \ + --base-model mistral:7b --output-dir /opt/cezen/data/finetuned/mymodel \ + --log-path /opt/cezen/data/job_logs/abc.jsonl \ + --epochs 3 --lr 2e-4 --batch-size 4 --lora-r 16 --lora-alpha 32 \ + --output-name mymodel +""" + +import argparse, json, os, sqlite3, sys, time +from datetime import datetime, timezone +from pathlib import Path + +# ── Argument parsing ────────────────────────────────────────────────────────── + +parser = argparse.ArgumentParser() +parser.add_argument("--job-id", type=int, required=True) +parser.add_argument("--db-path", required=True) +parser.add_argument("--dataset", required=True) +parser.add_argument("--base-model", required=True) +parser.add_argument("--output-dir", required=True) +parser.add_argument("--log-path", required=True) +parser.add_argument("--output-name", required=True) +parser.add_argument("--epochs", type=int, default=3) +parser.add_argument("--lr", type=float, default=2e-4) +parser.add_argument("--batch-size", type=int, default=4) +parser.add_argument("--lora-r", type=int, default=16) +parser.add_argument("--lora-alpha", type=int, default=32) +args = parser.parse_args() + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def utcnow(): + return datetime.now(timezone.utc).isoformat() + +def db_connect(): + conn = sqlite3.connect(args.db_path) + conn.row_factory = sqlite3.Row + return conn + +log_file = open(args.log_path, "a", buffering=1) + +def log(type_: str, **kwargs): + entry = {"ts": utcnow(), "type": type_, **kwargs} + log_file.write(json.dumps(entry) + "\n") + +def set_status(status: str): + db = db_connect() + if status in ("completed", "failed", "cancelled"): + db.execute( + "UPDATE training_jobs SET status=?, finished_at=? WHERE id=?", + (status, utcnow(), args.job_id) + ) + else: + db.execute("UPDATE training_jobs SET status=? WHERE id=?", (status, args.job_id)) + db.commit() + db.close() + +# ── Dataset loading ─────────────────────────────────────────────────────────── + +def load_dataset_from_file(path: str): + """Load JSONL or CSV dataset into a list of dicts with 'text' or 'prompt'/'completion' keys.""" + p = Path(path) + rows = [] + if p.suffix.lower() == ".csv": + import csv + with open(path, newline="", encoding="utf-8", errors="replace") as f: + reader = csv.DictReader(f) + for row in reader: + rows.append(dict(row)) + else: + with open(path, encoding="utf-8", errors="replace") as f: + for line in f: + line = line.strip() + if line: + try: + rows.append(json.loads(line)) + except Exception: + pass + return rows + +def format_row(row: dict) -> str: + """Convert a dataset row to a plain text training string.""" + if "text" in row: + return row["text"] + if "prompt" in row and "completion" in row: + return f"### Instruction:\n{row['prompt']}\n\n### Response:\n{row['completion']}" + if "instruction" in row and "output" in row: + inp = row.get("input", "") + return (f"### Instruction:\n{row['instruction']}\n\n### Input:\n{inp}\n\n### Response:\n{row['output']}" + if inp else + f"### Instruction:\n{row['instruction']}\n\n### Response:\n{row['output']}") + # Fallback: concatenate all values + return " ".join(str(v) for v in row.values() if v) + +# ── Main training routine ───────────────────────────────────────────────────── + +def main(): + log("start", job_id=args.job_id, base_model=args.base_model, + epochs=args.epochs, lr=args.lr, batch_size=args.batch_size, + lora_r=args.lora_r, lora_alpha=args.lora_alpha) + set_status("running") + + # Resolve model name (Ollama uses "mistral:7b" style — strip the tag for HF) + hf_model = args.base_model + if ":" in hf_model and "/" not in hf_model: + # e.g. "mistral:7b" → try to map to HF repo + name_map = { + "mistral": "mistralai/Mistral-7B-v0.1", + "llama2": "meta-llama/Llama-2-7b-hf", + "llama3": "meta-llama/Meta-Llama-3-8B", + "phi3": "microsoft/Phi-3-mini-4k-instruct", + "gemma": "google/gemma-7b", + "codellama":"codellama/CodeLlama-7b-hf", + "qwen2": "Qwen/Qwen2-7B", + } + base_name = hf_model.split(":")[0].lower() + hf_model = name_map.get(base_name, hf_model) + log("info", msg=f"Mapped '{args.base_model}' → '{hf_model}' (HuggingFace)") + + # Load dataset + log("info", msg="Loading dataset...") + raw_rows = load_dataset_from_file(args.dataset) + if not raw_rows: + log("error", msg="Dataset is empty or could not be parsed") + set_status("failed") + sys.exit(1) + + texts = [format_row(r) for r in raw_rows] + log("info", msg=f"Loaded {len(texts)} training examples") + + # Try Unsloth first (faster), fall back to HF PEFT + use_unsloth = False + try: + from unsloth import FastLanguageModel + use_unsloth = True + log("info", msg="Using Unsloth for accelerated training") + except ImportError: + log("info", msg="Unsloth not available — using HuggingFace PEFT + BitsAndBytes") + + try: + import torch + from transformers import TrainingArguments, TrainerCallback + from datasets import Dataset as HFDataset + + if use_unsloth: + model, tokenizer = FastLanguageModel.from_pretrained( + model_name=hf_model, + max_seq_length=2048, + dtype=None, + load_in_4bit=True, + ) + model = FastLanguageModel.get_peft_model( + model, + r=args.lora_r, + lora_alpha=args.lora_alpha, + target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"], + lora_dropout=0, + bias="none", + use_gradient_checkpointing="unsloth", + ) + else: + from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig + from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training + + bnb_cfg = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + ) + tokenizer = AutoTokenizer.from_pretrained(hf_model, trust_remote_code=True) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + model = AutoModelForCausalLM.from_pretrained( + hf_model, + quantization_config=bnb_cfg, + device_map="auto", + trust_remote_code=True, + ) + model = prepare_model_for_kbit_training(model) + + lora_cfg = LoraConfig( + r=args.lora_r, + lora_alpha=args.lora_alpha, + target_modules=["q_proj","k_proj","v_proj","o_proj"], + lora_dropout=0.05, + bias="none", + task_type="CAUSAL_LM", + ) + model = get_peft_model(model, lora_cfg) + + # Tokenise + def tokenise(examples): + return tokenizer( + examples["text"], + truncation=True, + max_length=2048, + padding="max_length", + ) + + hf_ds = HFDataset.from_dict({"text": texts}) + hf_ds = hf_ds.map(tokenise, batched=True, remove_columns=["text"]) + + # Custom callback to stream loss to our log + class LossLogger(TrainerCallback): + def on_log(self, _args, state, control, logs=None, **kwargs): + if logs and "loss" in logs: + log("loss", + step=state.global_step, + loss=round(float(logs["loss"]), 6), + epoch=round(float(logs.get("epoch", 0)), 3), + lr=float(logs.get("learning_rate", args.lr))) + + output_dir = args.output_dir + Path(output_dir).mkdir(parents=True, exist_ok=True) + + from trl import SFTTrainer + trainer = SFTTrainer( + model=model, + tokenizer=tokenizer, + train_dataset=hf_ds, + dataset_text_field="input_ids", + max_seq_length=2048, + args=TrainingArguments( + output_dir=output_dir, + num_train_epochs=args.epochs, + per_device_train_batch_size=args.batch_size, + gradient_accumulation_steps=4, + warmup_steps=5, + learning_rate=args.lr, + fp16=not torch.cuda.is_bf16_supported(), + bf16=torch.cuda.is_bf16_supported(), + logging_steps=1, + save_strategy="epoch", + report_to="none", + ), + callbacks=[LossLogger()], + ) + + log("info", msg="Training started") + trainer.train() + log("info", msg="Training complete — saving model") + trainer.save_model(output_dir) + tokenizer.save_pretrained(output_dir) + + except Exception as e: + import traceback + log("error", msg=str(e), traceback=traceback.format_exc()) + set_status("failed") + sys.exit(1) + + # Auto-register with Ollama via Modelfile + try: + _register_with_ollama(output_dir, args.output_name) + except Exception as e: + log("warning", msg=f"Could not auto-register with Ollama: {e}") + + log("complete", msg="Job finished successfully", output_dir=output_dir) + set_status("completed") + + +def _register_with_ollama(model_dir: str, model_name: str): + """Create an Ollama Modelfile and register the fine-tuned model.""" + modelfile_path = Path(model_dir) / "Modelfile" + modelfile_path.write_text( + f'FROM {model_dir}\n' + f'PARAMETER stop "<|im_end|>"\n' + f'SYSTEM "This is a Nexus One AI fine-tuned model."\n' + ) + import subprocess + result = subprocess.run( + ["ollama", "create", model_name, "-f", str(modelfile_path)], + capture_output=True, text=True, timeout=300 + ) + if result.returncode == 0: + log("info", msg=f"Model '{model_name}' registered with Ollama") + else: + log("warning", msg=f"Ollama registration failed: {result.stderr}") + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + log("error", msg="Job interrupted (SIGTERM/SIGINT)") + set_status("cancelled") + sys.exit(130) + except Exception as e: + import traceback + log("error", msg=str(e), traceback=traceback.format_exc()) + set_status("failed") + sys.exit(1) + finally: + log_file.close() diff --git a/ansible/roles/cezen-backend/handlers/main.yml b/ansible/roles/cezen-backend/handlers/main.yml new file mode 100644 index 0000000..68c8e3a --- /dev/null +++ b/ansible/roles/cezen-backend/handlers/main.yml @@ -0,0 +1,9 @@ +--- +- name: Reload systemd + systemd: + daemon_reload: yes + +- name: Restart cezen-api + systemd: + name: cezen-api + state: restarted diff --git a/ansible/roles/cezen-backend/tasks/main.yml b/ansible/roles/cezen-backend/tasks/main.yml new file mode 100644 index 0000000..f9b498d --- /dev/null +++ b/ansible/roles/cezen-backend/tasks/main.yml @@ -0,0 +1,113 @@ +--- +# cezen-backend role: installs the Nexus One AI FastAPI management API + +- name: Install system Python deps + apt: + name: + - python3-pip + - python3-venv + - python3.11 + - python3.11-venv + - libmupdf-dev # required by pymupdf (Document Intelligence) + - mupdf-tools + state: present + update_cache: yes + +- name: Create backend directory + file: + path: /opt/cezen/backend + state: directory + owner: "{{ cezen_user }}" + group: "{{ cezen_user }}" + mode: "0755" + +- name: Create data directory (JWT secret + SQLite DB) + file: + path: /opt/cezen/data + state: directory + owner: "{{ cezen_user }}" + group: "{{ cezen_user }}" + mode: "0750" + +- name: Copy FastAPI application + copy: + src: main.py + dest: /opt/cezen/backend/main.py + owner: "{{ cezen_user }}" + group: "{{ cezen_user }}" + mode: "0644" + notify: Restart cezen-api + +- name: Copy QLoRA training runner + copy: + src: train_qlora.py + dest: /opt/cezen/backend/train_qlora.py + owner: "{{ cezen_user }}" + group: "{{ cezen_user }}" + mode: "0755" + +- name: Copy requirements.txt + copy: + src: requirements.txt + dest: /opt/cezen/backend/requirements.txt + owner: "{{ cezen_user }}" + group: "{{ cezen_user }}" + mode: "0644" + +- name: Create Python virtual environment (Python 3.11) + become_user: "{{ cezen_user }}" + command: python3.11 -m venv /opt/cezen/backend/venv + args: + creates: /opt/cezen/backend/venv/bin/activate + +- name: Install Python dependencies + become_user: "{{ cezen_user }}" + pip: + requirements: /opt/cezen/backend/requirements.txt + virtualenv: /opt/cezen/backend/venv + notify: Restart cezen-api + +- name: Install Pro/Max fine-tuning dependencies + become_user: "{{ cezen_user }}" + pip: + name: + - torch + - transformers + - datasets + - peft + - bitsandbytes + - accelerate + - trl + - sentencepiece + virtualenv: /opt/cezen/backend/venv + retries: 3 + delay: 15 + when: (tier | default('basic')) in ['pro', 'max'] + +- name: Install Max multi-GPU training dependencies + become_user: "{{ cezen_user }}" + pip: + name: + - deepspeed + virtualenv: /opt/cezen/backend/venv + retries: 3 + delay: 15 + when: (tier | default('basic')) == 'max' + +- name: Install systemd service unit + copy: + src: cezen-api.service + dest: /etc/systemd/system/cezen-api.service + owner: root + group: root + mode: "0644" + notify: + - Reload systemd + - Restart cezen-api + +- name: Enable and start cezen-api service + systemd: + name: cezen-api + enabled: yes + state: started + daemon_reload: yes diff --git a/ansible/roles/cezen-nginx/files/cezen.conf b/ansible/roles/cezen-nginx/files/cezen.conf new file mode 100644 index 0000000..1bcb757 --- /dev/null +++ b/ansible/roles/cezen-nginx/files/cezen.conf @@ -0,0 +1,135 @@ +# /etc/nginx/sites-available/cezen +# Nexus One AI Portal — serves static portal, proxies API and console terminal +# +# Install: +# sudo cp cezen.conf /etc/nginx/sites-available/cezen +# sudo ln -sf /etc/nginx/sites-available/cezen /etc/nginx/sites-enabled/cezen +# sudo rm -f /etc/nginx/sites-enabled/default +# sudo nginx -t && sudo systemctl reload nginx + +# ─── Rate limiting zones (must be outside server block) ────────────────────── +# Login: 5 requests/min per IP, burst of 3 queued, then 429 +limit_req_zone $binary_remote_addr zone=cezen_login:10m rate=5r/m; +# General API: 60 req/min per IP (generous for dashboard polling) +limit_req_zone $binary_remote_addr zone=cezen_api:10m rate=60r/m; + +server { + listen 80 default_server; + listen [::]:80 default_server; + + server_name _; + + # Hide server version + server_tokens off; + + # Logging + access_log /var/log/nginx/cezen-access.log; + error_log /var/log/nginx/cezen-error.log; + + # ─── Global security headers ────────────────────────────────────────────── + add_header X-Content-Type-Options "nosniff" always; + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Referrer-Policy "strict-origin-when-cross-origin" always; + add_header Permissions-Policy "geolocation=(), camera=(), microphone=()" always; + add_header Content-Security-Policy + "default-src 'self'; " + "script-src 'self' 'unsafe-inline'; " + "style-src 'self' 'unsafe-inline'; " + "img-src 'self' data:; " + "connect-src 'self'; " + "frame-src 'self'; " + "font-src 'self'; " + "object-src 'none'; " + "base-uri 'self';" + always; + + # ─── robots.txt — block all indexing (air-gapped / private portal) ──────── + location = /robots.txt { + return 200 "User-agent: *\nDisallow: /\n"; + add_header Content-Type text/plain; + } + + # ─── Static Portal ─────────────────────────────────────────────────────── + root /opt/cezen/portal; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } + + # Cache static assets aggressively + location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff2?)$ { + expires 7d; + add_header Cache-Control "public, immutable"; + } + + # ─── Model upload (large files — no size limit, extended timeout) ──────── + location = /api/models/upload { + client_max_body_size 0; # unlimited — GGUF files can be 70 GB+ + proxy_request_buffering off; # stream directly to backend, don't buffer in Nginx + proxy_read_timeout 7200s; # 2 hours for slow transfers + proxy_send_timeout 7200s; + + proxy_pass http://127.0.0.1:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + # ─── Login rate limit (tight) ───────────────────────────────────────────── + location = /api/auth/login { + limit_req zone=cezen_login burst=3 nodelay; + limit_req_status 429; + + proxy_pass http://127.0.0.1:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 30s; + } + + # ─── FastAPI Backend (/api/) ────────────────────────────────────────────── + location /api/ { + limit_req zone=cezen_api burst=20 nodelay; + limit_req_status 429; + + proxy_pass http://127.0.0.1:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 120s; + } + + # ─── Web Console (ttyd) (/console/) ────────────────────────────────────── + location /console/ { + proxy_pass http://127.0.0.1:7681/; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_read_timeout 86400s; + + # Rewrite paths so ttyd JS/CSS assets load correctly + proxy_redirect / /console/; + sub_filter 'href="/' 'href="/console/'; + sub_filter 'src="/' 'src="/console/'; + sub_filter_once off; + sub_filter_types text/html; + } + + # ─── Block dotfiles and common attack paths ─────────────────────────────── + location ~ /\. { + deny all; + } + + location ~* \.(env|git|sql|bak|sh|py)$ { + deny all; + } +} diff --git a/ansible/roles/cezen-nginx/handlers/main.yml b/ansible/roles/cezen-nginx/handlers/main.yml new file mode 100644 index 0000000..6dfcdd7 --- /dev/null +++ b/ansible/roles/cezen-nginx/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: Reload nginx + systemd: + name: nginx + state: reloaded diff --git a/ansible/roles/cezen-nginx/tasks/main.yml b/ansible/roles/cezen-nginx/tasks/main.yml new file mode 100644 index 0000000..be59f70 --- /dev/null +++ b/ansible/roles/cezen-nginx/tasks/main.yml @@ -0,0 +1,59 @@ +--- +# cezen-nginx role: installs Nginx, deploys portal static files and site config + +- name: Install Nginx + apt: + name: nginx + state: present + update_cache: yes + +- name: Create portal directory + file: + path: /opt/cezen/portal + state: directory + owner: "{{ cezen_user }}" + group: www-data + mode: "0755" + +- name: Sync portal static files + synchronize: + src: "{{ playbook_dir }}/../../../cezen-portal/" + dest: /opt/cezen/portal/ + delete: yes + recursive: yes + rsync_opts: + - "--exclude=.DS_Store" + - "--exclude=*.sh" + notify: Reload nginx + +- name: Deploy Nginx site config + copy: + src: cezen.conf + dest: /etc/nginx/sites-available/cezen + owner: root + group: root + mode: "0644" + notify: Reload nginx + +- name: Enable Cezen site + file: + src: /etc/nginx/sites-available/cezen + dest: /etc/nginx/sites-enabled/cezen + state: link + notify: Reload nginx + +- name: Disable default Nginx site + file: + path: /etc/nginx/sites-enabled/default + state: absent + notify: Reload nginx + +- name: Validate Nginx config + command: nginx -t + changed_when: false + +- name: Ensure Nginx is enabled and running + systemd: + name: nginx + enabled: yes + state: started diff --git a/ansible/roles/cezen-ttyd/files/cezen-ttyd.service b/ansible/roles/cezen-ttyd/files/cezen-ttyd.service new file mode 100644 index 0000000..3a0132e --- /dev/null +++ b/ansible/roles/cezen-ttyd/files/cezen-ttyd.service @@ -0,0 +1,17 @@ +[Unit] +Description=Cezen Web Terminal (ttyd) +After=network.target + +[Service] +# Bind to localhost only — Nginx proxies /console/ to this port +ExecStart=/usr/bin/ttyd \ + --port 7681 \ + --interface 127.0.0.1 \ + --writable \ + login -f cezen-console +Restart=always +RestartSec=5 +User=root + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/cezen-ttyd/handlers/main.yml b/ansible/roles/cezen-ttyd/handlers/main.yml new file mode 100644 index 0000000..3a3cc36 --- /dev/null +++ b/ansible/roles/cezen-ttyd/handlers/main.yml @@ -0,0 +1,9 @@ +--- +- name: Reload systemd + systemd: + daemon_reload: yes + +- name: Restart cezen-ttyd + systemd: + name: cezen-ttyd + state: restarted diff --git a/ansible/roles/cezen-ttyd/tasks/main.yml b/ansible/roles/cezen-ttyd/tasks/main.yml new file mode 100644 index 0000000..a6c9ecd --- /dev/null +++ b/ansible/roles/cezen-ttyd/tasks/main.yml @@ -0,0 +1,72 @@ +--- +# cezen-ttyd role: browser-based terminal via ttyd, bound to localhost + +- name: Install ttyd + apt: + name: ttyd + state: present + update_cache: yes + +- name: Create cezen-console restricted user + user: + name: cezen-console + shell: /bin/bash + comment: "Cezen Web Console User" + groups: "{{ cezen_user }}" + append: yes + state: present + create_home: yes + +- name: Set cezen-console password + # Change this password after first login or use PAM/SSO integration + shell: echo "cezen-console:CezenConsole2024!" | chpasswd + changed_when: false + no_log: true + +- name: Restrict cezen-console home directory + file: + path: /home/cezen-console + owner: cezen-console + group: cezen-console + mode: "0750" + +- name: Add useful aliases for console user + copy: + dest: /home/cezen-console/.bashrc + owner: cezen-console + group: cezen-console + mode: "0644" + content: | + # Cezen Web Console — restricted shell environment + PS1='\[\033[01;32m\]cezen-console\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ ' + + # Useful shortcuts + alias ll='ls -lah --color=auto' + alias logs='journalctl -u cezen-api -f' + alias api-status='systemctl status cezen-api' + alias ollama-ps='ollama ps' + alias gpu='nvidia-smi' + alias ports='ss -tlnp' + + # Prevent accidental system damage + alias rm='rm -i' + alias mv='mv -i' + alias cp='cp -i' + +- name: Install ttyd systemd service + copy: + src: cezen-ttyd.service + dest: /etc/systemd/system/cezen-ttyd.service + owner: root + group: root + mode: "0644" + notify: + - Reload systemd + - Restart cezen-ttyd + +- name: Enable and start ttyd service + systemd: + name: cezen-ttyd + enabled: yes + state: started + daemon_reload: yes diff --git a/ansible/roles/jupyterlab/tasks/main.yml b/ansible/roles/jupyterlab/tasks/main.yml index 06663d5..4848460 100644 --- a/ansible/roles/jupyterlab/tasks/main.yml +++ b/ansible/roles/jupyterlab/tasks/main.yml @@ -51,7 +51,7 @@ copy: dest: /opt/cezen/notebooks/README.md content: | - # Cezen AI Suite — JupyterLab + # Nexus One AI — JupyterLab Default token: `cezen2024` diff --git a/ansible/roles/mlflow/tasks/main.yml b/ansible/roles/mlflow/tasks/main.yml index 76f33aa..42fe7e7 100644 --- a/ansible/roles/mlflow/tasks/main.yml +++ b/ansible/roles/mlflow/tasks/main.yml @@ -53,5 +53,5 @@ wait_for: host: localhost port: 5000 - timeout: 30 + timeout: 120 ignore_errors: true diff --git a/ansible/roles/monitoring/tasks/main.yml b/ansible/roles/monitoring/tasks/main.yml index 15c7e6a..2f4a33d 100644 --- a/ansible/roles/monitoring/tasks/main.yml +++ b/ansible/roles/monitoring/tasks/main.yml @@ -25,6 +25,7 @@ register: dcgm_result failed_when: dcgm_result.rc != 0 and 'already in use' not in dcgm_result.stderr ignore_errors: true + when: gpu_available | default(false) | bool # ── Prometheus ────────────────────────────────────────── - name: Write Prometheus config @@ -99,7 +100,8 @@ wait_for: host: localhost port: 3000 - timeout: 60 + timeout: 120 + register: grafana_wait ignore_errors: true - name: Add Prometheus datasource to Grafana @@ -118,6 +120,7 @@ isDefault: true status_code: [200, 409] # 409 = already exists, that's fine ignore_errors: true + when: not (grafana_wait is failed) - name: Import NVIDIA GPU dashboard (ID 12239) uri: @@ -143,3 +146,6 @@ uid: "nvidia-gpu" status_code: [200, 412] ignore_errors: true + when: + - not (grafana_wait is failed) + - gpu_available | default(false) | bool diff --git a/ansible/roles/nvidia/tasks/main.yml b/ansible/roles/nvidia/tasks/main.yml index 834038a..9c8249e 100644 --- a/ansible/roles/nvidia/tasks/main.yml +++ b/ansible/roles/nvidia/tasks/main.yml @@ -1,6 +1,6 @@ --- # NVIDIA role: Drivers + CUDA + cuDNN -# NOTE: Tested on L40S (Entry) and A40 (lab). Requires reboot after this role. +# NOTE: Tested on RTX Pro 6000 (Entry) and A40 (lab). Requires reboot after this role. # If no GPU is present, this role will install drivers but nvidia-smi won't show GPUs. - name: Add NVIDIA package repository key diff --git a/ansible/roles/ollama/tasks/main.yml b/ansible/roles/ollama/tasks/main.yml index 8b982b9..2c6d210 100644 --- a/ansible/roles/ollama/tasks/main.yml +++ b/ansible/roles/ollama/tasks/main.yml @@ -28,7 +28,7 @@ Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" Environment="OLLAMA_HOST=0.0.0.0:11434" Environment="OLLAMA_MODELS=/opt/cezen/models/ollama" - Environment="CUDA_VISIBLE_DEVICES=0,1,2" + Environment="CUDA_VISIBLE_DEVICES=0" [Install] WantedBy=multi-user.target @@ -54,50 +54,49 @@ port: 11434 timeout: 60 -- name: Pull default models (Llama 3.1 8B + Mistral 7B) +- name: Select tier model set + set_fact: + ollama_models: >- + {{ + { + 'starter': ['phi3:mini', 'nomic-embed-text'], + 'basic': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'], + 'entry': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'], + 'pro': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b'], + 'max': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b', 'llama3.1:405b', 'mixtral:8x22b'] + }.get(tier | default('basic'), ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text']) + }} + +- name: Pull tier Ollama models become_user: cezen command: ollama pull {{ item }} - loop: - - llama3.1:8b - - mistral:7b + loop: "{{ ollama_models }}" environment: OLLAMA_HOST: "http://localhost:11434" retries: 3 delay: 15 - # NOTE: Models are large (~5GB each). This step takes time on first run. - # Skip by setting: ansible-playbook ... -e "skip_model_pull=true" + # NOTE: Pro/Max models are very large. Skip with --skip-model-pull for + # bandwidth-constrained installs, then run models/pull-models.sh later. when: not (skip_model_pull | default(false)) # Open WebUI (chat interface on top of Ollama) -- name: Deploy Open WebUI via Docker - community.docker.docker_container: - name: open-webui - image: ghcr.io/open-webui/open-webui:main - state: started - restart_policy: always - ports: - - "3001:8080" - volumes: - - open-webui:/app/backend/data - env: - OLLAMA_BASE_URL: "http://host-gateway:11434" - etc_hosts: - host-gateway: "172.17.0.1" - # Note: Requires docker community collection. Install with: - # ansible-galaxy collection install community.docker - ignore_errors: true # Falls back gracefully if docker collection not available - -- name: Alternative Open WebUI start (if community.docker not available) +- name: Start Open WebUI via Docker CLI shell: | - docker run -d \ - --name open-webui \ - --restart always \ - -p 3001:8080 \ - --add-host=host-gateway:172.17.0.1 \ - -v open-webui:/app/backend/data \ - -e OLLAMA_BASE_URL=http://host-gateway:11434 \ - ghcr.io/open-webui/open-webui:main + if docker ps -a --format '{{ "{{" }}.Names{{ "}}" }}' | grep -qx open-webui; then + docker start open-webui + else + docker run -d \ + --name open-webui \ + --restart always \ + -p 3001:8080 \ + --add-host=host-gateway:172.17.0.1 \ + -v open-webui:/app/backend/data \ + -e OLLAMA_BASE_URL=http://host-gateway:11434 \ + ghcr.io/open-webui/open-webui:main + fi args: executable: /bin/bash register: webui_result - failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr + changed_when: webui_result.rc == 0 + failed_when: webui_result.rc != 0 and 'already in use' not in (webui_result.stderr | default('')) + ignore_errors: true diff --git a/ansible/roles/vllm/defaults/main.yml b/ansible/roles/vllm/defaults/main.yml new file mode 100644 index 0000000..defe311 --- /dev/null +++ b/ansible/roles/vllm/defaults/main.yml @@ -0,0 +1,7 @@ +--- +# vLLM role defaults — overridden per-tier in the tier playbook vars block +vllm_model: "meta-llama/Meta-Llama-3.1-8B-Instruct" +vllm_tensor_parallel: 1 +vllm_gpu_memory_util: "0.70" +vllm_max_model_len: 8192 +vllm_quantization: "" # blank = full precision; set to "awq" for 4-bit diff --git a/ansible/roles/vllm/tasks/main.yml b/ansible/roles/vllm/tasks/main.yml index 67e050f..65a74b4 100644 --- a/ansible/roles/vllm/tasks/main.yml +++ b/ansible/roles/vllm/tasks/main.yml @@ -1,6 +1,12 @@ --- # vLLM — high-performance LLM inference with OpenAI-compatible API # Skipped automatically if no GPU is present. +# Variables (set defaults in defaults/main.yml, override per-tier in the playbook): +# vllm_model HuggingFace model ID to load on start +# vllm_tensor_parallel Number of GPUs for tensor parallelism (1 for Starter/Basic) +# vllm_gpu_memory_util Fraction of VRAM to reserve for vLLM (0.0–1.0) +# vllm_max_model_len Maximum context length in tokens +# vllm_quantization Quantization method: "" (none) | "awq" | "gptq" | "fp8" - name: Check for NVIDIA GPU shell: nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 @@ -10,7 +16,9 @@ - name: Skip vLLM if no GPU detected debug: - msg: "No GPU detected — skipping vLLM image pull. Run manually when GPU is available: docker pull vllm/vllm-openai:latest" + msg: > + No GPU detected — skipping vLLM image pull. + Run manually when GPU is available: docker pull vllm/vllm-openai:latest when: gpu_check.stdout == "" or gpu_check.rc != 0 - name: Pull vLLM Docker image @@ -19,18 +27,23 @@ delay: 15 when: gpu_check.stdout != "" and gpu_check.rc == 0 +- name: Build vLLM quantization flag + set_fact: + vllm_quant_flag: "{{ '--quantization ' + vllm_quantization if vllm_quantization != '' else '' }}" + - name: Create vLLM systemd service copy: dest: /etc/systemd/system/vllm.service + mode: "0644" content: | [Unit] - Description=vLLM OpenAI-Compatible Inference Server - After=docker.service ollama.service + Description=vLLM OpenAI-Compatible Inference Server ({{ vllm_model }}) + After=docker.service Requires=docker.service [Service] Restart=always - RestartSec=5 + RestartSec=10 ExecStartPre=-/usr/bin/docker stop vllm ExecStartPre=-/usr/bin/docker rm vllm ExecStart=/usr/bin/docker run \ @@ -41,15 +54,16 @@ -v /opt/cezen/models:/root/.cache/huggingface \ -e HF_HOME=/root/.cache/huggingface \ vllm/vllm-openai:latest \ - --model meta-llama/Meta-Llama-3.1-8B-Instruct \ - --gpu-memory-utilization 0.7 \ - --max-model-len 8192 \ - --tensor-parallel-size 1 + --model {{ vllm_model }} \ + --gpu-memory-utilization {{ vllm_gpu_memory_util }} \ + --max-model-len {{ vllm_max_model_len }} \ + --tensor-parallel-size {{ vllm_tensor_parallel }} \ + {{ vllm_quant_flag }} ExecStop=/usr/bin/docker stop vllm + TimeoutStartSec=300 [Install] WantedBy=multi-user.target - mode: "0644" - name: Create vLLM model directory file: @@ -57,3 +71,26 @@ state: directory owner: cezen group: cezen + mode: "0755" + +- name: Write vLLM tier config file (for portal reference) + copy: + dest: /opt/cezen/vllm-config.json + owner: cezen + group: cezen + mode: "0644" + content: | + { + "model": "{{ vllm_model }}", + "tensor_parallel_size": {{ vllm_tensor_parallel }}, + "gpu_memory_utilization": {{ vllm_gpu_memory_util }}, + "max_model_len": {{ vllm_max_model_len }}, + "quantization": "{{ vllm_quantization }}" + } + +- name: Enable and start vLLM service + systemd: + name: vllm + enabled: true + daemon_reload: true + when: gpu_check.stdout != "" and gpu_check.rc == 0 diff --git a/ansible/starter.yml b/ansible/starter.yml new file mode 100644 index 0000000..4a60c39 --- /dev/null +++ b/ansible/starter.yml @@ -0,0 +1,76 @@ +--- +# Nexus One AI — Starter Tier Stack +# Hardware: 1× NVIDIA RTX 5090 (32 GB GDDR7), 64 GB DDR5, 2 TB NVMe, 2.5 GbE +# Capacity: 1–5 concurrent users +# Runs after NVIDIA driver reboot (phase1_nvidia.yml) +# +# Differences from Basic tier: +# - No k3s (Kubernetes overhead not justified for 64 GB RAM / 1-5 users) +# - No MLflow (fine-tuning tracking overkill for Starter) +# - No MinIO (local model cache is sufficient) +# - vLLM uses Phi-3 Mini / Llama 3.2 3B with 4-bit quant by default +# - JupyterLab is optional (off by default, wizard can enable) + +- name: Nexus One AI — Starter Tier Stack + hosts: localhost + connection: local + become: true + vars: + cezen_user: "cezen" + cezen_home: "/opt/cezen" + cezen_login_home: "/home/cezen" + python_version: "3.11" + cuda_version: "12.6" # RTX 5090 requires CUDA 12.6+ + skip_roles: "" # comma-separated list of roles to skip + gpu_available: false + tier: "starter" + + # ── vLLM — Starter defaults ────────────────── + # Small 4-bit quantised model fits comfortably in 32 GB GDDR7. + # Users can swap to Llama-3.2-3B-Instruct via the portal Model Manager. + vllm_model: "microsoft/Phi-3-mini-4k-instruct" + vllm_tensor_parallel: 1 + vllm_gpu_memory_util: "0.85" + vllm_max_model_len: 4096 + vllm_quantization: "awq" + + # ── Ollama — lightweight models ─────────────── + ollama_default_model: "phi3:mini" + + roles: + - role: base + when: "'base' not in skip_roles.split(',')" + + - role: docker + when: "'docker' not in skip_roles.split(',')" + + # k3s intentionally omitted for Starter — insufficient RAM headroom + + - role: ollama + when: "'ollama' not in skip_roles.split(',')" + + - role: vllm + when: "'vllm' not in skip_roles.split(',')" + + - role: chromadb + when: "'chromadb' not in skip_roles.split(',')" + + # mlflow / minio omitted for Starter + + - role: monitoring + when: "'monitoring' not in skip_roles.split(',')" + + - role: cezen-backend + when: "'cezen-backend' not in skip_roles.split(',')" + + - role: cezen-ttyd + when: "'cezen-ttyd' not in skip_roles.split(',')" + + - role: cezen-nginx + when: "'cezen-nginx' not in skip_roles.split(',')" + + # JupyterLab — optional, install only when explicitly requested + - role: jupyterlab + when: > + 'jupyterlab' not in skip_roles.split(',') and + install_jupyterlab | default(false) | bool diff --git a/autoinstall/build-iso-starter.sh b/autoinstall/build-iso-starter.sh new file mode 100644 index 0000000..55e212b --- /dev/null +++ b/autoinstall/build-iso-starter.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash +# ───────────────────────────────────────────────────────────── +# Nexus One AI — Starter Tier ISO Builder +# Hardware target: compact workstation (1× RTX 5090, 64 GB RAM, 2 TB NVMe) +# +# Usage: +# cd ~/aipackage +# bash autoinstall/build-iso-starter.sh +# +# Output: autoinstall/cezen-ai-starter-ubuntu2204.iso +# Flash to USB: +# diskutil unmountDisk /dev/diskN +# sudo dd if=cezen-ai-starter-ubuntu2204.iso of=/dev/diskN bs=4m status=progress +# ───────────────────────────────────────────────────────────── +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PACKAGE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +WORK_DIR="/tmp/cezen-iso-starter-work" +ORIGINAL_ISO="/tmp/ubuntu-22.04.5-live-server-amd64.iso" +OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-starter-ubuntu2204.iso" +UBUNTU_URL="https://releases.ubuntu.com/22.04.5/ubuntu-22.04.5-live-server-amd64.iso" +TIER="starter" + +echo "╔══════════════════════════════════════════════════════╗" +echo "║ Nexus One AI — ISO Builder [STARTER TIER] ║" +echo "║ RTX 5090 · 64 GB RAM · 2 TB NVMe · 1–5 users ║" +echo "╚══════════════════════════════════════════════════════╝" +echo "" + +# ── Install build tools ──────────────────────── +echo "→ Installing build tools..." +apt-get update -qq +apt-get install -y -qq xorriso wget isolinux rsync +echo "✓ Tools ready" + +# ── Download Ubuntu ISO ──────────────────────── +if [ -f "$ORIGINAL_ISO" ]; then + echo "✓ Ubuntu ISO already downloaded" +else + echo "→ Downloading Ubuntu 22.04.5 Server ISO (~1.8 GB)..." + wget --show-progress -O "$ORIGINAL_ISO" "$UBUNTU_URL" + echo "✓ Downloaded" +fi + +# ── Extract ISO ──────────────────────────────── +echo "→ Extracting ISO..." +rm -rf "$WORK_DIR" +mkdir -p "$WORK_DIR" +xorriso -osirrox on \ + -indev "$ORIGINAL_ISO" \ + -extract / "$WORK_DIR" 2>/dev/null +chmod -R u+w "$WORK_DIR" +echo "✓ Extracted" + +# ── Inject Starter autoinstall files ────────── +echo "→ Injecting Starter autoinstall config..." +mkdir -p "$WORK_DIR/nocloud" +cp "$SCRIPT_DIR/user-data-starter" "$WORK_DIR/nocloud/user-data" +cp "$SCRIPT_DIR/meta-data" "$WORK_DIR/nocloud/meta-data" +echo "✓ user-data-starter and meta-data injected" + +# ── Online installer mode ────────────────────── +# The installed system pulls the current package from cgit on first boot. This +# keeps the ISO small and avoids shipping stale backend/portal code. +echo "✓ Online installer mode: package will be pulled from cgit on first boot" + +# ── Patch GRUB ──────────────────────────────── +echo "→ Patching GRUB config..." +GRUB_CFG="$WORK_DIR/boot/grub/grub.cfg" +cp "$GRUB_CFG" "$GRUB_CFG.orig" + +sed -i "s/set timeout=.*/set timeout=5/" "$GRUB_CFG" +sed -i "s/set timeout_style=.*/set timeout_style=countdown/" "$GRUB_CFG" +sed -i '/^\s*linux.*vmlinuz/s|---|autoinstall ds=nocloud\\;s=/cdrom/nocloud/ ---|' "$GRUB_CFG" + +# Update GRUB title to reflect Starter tier +sed -i 's/Install Ubuntu Server/Install Nexus One AI — Starter Tier/' "$GRUB_CFG" || true +echo "✓ GRUB patched" + +# ── Extract MBR and EFI boot data ───────────── +echo "→ Extracting boot data from original ISO..." +MBR_TEMPLATE=$(mktemp) +EFI_IMG=$(mktemp) +dd if="$ORIGINAL_ISO" bs=1 count=432 of="$MBR_TEMPLATE" 2>/dev/null + +EFI_LINE=$(fdisk -l "$ORIGINAL_ISO" 2>/dev/null | grep "EFI") +echo " EFI partition info: $EFI_LINE" +EFI_START=$(echo "$EFI_LINE" | awk '{print $2}') +EFI_SIZE=$(echo "$EFI_LINE" | awk '{print $4}') + +if [ -z "$EFI_START" ] || [ -z "$EFI_SIZE" ]; then + echo "ERROR: Could not detect EFI partition in ISO." + echo "Run: fdisk -l $ORIGINAL_ISO" + exit 1 +fi + +dd if="$ORIGINAL_ISO" bs=512 skip="$EFI_START" count="$EFI_SIZE" \ + of="$EFI_IMG" 2>/dev/null +echo "✓ EFI partition extracted (start=$EFI_START, size=$EFI_SIZE)" + +# ── Repack ISO (pass 1) ──────────────────────── +echo "→ Repacking ISO (pass 1)..." +xorriso -as mkisofs \ + -r \ + -V "CezenAI_Starter_2204" \ + -o "$OUTPUT_ISO" \ + --grub2-mbr "$MBR_TEMPLATE" \ + -partition_offset 16 \ + --mbr-force-bootable \ + -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \ + -appended_part_as_gpt \ + -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \ + -c "/boot.catalog" \ + -b "/boot/grub/i386-pc/eltorito.img" \ + -no-emul-boot \ + -boot-load-size 4 \ + -boot-info-table \ + --grub2-boot-info \ + -eltorito-alt-boot \ + -e "--interval:appended_partition_2:::" \ + -no-emul-boot \ + "$WORK_DIR" + +# ── Refresh md5sum.txt and repack (pass 2) ──── +echo "→ Refreshing md5sum.txt..." +FINAL_DIR=$(mktemp -d) +VERIFY_DIR=$(mktemp -d) +trap 'rm -rf "$WORK_DIR" "$MBR_TEMPLATE" "$EFI_IMG" "$FINAL_DIR" "$VERIFY_DIR"' EXIT + +xorriso -osirrox on -indev "$OUTPUT_ISO" -extract / "$FINAL_DIR" >/dev/null 2>&1 +chmod -R u+w "$FINAL_DIR" +( + cd "$FINAL_DIR" + rm -f md5sum.txt + find . -type f \ + ! -path './md5sum.txt' \ + ! -path './boot.catalog' \ + -print0 \ + | sort -z \ + | xargs -0 md5sum > md5sum.txt +) +echo "✓ md5sum.txt refreshed" + +echo "→ Repacking ISO (pass 2)..." +xorriso -as mkisofs \ + -r \ + -V "CezenAI_Starter_2204" \ + -o "$OUTPUT_ISO" \ + --grub2-mbr "$MBR_TEMPLATE" \ + -partition_offset 16 \ + --mbr-force-bootable \ + -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \ + -appended_part_as_gpt \ + -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \ + -c "/boot.catalog" \ + -b "/boot/grub/i386-pc/eltorito.img" \ + -no-emul-boot \ + -boot-load-size 4 \ + -boot-info-table \ + --grub2-boot-info \ + -eltorito-alt-boot \ + -e "--interval:appended_partition_2:::" \ + -no-emul-boot \ + "$FINAL_DIR" + +# ── Verify output ISO ────────────────────────── +echo "→ Verifying rebuilt ISO manifest..." +xorriso -osirrox on -indev "$OUTPUT_ISO" -extract / "$VERIFY_DIR" >/dev/null 2>&1 +chmod -R u+w "$VERIFY_DIR" +( + cd "$VERIFY_DIR" + md5sum -c md5sum.txt >/tmp/cezen-iso-md5check-starter.log 2>&1 || { + echo "ERROR: Rebuilt ISO failed its own md5sum.txt verification." + sed -n '1,40p' /tmp/cezen-iso-md5check-starter.log + exit 1 + } +) +echo "✓ Output ISO manifest verified" + +echo "" +echo "╔══════════════════════════════════════════════════════╗" +echo "║ Done! Starter Tier ISO ready. ║" +echo "╚══════════════════════════════════════════════════════╝" +echo "" +ls -lh "$OUTPUT_ISO" +echo "" +echo "→ Transfer to MacBook:" +echo " scp user@server:~/aipackage/autoinstall/cezen-ai-starter-ubuntu2204.iso ." +echo "" +echo "→ Flash to USB (macOS):" +echo " diskutil list # find USB e.g. /dev/disk4" +echo " diskutil unmountDisk /dev/disk4" +echo " sudo dd if=cezen-ai-starter-ubuntu2204.iso of=/dev/disk4 bs=4m status=progress" +echo "" +echo "→ Post-flash: boot the workstation from USB." +echo " Unattended install completes in ~10 min." +echo " First-boot wizard runs on tty1 — set IP, org name, admin password." +echo " Then run: sudo bash /opt/aipackage/install.sh --tier starter" diff --git a/autoinstall/build-iso.sh b/autoinstall/build-iso.sh index e48ad5d..ce48fe7 100644 --- a/autoinstall/build-iso.sh +++ b/autoinstall/build-iso.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # ───────────────────────────────────────────────────────────── -# Cezen AI Suite — Custom ISO Builder +# Nexus One AI — Custom ISO Builder # Runs directly on Ubuntu 22.04 (run on the server) # # Usage: @@ -20,7 +20,7 @@ OUTPUT_ISO="$SCRIPT_DIR/cezen-ai-ubuntu2204.iso" UBUNTU_URL="https://releases.ubuntu.com/22.04.5/ubuntu-22.04.5-live-server-amd64.iso" echo "╔══════════════════════════════════════════╗" -echo "║ Cezen AI — ISO Builder ║" +echo "║ Nexus One AI — ISO Builder ║" echo "╚══════════════════════════════════════════╝" echo "" @@ -56,15 +56,10 @@ cp "$SCRIPT_DIR/user-data" "$WORK_DIR/nocloud/user-data" cp "$SCRIPT_DIR/meta-data" "$WORK_DIR/nocloud/meta-data" echo "✓ user-data and meta-data injected" -# Keep the installer payload on the ISO so first boot does not depend on a -# private Git server being reachable before the setup UI can start. -echo "→ Bundling Cezen AI installer payload..." -mkdir -p "$WORK_DIR/cezen-aipackage" -rsync -a --delete \ - --exclude 'autoinstall/cezen-ai-ubuntu2204.iso' \ - --exclude '*.iso' \ - "$PACKAGE_DIR/" "$WORK_DIR/cezen-aipackage/" -echo "✓ Installer payload bundled" +# Keep this as an online installer ISO. The installed system pulls the current +# Nexus One AI package from cgit during first boot, which keeps the ISO small and +# avoids shipping stale backend/portal code inside the image. +echo "✓ Online installer mode: package will be pulled from cgit on first boot" # ── Patch GRUB ──────────────────────────────── echo "→ Patching GRUB config..." @@ -82,20 +77,6 @@ sed -i "s/set timeout_style=.*/set timeout_style=countdown/" "$GRUB_CFG" sed -i '/^\s*linux.*vmlinuz/s|---|autoinstall ds=nocloud\\;s=/cdrom/nocloud/ ---|' "$GRUB_CFG" echo "✓ GRUB patched" -# ── Refresh installer checksum manifest ───────────────── -echo "→ Refreshing md5sum.txt..." -( - cd "$WORK_DIR" - rm -f md5sum.txt - find . -type f \ - ! -path './md5sum.txt' \ - ! -path './boot.catalog' \ - -print0 \ - | sort -z \ - | xargs -0 md5sum > md5sum.txt -) -echo "✓ md5sum.txt refreshed" - # ── Extract MBR and EFI partition from original ISO ──── echo "→ Extracting boot data from original ISO..." MBR_TEMPLATE=$(mktemp) @@ -119,7 +100,7 @@ dd if="$ORIGINAL_ISO" bs=512 skip="$EFI_START" count="$EFI_SIZE" \ echo "✓ EFI partition extracted (start=$EFI_START, size=$EFI_SIZE)" # ── Repack ISO ───────────────────────────────── -echo "→ Repacking ISO (this takes ~2 minutes)..." +echo "→ Repacking ISO (pass 1)..." xorriso -as mkisofs \ -r \ -V "Cezen_AI_Ubuntu2204" \ @@ -141,6 +122,64 @@ xorriso -as mkisofs \ -no-emul-boot \ "$WORK_DIR" +echo "→ Refreshing md5sum.txt from pass-1 ISO contents..." +FINAL_DIR=$(mktemp -d) +VERIFY_DIR=$(mktemp -d) +trap 'rm -rf "$WORK_DIR" "$MBR_TEMPLATE" "$EFI_IMG" "$FINAL_DIR" "$VERIFY_DIR"' EXIT +xorriso -osirrox on \ + -indev "$OUTPUT_ISO" \ + -extract / "$FINAL_DIR" >/dev/null 2>&1 +chmod -R u+w "$FINAL_DIR" +( + cd "$FINAL_DIR" + rm -f md5sum.txt + find . -type f \ + ! -path './md5sum.txt' \ + ! -path './boot.catalog' \ + -print0 \ + | sort -z \ + | xargs -0 md5sum > md5sum.txt +) +echo "✓ md5sum.txt refreshed" + +echo "→ Repacking ISO (pass 2 with final manifest)..." +xorriso -as mkisofs \ + -r \ + -V "Cezen_AI_Ubuntu2204" \ + -o "$OUTPUT_ISO" \ + --grub2-mbr "$MBR_TEMPLATE" \ + -partition_offset 16 \ + --mbr-force-bootable \ + -append_partition 2 28732ac11ff8d211ba4b00a0c93ec93b "$EFI_IMG" \ + -appended_part_as_gpt \ + -iso_mbr_part_type a2a0d0ebe5b9334487c068b6b72699c7 \ + -c "/boot.catalog" \ + -b "/boot/grub/i386-pc/eltorito.img" \ + -no-emul-boot \ + -boot-load-size 4 \ + -boot-info-table \ + --grub2-boot-info \ + -eltorito-alt-boot \ + -e "--interval:appended_partition_2:::" \ + -no-emul-boot \ + "$FINAL_DIR" + +# ── Verify output ISO integrity manifest ───────────────── +echo "→ Verifying rebuilt ISO manifest..." +xorriso -osirrox on \ + -indev "$OUTPUT_ISO" \ + -extract / "$VERIFY_DIR" >/dev/null 2>&1 +chmod -R u+w "$VERIFY_DIR" +( + cd "$VERIFY_DIR" + md5sum -c md5sum.txt >/tmp/cezen-iso-md5check.log 2>&1 || { + echo "ERROR: Rebuilt ISO failed its own md5sum.txt verification." + sed -n '1,40p' /tmp/cezen-iso-md5check.log + exit 1 + } +) +echo "✓ Output ISO manifest verified" + echo "" echo "╔══════════════════════════════════════════════════════╗" echo "║ Done! ║" diff --git a/autoinstall/firstboot-setup.sh b/autoinstall/firstboot-setup.sh index 4eb9dc9..d4582b7 100644 --- a/autoinstall/firstboot-setup.sh +++ b/autoinstall/firstboot-setup.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # ───────────────────────────────────────────────────────────── -# Cezen AI Suite — First Boot Setup Wizard +# Nexus One AI — First Boot Setup Wizard # Runs on first boot after OS install via systemd service. # Uses whiptail for the TUI. # ───────────────────────────────────────────────────────────── @@ -8,6 +8,7 @@ set -e AIPACKAGE_DIR="/opt/aipackage" LOG_FILE="/var/log/cezen-setup.log" +export TERM="${TERM:-linux}" exec > >(tee -a "$LOG_FILE") 2>&1 detect_iface() { @@ -20,28 +21,28 @@ IFACE="${IFACE:-$(ip -o link show | awk -F': ' '$2 !~ /lo|docker|br-|veth/ {prin # ── Colors / terminal setup ──────────────────────────────── export NEWT_COLORS=' root=,black -window=white,navy -border=white,navy -title=white,navy -button=black,cyan -actbutton=white,red -checkbox=white,navy -actcheckbox=black,cyan -entry=white,navy -label=white,navy -listbox=white,navy -actlistbox=black,cyan -textbox=white,navy -acttextbox=black,cyan +window=black,white +border=white,black +title=black,white +button=black,white +actbutton=white,blue +checkbox=black,white +actcheckbox=white,blue +entry=black,white +label=black,white +listbox=black,white +actlistbox=white,blue +textbox=black,white +acttextbox=white,blue ' -TITLE=" Cezen AI Suite — Server Setup " +TITLE=" Nexus One AI — Server Setup " H=20 W=70 # ── Welcome ──────────────────────────────────────────────── whiptail --title "$TITLE" \ - --msgbox "\nWelcome to the Cezen AI Suite installer.\n\nThis wizard will configure your network and install the AI stack.\n\nMake sure this server is connected to the internet before continuing." \ + --msgbox "\nWelcome to the Nexus One AI installer.\n\nThis wizard will configure your network and install the AI stack.\n\nMake sure this server is connected to the internet before continuing." \ $H $W # ════════════════════════════════════════════════════════════ @@ -135,10 +136,11 @@ fi TIER=$(whiptail --title "$TITLE" \ --menu "\nStep 2 of 3: Select AI Package Tier\n\nChoose the tier that matches your hardware:" \ - $H $W 3 \ - "entry" "Entry — 3× NVIDIA L40S (48GB each) · Up to 20 users" \ - "mid" "Mid — RTX Pro 6000 BW (96GB each) · Up to 50 users" \ - "advanced" "Advanced — HGX H200 (141GB each) · 200+ users" \ + $H $W 4 \ + "starter" "Starter — 1× RTX 5090 / 32GB VRAM · Small team" \ + "basic" "Entry — 1× NVIDIA RTX Pro 6000 (96GB) · Up to 20 users" \ + "pro" "Pro — 2× RTX 5090 / RTX Pro class · Up to 100 users" \ + "max" "Max — 4–8× H100/H200/A100 class · 100+ users" \ 3>&1 1>&2 2>&3) # ════════════════════════════════════════════════════════════ @@ -177,7 +179,7 @@ whiptail --title "$TITLE" \ clear echo "" echo "╔══════════════════════════════════════════╗" -echo "║ Cezen AI Suite — Installing... ║" +echo "║ Nexus One AI — Installing... ║" echo "║ Check progress: journalctl -f ║" echo "╚══════════════════════════════════════════╝" echo "" diff --git a/autoinstall/user-data b/autoinstall/user-data index 24fac4d..6b70cc7 100644 --- a/autoinstall/user-data +++ b/autoinstall/user-data @@ -85,10 +85,11 @@ autoinstall: # mirrors instead of the custom ISO content. - sed -i 's/^deb cdrom:/# deb cdrom:/' /target/etc/apt/sources.list || true - # Install the Cezen AI payload from the ISO first. Fall back to Git only - # when building from older media that does not contain /cdrom/cezen-aipackage. + # Pull the Nexus One AI installer from cgit. The ISO intentionally does not + # bundle the full package, keeping the image small and the installed code + # current at deployment time. - mkdir -p /target/opt/aipackage - - cp -a /cdrom/cezen-aipackage/. /target/opt/aipackage/ || git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage + - git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage # Deploy the console setup wizard - mkdir -p /target/opt/cezen @@ -104,23 +105,18 @@ autoinstall: - | cat > /target/etc/systemd/system/cezen-setup.service << 'EOF' [Unit] - Description=Cezen AI Suite — Console Setup Wizard + Description=Nexus One AI — Console Setup Wizard After=cloud-final.service cloud-init.target network-online.target Wants=cloud-init.target network-online.target - Conflicts=getty@tty1.service ConditionPathExists=!/opt/cezen/.setup-done + OnFailure=getty@tty1.service [Service] - Type=idle - ExecStartPre=-/usr/bin/systemctl stop getty@tty1.service - ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; exec /opt/cezen/firstboot-setup.sh' - StandardInput=tty-force - StandardOutput=tty - StandardError=tty - TTYPath=/dev/tty1 - TTYReset=yes - TTYVHangup=yes - TTYVTDisallocate=yes + Type=oneshot + WorkingDirectory=/opt/cezen + ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; /usr/bin/openvt -c 1 -f -w -- env TERM=linux /opt/cezen/firstboot-setup.sh' + StandardOutput=journal+console + StandardError=journal+console Restart=no [Install] diff --git a/autoinstall/user-data-starter b/autoinstall/user-data-starter new file mode 100644 index 0000000..04cae77 --- /dev/null +++ b/autoinstall/user-data-starter @@ -0,0 +1,137 @@ +#cloud-config +# ───────────────────────────────────────────────────────────── +# Nexus One AI — Starter Tier Autoinstall +# Hardware target: compact workstation (Mini-ITX / SFF) +# GPU: 1× NVIDIA RTX 5090 (32 GB GDDR7) +# RAM: 64 GB DDR5 +# Storage: 1× 2 TB NVMe SSD (single drive — simple LVM) +# Network: 2.5 GbE (single interface) +# ───────────────────────────────────────────────────────────── +autoinstall: + version: 1 + + # ── Locale & keyboard ────────────────────────── + locale: en_IN.UTF-8 + keyboard: + layout: us + + # ── Network: DHCP during install; static config applied post-install ── + network: + network: + version: 2 + ethernets: + any-en: + dhcp4: true + match: + name: "en*" + any-eth: + dhcp4: true + match: + name: "eth*" + + # ── Storage: single 2 TB NVMe, simple LVM ───── + # Starter workstations have one drive — no RAID needed. + storage: + layout: + name: lvm + match: + size: largest + + # ── Identity ────────────────────────────────── + identity: + hostname: cezenai-starter + username: cezen + # Default password: cezen@123 (change via first-boot wizard) + password: "$6$I5VA.42G1xTeVhCv$KCLzqIKg/kbNHZyiTEMAY4FZsJMDDwoS90k6Ffb9VEwmcK.wuzlJNe3ceiEfLrzYzXEvqjYsLc7klAbeGPGab." + + # ── SSH ─────────────────────────────────────── + ssh: + install-server: true + allow-pw: true + + # ── Base packages ───────────────────────────── + packages: + - git + - curl + - wget + - python3 + - whiptail + - openssh-server + - nvme-cli # NVMe health / SMART monitoring + + # ── Late commands ───────────────────────────── + late-commands: + # Expand LVM to fill the full 2 TB NVMe + - lvextend -l +100%FREE /dev/ubuntu-vg/ubuntu-lv || true + - resize2fs /dev/ubuntu-vg/ubuntu-lv || true + + # Passwordless sudo for cezen (needed by install.sh + first-boot wizard) + - echo "cezen ALL=(ALL) NOPASSWD:ALL" > /target/etc/sudoers.d/cezen + - chmod 440 /target/etc/sudoers.d/cezen + + # Replace installer netplan with simple DHCP target config. + # The first-boot wizard will switch to static if desired. + - rm -f /target/etc/netplan/50-cloud-init.yaml /target/etc/netplan/00-installer-config.yaml || true + - | + cat > /target/etc/netplan/99-cezen-dhcp.yaml << 'EOF' + network: + version: 2 + ethernets: + any-en: + dhcp4: true + match: + name: "en*" + any-eth: + dhcp4: true + match: + name: "eth*" + EOF + + # Disable cdrom APT source + - sed -i 's/^deb cdrom:/# deb cdrom:/' /target/etc/apt/sources.list || true + + # Pull the Nexus One AI installer from cgit. The ISO intentionally does not + # bundle the full package, keeping the image small and the installed code + # current at deployment time. + - mkdir -p /target/opt/aipackage + - git clone https://cgit.cezentech.com/jinojose/aipackage.git /target/opt/aipackage + + # Write tier marker — used by install.sh and the portal branding system + - mkdir -p /target/opt/cezen + - echo "starter" > /target/opt/cezen/tier + + # Deploy first-boot TUI wizard + - cp /target/opt/aipackage/autoinstall/firstboot-setup.sh /target/opt/cezen/firstboot-setup.sh + - chmod +x /target/opt/cezen/firstboot-setup.sh + + # Set hostname + - echo "cezenai-starter" > /target/etc/hostname + - sed -i 's/aiserver/cezenai-starter/g' /target/etc/hosts || true + + # Systemd service: run first-boot wizard on tty1 once + - | + cat > /target/etc/systemd/system/cezen-setup.service << 'EOF' + [Unit] + Description=Nexus One AI — Console Setup Wizard (Starter) + After=cloud-final.service cloud-init.target network-online.target + Wants=cloud-init.target network-online.target + ConditionPathExists=!/opt/cezen/.setup-done + OnFailure=getty@tty1.service + + [Service] + Type=oneshot + WorkingDirectory=/opt/cezen + ExecStart=/bin/bash -lc 'clear >/dev/tty1 2>/dev/null || true; /usr/bin/openvt -c 1 -f -w -- env TERM=linux CEZEN_TIER=starter /opt/cezen/firstboot-setup.sh' + StandardOutput=journal+console + StandardError=journal+console + Restart=no + + [Install] + WantedBy=cloud-init.target + EOF + + - curtin in-target -- systemctl enable ssh + - curtin in-target -- systemctl enable cezen-setup.service + + user-data: + disable_root: false diff --git a/autoinstall/websetup/server.py b/autoinstall/websetup/server.py index ee99b0e..ceb0598 100644 --- a/autoinstall/websetup/server.py +++ b/autoinstall/websetup/server.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Cezen AI Suite — First Boot Web Setup Server +Nexus One AI — First Boot Web Setup Server Serves on port 80. Access from any browser on the same network. """ import os, json, subprocess, threading, time, socket, ipaddress @@ -123,7 +123,7 @@ HTML = r""" -Cezen AI Suite — Server Setup +Nexus One AI — Server Setup