aipackage/scripts/cezen-feasibility.sh

#!/usr/bin/env bash
# Nexus One AI feasibility checker.
# Runs before installation to classify existing hardware for software-only or appliance deployments.
set -euo pipefail

OUT="${1:-/opt/cezen/feasibility.json}"
mkdir -p "$(dirname "$OUT")" 2>/dev/null || true

tmp_json="$(mktemp /tmp/cezen-feasibility.XXXXXX.json)"

python3 - "$tmp_json" <<'PY'
import json, os, platform, shutil, socket, subprocess, sys
from pathlib import Path

out = Path(sys.argv[1])

def run(cmd, timeout=5):
    try:
        return subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True, timeout=timeout).strip()
    except Exception:
        return ""

def read(path, default=""):
    try:
        return Path(path).read_text(errors="replace").strip()
    except Exception:
        return default

def gb(n):
    return round(n / (1024 ** 3), 1)

def os_info():
    data = {}
    for line in read("/etc/os-release").splitlines():
        if "=" in line:
            k, v = line.split("=", 1)
            data[k] = v.strip('"')
    return {
        "name": data.get("PRETTY_NAME") or platform.platform(),
        "id": data.get("ID", ""),
        "version_id": data.get("VERSION_ID", ""),
        "kernel": platform.release(),
    }

def cpu_info():
    model = ""
    for line in read("/proc/cpuinfo").splitlines():
        if line.lower().startswith("model name"):
            model = line.split(":", 1)[1].strip()
            break
    return {"model": model or platform.processor(), "cores": os.cpu_count() or 0}

def mem_gb():
    for line in read("/proc/meminfo").splitlines():
        if line.startswith("MemTotal:"):
            return round(int(line.split()[1]) / 1024 / 1024, 1)
    out = run(["sysctl", "-n", "hw.memsize"])
    if out.isdigit():
        return gb(int(out))
    try:
        pages = os.sysconf("SC_PHYS_PAGES")
        page_size = os.sysconf("SC_PAGE_SIZE")
        if pages and page_size:
            return gb(int(pages) * int(page_size))
    except Exception:
        pass
    return 0

def disk_info():
    usage = shutil.disk_usage("/")
    return {"total_gb": gb(usage.total), "free_gb": gb(usage.free), "used_gb": gb(usage.used)}

def gpu_info():
    gpus = []
    if shutil.which("nvidia-smi"):
        q = "name,memory.total,driver_version"
        out = run(["nvidia-smi", f"--query-gpu={q}", "--format=csv,noheader,nounits"])
        for row in out.splitlines():
            parts = [p.strip() for p in row.split(",")]
            if len(parts) >= 2:
                try:
                    vram = round(float(parts[1]) / 1024, 1)
                except Exception:
                    vram = 0
                gpus.append({"name": parts[0], "vram_gb": vram, "driver": parts[2] if len(parts) > 2 else ""})
    if not gpus:
        for vendor in Path("/sys/bus/pci/devices").glob("*/vendor"):
            if read(vendor).lower() == "0x10de":
                gpus.append({"name": "NVIDIA GPU detected (driver not ready)", "vram_gb": 0, "driver": ""})
                break
    return gpus

def port_open(host, port):
    try:
        with socket.create_connection((host, port), timeout=1):
            return True
    except Exception:
        return False

def tool_state():
    return {
        "docker": bool(shutil.which("docker")),
        "docker_running": bool(run(["docker", "info"], timeout=3)) if shutil.which("docker") else False,
        "ansible": bool(shutil.which("ansible-playbook")),
        "python3": bool(shutil.which("python3")),
        "curl": bool(shutil.which("curl")),
        "git": bool(shutil.which("git")),
    }

def recommend(mem, disk, gpus, cpu_cores):
    max_vram = max([g.get("vram_gb", 0) for g in gpus] or [0])
    gpu_count = len([g for g in gpus if g.get("vram_gb", 0) > 0])
    profile = "core"
    tier = "starter"
    concurrency = "1-3"
    notes = []

    if max_vram >= 120 and gpu_count >= 4:
        tier, profile, concurrency = "max", "gpu-max", "100+"
    elif max_vram >= 80 and gpu_count >= 2:
        tier, profile, concurrency = "pro", "gpu-pro", "20-100"
    elif max_vram >= 48:
        tier, profile, concurrency = "basic", "gpu-standard", "5-20"
    elif max_vram >= 24:
        tier, profile, concurrency = "starter", "gpu-starter", "1-10"
    elif max_vram >= 8:
        tier, profile, concurrency = "starter", "gpu-lite", "1-5"
        notes.append("GPU is suitable for small quantized models only.")
    elif mem >= 32 and cpu_cores >= 8:
        tier, profile, concurrency = "starter", "cpu-ai", "1-3"
        notes.append("No usable NVIDIA VRAM found; local CPU inference is limited. Use cloud/external model fallback for better UX.")
    else:
        tier, profile, concurrency = "starter", "core", "1-2"
        notes.append("Hardware is best for portal, RAG management, workflows, and external/cloud model routing.")

    if mem < 16:
        notes.append("RAM below 16 GB; avoid local model serving.")
    if disk < 100:
        notes.append("Less than 100 GB free disk; model storage and document indexing will be constrained.")

    return {
        "recommended_tier": tier,
        "recommended_profile": profile,
        "estimated_concurrent_users": concurrency,
        "notes": notes,
    }

mem = mem_gb()
disk = disk_info()
gpus = gpu_info()
cpu = cpu_info()
tools = tool_state()
rec = recommend(mem, disk["free_gb"], gpus, cpu["cores"])

features = {
    "portal": True,
    "users_auth": True,
    "document_intelligence": mem >= 8,
    "rag_chromadb": mem >= 16 and disk["free_gb"] >= 50,
    "ollama_cpu": mem >= 32,
    "ollama_gpu": any(g.get("vram_gb", 0) >= 8 for g in gpus),
    "vllm": any(g.get("vram_gb", 0) >= 24 for g in gpus),
    "fine_tuning_qlora": any(g.get("vram_gb", 0) >= 24 for g in gpus),
    "distributed_training": len([g for g in gpus if g.get("vram_gb", 0) >= 48]) >= 2,
    "monitoring": True,
    "software_only": True,
    "air_gapped_ready": True,
}

report = {
    "schema": "cezen.feasibility.v1",
    "generated_at": run(["date", "-u", "+%Y-%m-%dT%H:%M:%SZ"]) or "",
    "host": socket.gethostname(),
    "os": os_info(),
    "cpu": cpu,
    "ram_gb": mem,
    "disk": disk,
    "gpus": gpus,
    "tools": tools,
    "ports_in_use": {str(p): port_open("127.0.0.1", p) for p in [80, 8080, 11434, 8000, 3000, 8888]},
    "recommendation": rec,
    "features": features,
}
out.write_text(json.dumps(report, indent=2))
PY

if ! cp "$tmp_json" "$OUT" 2>/dev/null; then
  OUT="./feasibility.json"
  cp "$tmp_json" "$OUT"
fi
rm -f "$tmp_json"

python3 - "$OUT" <<'PY'
import json, sys
p = sys.argv[1]
d = json.load(open(p))
r = d["recommendation"]
print("")
print("Nexus One AI Feasibility Report")
print("--------------------------------")
print(f"Host: {d['host']}")
print(f"OS: {d['os']['name']}")
print(f"CPU: {d['cpu']['cores']} cores | RAM: {d['ram_gb']} GB | Free disk: {d['disk']['free_gb']} GB")
if d["gpus"]:
    print("GPU: " + "; ".join(f"{g['name']} ({g.get('vram_gb', 0)} GB VRAM)" for g in d["gpus"]))
else:
    print("GPU: none detected")
print("")
print(f"Recommended tier: {r['recommended_tier'].upper()}")
print(f"Recommended profile: {r['recommended_profile']}")
print(f"Estimated concurrency: {r['estimated_concurrent_users']} users")
if r["notes"]:
    print("Notes:")
    for n in r["notes"]:
        print(f"  - {n}")
print("")
print(f"JSON report: {p}")
PY