aipackage/scripts/cezen-feasibility.sh

219 lines
7.1 KiB
Bash

#!/usr/bin/env bash
# Nexus One AI feasibility checker.
# Runs before installation to classify existing hardware for software-only or appliance deployments.
set -euo pipefail
OUT="${1:-/opt/cezen/feasibility.json}"
mkdir -p "$(dirname "$OUT")" 2>/dev/null || true
tmp_json="$(mktemp /tmp/cezen-feasibility.XXXXXX.json)"
python3 - "$tmp_json" <<'PY'
import json, os, platform, shutil, socket, subprocess, sys
from pathlib import Path
out = Path(sys.argv[1])
def run(cmd, timeout=5):
try:
return subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True, timeout=timeout).strip()
except Exception:
return ""
def read(path, default=""):
try:
return Path(path).read_text(errors="replace").strip()
except Exception:
return default
def gb(n):
return round(n / (1024 ** 3), 1)
def os_info():
data = {}
for line in read("/etc/os-release").splitlines():
if "=" in line:
k, v = line.split("=", 1)
data[k] = v.strip('"')
return {
"name": data.get("PRETTY_NAME") or platform.platform(),
"id": data.get("ID", ""),
"version_id": data.get("VERSION_ID", ""),
"kernel": platform.release(),
}
def cpu_info():
model = ""
for line in read("/proc/cpuinfo").splitlines():
if line.lower().startswith("model name"):
model = line.split(":", 1)[1].strip()
break
return {"model": model or platform.processor(), "cores": os.cpu_count() or 0}
def mem_gb():
for line in read("/proc/meminfo").splitlines():
if line.startswith("MemTotal:"):
return round(int(line.split()[1]) / 1024 / 1024, 1)
out = run(["sysctl", "-n", "hw.memsize"])
if out.isdigit():
return gb(int(out))
try:
pages = os.sysconf("SC_PHYS_PAGES")
page_size = os.sysconf("SC_PAGE_SIZE")
if pages and page_size:
return gb(int(pages) * int(page_size))
except Exception:
pass
return 0
def disk_info():
usage = shutil.disk_usage("/")
return {"total_gb": gb(usage.total), "free_gb": gb(usage.free), "used_gb": gb(usage.used)}
def gpu_info():
gpus = []
if shutil.which("nvidia-smi"):
q = "name,memory.total,driver_version"
out = run(["nvidia-smi", f"--query-gpu={q}", "--format=csv,noheader,nounits"])
for row in out.splitlines():
parts = [p.strip() for p in row.split(",")]
if len(parts) >= 2:
try:
vram = round(float(parts[1]) / 1024, 1)
except Exception:
vram = 0
gpus.append({"name": parts[0], "vram_gb": vram, "driver": parts[2] if len(parts) > 2 else ""})
if not gpus:
for vendor in Path("/sys/bus/pci/devices").glob("*/vendor"):
if read(vendor).lower() == "0x10de":
gpus.append({"name": "NVIDIA GPU detected (driver not ready)", "vram_gb": 0, "driver": ""})
break
return gpus
def port_open(host, port):
try:
with socket.create_connection((host, port), timeout=1):
return True
except Exception:
return False
def tool_state():
return {
"docker": bool(shutil.which("docker")),
"docker_running": bool(run(["docker", "info"], timeout=3)) if shutil.which("docker") else False,
"ansible": bool(shutil.which("ansible-playbook")),
"python3": bool(shutil.which("python3")),
"curl": bool(shutil.which("curl")),
"git": bool(shutil.which("git")),
}
def recommend(mem, disk, gpus, cpu_cores):
max_vram = max([g.get("vram_gb", 0) for g in gpus] or [0])
gpu_count = len([g for g in gpus if g.get("vram_gb", 0) > 0])
profile = "core"
tier = "starter"
concurrency = "1-3"
notes = []
if max_vram >= 120 and gpu_count >= 4:
tier, profile, concurrency = "max", "gpu-max", "100+"
elif max_vram >= 80 and gpu_count >= 2:
tier, profile, concurrency = "pro", "gpu-pro", "20-100"
elif max_vram >= 48:
tier, profile, concurrency = "basic", "gpu-standard", "5-20"
elif max_vram >= 24:
tier, profile, concurrency = "starter", "gpu-starter", "1-10"
elif max_vram >= 8:
tier, profile, concurrency = "starter", "gpu-lite", "1-5"
notes.append("GPU is suitable for small quantized models only.")
elif mem >= 32 and cpu_cores >= 8:
tier, profile, concurrency = "starter", "cpu-ai", "1-3"
notes.append("No usable NVIDIA VRAM found; local CPU inference is limited. Use cloud/external model fallback for better UX.")
else:
tier, profile, concurrency = "starter", "core", "1-2"
notes.append("Hardware is best for portal, RAG management, workflows, and external/cloud model routing.")
if mem < 16:
notes.append("RAM below 16 GB; avoid local model serving.")
if disk < 100:
notes.append("Less than 100 GB free disk; model storage and document indexing will be constrained.")
return {
"recommended_tier": tier,
"recommended_profile": profile,
"estimated_concurrent_users": concurrency,
"notes": notes,
}
mem = mem_gb()
disk = disk_info()
gpus = gpu_info()
cpu = cpu_info()
tools = tool_state()
rec = recommend(mem, disk["free_gb"], gpus, cpu["cores"])
features = {
"portal": True,
"users_auth": True,
"document_intelligence": mem >= 8,
"rag_chromadb": mem >= 16 and disk["free_gb"] >= 50,
"ollama_cpu": mem >= 32,
"ollama_gpu": any(g.get("vram_gb", 0) >= 8 for g in gpus),
"vllm": any(g.get("vram_gb", 0) >= 24 for g in gpus),
"fine_tuning_qlora": any(g.get("vram_gb", 0) >= 24 for g in gpus),
"distributed_training": len([g for g in gpus if g.get("vram_gb", 0) >= 48]) >= 2,
"monitoring": True,
"software_only": True,
"air_gapped_ready": True,
}
report = {
"schema": "cezen.feasibility.v1",
"generated_at": run(["date", "-u", "+%Y-%m-%dT%H:%M:%SZ"]) or "",
"host": socket.gethostname(),
"os": os_info(),
"cpu": cpu,
"ram_gb": mem,
"disk": disk,
"gpus": gpus,
"tools": tools,
"ports_in_use": {str(p): port_open("127.0.0.1", p) for p in [80, 8080, 11434, 8000, 3000, 8888]},
"recommendation": rec,
"features": features,
}
out.write_text(json.dumps(report, indent=2))
PY
if ! cp "$tmp_json" "$OUT" 2>/dev/null; then
OUT="./feasibility.json"
cp "$tmp_json" "$OUT"
fi
rm -f "$tmp_json"
python3 - "$OUT" <<'PY'
import json, sys
p = sys.argv[1]
d = json.load(open(p))
r = d["recommendation"]
print("")
print("Nexus One AI Feasibility Report")
print("--------------------------------")
print(f"Host: {d['host']}")
print(f"OS: {d['os']['name']}")
print(f"CPU: {d['cpu']['cores']} cores | RAM: {d['ram_gb']} GB | Free disk: {d['disk']['free_gb']} GB")
if d["gpus"]:
print("GPU: " + "; ".join(f"{g['name']} ({g.get('vram_gb', 0)} GB VRAM)" for g in d["gpus"]))
else:
print("GPU: none detected")
print("")
print(f"Recommended tier: {r['recommended_tier'].upper()}")
print(f"Recommended profile: {r['recommended_profile']}")
print(f"Estimated concurrency: {r['estimated_concurrent_users']} users")
if r["notes"]:
print("Notes:")
for n in r["notes"]:
print(f" - {n}")
print("")
print(f"JSON report: {p}")
PY