103 lines
3.1 KiB
YAML
103 lines
3.1 KiB
YAML
---
|
|
# Ollama — local LLM serving (main inference engine for Entry tier)
|
|
- name: Check if Ollama is already installed
|
|
stat:
|
|
path: /usr/local/bin/ollama
|
|
register: ollama_binary
|
|
|
|
- name: Install Ollama
|
|
shell: curl -fsSL https://ollama.ai/install.sh | sh
|
|
when: not ollama_binary.stat.exists
|
|
retries: 3
|
|
delay: 10
|
|
|
|
- name: Create Ollama systemd service with GPU support
|
|
copy:
|
|
dest: /etc/systemd/system/ollama.service
|
|
content: |
|
|
[Unit]
|
|
Description=Ollama Service
|
|
After=network-online.target
|
|
|
|
[Service]
|
|
ExecStart=/usr/local/bin/ollama serve
|
|
User=cezen
|
|
Group=cezen
|
|
Restart=always
|
|
RestartSec=3
|
|
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
|
Environment="OLLAMA_HOST=0.0.0.0:11434"
|
|
Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
|
|
Environment="CUDA_VISIBLE_DEVICES=0"
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
mode: "0644"
|
|
|
|
- name: Create Ollama models directory
|
|
file:
|
|
path: /opt/cezen/models/ollama
|
|
state: directory
|
|
owner: cezen
|
|
group: cezen
|
|
|
|
- name: Enable and start Ollama
|
|
systemd:
|
|
name: ollama
|
|
enabled: yes
|
|
state: started
|
|
daemon_reload: yes
|
|
|
|
- name: Wait for Ollama API to be ready
|
|
wait_for:
|
|
host: localhost
|
|
port: 11434
|
|
timeout: 60
|
|
|
|
- name: Select tier model set
|
|
set_fact:
|
|
ollama_models: >-
|
|
{{
|
|
{
|
|
'starter': ['phi3:mini', 'nomic-embed-text'],
|
|
'basic': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
|
|
'entry': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
|
|
'pro': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b'],
|
|
'max': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b', 'llama3.1:405b', 'mixtral:8x22b']
|
|
}.get(tier | default('basic'), ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text'])
|
|
}}
|
|
|
|
- name: Pull tier Ollama models
|
|
become_user: cezen
|
|
command: ollama pull {{ item }}
|
|
loop: "{{ ollama_models }}"
|
|
environment:
|
|
OLLAMA_HOST: "http://localhost:11434"
|
|
retries: 3
|
|
delay: 15
|
|
# NOTE: Pro/Max models are very large. Skip with --skip-model-pull for
|
|
# bandwidth-constrained installs, then run models/pull-models.sh later.
|
|
when: not (skip_model_pull | default(false))
|
|
|
|
# Open WebUI (chat interface on top of Ollama)
|
|
- name: Start Open WebUI via Docker CLI
|
|
shell: |
|
|
if docker ps -a --format '{{ "{{" }}.Names{{ "}}" }}' | grep -qx open-webui; then
|
|
docker start open-webui
|
|
else
|
|
docker run -d \
|
|
--name open-webui \
|
|
--restart always \
|
|
-p 3001:8080 \
|
|
--add-host=host-gateway:172.17.0.1 \
|
|
-v open-webui:/app/backend/data \
|
|
-e OLLAMA_BASE_URL=http://host-gateway:11434 \
|
|
ghcr.io/open-webui/open-webui:main
|
|
fi
|
|
args:
|
|
executable: /bin/bash
|
|
register: webui_result
|
|
changed_when: webui_result.rc == 0
|
|
failed_when: webui_result.rc != 0 and 'already in use' not in (webui_result.stderr | default(''))
|
|
ignore_errors: true
|