aipackage/ansible/roles/ollama/tasks/main.yml

103 lines
3.1 KiB
YAML

---
# Ollama — local LLM serving (main inference engine for Entry tier)
- name: Check if Ollama is already installed
stat:
path: /usr/local/bin/ollama
register: ollama_binary
- name: Install Ollama
shell: curl -fsSL https://ollama.ai/install.sh | sh
when: not ollama_binary.stat.exists
retries: 3
delay: 10
- name: Create Ollama systemd service with GPU support
copy:
dest: /etc/systemd/system/ollama.service
content: |
[Unit]
Description=Ollama Service
After=network-online.target
[Service]
ExecStart=/usr/local/bin/ollama serve
User=cezen
Group=cezen
Restart=always
RestartSec=3
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
Environment="OLLAMA_HOST=0.0.0.0:11434"
Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
Environment="CUDA_VISIBLE_DEVICES=0"
[Install]
WantedBy=multi-user.target
mode: "0644"
- name: Create Ollama models directory
file:
path: /opt/cezen/models/ollama
state: directory
owner: cezen
group: cezen
- name: Enable and start Ollama
systemd:
name: ollama
enabled: yes
state: started
daemon_reload: yes
- name: Wait for Ollama API to be ready
wait_for:
host: localhost
port: 11434
timeout: 60
- name: Select tier model set
set_fact:
ollama_models: >-
{{
{
'starter': ['phi3:mini', 'nomic-embed-text'],
'basic': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
'entry': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b'],
'pro': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b'],
'max': ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text', 'codellama:13b', 'llama3.1:70b', 'mixtral:8x7b', 'deepseek-coder-v2:16b', 'llama3.1:405b', 'mixtral:8x22b']
}.get(tier | default('basic'), ['llama3.1:8b', 'mistral:7b', 'nomic-embed-text'])
}}
- name: Pull tier Ollama models
become_user: cezen
command: ollama pull {{ item }}
loop: "{{ ollama_models }}"
environment:
OLLAMA_HOST: "http://localhost:11434"
retries: 3
delay: 15
# NOTE: Pro/Max models are very large. Skip with --skip-model-pull for
# bandwidth-constrained installs, then run models/pull-models.sh later.
when: not (skip_model_pull | default(false))
# Open WebUI (chat interface on top of Ollama)
- name: Start Open WebUI via Docker CLI
shell: |
if docker ps -a --format '{{ "{{" }}.Names{{ "}}" }}' | grep -qx open-webui; then
docker start open-webui
else
docker run -d \
--name open-webui \
--restart always \
-p 3001:8080 \
--add-host=host-gateway:172.17.0.1 \
-v open-webui:/app/backend/data \
-e OLLAMA_BASE_URL=http://host-gateway:11434 \
ghcr.io/open-webui/open-webui:main
fi
args:
executable: /bin/bash
register: webui_result
changed_when: webui_result.rc == 0
failed_when: webui_result.rc != 0 and 'already in use' not in (webui_result.stderr | default(''))
ignore_errors: true