104 lines
2.8 KiB
YAML
104 lines
2.8 KiB
YAML
---
|
|
# Ollama — local LLM serving (main inference engine for Entry tier)
|
|
- name: Check if Ollama is already installed
|
|
stat:
|
|
path: /usr/local/bin/ollama
|
|
register: ollama_binary
|
|
|
|
- name: Install Ollama
|
|
shell: curl -fsSL https://ollama.ai/install.sh | sh
|
|
when: not ollama_binary.stat.exists
|
|
retries: 3
|
|
delay: 10
|
|
|
|
- name: Create Ollama systemd service with GPU support
|
|
copy:
|
|
dest: /etc/systemd/system/ollama.service
|
|
content: |
|
|
[Unit]
|
|
Description=Ollama Service
|
|
After=network-online.target
|
|
|
|
[Service]
|
|
ExecStart=/usr/local/bin/ollama serve
|
|
User=cezen
|
|
Group=cezen
|
|
Restart=always
|
|
RestartSec=3
|
|
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
|
Environment="OLLAMA_HOST=0.0.0.0:11434"
|
|
Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
|
|
Environment="CUDA_VISIBLE_DEVICES=0,1,2"
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
mode: "0644"
|
|
|
|
- name: Create Ollama models directory
|
|
file:
|
|
path: /opt/cezen/models/ollama
|
|
state: directory
|
|
owner: cezen
|
|
group: cezen
|
|
|
|
- name: Enable and start Ollama
|
|
systemd:
|
|
name: ollama
|
|
enabled: yes
|
|
state: started
|
|
daemon_reload: yes
|
|
|
|
- name: Wait for Ollama API to be ready
|
|
wait_for:
|
|
host: localhost
|
|
port: 11434
|
|
timeout: 60
|
|
|
|
- name: Pull default models (Llama 3.1 8B + Mistral 7B)
|
|
become_user: cezen
|
|
command: ollama pull {{ item }}
|
|
loop:
|
|
- llama3.1:8b
|
|
- mistral:7b
|
|
environment:
|
|
OLLAMA_HOST: "http://localhost:11434"
|
|
retries: 3
|
|
delay: 15
|
|
# NOTE: Models are large (~5GB each). This step takes time on first run.
|
|
# Skip by setting: ansible-playbook ... -e "skip_model_pull=true"
|
|
when: not (skip_model_pull | default(false))
|
|
|
|
# Open WebUI (chat interface on top of Ollama)
|
|
- name: Deploy Open WebUI via Docker
|
|
community.docker.docker_container:
|
|
name: open-webui
|
|
image: ghcr.io/open-webui/open-webui:main
|
|
state: started
|
|
restart_policy: always
|
|
ports:
|
|
- "3001:8080"
|
|
volumes:
|
|
- open-webui:/app/backend/data
|
|
env:
|
|
OLLAMA_BASE_URL: "http://host-gateway:11434"
|
|
etc_hosts:
|
|
host-gateway: "172.17.0.1"
|
|
# Note: Requires docker community collection. Install with:
|
|
# ansible-galaxy collection install community.docker
|
|
ignore_errors: true # Falls back gracefully if docker collection not available
|
|
|
|
- name: Alternative Open WebUI start (if community.docker not available)
|
|
shell: |
|
|
docker run -d \
|
|
--name open-webui \
|
|
--restart always \
|
|
-p 3001:8080 \
|
|
--add-host=host-gateway:172.17.0.1 \
|
|
-v open-webui:/app/backend/data \
|
|
-e OLLAMA_BASE_URL=http://host-gateway:11434 \
|
|
ghcr.io/open-webui/open-webui:main
|
|
args:
|
|
executable: /bin/bash
|
|
register: webui_result
|
|
failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr
|