aipackage/ansible/roles/ollama/tasks/main.yml

---
# Ollama — local LLM serving (main inference engine for Entry tier)
- name: Check if Ollama is already installed
  stat:
    path: /usr/local/bin/ollama
  register: ollama_binary

- name: Install Ollama
  shell: curl -fsSL https://ollama.ai/install.sh | sh
  when: not ollama_binary.stat.exists
  retries: 3
  delay: 10

- name: Create Ollama systemd service with GPU support
  copy:
    dest: /etc/systemd/system/ollama.service
    content: |
      [Unit]
      Description=Ollama Service
      After=network-online.target

      [Service]
      ExecStart=/usr/local/bin/ollama serve
      User=cezen
      Group=cezen
      Restart=always
      RestartSec=3
      Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
      Environment="OLLAMA_HOST=0.0.0.0:11434"
      Environment="OLLAMA_MODELS=/opt/cezen/models/ollama"
      Environment="CUDA_VISIBLE_DEVICES=0,1,2"

      [Install]
      WantedBy=multi-user.target
    mode: "0644"

- name: Create Ollama models directory
  file:
    path: /opt/cezen/models/ollama
    state: directory
    owner: cezen
    group: cezen

- name: Enable and start Ollama
  systemd:
    name: ollama
    enabled: yes
    state: started
    daemon_reload: yes

- name: Wait for Ollama API to be ready
  wait_for:
    host: localhost
    port: 11434
    timeout: 60

- name: Pull default models (Llama 3.1 8B + Mistral 7B)
  become_user: cezen
  command: ollama pull {{ item }}
  loop:
    - llama3.1:8b
    - mistral:7b
  environment:
    OLLAMA_HOST: "http://localhost:11434"
  retries: 3
  delay: 15
  # NOTE: Models are large (~5GB each). This step takes time on first run.
  # Skip by setting: ansible-playbook ... -e "skip_model_pull=true"
  when: not (skip_model_pull | default(false))

# Open WebUI (chat interface on top of Ollama)
- name: Deploy Open WebUI via Docker
  community.docker.docker_container:
    name: open-webui
    image: ghcr.io/open-webui/open-webui:main
    state: started
    restart_policy: always
    ports:
      - "3001:8080"
    volumes:
      - open-webui:/app/backend/data
    env:
      OLLAMA_BASE_URL: "http://host-gateway:11434"
    etc_hosts:
      host-gateway: "172.17.0.1"
  # Note: Requires docker community collection. Install with:
  # ansible-galaxy collection install community.docker
  ignore_errors: true  # Falls back gracefully if docker collection not available

- name: Alternative Open WebUI start (if community.docker not available)
  shell: |
    docker run -d \
      --name open-webui \
      --restart always \
      -p 3001:8080 \
      --add-host=host-gateway:172.17.0.1 \
      -v open-webui:/app/backend/data \
      -e OLLAMA_BASE_URL=http://host-gateway:11434 \
      ghcr.io/open-webui/open-webui:main
  args:
    executable: /bin/bash
  register: webui_result
  failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr