--- # Ollama — local LLM serving (main inference engine for Entry tier) - name: Check if Ollama is already installed stat: path: /usr/local/bin/ollama register: ollama_binary - name: Install Ollama shell: curl -fsSL https://ollama.ai/install.sh | sh when: not ollama_binary.stat.exists retries: 3 delay: 10 - name: Create Ollama systemd service with GPU support copy: dest: /etc/systemd/system/ollama.service content: | [Unit] Description=Ollama Service After=network-online.target [Service] ExecStart=/usr/local/bin/ollama serve User=cezen Group=cezen Restart=always RestartSec=3 Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" Environment="OLLAMA_HOST=0.0.0.0:11434" Environment="OLLAMA_MODELS=/opt/cezen/models/ollama" Environment="CUDA_VISIBLE_DEVICES=0,1,2" [Install] WantedBy=multi-user.target mode: "0644" - name: Create Ollama models directory file: path: /opt/cezen/models/ollama state: directory owner: cezen group: cezen - name: Enable and start Ollama systemd: name: ollama enabled: yes state: started daemon_reload: yes - name: Wait for Ollama API to be ready wait_for: host: localhost port: 11434 timeout: 60 - name: Pull default models (Llama 3.1 8B + Mistral 7B) become_user: cezen command: ollama pull {{ item }} loop: - llama3.1:8b - mistral:7b environment: OLLAMA_HOST: "http://localhost:11434" retries: 3 delay: 15 # NOTE: Models are large (~5GB each). This step takes time on first run. # Skip by setting: ansible-playbook ... -e "skip_model_pull=true" when: not (skip_model_pull | default(false)) # Open WebUI (chat interface on top of Ollama) - name: Deploy Open WebUI via Docker community.docker.docker_container: name: open-webui image: ghcr.io/open-webui/open-webui:main state: started restart_policy: always ports: - "3001:8080" volumes: - open-webui:/app/backend/data env: OLLAMA_BASE_URL: "http://host-gateway:11434" etc_hosts: host-gateway: "172.17.0.1" # Note: Requires docker community collection. Install with: # ansible-galaxy collection install community.docker ignore_errors: true # Falls back gracefully if docker collection not available - name: Alternative Open WebUI start (if community.docker not available) shell: | docker run -d \ --name open-webui \ --restart always \ -p 3001:8080 \ --add-host=host-gateway:172.17.0.1 \ -v open-webui:/app/backend/data \ -e OLLAMA_BASE_URL=http://host-gateway:11434 \ ghcr.io/open-webui/open-webui:main args: executable: /bin/bash register: webui_result failed_when: webui_result.rc != 0 and 'already in use' not in webui_result.stderr