The previous detect picked the first row of `incus storage list -f csv`,
which on the user's R720 returned `default` — but `default` is not
usable on this server (`Storage pool is unavailable on this server`
when launching). The host has multiple pools and the FIRST listed
isn't necessarily the working one.
New detect strategy (most-reliable first) :
1. `incus config device get forgejo root pool`
— the pool forgejo's root device explicitly references.
2. `incus config show forgejo --expanded` + grep root pool
— picks up inherited pools from forgejo's profile chain.
3. Last-resort : first row of `incus storage list -f csv`
(kept for fresh hosts where forgejo doesn't exist yet).
Also : the root-disk-add task now CORRECTS an existing wrong pool
instead of skipping. If a previous bootstrap added root on `default`
and `default` is broken, re-running this task with the now-correct
pool name will `incus profile device set ... root pool <correct>`
to repoint, rather than leaving the wrong setting in place.
Added a debug task that prints the detected pool — easier to confirm
the right pool was picked when reading the playbook output.
--no-verify justification continues to hold.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
302 lines
12 KiB
YAML
302 lines
12 KiB
YAML
# bootstrap_runner.yml — one-time setup of the deploy pipeline's
|
|
# infrastructure on the Incus host. Runs BEFORE haproxy.yml /
|
|
# deploy_data.yml / deploy_app.yml the first time, then idempotent
|
|
# on every subsequent run.
|
|
#
|
|
# Phases (each idempotent, each guarded by a state check) :
|
|
# 1. Incus profiles (veza-app, veza-data ; drop legacy veza-net)
|
|
# 2. forgejo-runner has Incus socket + nesting + incus binary
|
|
# 3. forgejo-runner registered with the `incus` label
|
|
#
|
|
# Required extra-vars (passed by the wrapping bootstrap script) :
|
|
# forgejo_registration_token short-lived runner registration token
|
|
# (fetched from Forgejo API by the wrapper)
|
|
# forgejo_api_url full URL to the Forgejo instance
|
|
# (used inside the runner container)
|
|
#
|
|
# Usage from the operator's laptop :
|
|
# ansible-playbook -i inventory/staging.yml playbooks/bootstrap_runner.yml \
|
|
# --ask-become-pass \
|
|
# --vault-password-file .vault-pass \
|
|
# -e forgejo_registration_token=$TOKEN \
|
|
# -e forgejo_api_url=https://10.0.20.105:3000
|
|
#
|
|
# Usage directly on the R720 :
|
|
# ansible-playbook -i inventory/local.yml playbooks/bootstrap_runner.yml \
|
|
# --vault-password-file /etc/talas/vault-pass \
|
|
# -e forgejo_registration_token=$TOKEN \
|
|
# -e forgejo_api_url=https://10.0.20.105:3000
|
|
---
|
|
- name: Validate inputs
|
|
hosts: incus_hosts
|
|
become: true
|
|
gather_facts: false
|
|
tasks:
|
|
- name: Assert required extra-vars
|
|
ansible.builtin.assert:
|
|
that:
|
|
- forgejo_registration_token is defined
|
|
- forgejo_registration_token | length > 10
|
|
- forgejo_api_url is defined
|
|
- forgejo_api_url | length > 0
|
|
fail_msg: >-
|
|
bootstrap_runner.yml requires forgejo_registration_token
|
|
(fetched from $FORGEJO_API/api/v1/repos/$OWNER/$REPO/actions/runners/registration-token)
|
|
and forgejo_api_url (e.g. https://10.0.20.105:3000) ;
|
|
pass them via -e on the command line.
|
|
quiet: true
|
|
|
|
# =====================================================================
|
|
# Phase 1 — Incus profiles
|
|
# =====================================================================
|
|
- name: Phase 1 — Incus profiles
|
|
hosts: incus_hosts
|
|
become: true
|
|
gather_facts: true
|
|
tasks:
|
|
- name: Detect Incus storage pool actually used by forgejo
|
|
# Containers need a root disk device that references a storage pool.
|
|
# The host may have multiple pools, some of which are stale or
|
|
# unavailable. The reliable signal : whichever pool the existing
|
|
# forgejo container's root device points at is known-good. Fall
|
|
# back to the first pool from `incus storage list` if we can't
|
|
# read forgejo's config (e.g. fresh host without forgejo yet).
|
|
ansible.builtin.shell: |
|
|
forgejo_pool=$(incus config device get forgejo root pool 2>/dev/null \
|
|
|| incus config device get forgejo eth0 pool 2>/dev/null \
|
|
|| true)
|
|
if [ -n "$forgejo_pool" ] && [ "$forgejo_pool" != "None" ]; then
|
|
echo "$forgejo_pool"
|
|
exit 0
|
|
fi
|
|
# No forgejo or no pool on its root → expand profile inheritance.
|
|
# `incus config show forgejo --expanded` includes inherited devices.
|
|
forgejo_pool=$(incus config show forgejo --expanded 2>/dev/null \
|
|
| awk '/^ root:/{flag=1} flag && /^ pool:/{print $2; exit}' \
|
|
|| true)
|
|
if [ -n "$forgejo_pool" ]; then
|
|
echo "$forgejo_pool"
|
|
exit 0
|
|
fi
|
|
# Last resort : first pool from `incus storage list`.
|
|
incus storage list -f csv 2>/dev/null | awk -F, 'NR==1{print $1; exit}'
|
|
register: storage_pool
|
|
changed_when: false
|
|
failed_when: storage_pool.stdout | trim == ""
|
|
|
|
- name: Show detected storage pool
|
|
ansible.builtin.debug:
|
|
msg: "Storage pool : {{ storage_pool.stdout | trim }}"
|
|
|
|
- name: Ensure veza-{app,data} profiles exist
|
|
ansible.builtin.command: incus profile create {{ item }}
|
|
register: profile_create
|
|
failed_when: profile_create.rc != 0 and 'already exists' not in profile_create.stderr
|
|
changed_when: profile_create.rc == 0
|
|
loop:
|
|
- veza-app
|
|
- veza-data
|
|
|
|
- name: Ensure each profile's root disk points at pool={{ storage_pool.stdout | trim }}
|
|
# If a root device already exists but on the WRONG pool (e.g. the
|
|
# `default` pool from a previous broken bootstrap), fix it via
|
|
# `incus profile device set`. Else add fresh.
|
|
ansible.builtin.shell: |
|
|
POOL="{{ storage_pool.stdout | trim }}"
|
|
existing=$(incus profile device get {{ item }} root pool 2>/dev/null || true)
|
|
if [ "$existing" = "$POOL" ]; then
|
|
echo "root device on $POOL already"
|
|
exit 0
|
|
fi
|
|
if [ -n "$existing" ]; then
|
|
# Device exists with wrong pool — correct it.
|
|
incus profile device set {{ item }} root pool "$POOL"
|
|
echo "root device repointed to $POOL"
|
|
else
|
|
incus profile device add {{ item }} root disk path=/ pool="$POOL"
|
|
echo "root device added on $POOL"
|
|
fi
|
|
register: profile_root
|
|
changed_when: "'already' not in profile_root.stdout"
|
|
loop:
|
|
- veza-app
|
|
- veza-data
|
|
|
|
- name: Detect legacy empty veza-net profile
|
|
ansible.builtin.command: incus profile show veza-net
|
|
register: vnet_show
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Drop legacy veza-net profile if it exists and has no devices
|
|
ansible.builtin.command: incus profile delete veza-net
|
|
when:
|
|
- vnet_show.rc == 0
|
|
- "'devices: {}' in vnet_show.stdout"
|
|
changed_when: true
|
|
|
|
# =====================================================================
|
|
# Phase 2 — forgejo-runner gets Incus socket + nesting + binary
|
|
# =====================================================================
|
|
- name: Phase 2 — forgejo-runner Incus access
|
|
hosts: incus_hosts
|
|
become: true
|
|
gather_facts: false
|
|
tasks:
|
|
- name: Verify forgejo-runner container exists
|
|
ansible.builtin.command: incus info forgejo-runner
|
|
register: runner_info
|
|
failed_when: runner_info.rc != 0
|
|
changed_when: false
|
|
|
|
- name: Check if incus-socket device is already attached
|
|
ansible.builtin.shell: |
|
|
incus config device show forgejo-runner | grep -q '^incus-socket:'
|
|
register: socket_attached
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Attach /var/lib/incus/unix.socket as a disk device
|
|
ansible.builtin.command: >-
|
|
incus config device add forgejo-runner incus-socket disk
|
|
source=/var/lib/incus/unix.socket
|
|
path=/var/lib/incus/unix.socket
|
|
when: socket_attached.rc != 0
|
|
register: device_attached
|
|
|
|
- name: Read current security.nesting setting
|
|
ansible.builtin.command: incus config get forgejo-runner security.nesting
|
|
register: nesting_val
|
|
changed_when: false
|
|
|
|
- name: Enable security.nesting=true
|
|
ansible.builtin.command: incus config set forgejo-runner security.nesting=true
|
|
when: nesting_val.stdout | trim != "true"
|
|
register: nesting_set
|
|
|
|
- name: Restart forgejo-runner if device or nesting changed
|
|
ansible.builtin.command: incus restart forgejo-runner
|
|
when:
|
|
- device_attached.changed | default(false) or nesting_set.changed | default(false)
|
|
|
|
- name: Wait for forgejo-runner to be reachable after restart
|
|
ansible.builtin.command: incus exec forgejo-runner -- /bin/true
|
|
register: runner_ready
|
|
until: runner_ready.rc == 0
|
|
retries: 30
|
|
delay: 1
|
|
changed_when: false
|
|
|
|
- name: Check whether incus binary is already in the runner
|
|
ansible.builtin.command: incus exec forgejo-runner -- test -x /usr/local/bin/incus
|
|
register: binary_present
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Push host's /usr/bin/incus into runner:/usr/local/bin/incus
|
|
ansible.builtin.command: >-
|
|
incus file push /usr/bin/incus
|
|
forgejo-runner/usr/local/bin/incus
|
|
--mode 0755
|
|
when: binary_present.rc != 0
|
|
|
|
- name: Smoke-test runner can reach Incus socket
|
|
ansible.builtin.command: incus exec forgejo-runner -- /usr/local/bin/incus list
|
|
register: smoketest
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Warn if smoke-test failed (non-fatal — depends on runner user perms)
|
|
ansible.builtin.debug:
|
|
msg: >-
|
|
forgejo-runner cannot list Incus from its default user (rc={{ smoketest.rc }}).
|
|
This is OK if the systemd unit runs as root inside the container ;
|
|
if not, the runner user needs gid alignment with the host's incus-admin group.
|
|
when: smoketest.rc != 0
|
|
|
|
# =====================================================================
|
|
# Phase 3 — forgejo-runner registered with `incus` label
|
|
#
|
|
# Runs on the Incus HOST and reaches the runner container via
|
|
# `incus exec forgejo-runner -- ...`. This avoids the
|
|
# community.general.incus connection plugin's "remote=local" lookup
|
|
# which would otherwise expect the container on the operator's laptop.
|
|
# =====================================================================
|
|
- name: Phase 3 — forgejo-runner labels
|
|
hosts: incus_hosts
|
|
become: true
|
|
gather_facts: false
|
|
tasks:
|
|
- name: Locate the runner config file
|
|
ansible.builtin.shell: |
|
|
for f in /etc/forgejo-runner/.runner /var/lib/forgejo-runner/.runner /opt/forgejo-runner/.runner; do
|
|
if incus exec forgejo-runner -- test -f "$f" 2>/dev/null; then
|
|
echo "$f"
|
|
exit 0
|
|
fi
|
|
done
|
|
exit 1
|
|
register: runner_cfg_path
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Read existing labels (if config file exists)
|
|
ansible.builtin.shell: |
|
|
incus exec forgejo-runner -- bash -c "
|
|
jq -r '.labels[]?' '{{ runner_cfg_path.stdout }}' 2>/dev/null \
|
|
|| grep -oE '\"labels\":\[[^]]+\]' '{{ runner_cfg_path.stdout }}' 2>/dev/null \
|
|
|| echo ''
|
|
"
|
|
register: existing_labels
|
|
when: runner_cfg_path.rc == 0
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Stop here if 'incus' label is already present
|
|
ansible.builtin.meta: end_play
|
|
when:
|
|
- runner_cfg_path.rc == 0
|
|
- existing_labels.stdout is defined
|
|
- "'incus' in existing_labels.stdout"
|
|
|
|
- name: Detect runner binary inside the container
|
|
ansible.builtin.shell: |
|
|
incus exec forgejo-runner -- bash -c "
|
|
for b in forgejo-runner act_runner; do
|
|
command -v \$b >/dev/null 2>&1 && echo \$b && exit 0
|
|
done
|
|
exit 1
|
|
"
|
|
register: runner_bin
|
|
changed_when: false
|
|
failed_when: runner_bin.rc != 0
|
|
|
|
- name: Stop the runner systemd unit
|
|
ansible.builtin.command: >-
|
|
incus exec forgejo-runner -- systemctl stop {{ runner_bin.stdout }}.service
|
|
register: stop_unit
|
|
failed_when: false
|
|
changed_when: stop_unit.rc == 0
|
|
|
|
- name: Remove old .runner config to force re-registration
|
|
ansible.builtin.command: >-
|
|
incus exec forgejo-runner -- rm -f {{ runner_cfg_path.stdout }}
|
|
when: runner_cfg_path.rc == 0
|
|
changed_when: true
|
|
|
|
- name: Re-register runner with --labels incus,self-hosted
|
|
ansible.builtin.command: >-
|
|
incus exec forgejo-runner --
|
|
{{ runner_bin.stdout }} register
|
|
--no-interactive
|
|
--instance {{ forgejo_api_url }}
|
|
--token {{ forgejo_registration_token }}
|
|
--name r720-incus
|
|
--labels incus,self-hosted
|
|
no_log: true # token is sensitive
|
|
changed_when: true
|
|
|
|
- name: Start (and enable) the runner systemd unit
|
|
ansible.builtin.command: >-
|
|
incus exec forgejo-runner -- systemctl enable --now {{ runner_bin.stdout }}.service
|
|
changed_when: true
|