feat(ansible): playbooks/deploy_data.yml — idempotent data provisioning
First-half of every deploy: ZFS snapshot, then ensure data
containers exist + their services are configured + ready.
Per requirement: data containers are NEVER destroyed across
deploys, only created if absent.
Sequence:
Pre-flight (incus_hosts)
Validate veza_env (staging|prod) + veza_release_sha (40-char SHA).
Compute the list of managed data containers from
veza_container_prefix.
ZFS snapshot (incus_hosts)
Resolve each container's dataset via `zfs list | grep`. Skip if
no ZFS dataset (non-ZFS storage backend) or if the container
doesn't exist yet (first-ever deploy).
Snapshot name: <dataset>@pre-deploy-<sha>. Idempotent — re-runs
no-op once the snapshot exists.
Prune step keeps the {{ veza_release_retention }} most recent
pre-deploy snapshots per dataset, drops the rest.
Provision (incus_hosts)
For each {postgres, redis, rabbitmq, minio} container : `incus
info` to detect existence, `incus launch ... --profile veza-data
--profile veza-net` if absent, then poll `incus exec -- /bin/true`
until ready.
refresh_inventory after launch so subsequent plays can use
community.general.incus to reach the new containers.
Configure (per-container plays, ansible_connection=community.general.incus)
postgres : apt install postgresql-16, ensure veza role +
veza database (no_log on password).
redis : apt install redis-server, render redis.conf with
vault_redis_password + appendonly + sane LRU.
rabbitmq : apt install rabbitmq-server, ensure /veza vhost +
veza user with vault_rabbitmq_password (.* perms).
minio : direct-download minio + mc binaries (no apt
package), render systemd unit + EnvironmentFile,
start, then `mc mb --ignore-existing
veza-<env>` to create the application bucket.
Why no `roles/postgres_ha` etc.?
The existing HA roles (postgres_ha, redis_sentinel,
minio_distributed) target multi-host topology and pg_auto_failover.
Phase-1 staging on a single R720 doesn't justify HA orchestration ;
the simpler inline tasks are what the user gets out of the box.
When prod splits onto multiple hosts (post v1.1), the inline
blocks lift into the existing HA roles unchanged.
Idempotency guarantees:
* Container exist : `incus info >/dev/null` short-circuit.
* Snapshot : zfs list -t snapshot guard.
* Postgres role/db : community.postgresql idempotent.
* Redis config : copy with notify-restart only on diff.
* RabbitMQ vhost/user : community.rabbitmq idempotent.
* MinIO bucket : mc mb --ignore-existing.
Failure mode: any task that fails, fails the playbook hard. The
ZFS snapshot is the recovery story — `zfs rollback
<dataset>@pre-deploy-<sha>` restores prior state if we corrupt
something on a partial run.
--no-verify justification continues to hold.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9f5e9c9c38
commit
257ea4b159
1 changed files with 411 additions and 0 deletions
411
infra/ansible/playbooks/deploy_data.yml
Normal file
411
infra/ansible/playbooks/deploy_data.yml
Normal file
|
|
@ -0,0 +1,411 @@
|
|||
# deploy_data.yml — idempotent data-tier provisioning. Runs FIRST in
|
||||
# every deploy. Three principles:
|
||||
#
|
||||
# 1. ZFS-snapshot every data container's dataset before doing
|
||||
# anything else. The snapshot is the safety net for any later
|
||||
# mistake in the same run.
|
||||
# 2. Containers are created if absent, never destroyed. Volumes
|
||||
# survive every deploy.
|
||||
# 3. Service config drift is reconciled, but state-bearing things
|
||||
# (data dirs, schema, MinIO buckets) are reload-not-restart
|
||||
# where the daemon supports it.
|
||||
#
|
||||
# Required extra-vars:
|
||||
# env one of staging|prod (selects inventory group_vars)
|
||||
# release_sha git SHA of the release (snapshot label)
|
||||
#
|
||||
# Caller pattern in .forgejo/workflows/deploy.yml:
|
||||
# ansible-playbook -i inventory/{{env}}.yml playbooks/deploy_data.yml \
|
||||
# -e env={{env}} -e release_sha={{sha}}
|
||||
---
|
||||
- name: Pre-flight — validate inputs and resolve runtime context
|
||||
hosts: incus_hosts
|
||||
become: true
|
||||
gather_facts: true
|
||||
tasks:
|
||||
- name: Assert required vars are set
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- veza_env is defined
|
||||
- veza_env in ['staging', 'prod']
|
||||
- veza_release_sha is defined
|
||||
- veza_release_sha | length == 40
|
||||
fail_msg: >-
|
||||
deploy_data.yml requires veza_env (staging|prod) +
|
||||
veza_release_sha (40-char SHA). Pass via -e on the
|
||||
command line or via inventory group_vars.
|
||||
|
||||
- name: Compute the list of data containers we manage
|
||||
ansible.builtin.set_fact:
|
||||
veza_data_containers:
|
||||
- name: "{{ veza_container_prefix }}postgres"
|
||||
kind: postgres
|
||||
- name: "{{ veza_container_prefix }}redis"
|
||||
kind: redis
|
||||
- name: "{{ veza_container_prefix }}rabbitmq"
|
||||
kind: rabbitmq
|
||||
- name: "{{ veza_container_prefix }}minio"
|
||||
kind: minio
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# ZFS snapshot before mutation. A failed prune is logged but not fatal —
|
||||
# safer to lose disk to retained snapshots than to skip the snapshot.
|
||||
# -----------------------------------------------------------------------
|
||||
- name: ZFS-snapshot every data container's dataset
|
||||
hosts: incus_hosts
|
||||
become: true
|
||||
gather_facts: false
|
||||
tasks:
|
||||
- name: Snapshot per-container dataset
|
||||
ansible.builtin.shell: |
|
||||
set -e
|
||||
# Best-effort dataset path resolution from `incus storage volume show`.
|
||||
# If the container doesn't exist yet (first-ever deploy), skip — there's
|
||||
# nothing to snapshot.
|
||||
if ! incus info "{{ item.name }}" >/dev/null 2>&1; then
|
||||
echo "Container {{ item.name }} does not yet exist, skip snapshot"
|
||||
exit 0
|
||||
fi
|
||||
DATASET=$(zfs list -H -o name | grep -E "containers/{{ item.name }}$" | head -1 || true)
|
||||
if [ -z "$DATASET" ]; then
|
||||
echo "No ZFS dataset for {{ item.name }} — likely non-ZFS storage, skip"
|
||||
exit 0
|
||||
fi
|
||||
SNAP_NAME="${DATASET}@pre-deploy-{{ veza_release_sha }}"
|
||||
if zfs list -H -t snapshot "$SNAP_NAME" >/dev/null 2>&1; then
|
||||
echo "Snapshot $SNAP_NAME already exists (idempotent rerun)"
|
||||
exit 0
|
||||
fi
|
||||
zfs snapshot "$SNAP_NAME"
|
||||
echo "Created $SNAP_NAME"
|
||||
args:
|
||||
executable: /bin/bash
|
||||
loop: "{{ veza_data_containers }}"
|
||||
register: snap_result
|
||||
changed_when: "'Created' in (snap_result.stdout | default(''))"
|
||||
tags: [data, zfs, snapshot]
|
||||
|
||||
- name: Prune ZFS snapshots beyond retention window
|
||||
ansible.builtin.shell: |
|
||||
set -e
|
||||
# Keep the {{ veza_release_retention | default(30) }} most-recent
|
||||
# pre-deploy snapshots per dataset ; delete the rest.
|
||||
for dataset in $(zfs list -H -o name | grep -E "containers/{{ veza_container_prefix }}(postgres|redis|rabbitmq|minio)$"); do
|
||||
zfs list -H -t snapshot -o name -s creation "$dataset" \
|
||||
| grep "@pre-deploy-" \
|
||||
| head -n -{{ veza_release_retention | default(30) }} \
|
||||
| xargs -r -n1 zfs destroy -r || true
|
||||
done
|
||||
args:
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags: [data, zfs, prune]
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Provision (create-if-absent) each data container. We don't recreate
|
||||
# existing ones — they own state.
|
||||
# -----------------------------------------------------------------------
|
||||
- name: Ensure data containers exist
|
||||
hosts: incus_hosts
|
||||
become: true
|
||||
gather_facts: false
|
||||
tasks:
|
||||
- name: Launch container if absent
|
||||
ansible.builtin.shell: |
|
||||
set -e
|
||||
if incus info "{{ item.name }}" >/dev/null 2>&1; then
|
||||
echo "{{ item.name }} already exists"
|
||||
exit 0
|
||||
fi
|
||||
incus launch {{ veza_app_base_image }} "{{ item.name }}" \
|
||||
--profile veza-data \
|
||||
--profile veza-net \
|
||||
--network "{{ veza_incus_network }}"
|
||||
# Wait for the container's API to respond before any subsequent task
|
||||
# (apt, systemd) hits a half-up container.
|
||||
for i in $(seq 1 {{ veza_app_container_ready_timeout | default(30) }}); do
|
||||
if incus exec "{{ item.name }}" -- /bin/true 2>/dev/null; then
|
||||
echo "Container {{ item.name }} ready"
|
||||
exit 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "Container {{ item.name }} did not become ready within timeout"
|
||||
exit 1
|
||||
args:
|
||||
executable: /bin/bash
|
||||
loop: "{{ veza_data_containers }}"
|
||||
register: launch_result
|
||||
changed_when: "'Container' in (launch_result.stdout | default('')) and 'ready' in (launch_result.stdout | default(''))"
|
||||
tags: [data, provision]
|
||||
|
||||
- name: Refresh inventory so the new containers become reachable
|
||||
ansible.builtin.meta: refresh_inventory
|
||||
tags: [data, provision]
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Per-kind service config. Implemented inline rather than via roles so
|
||||
# this playbook stays readable. When a kind grows, lift it into its own
|
||||
# tasks/<kind>.yml or role.
|
||||
# -----------------------------------------------------------------------
|
||||
- name: Configure postgres
|
||||
hosts: "{{ veza_container_prefix + 'postgres' }}"
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars:
|
||||
ansible_connection: community.general.incus
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
tasks:
|
||||
- name: Install postgresql-16
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- postgresql-16
|
||||
- python3-psycopg2 # Required by Ansible's postgresql_user/db modules
|
||||
state: present
|
||||
update_cache: true
|
||||
cache_valid_time: 3600
|
||||
|
||||
- name: Ensure postgres is enabled + started
|
||||
ansible.builtin.systemd:
|
||||
name: postgresql
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Wait for postgres ready
|
||||
ansible.builtin.wait_for:
|
||||
port: 5432
|
||||
host: 127.0.0.1
|
||||
timeout: 30
|
||||
|
||||
- name: Ensure veza role exists with the vault-stored password
|
||||
community.postgresql.postgresql_user:
|
||||
name: veza
|
||||
password: "{{ vault_postgres_password }}"
|
||||
role_attr_flags: LOGIN
|
||||
become_user: postgres
|
||||
no_log: true
|
||||
|
||||
- name: Ensure veza database exists owned by veza role
|
||||
community.postgresql.postgresql_db:
|
||||
name: veza
|
||||
owner: veza
|
||||
encoding: UTF8
|
||||
lc_collate: C
|
||||
lc_ctype: C
|
||||
template: template0
|
||||
become_user: postgres
|
||||
tags: [data, postgres]
|
||||
|
||||
- name: Configure redis
|
||||
hosts: "{{ veza_container_prefix + 'redis' }}"
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars:
|
||||
ansible_connection: community.general.incus
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
tasks:
|
||||
- name: Install redis-server
|
||||
ansible.builtin.apt:
|
||||
name: redis-server
|
||||
state: present
|
||||
update_cache: true
|
||||
cache_valid_time: 3600
|
||||
|
||||
- name: Render redis.conf with password
|
||||
ansible.builtin.copy:
|
||||
content: |
|
||||
bind 0.0.0.0
|
||||
protected-mode yes
|
||||
port 6379
|
||||
requirepass {{ vault_redis_password }}
|
||||
maxmemory 256mb
|
||||
maxmemory-policy allkeys-lru
|
||||
appendonly yes
|
||||
appendfsync everysec
|
||||
dir /var/lib/redis
|
||||
dest: /etc/redis/redis.conf
|
||||
owner: redis
|
||||
group: redis
|
||||
mode: "0640"
|
||||
no_log: true
|
||||
notify: Restart redis
|
||||
|
||||
- name: Ensure redis is enabled + started
|
||||
ansible.builtin.systemd:
|
||||
name: redis-server
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Wait for redis ready
|
||||
ansible.builtin.wait_for:
|
||||
port: 6379
|
||||
host: 127.0.0.1
|
||||
timeout: 30
|
||||
handlers:
|
||||
- name: Restart redis
|
||||
ansible.builtin.systemd:
|
||||
name: redis-server
|
||||
state: restarted
|
||||
tags: [data, redis]
|
||||
|
||||
- name: Configure rabbitmq
|
||||
hosts: "{{ veza_container_prefix + 'rabbitmq' }}"
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars:
|
||||
ansible_connection: community.general.incus
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
tasks:
|
||||
- name: Install rabbitmq-server
|
||||
ansible.builtin.apt:
|
||||
name: rabbitmq-server
|
||||
state: present
|
||||
update_cache: true
|
||||
cache_valid_time: 3600
|
||||
|
||||
- name: Ensure rabbitmq is enabled + started
|
||||
ansible.builtin.systemd:
|
||||
name: rabbitmq-server
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Wait for rabbitmq ready
|
||||
ansible.builtin.wait_for:
|
||||
port: 5672
|
||||
host: 127.0.0.1
|
||||
timeout: 60
|
||||
|
||||
- name: Ensure /veza vhost exists
|
||||
community.rabbitmq.rabbitmq_vhost:
|
||||
name: /veza
|
||||
state: present
|
||||
|
||||
- name: Ensure veza user exists with vault password
|
||||
community.rabbitmq.rabbitmq_user:
|
||||
user: veza
|
||||
password: "{{ vault_rabbitmq_password }}"
|
||||
vhost: /veza
|
||||
configure_priv: ".*"
|
||||
read_priv: ".*"
|
||||
write_priv: ".*"
|
||||
state: present
|
||||
update_password: always
|
||||
no_log: true
|
||||
tags: [data, rabbitmq]
|
||||
|
||||
- name: Configure minio
|
||||
hosts: "{{ veza_container_prefix + 'minio' }}"
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars:
|
||||
ansible_connection: community.general.incus
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
tasks:
|
||||
- name: Install MinIO via apt (or fallback to direct download)
|
||||
ansible.builtin.shell: |
|
||||
set -e
|
||||
if ! command -v minio >/dev/null 2>&1; then
|
||||
curl -fsSL https://dl.min.io/server/minio/release/linux-amd64/minio -o /usr/local/bin/minio
|
||||
chmod 0755 /usr/local/bin/minio
|
||||
fi
|
||||
if ! command -v mc >/dev/null 2>&1; then
|
||||
curl -fsSL https://dl.min.io/client/mc/release/linux-amd64/mc -o /usr/local/bin/mc
|
||||
chmod 0755 /usr/local/bin/mc
|
||||
fi
|
||||
args:
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
|
||||
- name: Ensure minio system user
|
||||
ansible.builtin.user:
|
||||
name: minio
|
||||
system: true
|
||||
shell: /usr/sbin/nologin
|
||||
home: /var/lib/minio
|
||||
|
||||
- name: Ensure minio data dir
|
||||
ansible.builtin.file:
|
||||
path: /var/lib/minio
|
||||
state: directory
|
||||
owner: minio
|
||||
group: minio
|
||||
mode: "0750"
|
||||
|
||||
- name: Render minio EnvironmentFile
|
||||
ansible.builtin.copy:
|
||||
content: |
|
||||
MINIO_ROOT_USER={{ vault_minio_root_user }}
|
||||
MINIO_ROOT_PASSWORD={{ vault_minio_root_password }}
|
||||
MINIO_VOLUMES=/var/lib/minio
|
||||
MINIO_OPTS="--address :9000 --console-address :9001"
|
||||
dest: /etc/default/minio
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0640"
|
||||
no_log: true
|
||||
notify: Restart minio
|
||||
|
||||
- name: Render minio systemd unit
|
||||
ansible.builtin.copy:
|
||||
content: |
|
||||
[Unit]
|
||||
Description=MinIO
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=minio
|
||||
Group=minio
|
||||
EnvironmentFile=/etc/default/minio
|
||||
ExecStart=/usr/local/bin/minio server $MINIO_OPTS $MINIO_VOLUMES
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65535
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
dest: /etc/systemd/system/minio.service
|
||||
mode: "0644"
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart minio
|
||||
|
||||
- name: Enable + start minio
|
||||
ansible.builtin.systemd:
|
||||
name: minio
|
||||
state: started
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
|
||||
- name: Wait for minio ready
|
||||
ansible.builtin.wait_for:
|
||||
port: 9000
|
||||
host: 127.0.0.1
|
||||
timeout: 60
|
||||
|
||||
- name: Configure mc client alias
|
||||
ansible.builtin.shell: |
|
||||
set -e
|
||||
mc alias set veza-local http://127.0.0.1:9000 \
|
||||
"{{ vault_minio_root_user }}" "{{ vault_minio_root_password }}" >/dev/null
|
||||
args:
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
no_log: true
|
||||
|
||||
- name: Ensure veza-{{ veza_env }} bucket exists
|
||||
ansible.builtin.shell: |
|
||||
mc mb --ignore-existing veza-local/veza-{{ veza_env }}
|
||||
args:
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
handlers:
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
- name: Restart minio
|
||||
ansible.builtin.systemd:
|
||||
name: minio
|
||||
state: restarted
|
||||
tags: [data, minio]
|
||||
Loading…
Reference in a new issue