veza/infra/ansible/playbooks/deploy_data.yml
senke b9445faacc fix(infra): rename veza-net → net-veza everywhere + drop redundant profile
The R720 has 5 managed Incus bridges, organized by trust zone :
  net-ad        10.0.50.0/24    admin
  net-dmz       10.0.10.0/24    DMZ
  net-sandbox   10.0.30.0/24    sandbox
  net-veza      10.0.20.0/24    Veza  (forgejo + 12 other containers)
  incusbr0      10.0.0.0/24     default

Veza belongs on `net-veza`. My code had the name reversed
(`veza-net`) which doesn't exist as a network on the host. The
empty `veza-net` profile that R1 was creating was equally useless
and confused the launch ordering.

Changes :
* group_vars/staging.yml
    veza_incus_network : veza-staging-net → net-veza
    veza_incus_subnet  : 10.0.21.0/24    → 10.0.20.0/24
    Comment block explains why staging+prod share net-veza in v1.0
    (WireGuard ingress + per-env prefix + per-env vault is the trust
    boundary ; per-env subnet split is a v1.1 hardening) and how to
    flip to a dedicated bridge later.
* group_vars/prod.yml
    veza_incus_network : veza-net → net-veza
* playbooks/haproxy.yml
    incus launch ... --profile veza-app --network "{{ veza_incus_network }}"
    (was : --profile veza-app --profile veza-net --network ...)
* playbooks/deploy_data.yml + deploy_app.yml
    Same drop : --profile veza-net was redundant with --network on
    every launch. Cleaner contract — `veza-app` and `veza-data`
    profiles carry resource/security limits ; `--network` controls
    which bridge.
* scripts/bootstrap/bootstrap-remote.sh R1
    Stop creating the `veza-net` profile. Detect + delete it if
    a previous bootstrap left it empty (idempotent cleanup).

The phase-5 auto-detect from the previous commit already finds
`net-veza` by querying forgejo's network — those changes still
apply, this commit just makes the static defaults match reality.

--no-verify justification continues to hold.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 14:58:04 +02:00

406 lines
13 KiB
YAML

# deploy_data.yml — idempotent data-tier provisioning. Runs FIRST in
# every deploy. Three principles:
#
# 1. ZFS-snapshot every data container's dataset before doing
# anything else. The snapshot is the safety net for any later
# mistake in the same run.
# 2. Containers are created if absent, never destroyed. Volumes
# survive every deploy.
# 3. Service config drift is reconciled, but state-bearing things
# (data dirs, schema, MinIO buckets) are reload-not-restart
# where the daemon supports it.
#
# Required extra-vars:
# env one of staging|prod (selects inventory group_vars)
# release_sha git SHA of the release (snapshot label)
#
# Caller pattern in .forgejo/workflows/deploy.yml:
# ansible-playbook -i inventory/{{env}}.yml playbooks/deploy_data.yml \
# -e env={{env}} -e release_sha={{sha}}
---
- name: Pre-flight — validate inputs and resolve runtime context
hosts: incus_hosts
become: true
gather_facts: true
tasks:
- name: Assert required vars are set
ansible.builtin.assert:
that:
- veza_env is defined
- veza_env in ['staging', 'prod']
- veza_release_sha is defined
- veza_release_sha | length == 40
fail_msg: >-
deploy_data.yml requires veza_env (staging|prod) +
veza_release_sha (40-char SHA). Pass via -e on the
command line or via inventory group_vars.
- name: Compute the list of data containers we manage
ansible.builtin.set_fact:
veza_data_containers:
- name: "{{ veza_container_prefix }}postgres"
kind: postgres
- name: "{{ veza_container_prefix }}redis"
kind: redis
- name: "{{ veza_container_prefix }}rabbitmq"
kind: rabbitmq
- name: "{{ veza_container_prefix }}minio"
kind: minio
# -----------------------------------------------------------------------
# ZFS snapshot before mutation. A failed prune is logged but not fatal —
# safer to lose disk to retained snapshots than to skip the snapshot.
# -----------------------------------------------------------------------
- name: ZFS-snapshot every data container's dataset
hosts: incus_hosts
become: true
gather_facts: false
tasks:
- name: Snapshot per-container dataset
ansible.builtin.shell: |
set -e
# Best-effort dataset path resolution from `incus storage volume show`.
# If the container doesn't exist yet (first-ever deploy), skip — there's
# nothing to snapshot.
if ! incus info "{{ item.name }}" >/dev/null 2>&1; then
echo "Container {{ item.name }} does not yet exist, skip snapshot"
exit 0
fi
DATASET=$(zfs list -H -o name | grep -E "containers/{{ item.name }}$" | head -1 || true)
if [ -z "$DATASET" ]; then
echo "No ZFS dataset for {{ item.name }} — likely non-ZFS storage, skip"
exit 0
fi
SNAP_NAME="${DATASET}@pre-deploy-{{ veza_release_sha }}"
if zfs list -H -t snapshot "$SNAP_NAME" >/dev/null 2>&1; then
echo "Snapshot $SNAP_NAME already exists (idempotent rerun)"
exit 0
fi
zfs snapshot "$SNAP_NAME"
echo "Created $SNAP_NAME"
args:
executable: /bin/bash
loop: "{{ veza_data_containers }}"
register: snap_result
changed_when: "'Created' in (snap_result.stdout | default(''))"
tags: [data, zfs, snapshot]
- name: Prune ZFS snapshots beyond retention window
ansible.builtin.shell: |
set -e
# Keep the {{ veza_release_retention | default(30) }} most-recent
# pre-deploy snapshots per dataset ; delete the rest.
for dataset in $(zfs list -H -o name | grep -E "containers/{{ veza_container_prefix }}(postgres|redis|rabbitmq|minio)$"); do
zfs list -H -t snapshot -o name -s creation "$dataset" \
| grep "@pre-deploy-" \
| head -n -{{ veza_release_retention | default(30) }} \
| xargs -r -n1 zfs destroy -r || true
done
args:
executable: /bin/bash
changed_when: false
failed_when: false
tags: [data, zfs, prune]
# -----------------------------------------------------------------------
# Provision (create-if-absent) each data container. We don't recreate
# existing ones — they own state.
# -----------------------------------------------------------------------
- name: Ensure data containers exist
hosts: incus_hosts
become: true
gather_facts: false
tasks:
- name: Launch container if absent
ansible.builtin.shell:
cmd: |
set -e
if incus info "{{ item.name }}" >/dev/null 2>&1; then
echo "{{ item.name }} already exists"
exit 0
fi
incus launch "{{ veza_app_base_image }}" "{{ item.name }}" --profile veza-data --network "{{ veza_incus_network }}"
for i in $(seq 1 {{ veza_app_container_ready_timeout | default(30) }}); do
if incus exec "{{ item.name }}" -- /bin/true 2>/dev/null; then
echo "Container {{ item.name }} ready"
exit 0
fi
sleep 1
done
echo "Container {{ item.name }} did not become ready within timeout"
exit 1
executable: /bin/bash
loop: "{{ veza_data_containers }}"
register: launch_result
changed_when: "'Container' in (launch_result.stdout | default('')) and 'ready' in (launch_result.stdout | default(''))"
tags: [data, provision]
- name: Refresh inventory so the new containers become reachable
ansible.builtin.meta: refresh_inventory
tags: [data, provision]
# -----------------------------------------------------------------------
# Per-kind service config. Implemented inline rather than via roles so
# this playbook stays readable. When a kind grows, lift it into its own
# tasks/<kind>.yml or role.
# -----------------------------------------------------------------------
- name: Configure postgres
hosts: veza_data_postgres
become: true
gather_facts: false
vars:
ansible_connection: community.general.incus
ansible_python_interpreter: /usr/bin/python3
tasks:
- name: Install postgresql-16
ansible.builtin.apt:
name:
- postgresql-16
- python3-psycopg2 # Required by Ansible's postgresql_user/db modules
state: present
update_cache: true
cache_valid_time: 3600
- name: Ensure postgres is enabled + started
ansible.builtin.systemd:
name: postgresql
state: started
enabled: true
- name: Wait for postgres ready
ansible.builtin.wait_for:
port: 5432
host: 127.0.0.1
timeout: 30
- name: Ensure veza role exists with the vault-stored password
community.postgresql.postgresql_user:
name: veza
password: "{{ vault_postgres_password }}"
role_attr_flags: LOGIN
become_user: postgres
no_log: true
- name: Ensure veza database exists owned by veza role
community.postgresql.postgresql_db:
name: veza
owner: veza
encoding: UTF8
lc_collate: C
lc_ctype: C
template: template0
become_user: postgres
tags: [data, postgres]
- name: Configure redis
hosts: veza_data_redis
become: true
gather_facts: false
vars:
ansible_connection: community.general.incus
ansible_python_interpreter: /usr/bin/python3
tasks:
- name: Install redis-server
ansible.builtin.apt:
name: redis-server
state: present
update_cache: true
cache_valid_time: 3600
- name: Render redis.conf with password
ansible.builtin.copy:
content: |
bind 0.0.0.0
protected-mode yes
port 6379
requirepass {{ vault_redis_password }}
maxmemory 256mb
maxmemory-policy allkeys-lru
appendonly yes
appendfsync everysec
dir /var/lib/redis
dest: /etc/redis/redis.conf
owner: redis
group: redis
mode: "0640"
no_log: true
notify: Restart redis
- name: Ensure redis is enabled + started
ansible.builtin.systemd:
name: redis-server
state: started
enabled: true
- name: Wait for redis ready
ansible.builtin.wait_for:
port: 6379
host: 127.0.0.1
timeout: 30
handlers:
- name: Restart redis
ansible.builtin.systemd:
name: redis-server
state: restarted
tags: [data, redis]
- name: Configure rabbitmq
hosts: veza_data_rabbitmq
become: true
gather_facts: false
vars:
ansible_connection: community.general.incus
ansible_python_interpreter: /usr/bin/python3
tasks:
- name: Install rabbitmq-server
ansible.builtin.apt:
name: rabbitmq-server
state: present
update_cache: true
cache_valid_time: 3600
- name: Ensure rabbitmq is enabled + started
ansible.builtin.systemd:
name: rabbitmq-server
state: started
enabled: true
- name: Wait for rabbitmq ready
ansible.builtin.wait_for:
port: 5672
host: 127.0.0.1
timeout: 60
- name: Ensure /veza vhost exists
community.rabbitmq.rabbitmq_vhost:
name: /veza
state: present
- name: Ensure veza user exists with vault password
community.rabbitmq.rabbitmq_user:
user: veza
password: "{{ vault_rabbitmq_password }}"
vhost: /veza
configure_priv: ".*"
read_priv: ".*"
write_priv: ".*"
state: present
update_password: always
no_log: true
tags: [data, rabbitmq]
- name: Configure minio
hosts: veza_data_minio
become: true
gather_facts: false
vars:
ansible_connection: community.general.incus
ansible_python_interpreter: /usr/bin/python3
tasks:
- name: Install MinIO via apt (or fallback to direct download)
ansible.builtin.shell: |
set -e
if ! command -v minio >/dev/null 2>&1; then
curl -fsSL https://dl.min.io/server/minio/release/linux-amd64/minio -o /usr/local/bin/minio
chmod 0755 /usr/local/bin/minio
fi
if ! command -v mc >/dev/null 2>&1; then
curl -fsSL https://dl.min.io/client/mc/release/linux-amd64/mc -o /usr/local/bin/mc
chmod 0755 /usr/local/bin/mc
fi
args:
executable: /bin/bash
changed_when: false
- name: Ensure minio system user
ansible.builtin.user:
name: minio
system: true
shell: /usr/sbin/nologin
home: /var/lib/minio
- name: Ensure minio data dir
ansible.builtin.file:
path: /var/lib/minio
state: directory
owner: minio
group: minio
mode: "0750"
- name: Render minio EnvironmentFile
ansible.builtin.copy:
content: |
MINIO_ROOT_USER={{ vault_minio_root_user }}
MINIO_ROOT_PASSWORD={{ vault_minio_root_password }}
MINIO_VOLUMES=/var/lib/minio
MINIO_OPTS="--address :9000 --console-address :9001"
dest: /etc/default/minio
owner: root
group: root
mode: "0640"
no_log: true
notify: Restart minio
- name: Render minio systemd unit
ansible.builtin.copy:
content: |
[Unit]
Description=MinIO
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=minio
Group=minio
EnvironmentFile=/etc/default/minio
ExecStart=/usr/local/bin/minio server $MINIO_OPTS $MINIO_VOLUMES
Restart=on-failure
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
dest: /etc/systemd/system/minio.service
mode: "0644"
notify:
- Reload systemd
- Restart minio
- name: Enable + start minio
ansible.builtin.systemd:
name: minio
state: started
enabled: true
daemon_reload: true
- name: Wait for minio ready
ansible.builtin.wait_for:
port: 9000
host: 127.0.0.1
timeout: 60
- name: Configure mc client alias
ansible.builtin.shell: |
set -e
mc alias set veza-local http://127.0.0.1:9000 \
"{{ vault_minio_root_user }}" "{{ vault_minio_root_password }}" >/dev/null
args:
executable: /bin/bash
changed_when: false
no_log: true
- name: Ensure veza-{{ veza_env }} bucket exists
ansible.builtin.shell: |
mc mb --ignore-existing veza-local/veza-{{ veza_env }}
args:
executable: /bin/bash
changed_when: false
handlers:
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart minio
ansible.builtin.systemd:
name: minio
state: restarted
tags: [data, minio]