veza/infra/ansible/playbooks/deploy_data.yml

# deploy_data.yml — idempotent data-tier provisioning. Runs FIRST in
# every deploy. Three principles:
#
#   1. ZFS-snapshot every data container's dataset before doing
#      anything else. The snapshot is the safety net for any later
#      mistake in the same run.
#   2. Containers are created if absent, never destroyed. Volumes
#      survive every deploy.
#   3. Service config drift is reconciled, but state-bearing things
#      (data dirs, schema, MinIO buckets) are reload-not-restart
#      where the daemon supports it.
#
# Required extra-vars:
#   env             one of staging|prod  (selects inventory group_vars)
#   release_sha     git SHA of the release  (snapshot label)
#
# Caller pattern in .forgejo/workflows/deploy.yml:
#   ansible-playbook -i inventory/{{env}}.yml playbooks/deploy_data.yml \
#     -e env={{env}} -e release_sha={{sha}}
---
- name: Pre-flight — validate inputs and resolve runtime context
  hosts: incus_hosts
  become: true
  gather_facts: true
  tasks:
    - name: Assert required vars are set
      ansible.builtin.assert:
        that:
          - veza_env is defined
          - veza_env in ['staging', 'prod']
          - veza_release_sha is defined
          - veza_release_sha | length == 40
        fail_msg: >-
          deploy_data.yml requires veza_env (staging|prod) +
          veza_release_sha (40-char SHA). Pass via -e on the
          command line or via inventory group_vars.

    - name: Compute the list of data containers we manage
      ansible.builtin.set_fact:
        veza_data_containers:
          - name: "{{ veza_container_prefix }}postgres"
            kind: postgres
          - name: "{{ veza_container_prefix }}redis"
            kind: redis
          - name: "{{ veza_container_prefix }}rabbitmq"
            kind: rabbitmq
          - name: "{{ veza_container_prefix }}minio"
            kind: minio

# -----------------------------------------------------------------------
# ZFS snapshot before mutation. A failed prune is logged but not fatal —
# safer to lose disk to retained snapshots than to skip the snapshot.
# -----------------------------------------------------------------------
- name: ZFS-snapshot every data container's dataset
  hosts: incus_hosts
  become: true
  gather_facts: false
  tasks:
    - name: Snapshot per-container dataset
      ansible.builtin.shell: |
        set -e
        # Best-effort dataset path resolution from `incus storage volume show`.
        # If the container doesn't exist yet (first-ever deploy), skip — there's
        # nothing to snapshot.
        if ! incus info "{{ item.name }}" >/dev/null 2>&1; then
          echo "Container {{ item.name }} does not yet exist, skip snapshot"
          exit 0
        fi
        DATASET=$(zfs list -H -o name | grep -E "containers/{{ item.name }}$" | head -1 || true)
        if [ -z "$DATASET" ]; then
          echo "No ZFS dataset for {{ item.name }} — likely non-ZFS storage, skip"
          exit 0
        fi
        SNAP_NAME="${DATASET}@pre-deploy-{{ veza_release_sha }}"
        if zfs list -H -t snapshot "$SNAP_NAME" >/dev/null 2>&1; then
          echo "Snapshot $SNAP_NAME already exists (idempotent rerun)"
          exit 0
        fi
        zfs snapshot "$SNAP_NAME"
        echo "Created $SNAP_NAME"
      args:
        executable: /bin/bash
      loop: "{{ veza_data_containers }}"
      register: snap_result
      changed_when: "'Created' in (snap_result.stdout | default(''))"
      tags: [data, zfs, snapshot]

    - name: Prune ZFS snapshots beyond retention window
      ansible.builtin.shell: |
        set -e
        # Keep the {{ veza_release_retention | default(30) }} most-recent
        # pre-deploy snapshots per dataset ; delete the rest.
        for dataset in $(zfs list -H -o name | grep -E "containers/{{ veza_container_prefix }}(postgres|redis|rabbitmq|minio)$"); do
          zfs list -H -t snapshot -o name -s creation "$dataset" \
            | grep "@pre-deploy-" \
            | head -n -{{ veza_release_retention | default(30) }} \
            | xargs -r -n1 zfs destroy -r || true
        done
      args:
        executable: /bin/bash
      changed_when: false
      failed_when: false
      tags: [data, zfs, prune]

# -----------------------------------------------------------------------
# Provision (create-if-absent) each data container. We don't recreate
# existing ones — they own state.
# -----------------------------------------------------------------------
- name: Ensure data containers exist
  hosts: incus_hosts
  become: true
  gather_facts: false
  tasks:
    - name: Launch container if absent
      ansible.builtin.shell:
        cmd: |
          set -e
          if incus info "{{ item.name }}" >/dev/null 2>&1; then
            echo "{{ item.name }} already exists"
            exit 0
          fi
          incus launch "{{ veza_app_base_image }}" "{{ item.name }}" --profile veza-data --profile veza-net --network "{{ veza_incus_network }}"
          for i in $(seq 1 {{ veza_app_container_ready_timeout | default(30) }}); do
            if incus exec "{{ item.name }}" -- /bin/true 2>/dev/null; then
              echo "Container {{ item.name }} ready"
              exit 0
            fi
            sleep 1
          done
          echo "Container {{ item.name }} did not become ready within timeout"
          exit 1
        executable: /bin/bash
      loop: "{{ veza_data_containers }}"
      register: launch_result
      changed_when: "'Container' in (launch_result.stdout | default('')) and 'ready' in (launch_result.stdout | default(''))"
      tags: [data, provision]

    - name: Refresh inventory so the new containers become reachable
      ansible.builtin.meta: refresh_inventory
      tags: [data, provision]

# -----------------------------------------------------------------------
# Per-kind service config. Implemented inline rather than via roles so
# this playbook stays readable. When a kind grows, lift it into its own
# tasks/<kind>.yml or role.
# -----------------------------------------------------------------------
- name: Configure postgres
  hosts: veza_data_postgres
  become: true
  gather_facts: false
  vars:
    ansible_connection: community.general.incus
    ansible_python_interpreter: /usr/bin/python3
  tasks:
    - name: Install postgresql-16
      ansible.builtin.apt:
        name:
          - postgresql-16
          - python3-psycopg2  # Required by Ansible's postgresql_user/db modules
        state: present
        update_cache: true
        cache_valid_time: 3600

    - name: Ensure postgres is enabled + started
      ansible.builtin.systemd:
        name: postgresql
        state: started
        enabled: true

    - name: Wait for postgres ready
      ansible.builtin.wait_for:
        port: 5432
        host: 127.0.0.1
        timeout: 30

    - name: Ensure veza role exists with the vault-stored password
      community.postgresql.postgresql_user:
        name: veza
        password: "{{ vault_postgres_password }}"
        role_attr_flags: LOGIN
      become_user: postgres
      no_log: true

    - name: Ensure veza database exists owned by veza role
      community.postgresql.postgresql_db:
        name: veza
        owner: veza
        encoding: UTF8
        lc_collate: C
        lc_ctype: C
        template: template0
      become_user: postgres
  tags: [data, postgres]

- name: Configure redis
  hosts: veza_data_redis
  become: true
  gather_facts: false
  vars:
    ansible_connection: community.general.incus
    ansible_python_interpreter: /usr/bin/python3
  tasks:
    - name: Install redis-server
      ansible.builtin.apt:
        name: redis-server
        state: present
        update_cache: true
        cache_valid_time: 3600

    - name: Render redis.conf with password
      ansible.builtin.copy:
        content: |
          bind 0.0.0.0
          protected-mode yes
          port 6379
          requirepass {{ vault_redis_password }}
          maxmemory 256mb
          maxmemory-policy allkeys-lru
          appendonly yes
          appendfsync everysec
          dir /var/lib/redis
        dest: /etc/redis/redis.conf
        owner: redis
        group: redis
        mode: "0640"
      no_log: true
      notify: Restart redis

    - name: Ensure redis is enabled + started
      ansible.builtin.systemd:
        name: redis-server
        state: started
        enabled: true

    - name: Wait for redis ready
      ansible.builtin.wait_for:
        port: 6379
        host: 127.0.0.1
        timeout: 30
  handlers:
    - name: Restart redis
      ansible.builtin.systemd:
        name: redis-server
        state: restarted
  tags: [data, redis]

- name: Configure rabbitmq
  hosts: veza_data_rabbitmq
  become: true
  gather_facts: false
  vars:
    ansible_connection: community.general.incus
    ansible_python_interpreter: /usr/bin/python3
  tasks:
    - name: Install rabbitmq-server
      ansible.builtin.apt:
        name: rabbitmq-server
        state: present
        update_cache: true
        cache_valid_time: 3600

    - name: Ensure rabbitmq is enabled + started
      ansible.builtin.systemd:
        name: rabbitmq-server
        state: started
        enabled: true

    - name: Wait for rabbitmq ready
      ansible.builtin.wait_for:
        port: 5672
        host: 127.0.0.1
        timeout: 60

    - name: Ensure /veza vhost exists
      community.rabbitmq.rabbitmq_vhost:
        name: /veza
        state: present

    - name: Ensure veza user exists with vault password
      community.rabbitmq.rabbitmq_user:
        user: veza
        password: "{{ vault_rabbitmq_password }}"
        vhost: /veza
        configure_priv: ".*"
        read_priv: ".*"
        write_priv: ".*"
        state: present
        update_password: always
      no_log: true
  tags: [data, rabbitmq]

- name: Configure minio
  hosts: veza_data_minio
  become: true
  gather_facts: false
  vars:
    ansible_connection: community.general.incus
    ansible_python_interpreter: /usr/bin/python3
  tasks:
    - name: Install MinIO via apt (or fallback to direct download)
      ansible.builtin.shell: |
        set -e
        if ! command -v minio >/dev/null 2>&1; then
          curl -fsSL https://dl.min.io/server/minio/release/linux-amd64/minio -o /usr/local/bin/minio
          chmod 0755 /usr/local/bin/minio
        fi
        if ! command -v mc >/dev/null 2>&1; then
          curl -fsSL https://dl.min.io/client/mc/release/linux-amd64/mc -o /usr/local/bin/mc
          chmod 0755 /usr/local/bin/mc
        fi
      args:
        executable: /bin/bash
      changed_when: false

    - name: Ensure minio system user
      ansible.builtin.user:
        name: minio
        system: true
        shell: /usr/sbin/nologin
        home: /var/lib/minio

    - name: Ensure minio data dir
      ansible.builtin.file:
        path: /var/lib/minio
        state: directory
        owner: minio
        group: minio
        mode: "0750"

    - name: Render minio EnvironmentFile
      ansible.builtin.copy:
        content: |
          MINIO_ROOT_USER={{ vault_minio_root_user }}
          MINIO_ROOT_PASSWORD={{ vault_minio_root_password }}
          MINIO_VOLUMES=/var/lib/minio
          MINIO_OPTS="--address :9000 --console-address :9001"
        dest: /etc/default/minio
        owner: root
        group: root
        mode: "0640"
      no_log: true
      notify: Restart minio

    - name: Render minio systemd unit
      ansible.builtin.copy:
        content: |
          [Unit]
          Description=MinIO
          After=network-online.target
          Wants=network-online.target

          [Service]
          Type=simple
          User=minio
          Group=minio
          EnvironmentFile=/etc/default/minio
          ExecStart=/usr/local/bin/minio server $MINIO_OPTS $MINIO_VOLUMES
          Restart=on-failure
          LimitNOFILE=65535

          [Install]
          WantedBy=multi-user.target
        dest: /etc/systemd/system/minio.service
        mode: "0644"
      notify:
        - Reload systemd
        - Restart minio

    - name: Enable + start minio
      ansible.builtin.systemd:
        name: minio
        state: started
        enabled: true
        daemon_reload: true

    - name: Wait for minio ready
      ansible.builtin.wait_for:
        port: 9000
        host: 127.0.0.1
        timeout: 60

    - name: Configure mc client alias
      ansible.builtin.shell: |
        set -e
        mc alias set veza-local http://127.0.0.1:9000 \
          "{{ vault_minio_root_user }}" "{{ vault_minio_root_password }}" >/dev/null
      args:
        executable: /bin/bash
      changed_when: false
      no_log: true

    - name: Ensure veza-{{ veza_env }} bucket exists
      ansible.builtin.shell: |
        mc mb --ignore-existing veza-local/veza-{{ veza_env }}
      args:
        executable: /bin/bash
      changed_when: false
  handlers:
    - name: Reload systemd
      ansible.builtin.systemd:
        daemon_reload: true
    - name: Restart minio
      ansible.builtin.systemd:
        name: minio
        state: restarted
  tags: [data, minio]