veza/infra/ansible/playbooks/postgres_ha.yml

# Postgres HA playbook — provisions 3 Incus containers on the
# `incus_hosts` group (lab/staging/prod) and lays down the
# pg_auto_failover formation across them.
#
# Topology:
#   - pgaf-monitor   — the state machine (single instance)
#   - pgaf-primary   — first data node, becomes primary at first boot
#   - pgaf-replica   — second data node, becomes hot-standby
#
# v1.0.9 Day 6 — single host (R720 lab) for now. W2 day 7+ moves
# the data nodes onto separate physical hosts when Hetzner standby
# is provisioned. The formation works the same either way.
#
# Run with:
#   ansible-playbook -i inventory/lab.yml playbooks/postgres_ha.yml --check
#   ansible-playbook -i inventory/lab.yml playbooks/postgres_ha.yml
---
- name: Provision Incus containers for the Postgres formation + pgbouncer
  hosts: incus_hosts
  become: true
  gather_facts: true
  tasks:
    - name: Launch pgaf-monitor + pgaf-primary + pgaf-replica + pgaf-pgbouncer
      ansible.builtin.shell:
        cmd: |
          set -e
          for ct in pgaf-monitor pgaf-primary pgaf-replica pgaf-pgbouncer; do
            if ! incus info "$ct" >/dev/null 2>&1; then
              incus launch images:ubuntu/22.04 "$ct"
              # Wait for cloud-init / network to settle.
              for _ in $(seq 1 30); do
                if incus exec "$ct" -- cloud-init status 2>/dev/null | grep -q "status: done"; then
                  break
                fi
                sleep 1
              done
              # Install python3 inside the container so Ansible can
              # speak to it via the incus connection plugin.
              incus exec "$ct" -- apt-get update
              incus exec "$ct" -- apt-get install -y python3 python3-apt
            fi
          done
      args:
        executable: /bin/bash
      register: provision_result
      changed_when: "'incus launch' in provision_result.stdout"
      tags: [postgres_ha, pgbouncer, provision]

    - name: Refresh inventory so the new containers are reachable via the incus connection
      ansible.builtin.meta: refresh_inventory

- name: Apply common baseline to the formation containers
  hosts: postgres_ha
  become: true
  gather_facts: true
  roles:
    - common

- name: Bring up the pg_auto_failover monitor first (formation depends on it)
  hosts: postgres_ha_monitor
  become: true
  gather_facts: true
  roles:
    - postgres_ha

- name: Bring up the data nodes (primary registers first, replica registers second)
  hosts: postgres_ha_nodes
  become: true
  gather_facts: true
  serial: 1  # primary must register before replica — pg_auto_failover assigns roles by registration order
  roles:
    - postgres_ha

# v1.0.9 Day 7: PgBouncer fronts the formation. Common baseline first
# (SSH + node_exporter + fail2ban), then the pgbouncer role itself.
- name: Apply common baseline to the pgbouncer container
  hosts: pgbouncer
  become: true
  gather_facts: true
  roles:
    - common

- name: Install + configure PgBouncer pointing at the formation
  hosts: pgbouncer
  become: true
  gather_facts: true
  roles:
    - pgbouncer