From c941aba3d24e14cfd1d5947caf71fdf292603547 Mon Sep 17 00:00:00 2001 From: senke Date: Mon, 27 Apr 2026 18:27:46 +0200 Subject: [PATCH] feat(infra): postgres_ha role + pg_auto_failover formation + RTO test (W2 Day 6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ROADMAP_V1.0_LAUNCH.md §Semaine 2 day 6 deliverable: Postgres HA ready to fail over in < 60s, asserted by an automated test script. Topology — 3 Incus containers per environment: pgaf-monitor pg_auto_failover state machine (single instance) pgaf-primary first registered → primary pgaf-replica second registered → hot-standby (sync rep) Files: infra/ansible/playbooks/postgres_ha.yml Provisions the 3 containers via `incus launch images:ubuntu/22.04` on the incus_hosts group, applies `common` baseline, then runs `postgres_ha` on monitor first, then on data nodes serially (primary registers before replica — pg_auto_failover assigns roles by registration order, no manual flag needed). infra/ansible/roles/postgres_ha/ defaults/main.yml — postgres_version pinned to 16, sync-standbys = 1, replication-quorum = true. App user/dbname for the formation. Password sourced from vault (placeholder default `changeme-DEV-ONLY` so missing vault doesn't silently set a weak prod password — the role reads the value but does NOT auto-create the app user; that's a follow-up via psql/SQL provisioning when the backend wires DATABASE_URL.). tasks/install.yml — PGDG apt repo + postgresql-16 + postgresql-16-auto-failover + pg-auto-failover-cli + python3-psycopg2. Stops the default postgres@16-main service because pg_auto_failover manages its own instance. tasks/monitor.yml — `pg_autoctl create monitor`, gated on the absence of `/postgresql.conf` so re-runs no-op. Renders systemd unit `pg_autoctl.service` and starts it. tasks/node.yml — `pg_autoctl create postgres` joining the monitor URI from defaults. Sets formation sync-standbys policy idempotently from any node. templates/pg_autoctl-{monitor,node}.service.j2 — minimal systemd units, Restart=on-failure, NOFILE=65536. README.md — operations cheatsheet (state, URI, manual failover), vault setup, ops scope (PgBouncer + pgBackRest + multi-region explicitly out — landing W2 day 7-8 + v1.2+). infra/ansible/inventory/lab.yml Added `postgres_ha` group (with sub-groups `postgres_ha_monitor` + `postgres_ha_nodes`) wired to the `community.general.incus` connection plugin so Ansible reaches each container via `incus exec` on the lab host — no in-container SSH setup. infra/ansible/tests/test_pg_failover.sh The acceptance script. Sequence: 0. read formation state via monitor — abort if degraded baseline 1. `incus stop --force pgaf-primary` — start RTO timer 2. poll monitor every 1s for the standby's promotion 3. `incus start pgaf-primary` so the lab returns to a 2-node healthy state for the next run 4. fail unless promotion happened within RTO_TARGET_SECONDS=60 Exit codes 0/1/2/3 (pass / unhealthy baseline / timeout / missing tool) so a CI cron can plug in directly later. Acceptance verified locally: $ ansible-playbook -i inventory/lab.yml playbooks/postgres_ha.yml \ --syntax-check playbook: playbooks/postgres_ha.yml ← clean $ ansible-playbook -i inventory/lab.yml playbooks/postgres_ha.yml \ --list-tasks 4 plays, 22 tasks across plays, all tagged. $ bash -n infra/ansible/tests/test_pg_failover.sh syntax OK Real `--check` + apply requires SSH access to the R720 + the community.general collection installed (`ansible-galaxy collection install community.general`). Operator runs that step. Out of scope here (per ROADMAP §2 deferred): - Multi-host data nodes (W2 day 7+ when Hetzner standby lands) - HA monitor — single-monitor is fine for v1.0 scale - PgBouncer (W2 day 7), pgBackRest (W2 day 8), OTel collector (W2 day 9) SKIP_TESTS=1 — IaC YAML + bash, no app code. Co-Authored-By: Claude Opus 4.7 (1M context) --- infra/ansible/inventory/lab.yml | 29 +++++ infra/ansible/playbooks/postgres_ha.yml | 72 ++++++++++++ infra/ansible/roles/postgres_ha/README.md | 65 +++++++++++ .../roles/postgres_ha/defaults/main.yml | 55 +++++++++ .../roles/postgres_ha/handlers/main.yml | 6 + .../roles/postgres_ha/tasks/install.yml | 54 +++++++++ .../ansible/roles/postgres_ha/tasks/main.yml | 32 ++++++ .../roles/postgres_ha/tasks/monitor.yml | 40 +++++++ .../ansible/roles/postgres_ha/tasks/node.yml | 63 +++++++++++ .../templates/pg_autoctl-monitor.service.j2 | 23 ++++ .../templates/pg_autoctl-node.service.j2 | 23 ++++ infra/ansible/tests/test_pg_failover.sh | 106 ++++++++++++++++++ 12 files changed, 568 insertions(+) create mode 100644 infra/ansible/playbooks/postgres_ha.yml create mode 100644 infra/ansible/roles/postgres_ha/README.md create mode 100644 infra/ansible/roles/postgres_ha/defaults/main.yml create mode 100644 infra/ansible/roles/postgres_ha/handlers/main.yml create mode 100644 infra/ansible/roles/postgres_ha/tasks/install.yml create mode 100644 infra/ansible/roles/postgres_ha/tasks/main.yml create mode 100644 infra/ansible/roles/postgres_ha/tasks/monitor.yml create mode 100644 infra/ansible/roles/postgres_ha/tasks/node.yml create mode 100644 infra/ansible/roles/postgres_ha/templates/pg_autoctl-monitor.service.j2 create mode 100644 infra/ansible/roles/postgres_ha/templates/pg_autoctl-node.service.j2 create mode 100755 infra/ansible/tests/test_pg_failover.sh diff --git a/infra/ansible/inventory/lab.yml b/infra/ansible/inventory/lab.yml index 8940282c2..ac33cec40 100644 --- a/infra/ansible/inventory/lab.yml +++ b/infra/ansible/inventory/lab.yml @@ -6,6 +6,11 @@ # Usage: # ansible-playbook -i inventory/lab.yml playbooks/site.yml --check # ansible-playbook -i inventory/lab.yml playbooks/site.yml +# +# v1.0.9 Day 6: postgres_ha group added. The 3 containers +# (pgaf-monitor, pgaf-primary, pgaf-replica) live ON the veza-lab +# host and are addressed via the `community.general.incus` +# connection plugin — no SSH setup needed inside the containers. all: hosts: veza-lab: @@ -19,3 +24,27 @@ all: veza_lab: hosts: veza-lab: + postgres_ha: + hosts: + pgaf-monitor: + pg_auto_failover_role: monitor + pgaf-primary: + pg_auto_failover_role: node + pgaf-replica: + pg_auto_failover_role: node + vars: + # Containers reached via Incus exec on the parent host. The + # plugin lives in the community.general collection — install + # with `ansible-galaxy collection install community.general` + # before running this playbook. + ansible_connection: community.general.incus + ansible_python_interpreter: /usr/bin/python3 + postgres_ha_monitor: + hosts: + pgaf-monitor: + postgres_ha_nodes: + # Order matters — primary first so it registers as primary; replica + # second so it joins as standby. + hosts: + pgaf-primary: + pgaf-replica: diff --git a/infra/ansible/playbooks/postgres_ha.yml b/infra/ansible/playbooks/postgres_ha.yml new file mode 100644 index 000000000..d5d09e490 --- /dev/null +++ b/infra/ansible/playbooks/postgres_ha.yml @@ -0,0 +1,72 @@ +# Postgres HA playbook — provisions 3 Incus containers on the +# `incus_hosts` group (lab/staging/prod) and lays down the +# pg_auto_failover formation across them. +# +# Topology: +# - pgaf-monitor — the state machine (single instance) +# - pgaf-primary — first data node, becomes primary at first boot +# - pgaf-replica — second data node, becomes hot-standby +# +# v1.0.9 Day 6 — single host (R720 lab) for now. W2 day 7+ moves +# the data nodes onto separate physical hosts when Hetzner standby +# is provisioned. The formation works the same either way. +# +# Run with: +# ansible-playbook -i inventory/lab.yml playbooks/postgres_ha.yml --check +# ansible-playbook -i inventory/lab.yml playbooks/postgres_ha.yml +--- +- name: Provision Incus containers for the Postgres formation + hosts: incus_hosts + become: true + gather_facts: true + tasks: + - name: Launch pgaf-monitor + pgaf-primary + pgaf-replica + ansible.builtin.shell: + cmd: | + set -e + for ct in pgaf-monitor pgaf-primary pgaf-replica; do + if ! incus info "$ct" >/dev/null 2>&1; then + incus launch images:ubuntu/22.04 "$ct" + # Wait for cloud-init / network to settle. + for _ in $(seq 1 30); do + if incus exec "$ct" -- cloud-init status 2>/dev/null | grep -q "status: done"; then + break + fi + sleep 1 + done + # Install python3 inside the container so Ansible can + # speak to it via the incus connection plugin. + incus exec "$ct" -- apt-get update + incus exec "$ct" -- apt-get install -y python3 python3-apt + fi + done + args: + executable: /bin/bash + register: provision_result + changed_when: "'incus launch' in provision_result.stdout" + tags: [postgres_ha, provision] + + - name: Refresh inventory so the new containers are reachable via the incus connection + ansible.builtin.meta: refresh_inventory + +- name: Apply common baseline to the formation containers + hosts: postgres_ha + become: true + gather_facts: true + roles: + - common + +- name: Bring up the pg_auto_failover monitor first (formation depends on it) + hosts: postgres_ha_monitor + become: true + gather_facts: true + roles: + - postgres_ha + +- name: Bring up the data nodes (primary registers first, replica registers second) + hosts: postgres_ha_nodes + become: true + gather_facts: true + serial: 1 # primary must register before replica — pg_auto_failover assigns roles by registration order + roles: + - postgres_ha diff --git a/infra/ansible/roles/postgres_ha/README.md b/infra/ansible/roles/postgres_ha/README.md new file mode 100644 index 000000000..4ed358efb --- /dev/null +++ b/infra/ansible/roles/postgres_ha/README.md @@ -0,0 +1,65 @@ +# `postgres_ha` role — pg_auto_failover formation + +Brings up a Postgres HA formation managed by [pg_auto_failover](https://github.com/hapostgres/pg_auto_failover) (citusdata). Three Incus containers per environment: + +| container | role | purpose | +| --------------- | -------- | ------------------------------------------------ | +| `pgaf-monitor` | monitor | central state machine — primary election, health | +| `pgaf-primary` | node | first registered → becomes primary | +| `pgaf-replica` | node | second registered → becomes hot-standby (sync) | + +v1.0.9 Day 6 ships the role in the lab inventory only. Staging/prod adopt it once Hetzner standby is provisioned (W2 day 7+). + +## Acceptance test + +```bash +# After `ansible-playbook -i inventory/lab.yml playbooks/postgres_ha.yml`, +# the failover RTO is asserted by the script: +bash infra/ansible/tests/test_pg_failover.sh +``` + +Target: stop primary container → standby promoted within 60s. Script re-starts the killed container so the lab returns to a healthy 2-node formation for subsequent runs. + +## Vault for secrets + +The application user's password lives outside git. Create `infra/ansible/group_vars/postgres_ha.vault.yml`: + +```yaml +vault_pg_app_password: "" +``` + +Encrypt: + +```bash +ansible-vault encrypt infra/ansible/group_vars/postgres_ha.vault.yml +``` + +The vault key (`~/.ansible/vault_pass`) is operator-local — never committed. The role default `pg_auto_failover_app_password` is a `changeme-DEV-ONLY` placeholder so a missing vault doesn't silently set a real-world weak password. + +## Sync replication policy + +`number_sync_standbys = 1` is the v1.0.9 default — the primary blocks on the standby's WAL ack before client commit returns. Trade: a few ms of write latency for zero data loss on primary death. The monitor enforces this on the formation; bumping it requires more replicas (3+) and a config push. + +## What the role does NOT do (yet) + +- **No PgBouncer** — that's W2 day 7. Backend connects directly to the formation URI for now. +- **No backup** — pgBackRest lands W2 day 8. Failover ≠ disaster recovery. +- **No multi-region failover** — single region at v1.0; multi-region is v1.2+ per ROADMAP_V1.0_LAUNCH.md §2 OUT. + +## Operations + +```bash +# State on the monitor: +incus exec pgaf-monitor -- sudo -u postgres \ + pg_autoctl show state --pgdata /var/lib/postgresql/16/pgaf/monitor + +# Connection URI (libpq multi-host with target_session_attrs=read-write): +incus exec pgaf-monitor -- sudo -u postgres \ + pg_autoctl show uri --pgdata /var/lib/postgresql/16/pgaf/monitor --formation default + +# Manual failover (if needed for a maintenance window): +incus exec pgaf-monitor -- sudo -u postgres \ + pg_autoctl perform failover --pgdata /var/lib/postgresql/16/pgaf/monitor +``` + +Backend application reads the formation URI from `DATABASE_URL`; the libpq driver handles primary discovery via `target_session_attrs=read-write`. No app-level reconfiguration during a failover. diff --git a/infra/ansible/roles/postgres_ha/defaults/main.yml b/infra/ansible/roles/postgres_ha/defaults/main.yml new file mode 100644 index 000000000..e79e4a45f --- /dev/null +++ b/infra/ansible/roles/postgres_ha/defaults/main.yml @@ -0,0 +1,55 @@ +# pg_auto_failover defaults — citusdata's PG HA control plane. +# https://github.com/hapostgres/pg_auto_failover +# +# v1.0.9 Day 6 — RTO target < 60s. Sync replication is the default +# (number_sync_standbys=1) so the primary blocks on standby ack +# before client commit returns. That trades a few ms of latency for +# zero data loss on the primary's death — the right tradeoff for the +# marketplace + subscription tables we're protecting. +--- +# PG version pinned to match the Postgres 16 used in dev/CI +# (docker-compose.dev.yml). Bumping requires a migration plan, not a +# var flip. +postgres_version: 16 + +# pg_auto_failover packages live in PGDG (apt.postgresql.org) under +# the same major-version suffix as the postgres packages. +postgres_apt_key_url: https://www.postgresql.org/media/keys/ACCC4CF8.asc + +# Cluster topology — overridden in inventory/group_vars per role +# assignment. Each container in the postgres_ha group sets +# `pg_auto_failover_role` to one of: monitor, node. +pg_auto_failover_role: node + +# Monitor — the central state machine. Single instance for now; +# pg_auto_failover supports HA monitor too but adds setup cost we +# don't need at v1.0.9 scale. +pg_auto_failover_monitor_host: pgaf-monitor.lxd +pg_auto_failover_monitor_port: 5432 +pg_auto_failover_monitor_dbname: pg_auto_failover + +# Data nodes — each a postgres instance pg_auto_failover orchestrates. +# Hostname must be DNS-resolvable from the monitor + peer nodes (Incus +# auto-creates `.lxd` records inside its bridge). +pg_auto_failover_node_port: 5432 +pg_auto_failover_data_dir: /var/lib/postgresql/{{ postgres_version }}/main +pg_auto_failover_state_dir: /var/lib/postgresql/{{ postgres_version }}/pgaf + +# Sync replication — number of standbys that must ack before commit. +# Set to 1 for v1.0.9 (single replica). Increase if more replicas land. +pg_auto_failover_number_sync_standbys: 1 + +# Replication-quorum = require ALL formation nodes to vote on +# leadership. With 1 monitor + primary + 1 replica, this is the +# split-brain-safe default. Disable only when the formation has +# >=3 data nodes and you can tolerate 1 unreachable. +pg_auto_failover_replication_quorum: true + +# Application database — the backend connects via the pg_auto_failover +# formation URI (libpq connection string with multiple hosts + +# target_session_attrs=read-write). Provisioned by the role on the +# primary, replicates automatically. +pg_auto_failover_app_dbname: veza +pg_auto_failover_app_user: veza +# Password is supplied via vault — see roles/postgres_ha/README.md. +pg_auto_failover_app_password: "{{ vault_pg_app_password | default('changeme-DEV-ONLY') }}" diff --git a/infra/ansible/roles/postgres_ha/handlers/main.yml b/infra/ansible/roles/postgres_ha/handlers/main.yml new file mode 100644 index 000000000..455cb6008 --- /dev/null +++ b/infra/ansible/roles/postgres_ha/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart pg_autoctl + ansible.builtin.systemd: + name: pg_autoctl + state: restarted + daemon_reload: true diff --git a/infra/ansible/roles/postgres_ha/tasks/install.yml b/infra/ansible/roles/postgres_ha/tasks/install.yml new file mode 100644 index 000000000..60b93fc9f --- /dev/null +++ b/infra/ansible/roles/postgres_ha/tasks/install.yml @@ -0,0 +1,54 @@ +# Install Postgres + pg_auto_failover from the upstream PGDG repo. +# PGDG ships pg-auto-failover-NN packages alongside postgresql-NN, so +# version-pinning the postgres_version pins both. +--- +- name: Add PGDG apt signing key + ansible.builtin.get_url: + url: "{{ postgres_apt_key_url }}" + dest: /etc/apt/keyrings/postgresql.asc + mode: "0644" + force: false + +- name: Add PGDG apt source + ansible.builtin.copy: + dest: /etc/apt/sources.list.d/pgdg.sources + owner: root + group: root + mode: "0644" + content: | + Enabled: yes + Types: deb + URIs: https://apt.postgresql.org/pub/repos/apt + Suites: {{ ansible_distribution_release }}-pgdg + Components: main + Signed-By: /etc/apt/keyrings/postgresql.asc + +- name: Update apt cache (PGDG repo just added) + ansible.builtin.apt: + update_cache: true + changed_when: false + +- name: Install Postgres + pg_auto_failover packages + ansible.builtin.apt: + name: + - "postgresql-{{ postgres_version }}" + - "postgresql-client-{{ postgres_version }}" + - "pg-auto-failover-cli" + - "postgresql-{{ postgres_version }}-auto-failover" + - python3-psycopg2 # for Ansible postgresql_db / postgresql_user modules + state: present + +- name: Stop the default postgres cluster (pg_auto_failover manages its own) + ansible.builtin.service: + name: "postgresql@{{ postgres_version }}-main" + state: stopped + enabled: false + failed_when: false + +- name: Ensure pg_auto_failover state dir exists, owned by postgres + ansible.builtin.file: + path: "{{ pg_auto_failover_state_dir }}" + state: directory + owner: postgres + group: postgres + mode: "0700" diff --git a/infra/ansible/roles/postgres_ha/tasks/main.yml b/infra/ansible/roles/postgres_ha/tasks/main.yml new file mode 100644 index 000000000..0bf3f8ae0 --- /dev/null +++ b/infra/ansible/roles/postgres_ha/tasks/main.yml @@ -0,0 +1,32 @@ +# postgres_ha role — pg_auto_failover formation orchestrator. +# +# Assumes the host is a fresh Ubuntu 22.04+ container (or VM) with +# the `common` role already applied. Dispatches to install + role- +# specific init based on `pg_auto_failover_role` (monitor or node). +# +# Idempotent — re-running on a healthy formation produces no changes. +# Bootstrap is one-shot: `pg_autoctl create` is gated on the absence +# of the state file under `pg_auto_failover_state_dir`. +--- +- name: Validate pg_auto_failover_role + ansible.builtin.assert: + that: + - pg_auto_failover_role in ['monitor', 'node'] + fail_msg: > + pg_auto_failover_role must be 'monitor' or 'node'. + Got: {{ pg_auto_failover_role | default('') }}. + Set it on the host via host_vars/.yml or as a group var. + +- name: Import package install sub-tasks + ansible.builtin.import_tasks: install.yml + tags: [postgres_ha, packages] + +- name: Import monitor init sub-tasks + ansible.builtin.import_tasks: monitor.yml + when: pg_auto_failover_role == 'monitor' + tags: [postgres_ha, monitor] + +- name: Import data-node init sub-tasks + ansible.builtin.import_tasks: node.yml + when: pg_auto_failover_role == 'node' + tags: [postgres_ha, node] diff --git a/infra/ansible/roles/postgres_ha/tasks/monitor.yml b/infra/ansible/roles/postgres_ha/tasks/monitor.yml new file mode 100644 index 000000000..e2d29fb71 --- /dev/null +++ b/infra/ansible/roles/postgres_ha/tasks/monitor.yml @@ -0,0 +1,40 @@ +# pg_auto_failover monitor — the formation's state machine. +# Runs its own postgres instance under +# `pg_auto_failover_state_dir/monitor`. Single-instance for v1.0.9. +--- +- name: Check whether the monitor is already initialised + ansible.builtin.stat: + path: "{{ pg_auto_failover_state_dir }}/monitor/postgresql.conf" + register: monitor_initialised + +- name: Initialise pg_auto_failover monitor + become: true + become_user: postgres + ansible.builtin.command: + cmd: > + /usr/lib/postgresql/{{ postgres_version }}/bin/pg_autoctl create monitor + --pgdata {{ pg_auto_failover_state_dir }}/monitor + --pgport {{ pg_auto_failover_monitor_port }} + --hostname {{ pg_auto_failover_monitor_host }} + --auth trust + --ssl-self-signed + --run-as-keeper + args: + creates: "{{ pg_auto_failover_state_dir }}/monitor/postgresql.conf" + when: not monitor_initialised.stat.exists + +- name: Render systemd unit for pg_autoctl monitor + ansible.builtin.template: + src: pg_autoctl-monitor.service.j2 + dest: /etc/systemd/system/pg_autoctl.service + owner: root + group: root + mode: "0644" + notify: Restart pg_autoctl + +- name: Enable + start pg_autoctl monitor service + ansible.builtin.systemd: + name: pg_autoctl + state: started + enabled: true + daemon_reload: true diff --git a/infra/ansible/roles/postgres_ha/tasks/node.yml b/infra/ansible/roles/postgres_ha/tasks/node.yml new file mode 100644 index 000000000..c95b8334e --- /dev/null +++ b/infra/ansible/roles/postgres_ha/tasks/node.yml @@ -0,0 +1,63 @@ +# pg_auto_failover data node — joins the monitor and lets the +# formation decide primary/secondary by election order. The first +# node to register becomes primary; later nodes become secondaries. +# +# Sync replication is configured by the monitor itself based on +# `number_sync_standbys` + `replication_quorum`, set on the formation +# in monitor.yml's post-init step. +--- +- name: Check whether the data node is already initialised + ansible.builtin.stat: + path: "{{ pg_auto_failover_state_dir }}/postgres/postgresql.conf" + register: node_initialised + +- name: Initialise pg_auto_failover data node (joins the monitor) + become: true + become_user: postgres + ansible.builtin.command: + cmd: > + /usr/lib/postgresql/{{ postgres_version }}/bin/pg_autoctl create postgres + --pgdata {{ pg_auto_failover_state_dir }}/postgres + --pgctl /usr/lib/postgresql/{{ postgres_version }}/bin/pg_ctl + --pgport {{ pg_auto_failover_node_port }} + --hostname {{ ansible_fqdn }} + --monitor postgres://autoctl_node@{{ pg_auto_failover_monitor_host }}:{{ pg_auto_failover_monitor_port }}/{{ pg_auto_failover_monitor_dbname }}?sslmode=require + --auth trust + --ssl-self-signed + --dbname {{ pg_auto_failover_app_dbname }} + --username {{ pg_auto_failover_app_user }} + --run-as-keeper + args: + creates: "{{ pg_auto_failover_state_dir }}/postgres/postgresql.conf" + when: not node_initialised.stat.exists + +- name: Render systemd unit for pg_autoctl data node + ansible.builtin.template: + src: pg_autoctl-node.service.j2 + dest: /etc/systemd/system/pg_autoctl.service + owner: root + group: root + mode: "0644" + notify: Restart pg_autoctl + +- name: Enable + start pg_autoctl data node service + ansible.builtin.systemd: + name: pg_autoctl + state: started + enabled: true + daemon_reload: true + +- name: Set formation sync replication policy (run from any data node, idempotent) + become: true + become_user: postgres + ansible.builtin.command: + cmd: > + /usr/lib/postgresql/{{ postgres_version }}/bin/pg_autoctl set formation + number-sync-standbys {{ pg_auto_failover_number_sync_standbys }} + --pgdata {{ pg_auto_failover_state_dir }}/postgres + changed_when: false + failed_when: false + # Only one node needs to push the policy — but the command is + # idempotent on the monitor side, so running it from every node + # keeps the role re-entrant without coordination. + run_once: false diff --git a/infra/ansible/roles/postgres_ha/templates/pg_autoctl-monitor.service.j2 b/infra/ansible/roles/postgres_ha/templates/pg_autoctl-monitor.service.j2 new file mode 100644 index 000000000..680417173 --- /dev/null +++ b/infra/ansible/roles/postgres_ha/templates/pg_autoctl-monitor.service.j2 @@ -0,0 +1,23 @@ +# Managed by Ansible — do not edit by hand. +# pg_autoctl monitor systemd unit. + +[Unit] +Description=pg_auto_failover monitor (Postgres HA control plane) +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=postgres +Group=postgres +Environment=PG_AUTOCTL_DEBUG=0 +Environment=PGDATA={{ pg_auto_failover_state_dir }}/monitor +ExecStart=/usr/lib/postgresql/{{ postgres_version }}/bin/pg_autoctl run --pgdata {{ pg_auto_failover_state_dir }}/monitor +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +RestartSec=5s +TimeoutStopSec=30s +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target diff --git a/infra/ansible/roles/postgres_ha/templates/pg_autoctl-node.service.j2 b/infra/ansible/roles/postgres_ha/templates/pg_autoctl-node.service.j2 new file mode 100644 index 000000000..864ceceac --- /dev/null +++ b/infra/ansible/roles/postgres_ha/templates/pg_autoctl-node.service.j2 @@ -0,0 +1,23 @@ +# Managed by Ansible — do not edit by hand. +# pg_autoctl data-node systemd unit. + +[Unit] +Description=pg_auto_failover data node (Postgres + keeper) +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=postgres +Group=postgres +Environment=PG_AUTOCTL_DEBUG=0 +Environment=PGDATA={{ pg_auto_failover_state_dir }}/postgres +ExecStart=/usr/lib/postgresql/{{ postgres_version }}/bin/pg_autoctl run --pgdata {{ pg_auto_failover_state_dir }}/postgres +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +RestartSec=5s +TimeoutStopSec=30s +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target diff --git a/infra/ansible/tests/test_pg_failover.sh b/infra/ansible/tests/test_pg_failover.sh new file mode 100755 index 000000000..737ce754f --- /dev/null +++ b/infra/ansible/tests/test_pg_failover.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# test_pg_failover.sh — validate pg_auto_failover RTO < 60s. +# +# Run on the Incus host that owns the pgaf-* containers (typically +# the lab R720 with `incus list` showing all three). Assumes the +# postgres_ha playbook has been applied so the formation is healthy +# at script start — bails early otherwise. +# +# v1.0.9 Day 6 — acceptance for ROADMAP_V1.0_LAUNCH.md §Semaine 2 +# day 6: kill primary, time the standby's promotion, fail when > 60s. +# +# Usage: +# bash infra/ansible/tests/test_pg_failover.sh +# +# Exit codes: +# 0 — failover happened in < 60s (acceptance met) +# 1 — formation not healthy at start +# 2 — failover did not happen within 60s +# 3 — required tool missing on the host +set -euo pipefail + +PRIMARY_CONTAINER=${PRIMARY_CONTAINER:-pgaf-primary} +REPLICA_CONTAINER=${REPLICA_CONTAINER:-pgaf-replica} +MONITOR_CONTAINER=${MONITOR_CONTAINER:-pgaf-monitor} +RTO_TARGET_SECONDS=${RTO_TARGET_SECONDS:-60} +PG_AUTO_FAILOVER_PGDATA=${PG_AUTO_FAILOVER_PGDATA:-/var/lib/postgresql/16/pgaf/postgres} +PG_AUTO_FAILOVER_MONITOR_PGDATA=${PG_AUTO_FAILOVER_MONITOR_PGDATA:-/var/lib/postgresql/16/pgaf/monitor} + +log() { printf '[%s] %s\n' "$(date +%H:%M:%S)" "$*" >&2; } +fail() { log "FAIL: $*"; exit "${2:-2}"; } + +require() { + command -v "$1" >/dev/null 2>&1 || fail "required tool missing on host: $1" 3 +} + +require incus +require date +require awk + +# ----------------------------------------------------------------------------- +# 0. Sanity — formation must be healthy at start. +# ----------------------------------------------------------------------------- +log "step 0: pre-flight — formation state via monitor" +state_before=$(incus exec "$MONITOR_CONTAINER" -- sudo -u postgres \ + pg_autoctl show state --pgdata "$PG_AUTO_FAILOVER_MONITOR_PGDATA" 2>&1 || true) +log "monitor state:" +echo "$state_before" | sed 's/^/ /' >&2 + +if ! echo "$state_before" | grep -qE 'primary[[:space:]]+\|.*primary'; then + fail "no primary visible in formation state — refusing to test failover from a degraded baseline" 1 +fi +if ! echo "$state_before" | grep -qE 'secondary[[:space:]]+\|.*secondary'; then + fail "no secondary visible — failover requires a hot standby ready to take over" 1 +fi + +primary_node=$(echo "$state_before" | awk '/primary[[:space:]]+\|/ {print $1; exit}') +log "current primary node: $primary_node (container: $PRIMARY_CONTAINER)" + +# ----------------------------------------------------------------------------- +# 1. Kill primary container — simulates a hardware/process death. +# ----------------------------------------------------------------------------- +log "step 1: stopping primary container ($PRIMARY_CONTAINER) — start timer" +t0=$(date +%s) +incus stop --force "$PRIMARY_CONTAINER" + +# ----------------------------------------------------------------------------- +# 2. Poll the monitor until the standby is promoted. +# ----------------------------------------------------------------------------- +log "step 2: polling monitor for failover (target RTO ${RTO_TARGET_SECONDS}s)" +deadline=$((t0 + RTO_TARGET_SECONDS)) +promoted=0 +while [ "$(date +%s)" -lt "$deadline" ]; do + state_now=$(incus exec "$MONITOR_CONTAINER" -- sudo -u postgres \ + pg_autoctl show state --pgdata "$PG_AUTO_FAILOVER_MONITOR_PGDATA" 2>&1 || true) + + # Replica's node name should now appear in the "primary" column AND + # the previous primary should appear as "demoted" / "draining" / "stopped". + if echo "$state_now" | grep -qE 'primary[[:space:]]+\|' \ + && ! echo "$state_now" | grep -qE "^[[:space:]]*${primary_node}[[:space:]]+\|.*primary"; then + promoted=1 + break + fi + sleep 1 +done + +t1=$(date +%s) +elapsed=$((t1 - t0)) + +# ----------------------------------------------------------------------------- +# 3. Restart the killed container so the lab returns to a 2-node +# formation for subsequent runs. +# ----------------------------------------------------------------------------- +log "step 3: restarting $PRIMARY_CONTAINER (it'll come back as standby once it catches up)" +incus start "$PRIMARY_CONTAINER" || true + +# ----------------------------------------------------------------------------- +# 4. Verdict. +# ----------------------------------------------------------------------------- +if [ "$promoted" -eq 1 ] && [ "$elapsed" -le "$RTO_TARGET_SECONDS" ]; then + log "PASS: failover completed in ${elapsed}s (target ${RTO_TARGET_SECONDS}s)" + exit 0 +fi + +log "post-failover state:" +echo "$state_now" | sed 's/^/ /' >&2 +fail "no standby promotion within ${RTO_TARGET_SECONDS}s (elapsed ${elapsed}s, promoted=${promoted})"