# pg_auto_failover defaults — citusdata's PG HA control plane. # https://github.com/hapostgres/pg_auto_failover # # v1.0.9 Day 6 — RTO target < 60s. Sync replication is the default # (number_sync_standbys=1) so the primary blocks on standby ack # before client commit returns. That trades a few ms of latency for # zero data loss on the primary's death — the right tradeoff for the # marketplace + subscription tables we're protecting. --- # PG version pinned to match the Postgres 16 used in dev/CI # (docker-compose.dev.yml). Bumping requires a migration plan, not a # var flip. postgres_version: 16 # pg_auto_failover packages live in PGDG (apt.postgresql.org) under # the same major-version suffix as the postgres packages. postgres_apt_key_url: https://www.postgresql.org/media/keys/ACCC4CF8.asc # Cluster topology — overridden in inventory/group_vars per role # assignment. Each container in the postgres_ha group sets # `pg_auto_failover_role` to one of: monitor, node. pg_auto_failover_role: node # Monitor — the central state machine. Single instance for now; # pg_auto_failover supports HA monitor too but adds setup cost we # don't need at v1.0.9 scale. pg_auto_failover_monitor_host: pgaf-monitor.lxd pg_auto_failover_monitor_port: 5432 pg_auto_failover_monitor_dbname: pg_auto_failover # Data nodes — each a postgres instance pg_auto_failover orchestrates. # Hostname must be DNS-resolvable from the monitor + peer nodes (Incus # auto-creates `.lxd` records inside its bridge). pg_auto_failover_node_port: 5432 pg_auto_failover_data_dir: /var/lib/postgresql/{{ postgres_version }}/main pg_auto_failover_state_dir: /var/lib/postgresql/{{ postgres_version }}/pgaf # Sync replication — number of standbys that must ack before commit. # Set to 1 for v1.0.9 (single replica). Increase if more replicas land. pg_auto_failover_number_sync_standbys: 1 # Replication-quorum = require ALL formation nodes to vote on # leadership. With 1 monitor + primary + 1 replica, this is the # split-brain-safe default. Disable only when the formation has # >=3 data nodes and you can tolerate 1 unreachable. pg_auto_failover_replication_quorum: true # Application database — the backend connects via the pg_auto_failover # formation URI (libpq connection string with multiple hosts + # target_session_attrs=read-write). Provisioned by the role on the # primary, replicates automatically. pg_auto_failover_app_dbname: veza pg_auto_failover_app_user: veza # Password is supplied via vault — see roles/postgres_ha/README.md. pg_auto_failover_app_password: "{{ vault_pg_app_password | default('changeme-DEV-ONLY') }}"