veza/infra/ansible/roles/postgres_ha/defaults/main.yml

# pg_auto_failover defaults — citusdata's PG HA control plane.
# https://github.com/hapostgres/pg_auto_failover
#
# v1.0.9 Day 6 — RTO target < 60s. Sync replication is the default
# (number_sync_standbys=1) so the primary blocks on standby ack
# before client commit returns. That trades a few ms of latency for
# zero data loss on the primary's death — the right tradeoff for the
# marketplace + subscription tables we're protecting.
---
# PG version pinned to match the Postgres 16 used in dev/CI
# (docker-compose.dev.yml). Bumping requires a migration plan, not a
# var flip.
postgres_version: 16

# pg_auto_failover packages live in PGDG (apt.postgresql.org) under
# the same major-version suffix as the postgres packages.
postgres_apt_key_url: https://www.postgresql.org/media/keys/ACCC4CF8.asc

# Cluster topology — overridden in inventory/group_vars per role
# assignment. Each container in the postgres_ha group sets
# `pg_auto_failover_role` to one of: monitor, node.
pg_auto_failover_role: node

# Monitor — the central state machine. Single instance for now;
# pg_auto_failover supports HA monitor too but adds setup cost we
# don't need at v1.0.9 scale.
pg_auto_failover_monitor_host: pgaf-monitor.lxd
pg_auto_failover_monitor_port: 5432
pg_auto_failover_monitor_dbname: pg_auto_failover

# Data nodes — each a postgres instance pg_auto_failover orchestrates.
# Hostname must be DNS-resolvable from the monitor + peer nodes (Incus
# auto-creates `<container>.lxd` records inside its bridge).
pg_auto_failover_node_port: 5432
pg_auto_failover_data_dir: /var/lib/postgresql/{{ postgres_version }}/main
pg_auto_failover_state_dir: /var/lib/postgresql/{{ postgres_version }}/pgaf

# Sync replication — number of standbys that must ack before commit.
# Set to 1 for v1.0.9 (single replica). Increase if more replicas land.
pg_auto_failover_number_sync_standbys: 1

# Replication-quorum = require ALL formation nodes to vote on
# leadership. With 1 monitor + primary + 1 replica, this is the
# split-brain-safe default. Disable only when the formation has
# >=3 data nodes and you can tolerate 1 unreachable.
pg_auto_failover_replication_quorum: true

# Application database — the backend connects via the pg_auto_failover
# formation URI (libpq connection string with multiple hosts +
# target_session_attrs=read-write). Provisioned by the role on the
# primary, replicates automatically.
pg_auto_failover_app_dbname: veza
pg_auto_failover_app_user: veza
# Password is supplied via vault — see roles/postgres_ha/README.md.
pg_auto_failover_app_password: "{{ vault_pg_app_password | default('changeme-DEV-ONLY') }}"