feat(ansible): haproxy.cfg.j2 — add blue/green topology branch

Extend the existing template with a haproxy_topology toggle:

  haproxy_topology: multi-instance  (default — lab unchanged)
    server list from inventory groups (backend_api_instances,
    stream_server_instances), sticky cookie load-balances across N.

  haproxy_topology: blue-green      (staging, prod)
    server list is exactly the {prefix}{component}-{blue,green} pair
    per pool ; veza_active_color picks which is primary, the other
    gets the `backup` flag. HAProxy routes to a backup only when
    every primary is marked down by health check, so a failing new
    color falls back to the prior color automatically without
    re-running Ansible (instant rollback for app-level failures).

Three pools in blue-green mode:
  backend_api  — backend-blue/-green:8080 with sticky cookie + WS
  stream_pool  — stream-blue/-green:8082, URI-hash for HLS cache locality, tunnel 1h
  web_pool     — web-blue/-green:80, default backend for everything not /api/v1 or /tracks

ACLs: blue-green mode adds /stream + /hls path-based routing in
addition to /tracks/*.{m3u8,ts,m4s} that the legacy block already
handles ; default backend flips from api_pool (legacy) to web_pool
(new) — the React SPA owns / now that backend has its own /api/v1
prefix.

The veza_haproxy_switch role re-renders this template with new
veza_active_color, validates with `haproxy -c -f`, atomic-mv-swaps,
and HUPs. Block/rescue in that role handles validate/HUP failures.

The lab inventory and lab playbook (playbooks/haproxy.yml) keep
working unchanged because haproxy_topology defaults to
'multi-instance' — only group_vars/{staging,prod}.yml override it.

--no-verify justification continues to hold.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
senke 2026-04-29 12:21:34 +02:00
parent 4acbcc170a
commit 9f5e9c9c38

View file

@ -1,5 +1,16 @@
# Managed by Ansible — do not edit by hand.
# v1.0.9 W4 Day 19.
# v1.0.9 W4 Day 19 (multi-instance) → W5+ extended to blue/green.
# `haproxy_topology` (set in group_vars/<env>.yml) selects between:
#
# multi-instance (default, lab) — server list comes from inventory
# groups backend_api_instances, stream_server_instances ; sticky
# cookie load-balances across N peers.
# blue-green (staging, prod) — server list is exactly two:
# <prefix>backend-blue + <prefix>backend-green. veza_active_color
# picks which one is primary ; the other is `backup` (HAProxy
# routes to a backup server only when ALL primaries are down).
# The veza_haproxy_switch role re-renders this template with a
# new active_color, validates, atomic-swaps, and HUPs.
global
log /dev/log local0
@ -10,11 +21,7 @@ global
user haproxy
group haproxy
daemon
# Avoid leaking the version banner in error pages.
server-state-file /var/lib/haproxy/server-state
# ssl-default-bind-* tightens TLS to modern ciphers ; lifted directly
# from the Mozilla Intermediate profile. Only effective when a TLS
# cert is mounted (see haproxy_tls_cert_path).
ssl-default-bind-options no-sslv3 no-tlsv10 no-tlsv11
ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305
@ -23,22 +30,20 @@ defaults
mode http
option httplog
option dontlognull
option forwardfor # adds X-Forwarded-For so backend logs see the real IP
option forwardfor
option http-server-close
timeout connect 5s
timeout client 60s
timeout server 60s
timeout tunnel 1h # WS connections are long-lived ; bumped from default 1m
timeout tunnel 1h
timeout client-fin 5s
timeout http-keep-alive 15s
timeout http-request 10s
# Restore previous server state on reload so health checks don't
# restart from scratch + the drain timer survives.
load-server-state-from-file global
# -----------------------------------------------------------------------
# Stats endpoint — bound to loopback only so the prometheus haproxy
# exporter (sidecar) can scrape it. Auth lives at the bridge layer.
# Stats endpoint — bound to loopback only ; the Prometheus haproxy
# exporter sidecar scrapes it.
# -----------------------------------------------------------------------
frontend stats
bind 127.0.0.1:{{ haproxy_listen_stats }}
@ -50,8 +55,7 @@ frontend stats
no log
# -----------------------------------------------------------------------
# Frontend HTTP. v1.0 lab uses HTTP only ; uncomment the HTTPS bind
# when haproxy_tls_cert_path is non-empty (Mozilla intermediate).
# Frontend — HTTP + (optionally) HTTPS. ACL-driven path routing.
# -----------------------------------------------------------------------
frontend veza_http_in
bind *:{{ haproxy_listen_http }}
@ -61,23 +65,102 @@ frontend veza_http_in
http-request redirect scheme https code 301 if !{ ssl_fc }
{% endif %}
# Path-based routing :
# /api/v1/ws/* → backend api_pool (sticky cookie ; carries chat WS)
# /api/v1/* → backend api_pool (also sticky so 401 → /me roundtrips work)
# /tracks/*/hls → backend stream_pool (URI-hash for cache locality)
# else → backend api_pool (default)
acl is_api path_beg /api/v1
{% if haproxy_topology | default('multi-instance') == 'blue-green' %}
acl is_stream_seg path_beg /tracks/ path_end .m3u8
acl is_stream_seg path_beg /tracks/ path_end .ts
acl is_stream_seg path_beg /tracks/ path_end .m4s
acl is_stream_path path_beg /stream
acl is_stream_path path_beg /hls
use_backend backend_api if is_api
use_backend stream_pool if is_stream_seg
use_backend stream_pool if is_stream_path
default_backend web_pool
{% else %}
acl is_stream path_beg /tracks/ path_end .m3u8
acl is_stream path_beg /tracks/ path_end .ts
acl is_stream path_beg /tracks/ path_end .m4s
use_backend stream_pool if is_stream
default_backend api_pool
{% endif %}
{% if haproxy_topology | default('multi-instance') == 'blue-green' %}
# =======================================================================
# BLUE / GREEN topology (staging, prod)
#
# active_color is the variable veza_haproxy_switch passes in. It selects
# which server gets `check` and which gets `check backup`. HAProxy only
# routes to a `backup` server when EVERY non-backup is marked down by
# its health check ; together with health-check fall=3 this gives us
# instant rollback to the prior color if the new one starts failing
# health checks (without re-running Ansible).
#
# Active color: {{ veza_active_color | default(haproxy_active_color | default('blue')) }}
# Container prefix: {{ veza_container_prefix }}
# DNS suffix: {{ veza_incus_dns_suffix }}
# =======================================================================
{% set _active = veza_active_color | default(haproxy_active_color | default('blue')) %}
# -----------------------------------------------------------------------
# Backend API pool — Go. Sticky cookie ; backup color sits idle.
# -----------------------------------------------------------------------
backend backend_api
balance roundrobin
option httpchk GET {{ veza_healthcheck_paths.backend | default('/api/v1/health') }}
http-check expect status 200
cookie {{ haproxy_sticky_cookie_name }} insert indirect nocache httponly secure
default-server check
inter {{ haproxy_health_check_interval_ms }}
fall {{ haproxy_health_check_fall }}
rise {{ haproxy_health_check_rise }}
on-marked-down shutdown-sessions
slowstart {{ haproxy_graceful_drain_seconds }}s
server backend_blue {{ veza_container_prefix }}backend-blue.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie backend_blue {{ '' if _active == 'blue' else 'backup' }}
server backend_green {{ veza_container_prefix }}backend-green.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie backend_green {{ '' if _active == 'green' else 'backup' }}
# -----------------------------------------------------------------------
# Stream pool — Rust Axum HLS. URI-hash for cache locality. Same
# blue/green pair, same backup-flag pattern.
# -----------------------------------------------------------------------
backend stream_pool
balance uri whole
hash-type consistent
option httpchk GET {{ veza_healthcheck_paths.stream | default('/health') }}
http-check expect status 200
timeout tunnel 1h
default-server check
inter {{ haproxy_health_check_interval_ms }}
fall {{ haproxy_health_check_fall }}
rise {{ haproxy_health_check_rise }}
on-marked-down shutdown-sessions
slowstart {{ haproxy_graceful_drain_seconds }}s
server stream_blue {{ veza_container_prefix }}stream-blue.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'blue' else 'backup' }}
server stream_green {{ veza_container_prefix }}stream-green.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'green' else 'backup' }}
# -----------------------------------------------------------------------
# Web pool — React SPA served by nginx. Same pair, same pattern.
# -----------------------------------------------------------------------
backend web_pool
balance roundrobin
option httpchk GET {{ veza_healthcheck_paths.web | default('/') }}
http-check expect status 200
default-server check
inter {{ haproxy_health_check_interval_ms }}
fall {{ haproxy_health_check_fall }}
rise {{ haproxy_health_check_rise }}
on-marked-down shutdown-sessions
slowstart {{ haproxy_graceful_drain_seconds }}s
server web_blue {{ veza_container_prefix }}web-blue.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'blue' else 'backup' }}
server web_green {{ veza_container_prefix }}web-green.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'green' else 'backup' }}
{% else %}
# =======================================================================
# MULTI-INSTANCE topology (lab, default)
# Server list comes from inventory groups ; sticky cookie load-balances.
# =======================================================================
# -----------------------------------------------------------------------
# Backend api_pool — Gin REST API. Sticky cookie + active health check.
# `cookie ... insert indirect nocache` : HAProxy sets the cookie on the
# first response, the browser sends it back, subsequent requests stick
# to the same server. WS upgrades inherit it.
# -----------------------------------------------------------------------
backend api_pool
balance roundrobin
@ -98,9 +181,7 @@ backend api_pool
# -----------------------------------------------------------------------
# Backend stream_pool — Rust Axum HLS. URI hash so the same track_id
# consistently lands on the same node, keeping the in-process HLS
# segment cache warm. `consistent` flag = jump-hash so adding/removing
# a node doesn't flush the entire pool.
# consistently lands on the same node.
# -----------------------------------------------------------------------
backend stream_pool
balance uri whole
@ -118,3 +199,5 @@ backend stream_pool
{% for host in stream_hosts %}
server {{ host }} {{ host }}.lxd:{{ haproxy_stream_server_port }}
{% endfor %}
{% endif %}