diff --git a/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2 b/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2 index 2f73e97e0..0fc206fd3 100644 --- a/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2 +++ b/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2 @@ -1,5 +1,16 @@ # Managed by Ansible — do not edit by hand. -# v1.0.9 W4 Day 19. +# v1.0.9 W4 Day 19 (multi-instance) → W5+ extended to blue/green. +# `haproxy_topology` (set in group_vars/.yml) selects between: +# +# multi-instance (default, lab) — server list comes from inventory +# groups backend_api_instances, stream_server_instances ; sticky +# cookie load-balances across N peers. +# blue-green (staging, prod) — server list is exactly two: +# backend-blue + backend-green. veza_active_color +# picks which one is primary ; the other is `backup` (HAProxy +# routes to a backup server only when ALL primaries are down). +# The veza_haproxy_switch role re-renders this template with a +# new active_color, validates, atomic-swaps, and HUPs. global log /dev/log local0 @@ -10,11 +21,7 @@ global user haproxy group haproxy daemon - # Avoid leaking the version banner in error pages. server-state-file /var/lib/haproxy/server-state - # ssl-default-bind-* tightens TLS to modern ciphers ; lifted directly - # from the Mozilla Intermediate profile. Only effective when a TLS - # cert is mounted (see haproxy_tls_cert_path). ssl-default-bind-options no-sslv3 no-tlsv10 no-tlsv11 ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305 @@ -23,22 +30,20 @@ defaults mode http option httplog option dontlognull - option forwardfor # adds X-Forwarded-For so backend logs see the real IP + option forwardfor option http-server-close timeout connect 5s timeout client 60s timeout server 60s - timeout tunnel 1h # WS connections are long-lived ; bumped from default 1m + timeout tunnel 1h timeout client-fin 5s timeout http-keep-alive 15s timeout http-request 10s - # Restore previous server state on reload so health checks don't - # restart from scratch + the drain timer survives. load-server-state-from-file global # ----------------------------------------------------------------------- -# Stats endpoint — bound to loopback only so the prometheus haproxy -# exporter (sidecar) can scrape it. Auth lives at the bridge layer. +# Stats endpoint — bound to loopback only ; the Prometheus haproxy +# exporter sidecar scrapes it. # ----------------------------------------------------------------------- frontend stats bind 127.0.0.1:{{ haproxy_listen_stats }} @@ -50,8 +55,7 @@ frontend stats no log # ----------------------------------------------------------------------- -# Frontend HTTP. v1.0 lab uses HTTP only ; uncomment the HTTPS bind -# when haproxy_tls_cert_path is non-empty (Mozilla intermediate). +# Frontend — HTTP + (optionally) HTTPS. ACL-driven path routing. # ----------------------------------------------------------------------- frontend veza_http_in bind *:{{ haproxy_listen_http }} @@ -61,23 +65,102 @@ frontend veza_http_in http-request redirect scheme https code 301 if !{ ssl_fc } {% endif %} - # Path-based routing : - # /api/v1/ws/* → backend api_pool (sticky cookie ; carries chat WS) - # /api/v1/* → backend api_pool (also sticky so 401 → /me roundtrips work) - # /tracks/*/hls → backend stream_pool (URI-hash for cache locality) - # else → backend api_pool (default) + acl is_api path_beg /api/v1 +{% if haproxy_topology | default('multi-instance') == 'blue-green' %} + acl is_stream_seg path_beg /tracks/ path_end .m3u8 + acl is_stream_seg path_beg /tracks/ path_end .ts + acl is_stream_seg path_beg /tracks/ path_end .m4s + acl is_stream_path path_beg /stream + acl is_stream_path path_beg /hls + use_backend backend_api if is_api + use_backend stream_pool if is_stream_seg + use_backend stream_pool if is_stream_path + default_backend web_pool +{% else %} acl is_stream path_beg /tracks/ path_end .m3u8 acl is_stream path_beg /tracks/ path_end .ts acl is_stream path_beg /tracks/ path_end .m4s - use_backend stream_pool if is_stream default_backend api_pool +{% endif %} + +{% if haproxy_topology | default('multi-instance') == 'blue-green' %} +# ======================================================================= +# BLUE / GREEN topology (staging, prod) +# +# active_color is the variable veza_haproxy_switch passes in. It selects +# which server gets `check` and which gets `check backup`. HAProxy only +# routes to a `backup` server when EVERY non-backup is marked down by +# its health check ; together with health-check fall=3 this gives us +# instant rollback to the prior color if the new one starts failing +# health checks (without re-running Ansible). +# +# Active color: {{ veza_active_color | default(haproxy_active_color | default('blue')) }} +# Container prefix: {{ veza_container_prefix }} +# DNS suffix: {{ veza_incus_dns_suffix }} +# ======================================================================= +{% set _active = veza_active_color | default(haproxy_active_color | default('blue')) %} + +# ----------------------------------------------------------------------- +# Backend API pool — Go. Sticky cookie ; backup color sits idle. +# ----------------------------------------------------------------------- +backend backend_api + balance roundrobin + option httpchk GET {{ veza_healthcheck_paths.backend | default('/api/v1/health') }} + http-check expect status 200 + cookie {{ haproxy_sticky_cookie_name }} insert indirect nocache httponly secure + default-server check + inter {{ haproxy_health_check_interval_ms }} + fall {{ haproxy_health_check_fall }} + rise {{ haproxy_health_check_rise }} + on-marked-down shutdown-sessions + slowstart {{ haproxy_graceful_drain_seconds }}s + server backend_blue {{ veza_container_prefix }}backend-blue.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie backend_blue {{ '' if _active == 'blue' else 'backup' }} + server backend_green {{ veza_container_prefix }}backend-green.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie backend_green {{ '' if _active == 'green' else 'backup' }} + +# ----------------------------------------------------------------------- +# Stream pool — Rust Axum HLS. URI-hash for cache locality. Same +# blue/green pair, same backup-flag pattern. +# ----------------------------------------------------------------------- +backend stream_pool + balance uri whole + hash-type consistent + option httpchk GET {{ veza_healthcheck_paths.stream | default('/health') }} + http-check expect status 200 + timeout tunnel 1h + default-server check + inter {{ haproxy_health_check_interval_ms }} + fall {{ haproxy_health_check_fall }} + rise {{ haproxy_health_check_rise }} + on-marked-down shutdown-sessions + slowstart {{ haproxy_graceful_drain_seconds }}s + server stream_blue {{ veza_container_prefix }}stream-blue.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'blue' else 'backup' }} + server stream_green {{ veza_container_prefix }}stream-green.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'green' else 'backup' }} + +# ----------------------------------------------------------------------- +# Web pool — React SPA served by nginx. Same pair, same pattern. +# ----------------------------------------------------------------------- +backend web_pool + balance roundrobin + option httpchk GET {{ veza_healthcheck_paths.web | default('/') }} + http-check expect status 200 + default-server check + inter {{ haproxy_health_check_interval_ms }} + fall {{ haproxy_health_check_fall }} + rise {{ haproxy_health_check_rise }} + on-marked-down shutdown-sessions + slowstart {{ haproxy_graceful_drain_seconds }}s + server web_blue {{ veza_container_prefix }}web-blue.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'blue' else 'backup' }} + server web_green {{ veza_container_prefix }}web-green.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'green' else 'backup' }} + +{% else %} +# ======================================================================= +# MULTI-INSTANCE topology (lab, default) +# Server list comes from inventory groups ; sticky cookie load-balances. +# ======================================================================= # ----------------------------------------------------------------------- # Backend api_pool — Gin REST API. Sticky cookie + active health check. -# `cookie ... insert indirect nocache` : HAProxy sets the cookie on the -# first response, the browser sends it back, subsequent requests stick -# to the same server. WS upgrades inherit it. # ----------------------------------------------------------------------- backend api_pool balance roundrobin @@ -98,9 +181,7 @@ backend api_pool # ----------------------------------------------------------------------- # Backend stream_pool — Rust Axum HLS. URI hash so the same track_id -# consistently lands on the same node, keeping the in-process HLS -# segment cache warm. `consistent` flag = jump-hash so adding/removing -# a node doesn't flush the entire pool. +# consistently lands on the same node. # ----------------------------------------------------------------------- backend stream_pool balance uri whole @@ -118,3 +199,5 @@ backend stream_pool {% for host in stream_hosts %} server {{ host }} {{ host }}.lxd:{{ haproxy_stream_server_port }} {% endfor %} + +{% endif %}