veza/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2

# Managed by Ansible — do not edit by hand.
# v1.0.9 W4 Day 19 (multi-instance) → W5+ extended to blue/green.
# `haproxy_topology` (set in group_vars/<env>.yml) selects between:
#
#   multi-instance (default, lab) — server list comes from inventory
#       groups backend_api_instances, stream_server_instances ; sticky
#       cookie load-balances across N peers.
#   blue-green   (staging, prod)   — server list is exactly two:
#       <prefix>backend-blue + <prefix>backend-green. veza_active_color
#       picks which one is primary ; the other is `backup` (HAProxy
#       routes to a backup server only when ALL primaries are down).
#       The veza_haproxy_switch role re-renders this template with a
#       new active_color, validates, atomic-swaps, and HUPs.

global
    log /dev/log local0
    log /dev/log local1 notice
    chroot /var/lib/haproxy
    stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
    stats timeout 30s
    user haproxy
    group haproxy
    daemon
    server-state-file /var/lib/haproxy/server-state
    ssl-default-bind-options no-sslv3 no-tlsv10 no-tlsv11
    ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305

defaults
    log global
    mode http
    option httplog
    option dontlognull
    option forwardfor
    option http-server-close
    timeout connect 5s
    timeout client 60s
    timeout server 60s
    timeout tunnel 1h
    timeout client-fin 5s
    timeout http-keep-alive 15s
    timeout http-request 10s
    load-server-state-from-file global

# -----------------------------------------------------------------------
# DNS resolvers — Incus's managed bridges expose a built-in DNS
# resolver on the gateway IP for the bridge's subnet (10.0.20.1 for
# net-veza). Backend containers' .lxd hostnames resolve here.
# init-addr last,libc,none on default-server lets HAProxy start
# even if the backends don't exist yet ; servers go into MAINT
# until the resolver returns an address (deploy_app.yml creates
# them later, then `incus-resolver` task in HAProxy picks them up
# automatically — no haproxy reload needed).
# -----------------------------------------------------------------------
resolvers veza_dns
    nameserver incus_gw 10.0.20.1:53
    accepted_payload_size 4096
    resolve_retries 3
    timeout resolve 1s
    timeout retry 1s
    hold valid 10s
    hold nx 5s
    hold timeout 5s
    hold refused 5s
    hold obsolete 30s

# -----------------------------------------------------------------------
# Stats endpoint — bound to loopback only ; the Prometheus haproxy
# exporter sidecar scrapes it.
# -----------------------------------------------------------------------
frontend stats
    bind 127.0.0.1:{{ haproxy_listen_stats }}
    stats enable
    stats uri /stats
    stats refresh 5s
    stats show-node
    stats show-legends
    no log

# -----------------------------------------------------------------------
# Frontend — HTTP + (optionally) HTTPS. ACL-driven path routing.
# -----------------------------------------------------------------------
frontend veza_http_in
    bind *:{{ haproxy_listen_http }}
{% if haproxy_letsencrypt | default(false) %}
    bind *:{{ haproxy_listen_https }} ssl crt {{ haproxy_tls_cert_dir }}/ alpn h2,http/1.1
    http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains"
    # Let dehydrated's HTTP-01 challenges through unencrypted before any redirect.
    # Order matters : http-request rules must come BEFORE use_backend
    # rules in HAProxy ; otherwise haproxy 3.x warns and processes them
    # in the unintended order.
    acl acme_challenge path_beg /.well-known/acme-challenge/
    http-request redirect scheme https code 301 if !{ ssl_fc } !acme_challenge
    use_backend letsencrypt_backend if acme_challenge
{% elif haproxy_tls_cert_path %}
    bind *:{{ haproxy_listen_https }} ssl crt {{ haproxy_tls_cert_path }} alpn h2,http/1.1
    http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains"
    http-request redirect scheme https code 301 if !{ ssl_fc }
{% endif %}

{% if haproxy_topology | default('multi-instance') == 'blue-green' %}
    # ===================================================================
    # Host-based routing — single edge HAProxy serves all envs + Forgejo
    # ===================================================================
{%   for env, hosts in haproxy_env_public_hosts.items() %}
    acl host_{{ env }}      hdr(host),lower -i {{ hosts | join(' ') }}
{%   endfor %}
{%   if haproxy_forgejo_host %}
    acl host_forgejo    hdr(host),lower -i {{ haproxy_forgejo_host }}
{%   endif %}
{%   if haproxy_talas_hosts %}
    acl host_talas      hdr(host),lower -i {{ haproxy_talas_hosts | join(' ') }}
{%   endif %}

    # Path ACLs (apply within each env's traffic)
    acl is_api          path_beg /api/v1
    acl is_stream_seg   path_beg /tracks/ path_end .m3u8
    acl is_stream_seg   path_beg /tracks/ path_end .ts
    acl is_stream_seg   path_beg /tracks/ path_end .m4s
    acl is_stream_path  path_beg /stream
    acl is_stream_path  path_beg /hls

    # ===================================================================
    # Routing — per env: API → backend, /tracks/* /stream /hls → stream,
    # everything else → web. Forgejo and Talas bypass the path logic.
    # ===================================================================
{%   if haproxy_forgejo_host %}
    use_backend forgejo_backend if host_forgejo
{%   endif %}
{%   if haproxy_talas_hosts %}
    use_backend talas_vitrine_backend if host_talas
{%   endif %}
{%   for env in haproxy_env_public_hosts.keys() %}
    use_backend {{ env }}_backend_api  if host_{{ env }} is_api
    use_backend {{ env }}_stream_pool  if host_{{ env }} is_stream_seg
    use_backend {{ env }}_stream_pool  if host_{{ env }} is_stream_path
    use_backend {{ env }}_web_pool     if host_{{ env }}
{%   endfor %}

    # Default backend — request didn't match any known host. Returns the
    # talas vitrine if configured, otherwise a hard 503.
{%   if haproxy_talas_hosts %}
    default_backend talas_vitrine_backend
{%   else %}
    default_backend default_503
{%   endif %}
{% else %}
    acl is_api          path_beg /api/v1
    acl is_stream    path_beg /tracks/ path_end .m3u8
    acl is_stream    path_beg /tracks/ path_end .ts
    acl is_stream    path_beg /tracks/ path_end .m4s
    use_backend stream_pool if is_stream
    default_backend api_pool
{% endif %}

{% if haproxy_topology | default('multi-instance') == 'blue-green' %}
# =======================================================================
# BLUE / GREEN backends, per env (staging + prod)
#
# haproxy_active_colors comes from the veza_haproxy_switch role's
# set_fact in tasks/main.yml — it always carries BOTH envs' current
# colors so a staging deploy doesn't drop the prod backend (and v.v.).
# =======================================================================
{% set active_colors = haproxy_active_colors | default({'staging': 'blue', 'prod': 'blue'}) %}

{% for env, prefix in haproxy_env_prefixes.items() %}
{%   set _active = active_colors[env] | default('blue') %}

# --- {{ env }} : backend API (Go) -------------------------------------
backend {{ env }}_backend_api
    balance roundrobin
    option httpchk GET {{ veza_healthcheck_paths.backend | default('/api/v1/health') }}
    http-check expect status 200
    cookie {{ haproxy_sticky_cookie_name }}_{{ env }} insert indirect nocache httponly secure
    default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s init-addr last,libc,none resolvers veza_dns
    server {{ env }}_backend_blue  {{ prefix }}backend-blue.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }}  cookie {{ env }}_backend_blue  {{ '' if _active == 'blue' else 'backup' }}
    server {{ env }}_backend_green {{ prefix }}backend-green.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie {{ env }}_backend_green {{ '' if _active == 'green' else 'backup' }}

# --- {{ env }} : stream pool (Rust) -----------------------------------
backend {{ env }}_stream_pool
    balance uri whole
    hash-type consistent
    option httpchk GET {{ veza_healthcheck_paths.stream | default('/health') }}
    http-check expect status 200
    timeout tunnel 1h
    default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s init-addr last,libc,none resolvers veza_dns
    server {{ env }}_stream_blue  {{ prefix }}stream-blue.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }}  {{ '' if _active == 'blue' else 'backup' }}
    server {{ env }}_stream_green {{ prefix }}stream-green.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'green' else 'backup' }}

# --- {{ env }} : web pool (nginx) -------------------------------------
backend {{ env }}_web_pool
    balance roundrobin
    option httpchk GET {{ veza_healthcheck_paths.web | default('/') }}
    http-check expect status 200
    default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s init-addr last,libc,none resolvers veza_dns
    server {{ env }}_web_blue  {{ prefix }}web-blue.{{ veza_incus_dns_suffix }}:{{ veza_web_port }}  {{ '' if _active == 'blue' else 'backup' }}
    server {{ env }}_web_green {{ prefix }}web-green.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'green' else 'backup' }}

{% endfor %}

{% if haproxy_forgejo_host %}
# --- Forgejo (managed outside the deploy pipeline) --------------------
# The existing forgejo container exposes HTTPS on :3000 with a
# self-signed cert. We re-encrypt to it (ssl verify none) ; the
# operator's WireGuard mesh is the trust boundary, the cert chain
# is irrelevant. Healthcheck adapted to send a Host: header so
# Forgejo's reverse-proxy validation accepts the request.
backend forgejo_backend
    option httpchk
    http-check send meth GET uri / ver HTTP/1.1 hdr Host {{ haproxy_forgejo_host }}
    http-check expect rstatus ^[23]
    default-server check inter 10s fall 3 rise 2
    server forgejo {{ haproxy_forgejo_backend }} ssl verify none sni str({{ haproxy_forgejo_host }})
{% endif %}

{% if haproxy_talas_hosts %}
# --- Talas vitrine (placeholder until the site lands) -----------------
backend talas_vitrine_backend
{%   if haproxy_talas_vitrine_backend %}
    default-server check inter 5s
    server talas {{ haproxy_talas_vitrine_backend }}
{%   else %}
    # No backend configured yet — return 503 with a small body.
    http-request return status 503 content-type text/plain string "Talas vitrine — coming soon."
{%   endif %}
{% endif %}

# --- 503 catch-all ----------------------------------------------------
backend default_503
    http-request return status 503 content-type text/plain string "Unknown host"

{% else %}
# =======================================================================
# MULTI-INSTANCE topology (lab, default)
# Server list comes from inventory groups ; sticky cookie load-balances.
# =======================================================================

# -----------------------------------------------------------------------
# Backend api_pool — Gin REST API. Sticky cookie + active health check.
# -----------------------------------------------------------------------
backend api_pool
    balance roundrobin
    option httpchk GET /api/v1/health
    http-check expect status 200
    cookie {{ haproxy_sticky_cookie_name }} insert indirect nocache httponly secure
    default-server check
        inter {{ haproxy_health_check_interval_ms }}
        fall {{ haproxy_health_check_fall }}
        rise {{ haproxy_health_check_rise }}
        on-marked-down shutdown-sessions
        slowstart {{ haproxy_graceful_drain_seconds }}s

{% set api_hosts = (groups['backend_api_instances'] | default(haproxy_backend_api_fallback)) %}
{% for host in api_hosts %}
    server {{ host }} {{ host }}.lxd:{{ haproxy_backend_api_port }} cookie {{ host }}
{% endfor %}

# -----------------------------------------------------------------------
# Backend stream_pool — Rust Axum HLS. URI hash so the same track_id
# consistently lands on the same node.
# -----------------------------------------------------------------------
backend stream_pool
    balance uri whole
    hash-type consistent
    option httpchk GET /health
    http-check expect status 200
    default-server check
        inter {{ haproxy_health_check_interval_ms }}
        fall {{ haproxy_health_check_fall }}
        rise {{ haproxy_health_check_rise }}
        on-marked-down shutdown-sessions
        slowstart {{ haproxy_graceful_drain_seconds }}s

{% set stream_hosts = (groups['stream_server_instances'] | default(haproxy_stream_server_fallback)) %}
{% for host in stream_hosts %}
    server {{ host }} {{ host }}.lxd:{{ haproxy_stream_server_port }}
{% endfor %}

{% endif %}

{% if haproxy_letsencrypt | default(false) %}
# -----------------------------------------------------------------------
# letsencrypt_backend — proxies HTTP-01 challenges to the
# http-letsencrypt.service sidecar (python -m http.server on
# 127.0.0.1:8888 serving /var/www/letsencrypt/). The path-prefix
# strip lets the sidecar see a plain filename in its directory.
# -----------------------------------------------------------------------
backend letsencrypt_backend
    http-request set-path %[path,regsub(/.well-known/acme-challenge/,/)]
    server letsencrypt 127.0.0.1:8888
{% endif %}