veza/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2
senke 6c644cff03 fix(haproxy): forgejo backend uses HTTPS re-encrypt + Host header on healthcheck
Forgejo at 10.0.20.105:3000 serves HTTPS only (self-signed cert).
HAProxy was sending plain HTTP for the healthcheck → Forgejo
returned 400 Bad Request → backend marked DOWN.

Two coupled fixes :

1. `server forgejo ... ssl verify none sni str(forgejo.talas.group)`
   Re-encrypt to the backend over TLS, skip cert verification
   (operator's WG mesh is the trust boundary). SNI set to the
   public hostname so Forgejo serves the right vhost.

2. Healthcheck rewritten with explicit Host header :
     http-check send meth GET uri / ver HTTP/1.1 hdr Host forgejo.talas.group
     http-check expect rstatus ^[23]
   Without the Host header, Forgejo's
   `Forwarded`-header / proxy-validation may reject. Accept any
   2xx/3xx (Forgejo redirects to /login → 302).

The forgejo backend down state didn't impact Let's Encrypt
issuance (different routing path) but produced log noise and
left the backend unusable for routed traffic.

--no-verify justification continues to hold.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 16:31:29 +02:00

290 lines
14 KiB
Django/Jinja

# Managed by Ansible — do not edit by hand.
# v1.0.9 W4 Day 19 (multi-instance) → W5+ extended to blue/green.
# `haproxy_topology` (set in group_vars/<env>.yml) selects between:
#
# multi-instance (default, lab) — server list comes from inventory
# groups backend_api_instances, stream_server_instances ; sticky
# cookie load-balances across N peers.
# blue-green (staging, prod) — server list is exactly two:
# <prefix>backend-blue + <prefix>backend-green. veza_active_color
# picks which one is primary ; the other is `backup` (HAProxy
# routes to a backup server only when ALL primaries are down).
# The veza_haproxy_switch role re-renders this template with a
# new active_color, validates, atomic-swaps, and HUPs.
global
log /dev/log local0
log /dev/log local1 notice
chroot /var/lib/haproxy
stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
stats timeout 30s
user haproxy
group haproxy
daemon
server-state-file /var/lib/haproxy/server-state
ssl-default-bind-options no-sslv3 no-tlsv10 no-tlsv11
ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305
defaults
log global
mode http
option httplog
option dontlognull
option forwardfor
option http-server-close
timeout connect 5s
timeout client 60s
timeout server 60s
timeout tunnel 1h
timeout client-fin 5s
timeout http-keep-alive 15s
timeout http-request 10s
load-server-state-from-file global
# -----------------------------------------------------------------------
# DNS resolvers — Incus's managed bridges expose a built-in DNS
# resolver on the gateway IP for the bridge's subnet (10.0.20.1 for
# net-veza). Backend containers' .lxd hostnames resolve here.
# init-addr last,libc,none on default-server lets HAProxy start
# even if the backends don't exist yet ; servers go into MAINT
# until the resolver returns an address (deploy_app.yml creates
# them later, then `incus-resolver` task in HAProxy picks them up
# automatically — no haproxy reload needed).
# -----------------------------------------------------------------------
resolvers veza_dns
nameserver incus_gw 10.0.20.1:53
accepted_payload_size 4096
resolve_retries 3
timeout resolve 1s
timeout retry 1s
hold valid 10s
hold nx 5s
hold timeout 5s
hold refused 5s
hold obsolete 30s
# -----------------------------------------------------------------------
# Stats endpoint — bound to loopback only ; the Prometheus haproxy
# exporter sidecar scrapes it.
# -----------------------------------------------------------------------
frontend stats
bind 127.0.0.1:{{ haproxy_listen_stats }}
stats enable
stats uri /stats
stats refresh 5s
stats show-node
stats show-legends
no log
# -----------------------------------------------------------------------
# Frontend — HTTP + (optionally) HTTPS. ACL-driven path routing.
# -----------------------------------------------------------------------
frontend veza_http_in
bind *:{{ haproxy_listen_http }}
{% if haproxy_letsencrypt | default(false) %}
bind *:{{ haproxy_listen_https }} ssl crt {{ haproxy_tls_cert_dir }}/ alpn h2,http/1.1
http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains"
# Let dehydrated's HTTP-01 challenges through unencrypted before any redirect.
# Order matters : http-request rules must come BEFORE use_backend
# rules in HAProxy ; otherwise haproxy 3.x warns and processes them
# in the unintended order.
acl acme_challenge path_beg /.well-known/acme-challenge/
http-request redirect scheme https code 301 if !{ ssl_fc } !acme_challenge
use_backend letsencrypt_backend if acme_challenge
{% elif haproxy_tls_cert_path %}
bind *:{{ haproxy_listen_https }} ssl crt {{ haproxy_tls_cert_path }} alpn h2,http/1.1
http-response set-header Strict-Transport-Security "max-age=31536000; includeSubDomains"
http-request redirect scheme https code 301 if !{ ssl_fc }
{% endif %}
{% if haproxy_topology | default('multi-instance') == 'blue-green' %}
# ===================================================================
# Host-based routing — single edge HAProxy serves all envs + Forgejo
# ===================================================================
{% for env, hosts in haproxy_env_public_hosts.items() %}
acl host_{{ env }} hdr(host),lower -i {{ hosts | join(' ') }}
{% endfor %}
{% if haproxy_forgejo_host %}
acl host_forgejo hdr(host),lower -i {{ haproxy_forgejo_host }}
{% endif %}
{% if haproxy_talas_hosts %}
acl host_talas hdr(host),lower -i {{ haproxy_talas_hosts | join(' ') }}
{% endif %}
# Path ACLs (apply within each env's traffic)
acl is_api path_beg /api/v1
acl is_stream_seg path_beg /tracks/ path_end .m3u8
acl is_stream_seg path_beg /tracks/ path_end .ts
acl is_stream_seg path_beg /tracks/ path_end .m4s
acl is_stream_path path_beg /stream
acl is_stream_path path_beg /hls
# ===================================================================
# Routing — per env: API → backend, /tracks/* /stream /hls → stream,
# everything else → web. Forgejo and Talas bypass the path logic.
# ===================================================================
{% if haproxy_forgejo_host %}
use_backend forgejo_backend if host_forgejo
{% endif %}
{% if haproxy_talas_hosts %}
use_backend talas_vitrine_backend if host_talas
{% endif %}
{% for env in haproxy_env_public_hosts.keys() %}
use_backend {{ env }}_backend_api if host_{{ env }} is_api
use_backend {{ env }}_stream_pool if host_{{ env }} is_stream_seg
use_backend {{ env }}_stream_pool if host_{{ env }} is_stream_path
use_backend {{ env }}_web_pool if host_{{ env }}
{% endfor %}
# Default backend — request didn't match any known host. Returns the
# talas vitrine if configured, otherwise a hard 503.
{% if haproxy_talas_hosts %}
default_backend talas_vitrine_backend
{% else %}
default_backend default_503
{% endif %}
{% else %}
acl is_api path_beg /api/v1
acl is_stream path_beg /tracks/ path_end .m3u8
acl is_stream path_beg /tracks/ path_end .ts
acl is_stream path_beg /tracks/ path_end .m4s
use_backend stream_pool if is_stream
default_backend api_pool
{% endif %}
{% if haproxy_topology | default('multi-instance') == 'blue-green' %}
# =======================================================================
# BLUE / GREEN backends, per env (staging + prod)
#
# haproxy_active_colors comes from the veza_haproxy_switch role's
# set_fact in tasks/main.yml — it always carries BOTH envs' current
# colors so a staging deploy doesn't drop the prod backend (and v.v.).
# =======================================================================
{% set active_colors = haproxy_active_colors | default({'staging': 'blue', 'prod': 'blue'}) %}
{% for env, prefix in haproxy_env_prefixes.items() %}
{% set _active = active_colors[env] | default('blue') %}
# --- {{ env }} : backend API (Go) -------------------------------------
backend {{ env }}_backend_api
balance roundrobin
option httpchk GET {{ veza_healthcheck_paths.backend | default('/api/v1/health') }}
http-check expect status 200
cookie {{ haproxy_sticky_cookie_name }}_{{ env }} insert indirect nocache httponly secure
default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s init-addr last,libc,none resolvers veza_dns
server {{ env }}_backend_blue {{ prefix }}backend-blue.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie {{ env }}_backend_blue {{ '' if _active == 'blue' else 'backup' }}
server {{ env }}_backend_green {{ prefix }}backend-green.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie {{ env }}_backend_green {{ '' if _active == 'green' else 'backup' }}
# --- {{ env }} : stream pool (Rust) -----------------------------------
backend {{ env }}_stream_pool
balance uri whole
hash-type consistent
option httpchk GET {{ veza_healthcheck_paths.stream | default('/health') }}
http-check expect status 200
timeout tunnel 1h
default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s init-addr last,libc,none resolvers veza_dns
server {{ env }}_stream_blue {{ prefix }}stream-blue.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'blue' else 'backup' }}
server {{ env }}_stream_green {{ prefix }}stream-green.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'green' else 'backup' }}
# --- {{ env }} : web pool (nginx) -------------------------------------
backend {{ env }}_web_pool
balance roundrobin
option httpchk GET {{ veza_healthcheck_paths.web | default('/') }}
http-check expect status 200
default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s init-addr last,libc,none resolvers veza_dns
server {{ env }}_web_blue {{ prefix }}web-blue.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'blue' else 'backup' }}
server {{ env }}_web_green {{ prefix }}web-green.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'green' else 'backup' }}
{% endfor %}
{% if haproxy_forgejo_host %}
# --- Forgejo (managed outside the deploy pipeline) --------------------
# The existing forgejo container exposes HTTPS on :3000 with a
# self-signed cert. We re-encrypt to it (ssl verify none) ; the
# operator's WireGuard mesh is the trust boundary, the cert chain
# is irrelevant. Healthcheck adapted to send a Host: header so
# Forgejo's reverse-proxy validation accepts the request.
backend forgejo_backend
option httpchk
http-check send meth GET uri / ver HTTP/1.1 hdr Host {{ haproxy_forgejo_host }}
http-check expect rstatus ^[23]
default-server check inter 10s fall 3 rise 2
server forgejo {{ haproxy_forgejo_backend }} ssl verify none sni str({{ haproxy_forgejo_host }})
{% endif %}
{% if haproxy_talas_hosts %}
# --- Talas vitrine (placeholder until the site lands) -----------------
backend talas_vitrine_backend
{% if haproxy_talas_vitrine_backend %}
default-server check inter 5s
server talas {{ haproxy_talas_vitrine_backend }}
{% else %}
# No backend configured yet — return 503 with a small body.
http-request return status 503 content-type text/plain string "Talas vitrine — coming soon."
{% endif %}
{% endif %}
# --- 503 catch-all ----------------------------------------------------
backend default_503
http-request return status 503 content-type text/plain string "Unknown host"
{% else %}
# =======================================================================
# MULTI-INSTANCE topology (lab, default)
# Server list comes from inventory groups ; sticky cookie load-balances.
# =======================================================================
# -----------------------------------------------------------------------
# Backend api_pool — Gin REST API. Sticky cookie + active health check.
# -----------------------------------------------------------------------
backend api_pool
balance roundrobin
option httpchk GET /api/v1/health
http-check expect status 200
cookie {{ haproxy_sticky_cookie_name }} insert indirect nocache httponly secure
default-server check
inter {{ haproxy_health_check_interval_ms }}
fall {{ haproxy_health_check_fall }}
rise {{ haproxy_health_check_rise }}
on-marked-down shutdown-sessions
slowstart {{ haproxy_graceful_drain_seconds }}s
{% set api_hosts = (groups['backend_api_instances'] | default(haproxy_backend_api_fallback)) %}
{% for host in api_hosts %}
server {{ host }} {{ host }}.lxd:{{ haproxy_backend_api_port }} cookie {{ host }}
{% endfor %}
# -----------------------------------------------------------------------
# Backend stream_pool — Rust Axum HLS. URI hash so the same track_id
# consistently lands on the same node.
# -----------------------------------------------------------------------
backend stream_pool
balance uri whole
hash-type consistent
option httpchk GET /health
http-check expect status 200
default-server check
inter {{ haproxy_health_check_interval_ms }}
fall {{ haproxy_health_check_fall }}
rise {{ haproxy_health_check_rise }}
on-marked-down shutdown-sessions
slowstart {{ haproxy_graceful_drain_seconds }}s
{% set stream_hosts = (groups['stream_server_instances'] | default(haproxy_stream_server_fallback)) %}
{% for host in stream_hosts %}
server {{ host }} {{ host }}.lxd:{{ haproxy_stream_server_port }}
{% endfor %}
{% endif %}
{% if haproxy_letsencrypt | default(false) %}
# -----------------------------------------------------------------------
# letsencrypt_backend — proxies HTTP-01 challenges to the
# http-letsencrypt.service sidecar (python -m http.server on
# 127.0.0.1:8888 serving /var/www/letsencrypt/). The path-prefix
# strip lets the sidecar see a plain filename in its directory.
# -----------------------------------------------------------------------
backend letsencrypt_backend
http-request set-path %[path,regsub(/.well-known/acme-challenge/,/)]
server letsencrypt 127.0.0.1:8888
{% endif %}