diff --git a/infra/ansible/group_vars/all/main.yml b/infra/ansible/group_vars/all/main.yml index 80d8d1c63..900729c23 100644 --- a/infra/ansible/group_vars/all/main.yml +++ b/infra/ansible/group_vars/all/main.yml @@ -93,3 +93,49 @@ veza_install_root: /opt/veza veza_config_root: /etc/veza veza_log_root: /var/log/veza veza_state_root: /var/lib/veza + +# ============================================================ +# Edge HAProxy — single-instance, shared across staging+prod. +# Both inventories declare the same `veza-haproxy` container ; +# the template at roles/haproxy/templates/haproxy.cfg.j2 +# enumerates BOTH envs so a staging deploy doesn't lose prod +# routing (and vice versa). Per-env container prefixes below +# let the template render the right backend hostnames. +# ============================================================ +haproxy_env_prefixes: + staging: "veza-staging-" + prod: "veza-" +haproxy_env_public_hosts: + staging: + - staging.veza.fr + prod: + - veza.fr + - www.veza.fr + +# Forgejo lives outside the per-env app tier — its container is +# unmanaged by the deploy pipeline, but the edge HAProxy SNI-routes +# forgejo.talas.group to it. Set to empty string to disable. +haproxy_forgejo_host: forgejo.talas.group +haproxy_forgejo_backend: "10.0.20.105:3000" + +# Talas vitrine — placeholder until the static site lands. +# When haproxy_talas_vitrine_backend is empty, requests to +# {talas.fr,www.talas.fr} get a 503 with a maintenance page. +haproxy_talas_hosts: + - talas.fr + - www.talas.fr +haproxy_talas_vitrine_backend: "" + +# Let's Encrypt — defined here (not in env files) because the edge +# HAProxy is SHARED ; whichever env last ran the haproxy role would +# otherwise overwrite the domain set. Every public hostname the edge +# serves goes through dehydrated. Internal services on talas.group +# are NOT here unless they need a public-trusted cert +# (forgejo.talas.group does, since browsers must accept its cert). +haproxy_letsencrypt: true +haproxy_letsencrypt_email: ops@veza.fr +haproxy_letsencrypt_domains: + - veza.fr www.veza.fr # prod apex + www in one cert + - staging.veza.fr # staging + - talas.fr www.talas.fr # talas vitrine + - forgejo.talas.group # forgejo (LE issues even when DNS points at the public R720 IP) diff --git a/infra/ansible/group_vars/prod.yml b/infra/ansible/group_vars/prod.yml index 752478c7d..a29b72fe6 100644 --- a/infra/ansible/group_vars/prod.yml +++ b/infra/ansible/group_vars/prod.yml @@ -41,15 +41,6 @@ postgres_password: "{{ vault_postgres_password }}" redis_password: "{{ vault_redis_password }}" rabbitmq_password: "{{ vault_rabbitmq_password }}" -# Let's Encrypt — HTTP-01 via dehydrated. Wildcards NOT supported ; -# every cert below corresponds to one public subdomain. Internal -# services on talas.group are NOT here — WireGuard is the trust -# boundary for those. -# -# DNS contract : every domain below MUST resolve to the R720 public -# IP for the HTTP-01 challenge to succeed. -haproxy_letsencrypt: true -haproxy_letsencrypt_email: ops@veza.fr -haproxy_letsencrypt_domains: - - veza.fr www.veza.fr - - talas.fr www.talas.fr +# Let's Encrypt config moved to group_vars/all/main.yml — the edge +# HAProxy is SHARED across staging+prod, so the domain list lives in +# the env-agnostic file. See haproxy_letsencrypt_domains there. diff --git a/infra/ansible/group_vars/staging.yml b/infra/ansible/group_vars/staging.yml index 1d2466a88..2e49081ac 100644 --- a/infra/ansible/group_vars/staging.yml +++ b/infra/ansible/group_vars/staging.yml @@ -66,17 +66,6 @@ postgres_password: "{{ vault_postgres_password }}" redis_password: "{{ vault_redis_password }}" rabbitmq_password: "{{ vault_rabbitmq_password }}" -# Let's Encrypt — HTTP-01 via dehydrated (see roles/haproxy/letsencrypt.yml). -# Wildcards NOT supported ; list every public subdomain explicitly. -# Each line in haproxy_letsencrypt_domains becomes one cert with the -# space-separated entries as SANs ; dehydrated names the cert dir -# after the FIRST entry. -# -# DNS contract : every domain below MUST resolve to the R720's public -# IP for the HTTP-01 challenge to succeed. Internal services on -# talas.group are NOT in this list — they live behind WireGuard with -# self-signed / no TLS. -haproxy_letsencrypt: true -haproxy_letsencrypt_email: ops@veza.fr -haproxy_letsencrypt_domains: - - staging.veza.fr +# Let's Encrypt config moved to group_vars/all/main.yml — the edge +# HAProxy is SHARED across staging+prod, so the domain list lives in +# the env-agnostic file. See haproxy_letsencrypt_domains there. diff --git a/infra/ansible/inventory/prod.yml b/infra/ansible/inventory/prod.yml index 2e57cdd8d..3b5df6501 100644 --- a/infra/ansible/inventory/prod.yml +++ b/infra/ansible/inventory/prod.yml @@ -21,6 +21,9 @@ all: incus_hosts: hosts: veza-prod: + # SHARED edge — one HAProxy on the R720 public 443. Serves + # staging + prod + forgejo.talas.group simultaneously. Same + # container in both staging.yml and prod.yml inventories. haproxy: hosts: veza-haproxy: diff --git a/infra/ansible/inventory/staging.yml b/infra/ansible/inventory/staging.yml index 185d34076..cc42dab3e 100644 --- a/infra/ansible/inventory/staging.yml +++ b/infra/ansible/inventory/staging.yml @@ -37,9 +37,14 @@ all: incus_hosts: hosts: veza-staging: + # SHARED edge — one HAProxy on the R720 public 443. Serves + # staging + prod + forgejo.talas.group simultaneously, Host-based + # routing per env. NAME deliberately env-agnostic (no veza-staging- + # prefix) since staging.yml and prod.yml both target the same + # container. haproxy: hosts: - veza-staging-haproxy: + veza-haproxy: vars: ansible_connection: community.general.incus ansible_python_interpreter: /usr/bin/python3 diff --git a/infra/ansible/playbooks/cleanup_failed.yml b/infra/ansible/playbooks/cleanup_failed.yml index 0701e53ad..e371b82e0 100644 --- a/infra/ansible/playbooks/cleanup_failed.yml +++ b/infra/ansible/playbooks/cleanup_failed.yml @@ -28,11 +28,10 @@ fail_msg: cleanup_failed.yml requires veza_env + target_color. quiet: true - - name: Read active color from HAProxy container - ansible.builtin.shell: | - incus exec "{{ veza_container_prefix }}haproxy" -- \ - cat /var/lib/veza/active-color 2>/dev/null | tr -d '[:space:]' - args: + - name: Read active color for {{ veza_env }} from shared HAProxy container + ansible.builtin.shell: + cmd: | + incus exec veza-haproxy -- cat "/var/lib/veza/active-color-{{ veza_env }}" 2>/dev/null | tr -d '[:space:]' executable: /bin/bash register: active_color_raw changed_when: false diff --git a/infra/ansible/playbooks/deploy_app.yml b/infra/ansible/playbooks/deploy_app.yml index 4a255ee31..71531a938 100644 --- a/infra/ansible/playbooks/deploy_app.yml +++ b/infra/ansible/playbooks/deploy_app.yml @@ -125,9 +125,9 @@ become: true gather_facts: false tasks: - - name: Read currently-active color + - name: Read currently-active color for {{ veza_env }} ansible.builtin.slurp: - src: /var/lib/veza/active-color + src: "/var/lib/veza/active-color-{{ veza_env }}" register: prior_color_raw failed_when: false diff --git a/infra/ansible/playbooks/rollback.yml b/infra/ansible/playbooks/rollback.yml index 65e22859d..8ba0f8ce6 100644 --- a/infra/ansible/playbooks/rollback.yml +++ b/infra/ansible/playbooks/rollback.yml @@ -93,10 +93,11 @@ name: veza_haproxy_switch vars: veza_active_color: "{{ target_color }}" - # Fast rollback re-uses the previous SHA from the history file. - # Fallback to a synthetic 40-char SHA if the file is missing — - # the role's assert tolerates this for the rollback case. - veza_release_sha: "{{ (lookup('ansible.builtin.file', '/var/lib/veza/active-color.history', errors='ignore') | default('', true) | regex_search('sha=([0-9a-f]{40})', '\\1') | default('r0llback' + '0' * 32, true)) }}" + # Fast rollback re-uses the previous SHA from the per-env + # history file. Fallback to a synthetic 40-char SHA if the + # file is missing — the role's assert tolerates this for + # the rollback case. + veza_release_sha: "{{ (lookup('ansible.builtin.file', '/var/lib/veza/active-color-' + veza_env + '.history', errors='ignore') | default('', true) | regex_search('sha=([0-9a-f]{40})', '\\1') | default('r0llback' + '0' * 32, true)) }}" when: mode == 'fast' tags: [rollback, fast] diff --git a/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2 b/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2 index 424ddbb86..92c4f7474 100644 --- a/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2 +++ b/infra/ansible/roles/haproxy/templates/haproxy.cfg.j2 @@ -72,18 +72,54 @@ frontend veza_http_in http-request redirect scheme https code 301 if !{ ssl_fc } {% endif %} - acl is_api path_beg /api/v1 {% if haproxy_topology | default('multi-instance') == 'blue-green' %} + # =================================================================== + # Host-based routing — single edge HAProxy serves all envs + Forgejo + # =================================================================== +{% for env, hosts in haproxy_env_public_hosts.items() %} + acl host_{{ env }} hdr(host),lower -i {{ hosts | join(' ') }} +{% endfor %} +{% if haproxy_forgejo_host %} + acl host_forgejo hdr(host),lower -i {{ haproxy_forgejo_host }} +{% endif %} +{% if haproxy_talas_hosts %} + acl host_talas hdr(host),lower -i {{ haproxy_talas_hosts | join(' ') }} +{% endif %} + + # Path ACLs (apply within each env's traffic) + acl is_api path_beg /api/v1 acl is_stream_seg path_beg /tracks/ path_end .m3u8 acl is_stream_seg path_beg /tracks/ path_end .ts acl is_stream_seg path_beg /tracks/ path_end .m4s acl is_stream_path path_beg /stream acl is_stream_path path_beg /hls - use_backend backend_api if is_api - use_backend stream_pool if is_stream_seg - use_backend stream_pool if is_stream_path - default_backend web_pool + + # =================================================================== + # Routing — per env: API → backend, /tracks/* /stream /hls → stream, + # everything else → web. Forgejo and Talas bypass the path logic. + # =================================================================== +{% if haproxy_forgejo_host %} + use_backend forgejo_backend if host_forgejo +{% endif %} +{% if haproxy_talas_hosts %} + use_backend talas_vitrine_backend if host_talas +{% endif %} +{% for env in haproxy_env_public_hosts.keys() %} + use_backend {{ env }}_backend_api if host_{{ env }} is_api + use_backend {{ env }}_stream_pool if host_{{ env }} is_stream_seg + use_backend {{ env }}_stream_pool if host_{{ env }} is_stream_path + use_backend {{ env }}_web_pool if host_{{ env }} +{% endfor %} + + # Default backend — request didn't match any known host. Returns the + # talas vitrine if configured, otherwise a hard 503. +{% if haproxy_talas_hosts %} + default_backend talas_vitrine_backend +{% else %} + default_backend default_503 +{% endif %} {% else %} + acl is_api path_beg /api/v1 acl is_stream path_beg /tracks/ path_end .m3u8 acl is_stream path_beg /tracks/ path_end .ts acl is_stream path_beg /tracks/ path_end .m4s @@ -93,72 +129,73 @@ frontend veza_http_in {% if haproxy_topology | default('multi-instance') == 'blue-green' %} # ======================================================================= -# BLUE / GREEN topology (staging, prod) +# BLUE / GREEN backends, per env (staging + prod) # -# active_color is the variable veza_haproxy_switch passes in. It selects -# which server gets `check` and which gets `check backup`. HAProxy only -# routes to a `backup` server when EVERY non-backup is marked down by -# its health check ; together with health-check fall=3 this gives us -# instant rollback to the prior color if the new one starts failing -# health checks (without re-running Ansible). -# -# Active color: {{ veza_active_color | default(haproxy_active_color | default('blue')) }} -# Container prefix: {{ veza_container_prefix }} -# DNS suffix: {{ veza_incus_dns_suffix }} +# haproxy_active_colors comes from the veza_haproxy_switch role's +# set_fact in tasks/main.yml — it always carries BOTH envs' current +# colors so a staging deploy doesn't drop the prod backend (and v.v.). # ======================================================================= -{% set _active = veza_active_color | default(haproxy_active_color | default('blue')) %} +{% set active_colors = haproxy_active_colors | default({'staging': 'blue', 'prod': 'blue'}) %} -# ----------------------------------------------------------------------- -# Backend API pool — Go. Sticky cookie ; backup color sits idle. -# ----------------------------------------------------------------------- -backend backend_api +{% for env, prefix in haproxy_env_prefixes.items() %} +{% set _active = active_colors[env] | default('blue') %} + +# --- {{ env }} : backend API (Go) ------------------------------------- +backend {{ env }}_backend_api balance roundrobin option httpchk GET {{ veza_healthcheck_paths.backend | default('/api/v1/health') }} http-check expect status 200 - cookie {{ haproxy_sticky_cookie_name }} insert indirect nocache httponly secure - default-server check - inter {{ haproxy_health_check_interval_ms }} - fall {{ haproxy_health_check_fall }} - rise {{ haproxy_health_check_rise }} - on-marked-down shutdown-sessions - slowstart {{ haproxy_graceful_drain_seconds }}s - server backend_blue {{ veza_container_prefix }}backend-blue.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie backend_blue {{ '' if _active == 'blue' else 'backup' }} - server backend_green {{ veza_container_prefix }}backend-green.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie backend_green {{ '' if _active == 'green' else 'backup' }} + cookie {{ haproxy_sticky_cookie_name }}_{{ env }} insert indirect nocache httponly secure + default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s + server {{ env }}_backend_blue {{ prefix }}backend-blue.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie {{ env }}_backend_blue {{ '' if _active == 'blue' else 'backup' }} + server {{ env }}_backend_green {{ prefix }}backend-green.{{ veza_incus_dns_suffix }}:{{ veza_backend_port }} cookie {{ env }}_backend_green {{ '' if _active == 'green' else 'backup' }} -# ----------------------------------------------------------------------- -# Stream pool — Rust Axum HLS. URI-hash for cache locality. Same -# blue/green pair, same backup-flag pattern. -# ----------------------------------------------------------------------- -backend stream_pool +# --- {{ env }} : stream pool (Rust) ----------------------------------- +backend {{ env }}_stream_pool balance uri whole hash-type consistent option httpchk GET {{ veza_healthcheck_paths.stream | default('/health') }} http-check expect status 200 timeout tunnel 1h - default-server check - inter {{ haproxy_health_check_interval_ms }} - fall {{ haproxy_health_check_fall }} - rise {{ haproxy_health_check_rise }} - on-marked-down shutdown-sessions - slowstart {{ haproxy_graceful_drain_seconds }}s - server stream_blue {{ veza_container_prefix }}stream-blue.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'blue' else 'backup' }} - server stream_green {{ veza_container_prefix }}stream-green.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'green' else 'backup' }} + default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s + server {{ env }}_stream_blue {{ prefix }}stream-blue.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'blue' else 'backup' }} + server {{ env }}_stream_green {{ prefix }}stream-green.{{ veza_incus_dns_suffix }}:{{ veza_stream_port }} {{ '' if _active == 'green' else 'backup' }} -# ----------------------------------------------------------------------- -# Web pool — React SPA served by nginx. Same pair, same pattern. -# ----------------------------------------------------------------------- -backend web_pool +# --- {{ env }} : web pool (nginx) ------------------------------------- +backend {{ env }}_web_pool balance roundrobin option httpchk GET {{ veza_healthcheck_paths.web | default('/') }} http-check expect status 200 - default-server check - inter {{ haproxy_health_check_interval_ms }} - fall {{ haproxy_health_check_fall }} - rise {{ haproxy_health_check_rise }} - on-marked-down shutdown-sessions - slowstart {{ haproxy_graceful_drain_seconds }}s - server web_blue {{ veza_container_prefix }}web-blue.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'blue' else 'backup' }} - server web_green {{ veza_container_prefix }}web-green.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'green' else 'backup' }} + default-server check inter {{ haproxy_health_check_interval_ms }} fall {{ haproxy_health_check_fall }} rise {{ haproxy_health_check_rise }} on-marked-down shutdown-sessions slowstart {{ haproxy_graceful_drain_seconds }}s + server {{ env }}_web_blue {{ prefix }}web-blue.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'blue' else 'backup' }} + server {{ env }}_web_green {{ prefix }}web-green.{{ veza_incus_dns_suffix }}:{{ veza_web_port }} {{ '' if _active == 'green' else 'backup' }} + +{% endfor %} + +{% if haproxy_forgejo_host %} +# --- Forgejo (managed outside the deploy pipeline) -------------------- +backend forgejo_backend + option httpchk GET / + http-check expect status 200 + default-server check inter 10s fall 3 rise 2 + server forgejo {{ haproxy_forgejo_backend }} +{% endif %} + +{% if haproxy_talas_hosts %} +# --- Talas vitrine (placeholder until the site lands) ----------------- +backend talas_vitrine_backend +{% if haproxy_talas_vitrine_backend %} + default-server check inter 5s + server talas {{ haproxy_talas_vitrine_backend }} +{% else %} + # No backend configured yet — return 503 with a small body. + http-request return status 503 content-type text/plain string "Talas vitrine — coming soon." +{% endif %} +{% endif %} + +# --- 503 catch-all ---------------------------------------------------- +backend default_503 + http-request return status 503 content-type text/plain string "Unknown host" {% else %} # ======================================================================= diff --git a/infra/ansible/roles/veza_haproxy_switch/defaults/main.yml b/infra/ansible/roles/veza_haproxy_switch/defaults/main.yml index 2a38419a5..aff219887 100644 --- a/infra/ansible/roles/veza_haproxy_switch/defaults/main.yml +++ b/infra/ansible/roles/veza_haproxy_switch/defaults/main.yml @@ -3,14 +3,18 @@ # fail loud if the caller forgot to pass them. veza_active_color: "" veza_release_sha: "" +# veza_env is read from group_vars (staging|prod). Validates inside +# tasks/main.yml. -# Paths inside the HAProxy container. +# Paths inside the SHARED HAProxy container. Per-env state files so a +# staging deploy can't accidentally trip the prod active-color (and +# vice versa). haproxy_cfg_path: /etc/haproxy/haproxy.cfg haproxy_cfg_new_path: /etc/haproxy/haproxy.cfg.new haproxy_cfg_backup_path: /etc/haproxy/haproxy.cfg.bak haproxy_state_dir: /var/lib/veza -haproxy_active_color_file: /var/lib/veza/active-color -haproxy_active_color_history: /var/lib/veza/active-color.history +haproxy_active_color_file: "/var/lib/veza/active-color-{{ veza_env }}" +haproxy_active_color_history: "/var/lib/veza/active-color-{{ veza_env }}.history" # How many history entries to keep before pruning. The rollback role # offers point-in-time switch within this window without redeploying diff --git a/infra/ansible/roles/veza_haproxy_switch/tasks/main.yml b/infra/ansible/roles/veza_haproxy_switch/tasks/main.yml index 14127289c..99656eaae 100644 --- a/infra/ansible/roles/veza_haproxy_switch/tasks/main.yml +++ b/infra/ansible/roles/veza_haproxy_switch/tasks/main.yml @@ -14,10 +14,12 @@ that: - veza_active_color in ['blue', 'green'] - veza_release_sha | length == 40 + - veza_env in ['staging', 'prod'] fail_msg: >- - veza_haproxy_switch role requires veza_active_color (blue|green) - and veza_release_sha (40-char git SHA). Got: color={{ veza_active_color }} - sha={{ veza_release_sha }}. + veza_haproxy_switch role requires veza_active_color (blue|green), + veza_release_sha (40-char git SHA), and veza_env (staging|prod). + Got: color={{ veza_active_color }} sha={{ veza_release_sha }} + env={{ veza_env | default('UNSET') }}. quiet: true tags: [veza_haproxy_switch, always] @@ -30,7 +32,7 @@ mode: "0755" tags: [veza_haproxy_switch] -- name: Read currently-active color (if any) +- name: Read currently-active color for THIS env (if any) ansible.builtin.slurp: src: "{{ haproxy_active_color_file }}" register: prior_color_raw @@ -45,6 +47,37 @@ else 'blue' }} tags: [veza_haproxy_switch] +# Read the OTHER env's active color too — the haproxy template renders +# both staging+prod simultaneously, so we need both values in scope. +- name: Read OTHER env's active color + ansible.builtin.slurp: + src: "/var/lib/veza/active-color-{{ 'prod' if veza_env == 'staging' else 'staging' }}" + register: other_color_raw + failed_when: false + changed_when: false + tags: [veza_haproxy_switch] + +- name: Build haproxy_active_colors map (current state of every env) + ansible.builtin.set_fact: + haproxy_active_colors: + staging: >- + {%- if veza_env == 'staging' -%} + {{ veza_active_color }} + {%- elif other_color_raw.content is defined -%} + {{ other_color_raw.content | b64decode | trim }} + {%- else -%} + blue + {%- endif -%} + prod: >- + {%- if veza_env == 'prod' -%} + {{ veza_active_color }} + {%- elif other_color_raw.content is defined -%} + {{ other_color_raw.content | b64decode | trim }} + {%- else -%} + blue + {%- endif -%} + tags: [veza_haproxy_switch] + - name: Switch sequence (block/rescue — restores cfg on any failure) block: - name: Backup current haproxy.cfg