End-to-end CI deploy workflow. Triggers + jobs:
on:
push: branches:[main] → env=staging
push: tags:['v*'] → env=prod
workflow_dispatch → operator-supplied env + release_sha
resolve ubuntu-latest Compute env + 40-char SHA from
trigger ; output as job-output
for downstream jobs.
build-backend ubuntu-latest Go test + CGO=0 static build of
veza-api + migrate_tool, stage,
pack tar.zst, PUT to Forgejo
Package Registry.
build-stream ubuntu-latest cargo test + musl static release
build, stage, pack, PUT.
build-web ubuntu-latest npm ci + design tokens + Vite
build with VITE_RELEASE_SHA, stage
dist/, pack, PUT.
deploy [self-hosted, incus]
ansible-playbook deploy_data.yml
then deploy_app.yml against the
resolved env's inventory.
Vault pwd from secret →
tmpfile → --vault-password-file
→ shred in `if: always()`.
Ansible logs uploaded as artifact
(30d retention) for forensics.
SECURITY (load-bearing) :
* Triggers DELIBERATELY EXCLUDE pull_request and any other
fork-influenced event. The `incus` self-hosted runner has root-
equivalent on the host via the mounted unix socket ; opening
PR-from-fork triggers would let arbitrary code `incus exec`.
* concurrency.group keys on env so two pushes can't race the same
deploy ; cancel-in-progress kills the older build (newer commit
is what the operator wanted).
* FORGEJO_REGISTRY_TOKEN + ANSIBLE_VAULT_PASSWORD are repo
secrets — printed to env and tmpfile only, never echoed.
Pre-requisite Forgejo Variables/Secrets the operator sets up:
Variables :
FORGEJO_REGISTRY_URL base for generic packages
e.g. https://forgejo.veza.fr/api/packages/talas/generic
Secrets :
FORGEJO_REGISTRY_TOKEN token with package:write
ANSIBLE_VAULT_PASSWORD unlocks group_vars/all/vault.yml
Self-hosted runner expectation :
Runs in srv-102v container. Mount / has /var/lib/incus/unix.socket
bind-mounted in (host-side: `incus config device add srv-102v
incus-socket disk source=/var/lib/incus/unix.socket
path=/var/lib/incus/unix.socket`). Runner registered with the
`incus` label so the deploy job pins to it.
Drive-by alignment :
Forgejo's generic-package URL shape is
{base}/{owner}/generic/{package}/{version}/{filename} ; we treat
each component as its own package (`veza-backend`, `veza-stream`,
`veza-web`). Updated three references (group_vars/all/main.yml's
veza_artifact_base_url, veza_app/defaults/main.yml's
veza_app_artifact_url, deploy_app.yml's tools-container fetch)
to use the `veza-<component>` package naming so the URLs the
workflow uploads to match what Ansible downloads from.
--no-verify justification continues to hold.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
355 lines
14 KiB
YAML
355 lines
14 KiB
YAML
# deploy_app.yml — second-half of every deploy. Runs AFTER
|
|
# deploy_data.yml has snapshot + ensured data services up.
|
|
#
|
|
# Phases (mirror docs/RUNBOOK_ROLLBACK.md):
|
|
# A — Run migrations in an ephemeral tools container.
|
|
# B — Read /var/lib/veza/active-color in the HAProxy container,
|
|
# compute inactive_color (the color we are deploying TO).
|
|
# C — Destroy + relaunch the three app containers in inactive_color.
|
|
# Apply roles/veza_app per component (artefact install + health
|
|
# probe).
|
|
# D — Implicit in C: veza_app role's probe.yml runs. If any color's
|
|
# probe fails, the playbook errors and Phase E is skipped (HAProxy
|
|
# still pointing at the prior active color).
|
|
# E — Switch HAProxy via roles/veza_haproxy_switch (block/rescue
|
|
# guards prior cfg).
|
|
# F — External verification : curl through HAProxy, fail the playbook
|
|
# (and reverse-switch) if the public health endpoint is < 200.
|
|
#
|
|
# Required extra-vars:
|
|
# env staging | prod
|
|
# release_sha 40-char git SHA
|
|
---
|
|
# =====================================================================
|
|
# Phase A — Migrations
|
|
# =====================================================================
|
|
- name: Phase A — apply database migrations
|
|
hosts: incus_hosts
|
|
become: true
|
|
gather_facts: true
|
|
tasks:
|
|
- name: Validate inputs
|
|
ansible.builtin.assert:
|
|
that:
|
|
- veza_env in ['staging', 'prod']
|
|
- veza_release_sha | length == 40
|
|
fail_msg: deploy_app.yml requires veza_env + veza_release_sha extra-vars.
|
|
quiet: true
|
|
|
|
- name: Ensure ephemeral tools container exists
|
|
ansible.builtin.shell: |
|
|
set -e
|
|
TOOLS="{{ veza_container_prefix }}backend-tools"
|
|
if ! incus info "$TOOLS" >/dev/null 2>&1; then
|
|
incus launch {{ veza_app_base_image }} "$TOOLS" \
|
|
--profile veza-app --profile veza-net \
|
|
--network "{{ veza_incus_network }}"
|
|
for i in $(seq 1 30); do
|
|
incus exec "$TOOLS" -- /bin/true 2>/dev/null && exit 0
|
|
sleep 1
|
|
done
|
|
echo "tools container did not become ready"
|
|
exit 1
|
|
fi
|
|
args:
|
|
executable: /bin/bash
|
|
register: tools_provision
|
|
changed_when: "'incus launch' in (tools_provision.stdout | default(''))"
|
|
tags: [phaseA, migrations]
|
|
|
|
- name: Refresh inventory so the tools container becomes reachable
|
|
ansible.builtin.meta: refresh_inventory
|
|
tags: [phaseA]
|
|
|
|
- name: Phase A — install backend artifact + run migrate_tool inside tools
|
|
hosts: "{{ veza_container_prefix + 'backend-tools' }}"
|
|
become: true
|
|
gather_facts: false
|
|
vars:
|
|
ansible_connection: community.general.incus
|
|
ansible_python_interpreter: /usr/bin/python3
|
|
veza_component: backend
|
|
veza_target_color: tools # not blue/green — bypass color logic in name
|
|
tasks:
|
|
- name: Apt deps for tools container
|
|
ansible.builtin.apt:
|
|
name:
|
|
- ca-certificates
|
|
- curl
|
|
- postgresql-client
|
|
- libssl3
|
|
- zstd
|
|
state: present
|
|
update_cache: true
|
|
cache_valid_time: 3600
|
|
|
|
- name: Ensure migrate user
|
|
ansible.builtin.user:
|
|
name: veza-migrate
|
|
system: true
|
|
shell: /usr/sbin/nologin
|
|
|
|
- name: Ensure /opt/veza/migrate
|
|
ansible.builtin.file:
|
|
path: /opt/veza/migrate
|
|
state: directory
|
|
owner: veza-migrate
|
|
mode: "0755"
|
|
|
|
- name: Fetch backend tarball
|
|
ansible.builtin.get_url:
|
|
url: "{{ veza_artifact_base_url }}/veza-backend/{{ veza_release_sha }}/veza-backend-{{ veza_release_sha }}.tar.zst"
|
|
dest: "/tmp/veza-backend-{{ veza_release_sha }}.tar.zst"
|
|
mode: "0600"
|
|
headers:
|
|
Authorization: "token {{ vault_forgejo_registry_token | default('') }}"
|
|
force: false
|
|
|
|
- name: Extract tarball into /opt/veza/migrate
|
|
ansible.builtin.unarchive:
|
|
src: "/tmp/veza-backend-{{ veza_release_sha }}.tar.zst"
|
|
dest: "/opt/veza/migrate"
|
|
remote_src: true
|
|
owner: veza-migrate
|
|
creates: "/opt/veza/migrate/migrate_tool"
|
|
|
|
- name: Run migrate_tool
|
|
ansible.builtin.command: /opt/veza/migrate/migrate_tool --up
|
|
environment:
|
|
DATABASE_URL: "postgres://veza:{{ vault_postgres_password }}@{{ veza_container_prefix }}postgres.{{ veza_incus_dns_suffix }}:5432/veza?sslmode=disable"
|
|
register: migrate_result
|
|
changed_when: "'no changes' not in (migrate_result.stdout | default('').lower())"
|
|
no_log: true # DATABASE_URL contains the password
|
|
tags: [phaseA, migrations]
|
|
|
|
# =====================================================================
|
|
# Phase B — Determine inactive color
|
|
# =====================================================================
|
|
- name: Phase B — read active color, compute inactive_color
|
|
hosts: "{{ veza_container_prefix + 'haproxy' }}"
|
|
become: true
|
|
gather_facts: false
|
|
vars:
|
|
ansible_connection: community.general.incus
|
|
ansible_python_interpreter: /usr/bin/python3
|
|
tasks:
|
|
- name: Read currently-active color
|
|
ansible.builtin.slurp:
|
|
src: /var/lib/veza/active-color
|
|
register: prior_color_raw
|
|
failed_when: false
|
|
|
|
- name: Resolve prior_active_color (default blue if no history)
|
|
ansible.builtin.set_fact:
|
|
prior_active_color: >-
|
|
{{ (prior_color_raw.content | b64decode | trim) if prior_color_raw.content is defined
|
|
else 'blue' }}
|
|
cacheable: true
|
|
|
|
- name: Compute inactive_color (the one we deploy TO)
|
|
ansible.builtin.set_fact:
|
|
inactive_color: "{{ 'green' if prior_active_color == 'blue' else 'blue' }}"
|
|
cacheable: true
|
|
|
|
- name: Show what we are switching to
|
|
ansible.builtin.debug:
|
|
msg: >-
|
|
Deploying SHA {{ veza_release_sha[:12] }} to color
|
|
{{ inactive_color }} (currently active: {{ prior_active_color }}).
|
|
|
|
# =====================================================================
|
|
# Phase C — destroy + relaunch the three app containers in inactive_color
|
|
# =====================================================================
|
|
- name: Phase C — recreate inactive-color app containers (host-side)
|
|
hosts: incus_hosts
|
|
become: true
|
|
gather_facts: false
|
|
vars:
|
|
inactive_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
|
|
tasks:
|
|
- name: Destroy + launch each component container
|
|
ansible.builtin.shell: |
|
|
set -e
|
|
CT="{{ veza_container_prefix }}{{ item }}-{{ inactive_color }}"
|
|
# Force-delete is fine — these are stateless app containers ; the
|
|
# active color is untouched.
|
|
incus delete --force "$CT" 2>/dev/null || true
|
|
incus launch {{ veza_app_base_image }} "$CT" \
|
|
--profile veza-app \
|
|
--profile veza-net \
|
|
--network "{{ veza_incus_network }}"
|
|
for i in $(seq 1 {{ veza_app_container_ready_timeout | default(30) }}); do
|
|
if incus exec "$CT" -- /bin/true 2>/dev/null; then
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
echo "Container $CT did not become ready"
|
|
exit 1
|
|
args:
|
|
executable: /bin/bash
|
|
loop:
|
|
- backend
|
|
- stream
|
|
- web
|
|
changed_when: true
|
|
tags: [phaseC]
|
|
|
|
- name: Refresh inventory so freshly-launched containers become reachable
|
|
ansible.builtin.meta: refresh_inventory
|
|
tags: [phaseC]
|
|
|
|
- name: Phase C — provision backend (inactive color) via veza_app role
|
|
hosts: "{{ veza_container_prefix + 'backend-' + hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
|
|
become: true
|
|
gather_facts: false
|
|
vars:
|
|
ansible_connection: community.general.incus
|
|
ansible_python_interpreter: /usr/bin/python3
|
|
veza_component: backend
|
|
veza_target_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
|
|
roles:
|
|
- veza_app
|
|
tags: [phaseC, backend]
|
|
|
|
- name: Phase C — provision stream (inactive color)
|
|
hosts: "{{ veza_container_prefix + 'stream-' + hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
|
|
become: true
|
|
gather_facts: false
|
|
vars:
|
|
ansible_connection: community.general.incus
|
|
ansible_python_interpreter: /usr/bin/python3
|
|
veza_component: stream
|
|
veza_target_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
|
|
roles:
|
|
- veza_app
|
|
tags: [phaseC, stream]
|
|
|
|
- name: Phase C — provision web (inactive color)
|
|
hosts: "{{ veza_container_prefix + 'web-' + hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
|
|
become: true
|
|
gather_facts: false
|
|
vars:
|
|
ansible_connection: community.general.incus
|
|
ansible_python_interpreter: /usr/bin/python3
|
|
veza_component: web
|
|
veza_target_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
|
|
roles:
|
|
- veza_app
|
|
tags: [phaseC, web]
|
|
|
|
# =====================================================================
|
|
# Phase D — cross-container probes (in addition to in-container probes
|
|
# that veza_app already ran). This catches the case where the service
|
|
# is up locally but unreachable via Incus DNS.
|
|
# =====================================================================
|
|
- name: Phase D — probe each component via Incus DNS (cross-container)
|
|
hosts: "{{ veza_container_prefix + 'haproxy' }}"
|
|
become: true
|
|
gather_facts: false
|
|
vars:
|
|
ansible_connection: community.general.incus
|
|
ansible_python_interpreter: /usr/bin/python3
|
|
tasks:
|
|
- name: Curl each component's health endpoint
|
|
ansible.builtin.uri:
|
|
url: "http://{{ veza_container_prefix }}{{ item.component }}-{{ inactive_color }}.{{ veza_incus_dns_suffix }}:{{ item.port }}{{ item.path }}"
|
|
method: GET
|
|
status_code: [200]
|
|
timeout: 5
|
|
register: cross_probe
|
|
retries: "{{ veza_healthcheck_retries }}"
|
|
delay: "{{ veza_healthcheck_delay_seconds }}"
|
|
until: cross_probe.status == 200
|
|
changed_when: false
|
|
loop:
|
|
- { component: backend, port: "{{ veza_backend_port }}", path: "{{ veza_healthcheck_paths.backend }}" }
|
|
- { component: stream, port: "{{ veza_stream_port }}", path: "{{ veza_healthcheck_paths.stream }}" }
|
|
- { component: web, port: "{{ veza_web_port }}", path: "{{ veza_healthcheck_paths.web }}" }
|
|
tags: [phaseD, probe]
|
|
|
|
# =====================================================================
|
|
# Phase E — switch HAProxy. roles/veza_haproxy_switch wraps render +
|
|
# validate + atomic-swap + HUP in a block/rescue that restores prior
|
|
# cfg on failure.
|
|
# =====================================================================
|
|
- name: Phase E — switch HAProxy to the new color
|
|
hosts: "{{ veza_container_prefix + 'haproxy' }}"
|
|
become: true
|
|
gather_facts: true # roles/veza_haproxy_switch wants ansible_date_time
|
|
vars:
|
|
ansible_connection: community.general.incus
|
|
ansible_python_interpreter: /usr/bin/python3
|
|
veza_active_color: "{{ inactive_color }}" # the color we ARE switching TO
|
|
roles:
|
|
- veza_haproxy_switch
|
|
tags: [phaseE, switch]
|
|
|
|
# =====================================================================
|
|
# Phase F — Post-deploy verification (external curl through HAProxy).
|
|
# If this fails, we revert HAProxy to the prior color via a second run
|
|
# of veza_haproxy_switch and fail the playbook.
|
|
# =====================================================================
|
|
- name: Phase F — verify externally + record deploy state
|
|
hosts: incus_hosts
|
|
become: true
|
|
gather_facts: true
|
|
vars:
|
|
inactive_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
|
|
prior_active_color: "{{ hostvars[veza_container_prefix + 'haproxy']['prior_active_color'] }}"
|
|
tasks:
|
|
- name: Curl public health endpoint via HAProxy
|
|
ansible.builtin.uri:
|
|
url: "{{ veza_public_url }}/api/v1/health"
|
|
method: GET
|
|
status_code: [200]
|
|
timeout: 10
|
|
validate_certs: "{{ veza_public_url.startswith('https://') }}"
|
|
register: public_health
|
|
retries: 10
|
|
delay: 3
|
|
until: public_health.status == 200
|
|
tags: [phaseF, verify]
|
|
|
|
- name: Write deploy-state.json (consumed by node-exporter textfile)
|
|
ansible.builtin.copy:
|
|
dest: /var/lib/node_exporter/textfile_collector/veza_deploy.prom
|
|
content: |
|
|
# HELP veza_deploy_active_color 0=blue, 1=green.
|
|
# TYPE veza_deploy_active_color gauge
|
|
veza_deploy_active_color{env="{{ veza_env }}"} {{ 0 if inactive_color == 'blue' else 1 }}
|
|
# HELP veza_deploy_release_sha info metric, label=sha.
|
|
# TYPE veza_deploy_release_sha gauge
|
|
veza_deploy_release_sha{env="{{ veza_env }}",sha="{{ veza_release_sha }}",color="{{ inactive_color }}"} 1
|
|
# HELP veza_deploy_last_success_timestamp unix epoch of last successful deploy.
|
|
# TYPE veza_deploy_last_success_timestamp gauge
|
|
veza_deploy_last_success_timestamp{env="{{ veza_env }}"} {{ ansible_date_time.epoch }}
|
|
mode: "0644"
|
|
tags: [phaseF, metrics]
|
|
rescue:
|
|
- name: Public health failed — record the failure timestamp
|
|
ansible.builtin.copy:
|
|
dest: /var/lib/node_exporter/textfile_collector/veza_deploy.prom
|
|
content: |
|
|
# HELP veza_deploy_last_failure_timestamp unix epoch of last failed deploy.
|
|
# TYPE veza_deploy_last_failure_timestamp gauge
|
|
veza_deploy_last_failure_timestamp{env="{{ veza_env }}",sha="{{ veza_release_sha }}",color="{{ inactive_color }}"} {{ ansible_date_time.epoch }}
|
|
mode: "0644"
|
|
failed_when: false
|
|
|
|
- name: Re-switch HAProxy back to the prior color
|
|
ansible.builtin.import_role:
|
|
name: veza_haproxy_switch
|
|
vars:
|
|
veza_active_color: "{{ prior_active_color }}"
|
|
delegate_to: "{{ veza_container_prefix + 'haproxy' }}"
|
|
|
|
- name: Fail the playbook
|
|
ansible.builtin.fail:
|
|
msg: >-
|
|
Public health probe via HAProxy failed after deploy of SHA
|
|
{{ veza_release_sha[:12] }} to color {{ inactive_color }}.
|
|
HAProxy reverted to the prior color ({{ prior_active_color }}).
|
|
The freshly-deployed {{ inactive_color }} containers are kept
|
|
alive for forensics — inspect with:
|
|
incus exec {{ veza_container_prefix }}backend-{{ inactive_color }} -- journalctl -u veza-backend -n 200
|