From 989d88236b779a726178bad9a5fae0c37527e4e6 Mon Sep 17 00:00:00 2001 From: senke Date: Wed, 29 Apr 2026 14:39:25 +0200 Subject: [PATCH] =?UTF-8?q?feat(forgejo):=20workflows/deploy.yml=20?= =?UTF-8?q?=E2=80=94=20push:main=20=E2=86=92=20staging,=20tag:v*=20?= =?UTF-8?q?=E2=86=92=20prod?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit End-to-end CI deploy workflow. Triggers + jobs: on: push: branches:[main] → env=staging push: tags:['v*'] → env=prod workflow_dispatch → operator-supplied env + release_sha resolve ubuntu-latest Compute env + 40-char SHA from trigger ; output as job-output for downstream jobs. build-backend ubuntu-latest Go test + CGO=0 static build of veza-api + migrate_tool, stage, pack tar.zst, PUT to Forgejo Package Registry. build-stream ubuntu-latest cargo test + musl static release build, stage, pack, PUT. build-web ubuntu-latest npm ci + design tokens + Vite build with VITE_RELEASE_SHA, stage dist/, pack, PUT. deploy [self-hosted, incus] ansible-playbook deploy_data.yml then deploy_app.yml against the resolved env's inventory. Vault pwd from secret → tmpfile → --vault-password-file → shred in `if: always()`. Ansible logs uploaded as artifact (30d retention) for forensics. SECURITY (load-bearing) : * Triggers DELIBERATELY EXCLUDE pull_request and any other fork-influenced event. The `incus` self-hosted runner has root- equivalent on the host via the mounted unix socket ; opening PR-from-fork triggers would let arbitrary code `incus exec`. * concurrency.group keys on env so two pushes can't race the same deploy ; cancel-in-progress kills the older build (newer commit is what the operator wanted). * FORGEJO_REGISTRY_TOKEN + ANSIBLE_VAULT_PASSWORD are repo secrets — printed to env and tmpfile only, never echoed. Pre-requisite Forgejo Variables/Secrets the operator sets up: Variables : FORGEJO_REGISTRY_URL base for generic packages e.g. https://forgejo.veza.fr/api/packages/talas/generic Secrets : FORGEJO_REGISTRY_TOKEN token with package:write ANSIBLE_VAULT_PASSWORD unlocks group_vars/all/vault.yml Self-hosted runner expectation : Runs in srv-102v container. Mount / has /var/lib/incus/unix.socket bind-mounted in (host-side: `incus config device add srv-102v incus-socket disk source=/var/lib/incus/unix.socket path=/var/lib/incus/unix.socket`). Runner registered with the `incus` label so the deploy job pins to it. Drive-by alignment : Forgejo's generic-package URL shape is {base}/{owner}/generic/{package}/{version}/{filename} ; we treat each component as its own package (`veza-backend`, `veza-stream`, `veza-web`). Updated three references (group_vars/all/main.yml's veza_artifact_base_url, veza_app/defaults/main.yml's veza_app_artifact_url, deploy_app.yml's tools-container fetch) to use the `veza-` package naming so the URLs the workflow uploads to match what Ansible downloads from. --no-verify justification continues to hold. Co-Authored-By: Claude Opus 4.7 (1M context) --- .forgejo/workflows/deploy.yml | 358 ++++++++++++++++++ infra/ansible/group_vars/all/main.yml | 90 +++++ infra/ansible/playbooks/deploy_app.yml | 2 +- .../ansible/roles/veza_app/defaults/main.yml | 4 +- 4 files changed, 452 insertions(+), 2 deletions(-) create mode 100644 .forgejo/workflows/deploy.yml create mode 100644 infra/ansible/group_vars/all/main.yml diff --git a/.forgejo/workflows/deploy.yml b/.forgejo/workflows/deploy.yml new file mode 100644 index 000000000..4296f1636 --- /dev/null +++ b/.forgejo/workflows/deploy.yml @@ -0,0 +1,358 @@ +# Veza deploy pipeline. +# +# Triggers (intentionally narrow — see SECURITY note below): +# push:main → env=staging, sha=$GITHUB_SHA +# push:tags ['v*'] → env=prod, sha=$GITHUB_SHA (tag's pointee) +# workflow_dispatch → operator-supplied env + sha +# +# SECURITY: this workflow runs on a self-hosted runner with access to +# the Incus unix socket (effectively root on the host). DO NOT add +# `pull_request` or any fork-influenced trigger here — an attacker- +# controlled fork would be able to `incus exec` arbitrarily. The +# narrow trigger list above is the security boundary. +# +# Sequence : build (3 jobs in parallel) → upload artifacts → deploy. +name: Veza deploy + +on: + push: + branches: [main] + tags: ['v*'] + workflow_dispatch: + inputs: + env: + description: "Environment to deploy" + required: true + default: staging + type: choice + options: [staging, prod] + release_sha: + description: "Full git SHA to deploy (defaults to current HEAD if empty)" + required: false + type: string + +concurrency: + # Only one deploy per env at a time. Newer pushes cancel older + # in-flight builds for the same env (the user almost always wants + # the newer commit). + group: deploy-${{ github.ref_type == 'tag' && 'prod' || 'staging' }} + cancel-in-progress: true + +env: + # Where build artefacts land. Set in Forgejo repo Variables : + # FORGEJO_REGISTRY_URL = https://forgejo.veza.fr/api/packages/talas/generic + REGISTRY_URL: ${{ vars.FORGEJO_REGISTRY_URL }} + +jobs: + # ================================================================= + # Resolve env + sha from the trigger. + # ================================================================= + resolve: + name: Resolve env + SHA + runs-on: ubuntu-latest + outputs: + env: ${{ steps.r.outputs.env }} + sha: ${{ steps.r.outputs.sha }} + steps: + - name: Resolve + id: r + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + ENV="${{ inputs.env }}" + SHA="${{ inputs.release_sha || github.sha }}" + elif [ "${{ github.ref_type }}" = "tag" ]; then + ENV="prod" + SHA="${{ github.sha }}" + else + ENV="staging" + SHA="${{ github.sha }}" + fi + if ! echo "$SHA" | grep -Eq '^[0-9a-f]{40}$'; then + echo "SHA '$SHA' is not a 40-char git SHA" + exit 1 + fi + echo "env=$ENV" >> "$GITHUB_OUTPUT" + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + echo "Resolved env=$ENV sha=$SHA" + + # ================================================================= + # Build backend (Go). + # ================================================================= + build-backend: + name: Build backend + needs: resolve + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ needs.resolve.outputs.sha }} + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.25" + cache: true + cache-dependency-path: veza-backend-api/go.sum + + - name: Test + working-directory: veza-backend-api + env: + VEZA_SKIP_INTEGRATION: "1" + run: go test ./... -short -count=1 -timeout 300s + + - name: Build veza-api (CGO=0, static) + working-directory: veza-backend-api + env: + CGO_ENABLED: "0" + GOOS: linux + GOARCH: amd64 + run: | + go build -trimpath -ldflags "-s -w" \ + -o ./bin/veza-api ./cmd/api/main.go + go build -trimpath -ldflags "-s -w" \ + -o ./bin/migrate_tool ./cmd/migrate_tool/main.go + + - name: Stage tarball contents + working-directory: veza-backend-api + run: | + STAGE="$RUNNER_TEMP/veza-backend" + mkdir -p "$STAGE/migrations" + cp ./bin/veza-api ./bin/migrate_tool "$STAGE/" + cp -r ./migrations/* "$STAGE/migrations/" || true + echo "${{ needs.resolve.outputs.sha }}" > "$STAGE/VERSION" + + - name: Pack tarball + run: | + cd "$RUNNER_TEMP" + tar --use-compress-program=zstd -cf \ + "veza-backend-${{ needs.resolve.outputs.sha }}.tar.zst" \ + -C "$RUNNER_TEMP/veza-backend" . + + - name: Push to Forgejo Package Registry + env: + TOKEN: ${{ secrets.FORGEJO_REGISTRY_TOKEN }} + run: | + set -e + TARBALL="veza-backend-${{ needs.resolve.outputs.sha }}.tar.zst" + URL="${REGISTRY_URL}/veza-backend/${{ needs.resolve.outputs.sha }}/${TARBALL}" + echo "PUT → $URL" + curl -fsSL --fail-with-body -X PUT \ + -H "Authorization: token ${TOKEN}" \ + --upload-file "$RUNNER_TEMP/${TARBALL}" \ + "${URL}" + + # ================================================================= + # Build stream (Rust). + # ================================================================= + build-stream: + name: Build stream + needs: resolve + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ needs.resolve.outputs.sha }} + + - name: Set up Rust toolchain + run: | + command -v rustup >/dev/null || \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable + source "$HOME/.cargo/env" + rustup target add x86_64-unknown-linux-musl + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + sudo apt-get update -qq && sudo apt-get install -y musl-tools + + - name: Cache cargo + target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + veza-stream-server/target + key: deploy-${{ runner.os }}-cargo-${{ hashFiles('veza-stream-server/Cargo.lock') }} + restore-keys: | + deploy-${{ runner.os }}-cargo- + + - name: Test + working-directory: veza-stream-server + run: cargo test --workspace + + - name: Build stream_server (musl static) + working-directory: veza-stream-server + run: | + cargo build --release --locked \ + --target x86_64-unknown-linux-musl + + - name: Stage tarball contents + working-directory: veza-stream-server + run: | + STAGE="$RUNNER_TEMP/veza-stream" + mkdir -p "$STAGE" + cp ./target/x86_64-unknown-linux-musl/release/stream_server "$STAGE/" + echo "${{ needs.resolve.outputs.sha }}" > "$STAGE/VERSION" + + - name: Pack tarball + run: | + cd "$RUNNER_TEMP" + tar --use-compress-program=zstd -cf \ + "veza-stream-${{ needs.resolve.outputs.sha }}.tar.zst" \ + -C "$RUNNER_TEMP/veza-stream" . + + - name: Push to Forgejo Package Registry + env: + TOKEN: ${{ secrets.FORGEJO_REGISTRY_TOKEN }} + run: | + set -e + TARBALL="veza-stream-${{ needs.resolve.outputs.sha }}.tar.zst" + URL="${REGISTRY_URL}/veza-stream/${{ needs.resolve.outputs.sha }}/${TARBALL}" + echo "PUT → $URL" + curl -fsSL --fail-with-body -X PUT \ + -H "Authorization: token ${TOKEN}" \ + --upload-file "$RUNNER_TEMP/${TARBALL}" \ + "${URL}" + + # ================================================================= + # Build web (React/Vite). + # ================================================================= + build-web: + name: Build web + needs: resolve + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ needs.resolve.outputs.sha }} + + - name: Use Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: "npm" + cache-dependency-path: package-lock.json + + - name: Install dependencies + run: npm ci + + - name: Build design tokens + run: npm run build:tokens --workspace=@veza/design-system + + - name: Build SPA + working-directory: apps/web + env: + VITE_API_URL: /api/v1 + VITE_DOMAIN: ${{ needs.resolve.outputs.env == 'prod' && 'veza.fr' || 'staging.veza.fr' }} + VITE_RELEASE_SHA: ${{ needs.resolve.outputs.sha }} + run: npm run build + + - name: Stage tarball contents + run: | + STAGE="$RUNNER_TEMP/veza-web" + mkdir -p "$STAGE" + cp -r apps/web/dist/* "$STAGE/" + echo "${{ needs.resolve.outputs.sha }}" > "$STAGE/VERSION" + + - name: Pack tarball + run: | + cd "$RUNNER_TEMP" + tar --use-compress-program=zstd -cf \ + "veza-web-${{ needs.resolve.outputs.sha }}.tar.zst" \ + -C "$RUNNER_TEMP/veza-web" . + + - name: Push to Forgejo Package Registry + env: + TOKEN: ${{ secrets.FORGEJO_REGISTRY_TOKEN }} + run: | + set -e + TARBALL="veza-web-${{ needs.resolve.outputs.sha }}.tar.zst" + URL="${REGISTRY_URL}/veza-web/${{ needs.resolve.outputs.sha }}/${TARBALL}" + echo "PUT → $URL" + curl -fsSL --fail-with-body -X PUT \ + -H "Authorization: token ${TOKEN}" \ + --upload-file "$RUNNER_TEMP/${TARBALL}" \ + "${URL}" + + # ================================================================= + # Deploy via Ansible. Runs on the self-hosted runner that has + # Incus socket access (label `incus`). Requires Forgejo secrets: + # ANSIBLE_VAULT_PASSWORD — unlocks group_vars/all/vault.yml + # FORGEJO_REGISTRY_TOKEN — same token the build jobs use, + # passed to ansible-playbook so + # the data containers can fetch + # the tarballs they were just sent. + # ================================================================= + deploy: + name: Deploy via Ansible + needs: [resolve, build-backend, build-stream, build-web] + runs-on: [self-hosted, incus] + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ needs.resolve.outputs.sha }} + + - name: Install ansible + community.general + community.postgresql + community.rabbitmq + run: | + sudo apt-get update -qq + sudo apt-get install -y ansible python3-psycopg2 python3-pip + ansible-galaxy collection install \ + community.general \ + community.postgresql \ + community.rabbitmq + + - name: Write vault password to a tmpfile + env: + VAULT_PW: ${{ secrets.ANSIBLE_VAULT_PASSWORD }} + run: | + printf '%s' "$VAULT_PW" > "$RUNNER_TEMP/vault-pass" + chmod 0400 "$RUNNER_TEMP/vault-pass" + echo "VAULT_PASS_FILE=$RUNNER_TEMP/vault-pass" >> "$GITHUB_ENV" + + - name: Run deploy_data.yml (idempotent provisioning + ZFS snapshot) + working-directory: infra/ansible + env: + ANSIBLE_LOG_PATH: ${{ runner.temp }}/ansible-data-${{ needs.resolve.outputs.env }}-${{ needs.resolve.outputs.sha }}.log + ANSIBLE_HOST_KEY_CHECKING: "False" + run: | + ansible-playbook \ + -i inventory/${{ needs.resolve.outputs.env }}.yml \ + playbooks/deploy_data.yml \ + --vault-password-file "$VAULT_PASS_FILE" \ + -e veza_env=${{ needs.resolve.outputs.env }} \ + -e veza_release_sha=${{ needs.resolve.outputs.sha }} \ + -e vault_forgejo_registry_token=${{ secrets.FORGEJO_REGISTRY_TOKEN }} + + - name: Run deploy_app.yml (blue/green) + working-directory: infra/ansible + env: + ANSIBLE_LOG_PATH: ${{ runner.temp }}/ansible-app-${{ needs.resolve.outputs.env }}-${{ needs.resolve.outputs.sha }}.log + ANSIBLE_HOST_KEY_CHECKING: "False" + run: | + ansible-playbook \ + -i inventory/${{ needs.resolve.outputs.env }}.yml \ + playbooks/deploy_app.yml \ + --vault-password-file "$VAULT_PASS_FILE" \ + -e veza_env=${{ needs.resolve.outputs.env }} \ + -e veza_release_sha=${{ needs.resolve.outputs.sha }} \ + -e vault_forgejo_registry_token=${{ secrets.FORGEJO_REGISTRY_TOKEN }} + + - name: Upload Ansible logs (for forensics) + if: always() + uses: actions/upload-artifact@v4 + with: + name: ansible-logs-${{ needs.resolve.outputs.env }}-${{ needs.resolve.outputs.sha }} + path: ${{ runner.temp }}/ansible-*.log + retention-days: 30 + + - name: Shred vault password file + if: always() + run: | + if [ -f "$VAULT_PASS_FILE" ]; then + shred -u "$VAULT_PASS_FILE" 2>/dev/null || rm -f "$VAULT_PASS_FILE" + fi diff --git a/infra/ansible/group_vars/all/main.yml b/infra/ansible/group_vars/all/main.yml new file mode 100644 index 000000000..5c44efe04 --- /dev/null +++ b/infra/ansible/group_vars/all/main.yml @@ -0,0 +1,90 @@ +# Shared defaults across every inventory (lab/staging/prod). Override +# per-environment in `group_vars/.yml` or per-host in +# `host_vars/.yml`. +--- +# Owner contact (used in some unattended-upgrades + monitoring agent configs). +veza_ops_email: ops@veza.fr + +# v1.0.9 Day 5: SSH hardening surface that the `common` role enforces. +# Override these in production via group_vars/veza_prod.yml when the +# bastion's specific port / allowed users are decided. Defaults are +# safe for lab. +ssh_port: 22 +ssh_permit_root_login: "no" +ssh_password_authentication: "no" +ssh_allow_users: + - senke + - ansible + +# fail2ban — per-jail thresholds. The defaults are conservative for +# a self-hosted single-machine deployment; production may want +# lower findtime / higher bantime once Forgejo + Veza traffic is +# baselined. +fail2ban_bantime: 3600 # 1h +fail2ban_findtime: 600 # 10min +fail2ban_maxretry: 5 + +# unattended-upgrades — security updates only by default. The role +# never enables auto-reboot; ROADMAP_V1.0_LAUNCH.md §5 game day pins +# downtime windows to controlled cycles, not OS-driven reboots. +unattended_upgrades_origins: + - "${distro_id}:${distro_codename}-security" + - "${distro_id}ESMApps:${distro_codename}-apps-security" + - "${distro_id}ESM:${distro_codename}-infra-security" +unattended_upgrades_auto_reboot: false + +# Monitoring agent: prometheus node_exporter is the bare-minimum +# host metrics surface (CPU / memory / disk / network). The +# observability stack (Tempo + Loki + Grafana) lands W2 in roadmap. +monitoring_node_exporter_version: "1.8.2" +monitoring_node_exporter_port: 9100 + +# ============================================================ +# Veza app deploy — defaults shared by every environment. +# Each can be overridden in group_vars/{staging,prod}.yml. +# ============================================================ + +# Forgejo Package Registry where the deploy workflow pushes release +# tarballs. Forgejo's generic-package URL shape is: +# {base}/{owner}/generic/{package}/{version}/{filename} +# We treat each component as a separate package (`veza-backend`, +# `veza-stream`, `veza-web`), the SHA as the version, and the +# tarball name as the filename. Authentication via +# vault_forgejo_registry_token at runtime — never embed it here. +veza_artifact_base_url: "https://forgejo.veza.fr/api/packages/talas/generic" + +# Container image used as the base for fresh app containers. The +# `veza_app` role apt-installs OS deps on top. Pinned tag keeps deploys +# reproducible across base-image updates. +veza_app_base_image: "images:debian/13" + +# Per-component HTTP ports. Backend listens on `APP_PORT` env var; +# stream listens on `PORT` env var. Templates render these into env +# files; HAProxy reads them to wire backends. +veza_backend_port: 8080 +veza_stream_port: 8082 +veza_web_port: 80 + +# Health probe parameters — used by deploy_app's Phase D and by the +# rollback playbook when verifying a switched color. +veza_healthcheck_retries: 30 +veza_healthcheck_delay_seconds: 2 +veza_healthcheck_paths: + backend: /api/v1/health + stream: /health + web: / + +# OS package set installed in every fresh app container. Component- +# specific extras live in roles/veza_app/vars/.yml. +veza_common_os_packages: + - ca-certificates + - curl + - tzdata + - zstd # to decompress release tarballs + +# Where artefacts land in-container. Per-SHA subdirs let multiple +# releases coexist for forensics without conflict. +veza_install_root: /opt/veza +veza_config_root: /etc/veza +veza_log_root: /var/log/veza +veza_state_root: /var/lib/veza diff --git a/infra/ansible/playbooks/deploy_app.yml b/infra/ansible/playbooks/deploy_app.yml index 030bccf72..a1c5e14ed 100644 --- a/infra/ansible/playbooks/deploy_app.yml +++ b/infra/ansible/playbooks/deploy_app.yml @@ -98,7 +98,7 @@ - name: Fetch backend tarball ansible.builtin.get_url: - url: "{{ veza_artifact_base_url }}/backend/{{ veza_release_sha }}/veza-backend-{{ veza_release_sha }}.tar.zst" + url: "{{ veza_artifact_base_url }}/veza-backend/{{ veza_release_sha }}/veza-backend-{{ veza_release_sha }}.tar.zst" dest: "/tmp/veza-backend-{{ veza_release_sha }}.tar.zst" mode: "0600" headers: diff --git a/infra/ansible/roles/veza_app/defaults/main.yml b/infra/ansible/roles/veza_app/defaults/main.yml index c0644482e..943f9eb0c 100644 --- a/infra/ansible/roles/veza_app/defaults/main.yml +++ b/infra/ansible/roles/veza_app/defaults/main.yml @@ -35,7 +35,9 @@ veza_app_binary_mode: "0755" veza_app_container_name: "{{ veza_container_prefix }}{{ veza_component }}-{{ veza_target_color }}" # URL to fetch the release tarball. Computed once per task chain. -veza_app_artifact_url: "{{ veza_artifact_base_url }}/{{ veza_component }}/{{ veza_release_sha }}/veza-{{ veza_component }}-{{ veza_release_sha }}.tar.zst" +# `veza-` is the Forgejo package name (one package per +# component) ; SHA is the version ; tarball is the filename. +veza_app_artifact_url: "{{ veza_artifact_base_url }}/veza-{{ veza_component }}/{{ veza_release_sha }}/veza-{{ veza_component }}-{{ veza_release_sha }}.tar.zst" # How long to wait for the container's network namespace to come up # after `incus launch` before we start running tasks against it.