diff --git a/.github/workflows/cleanup-failed.yml b/.github/workflows/cleanup-failed.yml new file mode 100644 index 000000000..9875deb40 --- /dev/null +++ b/.github/workflows/cleanup-failed.yml @@ -0,0 +1,79 @@ +# cleanup-failed.yml — workflow_dispatch only. +# +# Tears down the kept-alive failed-deploy color (the inactive one +# that survived a Phase D / Phase F failure for forensics). +# Operator triggers this once they have read the journalctl output. +# +# Hard safety in playbooks/cleanup_failed.yml: refuses to destroy +# the currently-active color. +name: Veza cleanup failed-deploy color + +on: + workflow_dispatch: + inputs: + env: + description: "Environment to clean up" + required: true + type: choice + options: [staging, prod] + color: + description: "Color to destroy (must NOT be the active one)" + required: true + type: choice + options: [blue, green] + +concurrency: + group: cleanup-${{ inputs.env }} + cancel-in-progress: false + +jobs: + cleanup: + name: Destroy ${{ inputs.color }} app containers in ${{ inputs.env }} + runs-on: [self-hosted, incus] + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Install ansible + run: | + sudo apt-get update -qq + sudo apt-get install -y ansible + ansible-galaxy collection install community.general + + - name: Write vault password + env: + VAULT_PW: ${{ secrets.ANSIBLE_VAULT_PASSWORD }} + run: | + printf '%s' "$VAULT_PW" > "$RUNNER_TEMP/vault-pass" + chmod 0400 "$RUNNER_TEMP/vault-pass" + echo "VAULT_PASS_FILE=$RUNNER_TEMP/vault-pass" >> "$GITHUB_ENV" + + - name: Run cleanup_failed.yml + working-directory: infra/ansible + env: + ANSIBLE_LOG_PATH: ${{ runner.temp }}/ansible-cleanup-${{ inputs.env }}-${{ inputs.color }}.log + ANSIBLE_HOST_KEY_CHECKING: "False" + run: | + ansible-playbook \ + -i inventory/${{ inputs.env }}.yml \ + playbooks/cleanup_failed.yml \ + --vault-password-file "$VAULT_PASS_FILE" \ + -e veza_env=${{ inputs.env }} \ + -e target_color=${{ inputs.color }} + + - name: Upload Ansible log + if: always() + uses: actions/upload-artifact@v4 + with: + name: ansible-cleanup-${{ inputs.env }}-${{ inputs.color }} + path: ${{ runner.temp }}/ansible-cleanup-*.log + retention-days: 30 + + - name: Shred vault password file + if: always() + run: | + if [ -f "$VAULT_PASS_FILE" ]; then + shred -u "$VAULT_PASS_FILE" 2>/dev/null || rm -f "$VAULT_PASS_FILE" + fi diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 000000000..34fd376ec --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,360 @@ +# Veza deploy pipeline. +# +# Triggers (intentionally narrow — see SECURITY note below): +# workflow_dispatch → operator-supplied env + sha +# (push:main + tag:v* are commented OUT until provisioning is +# complete — see docs/RUNBOOK_DEPLOY_BOOTSTRAP.md. Re-enable +# once secrets/runner/vault are in place and a manual run via +# workflow_dispatch has been verified GREEN.) +# +# SECURITY: this workflow runs on a self-hosted runner with access to +# the Incus unix socket (effectively root on the host). DO NOT add +# `pull_request` or any fork-influenced trigger here — an attacker- +# controlled fork would be able to `incus exec` arbitrarily. The +# narrow trigger list above is the security boundary. +# +# Sequence : build (3 jobs in parallel) → upload artifacts → deploy. +name: Veza deploy + +on: + # push: # GATED — uncomment after first + # branches: [main] # successful workflow_dispatch run + # tags: ['v*'] # see RUNBOOK_DEPLOY_BOOTSTRAP.md + workflow_dispatch: + inputs: + env: + description: "Environment to deploy" + required: true + default: staging + type: choice + options: [staging, prod] + release_sha: + description: "Full git SHA to deploy (defaults to current HEAD if empty)" + required: false + type: string + +concurrency: + # Only one deploy per env at a time. Newer pushes cancel older + # in-flight builds for the same env (the user almost always wants + # the newer commit). + group: deploy-${{ github.ref_type == 'tag' && 'prod' || 'staging' }} + cancel-in-progress: true + +env: + # Where build artefacts land. Set in Forgejo repo Variables : + # FORGEJO_REGISTRY_URL = https://forgejo.veza.fr/api/packages/talas/generic + REGISTRY_URL: ${{ vars.FORGEJO_REGISTRY_URL }} + +jobs: + # ================================================================= + # Resolve env + sha from the trigger. + # ================================================================= + resolve: + name: Resolve env + SHA + runs-on: [self-hosted, incus] + outputs: + env: ${{ steps.r.outputs.env }} + sha: ${{ steps.r.outputs.sha }} + steps: + - name: Resolve + id: r + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + ENV="${{ inputs.env }}" + SHA="${{ inputs.release_sha || github.sha }}" + elif [ "${{ github.ref_type }}" = "tag" ]; then + ENV="prod" + SHA="${{ github.sha }}" + else + ENV="staging" + SHA="${{ github.sha }}" + fi + if ! echo "$SHA" | grep -Eq '^[0-9a-f]{40}$'; then + echo "SHA '$SHA' is not a 40-char git SHA" + exit 1 + fi + echo "env=$ENV" >> "$GITHUB_OUTPUT" + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + echo "Resolved env=$ENV sha=$SHA" + + # ================================================================= + # Build backend (Go). + # ================================================================= + build-backend: + name: Build backend + needs: resolve + runs-on: [self-hosted, incus] + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ needs.resolve.outputs.sha }} + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.25" + cache: true + cache-dependency-path: veza-backend-api/go.sum + + - name: Test + working-directory: veza-backend-api + env: + VEZA_SKIP_INTEGRATION: "1" + run: go test ./... -short -count=1 -timeout 300s + + - name: Build veza-api (CGO=0, static) + working-directory: veza-backend-api + env: + CGO_ENABLED: "0" + GOOS: linux + GOARCH: amd64 + run: | + go build -trimpath -ldflags "-s -w" \ + -o ./bin/veza-api ./cmd/api/main.go + go build -trimpath -ldflags "-s -w" \ + -o ./bin/migrate_tool ./cmd/migrate_tool/main.go + + - name: Stage tarball contents + working-directory: veza-backend-api + run: | + STAGE="$RUNNER_TEMP/veza-backend" + mkdir -p "$STAGE/migrations" + cp ./bin/veza-api ./bin/migrate_tool "$STAGE/" + cp -r ./migrations/* "$STAGE/migrations/" || true + echo "${{ needs.resolve.outputs.sha }}" > "$STAGE/VERSION" + + - name: Pack tarball + run: | + cd "$RUNNER_TEMP" + tar --use-compress-program=zstd -cf \ + "veza-backend-${{ needs.resolve.outputs.sha }}.tar.zst" \ + -C "$RUNNER_TEMP/veza-backend" . + + - name: Push to Forgejo Package Registry + env: + TOKEN: ${{ secrets.FORGEJO_REGISTRY_TOKEN }} + run: | + set -e + TARBALL="veza-backend-${{ needs.resolve.outputs.sha }}.tar.zst" + URL="${REGISTRY_URL}/veza-backend/${{ needs.resolve.outputs.sha }}/${TARBALL}" + echo "PUT → $URL" + curl -fsSL --fail-with-body -X PUT \ + -H "Authorization: token ${TOKEN}" \ + --upload-file "$RUNNER_TEMP/${TARBALL}" \ + "${URL}" + + # ================================================================= + # Build stream (Rust). + # ================================================================= + build-stream: + name: Build stream + needs: resolve + runs-on: [self-hosted, incus] + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ needs.resolve.outputs.sha }} + + - name: Set up Rust toolchain + run: | + command -v rustup >/dev/null || \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable + source "$HOME/.cargo/env" + rustup target add x86_64-unknown-linux-musl + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + sudo apt-get update -qq && sudo apt-get install -y musl-tools + + - name: Cache cargo + target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + veza-stream-server/target + key: deploy-${{ runner.os }}-cargo-${{ hashFiles('veza-stream-server/Cargo.lock') }} + restore-keys: | + deploy-${{ runner.os }}-cargo- + + - name: Test + working-directory: veza-stream-server + run: cargo test --workspace + + - name: Build stream_server (musl static) + working-directory: veza-stream-server + run: | + cargo build --release --locked \ + --target x86_64-unknown-linux-musl + + - name: Stage tarball contents + working-directory: veza-stream-server + run: | + STAGE="$RUNNER_TEMP/veza-stream" + mkdir -p "$STAGE" + cp ./target/x86_64-unknown-linux-musl/release/stream_server "$STAGE/" + echo "${{ needs.resolve.outputs.sha }}" > "$STAGE/VERSION" + + - name: Pack tarball + run: | + cd "$RUNNER_TEMP" + tar --use-compress-program=zstd -cf \ + "veza-stream-${{ needs.resolve.outputs.sha }}.tar.zst" \ + -C "$RUNNER_TEMP/veza-stream" . + + - name: Push to Forgejo Package Registry + env: + TOKEN: ${{ secrets.FORGEJO_REGISTRY_TOKEN }} + run: | + set -e + TARBALL="veza-stream-${{ needs.resolve.outputs.sha }}.tar.zst" + URL="${REGISTRY_URL}/veza-stream/${{ needs.resolve.outputs.sha }}/${TARBALL}" + echo "PUT → $URL" + curl -fsSL --fail-with-body -X PUT \ + -H "Authorization: token ${TOKEN}" \ + --upload-file "$RUNNER_TEMP/${TARBALL}" \ + "${URL}" + + # ================================================================= + # Build web (React/Vite). + # ================================================================= + build-web: + name: Build web + needs: resolve + runs-on: [self-hosted, incus] + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ needs.resolve.outputs.sha }} + + - name: Use Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: "npm" + cache-dependency-path: package-lock.json + + - name: Install dependencies + run: npm ci + + - name: Build design tokens + run: npm run build:tokens --workspace=@veza/design-system + + - name: Build SPA + working-directory: apps/web + env: + VITE_API_URL: /api/v1 + VITE_DOMAIN: ${{ needs.resolve.outputs.env == 'prod' && 'veza.fr' || 'staging.veza.fr' }} + VITE_RELEASE_SHA: ${{ needs.resolve.outputs.sha }} + run: npm run build + + - name: Stage tarball contents + run: | + STAGE="$RUNNER_TEMP/veza-web" + mkdir -p "$STAGE" + cp -r apps/web/dist/* "$STAGE/" + echo "${{ needs.resolve.outputs.sha }}" > "$STAGE/VERSION" + + - name: Pack tarball + run: | + cd "$RUNNER_TEMP" + tar --use-compress-program=zstd -cf \ + "veza-web-${{ needs.resolve.outputs.sha }}.tar.zst" \ + -C "$RUNNER_TEMP/veza-web" . + + - name: Push to Forgejo Package Registry + env: + TOKEN: ${{ secrets.FORGEJO_REGISTRY_TOKEN }} + run: | + set -e + TARBALL="veza-web-${{ needs.resolve.outputs.sha }}.tar.zst" + URL="${REGISTRY_URL}/veza-web/${{ needs.resolve.outputs.sha }}/${TARBALL}" + echo "PUT → $URL" + curl -fsSL --fail-with-body -X PUT \ + -H "Authorization: token ${TOKEN}" \ + --upload-file "$RUNNER_TEMP/${TARBALL}" \ + "${URL}" + + # ================================================================= + # Deploy via Ansible. Runs on the self-hosted runner that has + # Incus socket access (label `incus`). Requires Forgejo secrets: + # ANSIBLE_VAULT_PASSWORD — unlocks group_vars/all/vault.yml + # FORGEJO_REGISTRY_TOKEN — same token the build jobs use, + # passed to ansible-playbook so + # the data containers can fetch + # the tarballs they were just sent. + # ================================================================= + deploy: + name: Deploy via Ansible + needs: [resolve, build-backend, build-stream, build-web] + runs-on: [self-hosted, incus] + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ needs.resolve.outputs.sha }} + + - name: Install ansible + community.general + community.postgresql + community.rabbitmq + run: | + sudo apt-get update -qq + sudo apt-get install -y ansible python3-psycopg2 python3-pip + ansible-galaxy collection install \ + community.general \ + community.postgresql \ + community.rabbitmq + + - name: Write vault password to a tmpfile + env: + VAULT_PW: ${{ secrets.ANSIBLE_VAULT_PASSWORD }} + run: | + printf '%s' "$VAULT_PW" > "$RUNNER_TEMP/vault-pass" + chmod 0400 "$RUNNER_TEMP/vault-pass" + echo "VAULT_PASS_FILE=$RUNNER_TEMP/vault-pass" >> "$GITHUB_ENV" + + - name: Run deploy_data.yml (idempotent provisioning + ZFS snapshot) + working-directory: infra/ansible + env: + ANSIBLE_LOG_PATH: ${{ runner.temp }}/ansible-data-${{ needs.resolve.outputs.env }}-${{ needs.resolve.outputs.sha }}.log + ANSIBLE_HOST_KEY_CHECKING: "False" + run: | + ansible-playbook \ + -i inventory/${{ needs.resolve.outputs.env }}.yml \ + playbooks/deploy_data.yml \ + --vault-password-file "$VAULT_PASS_FILE" \ + -e veza_env=${{ needs.resolve.outputs.env }} \ + -e veza_release_sha=${{ needs.resolve.outputs.sha }} \ + -e vault_forgejo_registry_token=${{ secrets.FORGEJO_REGISTRY_TOKEN }} + + - name: Run deploy_app.yml (blue/green) + working-directory: infra/ansible + env: + ANSIBLE_LOG_PATH: ${{ runner.temp }}/ansible-app-${{ needs.resolve.outputs.env }}-${{ needs.resolve.outputs.sha }}.log + ANSIBLE_HOST_KEY_CHECKING: "False" + run: | + ansible-playbook \ + -i inventory/${{ needs.resolve.outputs.env }}.yml \ + playbooks/deploy_app.yml \ + --vault-password-file "$VAULT_PASS_FILE" \ + -e veza_env=${{ needs.resolve.outputs.env }} \ + -e veza_release_sha=${{ needs.resolve.outputs.sha }} \ + -e vault_forgejo_registry_token=${{ secrets.FORGEJO_REGISTRY_TOKEN }} + + - name: Upload Ansible logs (for forensics) + if: always() + uses: actions/upload-artifact@v4 + with: + name: ansible-logs-${{ needs.resolve.outputs.env }}-${{ needs.resolve.outputs.sha }} + path: ${{ runner.temp }}/ansible-*.log + retention-days: 30 + + - name: Shred vault password file + if: always() + run: | + if [ -f "$VAULT_PASS_FILE" ]; then + shred -u "$VAULT_PASS_FILE" 2>/dev/null || rm -f "$VAULT_PASS_FILE" + fi diff --git a/.github/workflows/rollback.yml b/.github/workflows/rollback.yml new file mode 100644 index 000000000..039bbf88d --- /dev/null +++ b/.github/workflows/rollback.yml @@ -0,0 +1,118 @@ +# rollback.yml — workflow_dispatch only. +# +# Two modes : +# fast — flip HAProxy back to the previous color. ~5s. Requires +# the target color's containers to still be alive +# (i.e., no later deploy has recycled them). +# full — re-run deploy_app.yml with a specific (older) release_sha. +# ~5-10min. The artefact must still be in the Forgejo +# registry (default retention 30 SHA per component). +# +# See docs/RUNBOOK_ROLLBACK.md for decision criteria. +name: Veza rollback + +on: + workflow_dispatch: + inputs: + env: + description: "Environment to rollback" + required: true + type: choice + options: [staging, prod] + mode: + description: "Rollback mode" + required: true + type: choice + options: [fast, full] + target_color: + description: "(mode=fast only) color to flip back TO (the prior active one)" + required: false + type: choice + options: [blue, green] + release_sha: + description: "(mode=full only) 40-char SHA of the release to redeploy" + required: false + type: string + +concurrency: + group: rollback-${{ inputs.env }} + cancel-in-progress: false + +jobs: + rollback: + name: Rollback ${{ inputs.env }} (${{ inputs.mode }}) + runs-on: [self-hosted, incus] + timeout-minutes: 30 + steps: + - name: Validate inputs + run: | + if [ "${{ inputs.mode }}" = "fast" ] && [ -z "${{ inputs.target_color }}" ]; then + echo "mode=fast requires target_color" + exit 1 + fi + if [ "${{ inputs.mode }}" = "full" ]; then + if [ -z "${{ inputs.release_sha }}" ]; then + echo "mode=full requires release_sha" + exit 1 + fi + if ! echo "${{ inputs.release_sha }}" | grep -Eq '^[0-9a-f]{40}$'; then + echo "release_sha is not a 40-char git SHA" + exit 1 + fi + fi + + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + ref: ${{ inputs.mode == 'full' && inputs.release_sha || github.ref }} + + - name: Install ansible + collections + run: | + sudo apt-get update -qq + sudo apt-get install -y ansible python3-psycopg2 + ansible-galaxy collection install \ + community.general \ + community.postgresql \ + community.rabbitmq + + - name: Write vault password + env: + VAULT_PW: ${{ secrets.ANSIBLE_VAULT_PASSWORD }} + run: | + printf '%s' "$VAULT_PW" > "$RUNNER_TEMP/vault-pass" + chmod 0400 "$RUNNER_TEMP/vault-pass" + echo "VAULT_PASS_FILE=$RUNNER_TEMP/vault-pass" >> "$GITHUB_ENV" + + - name: Run rollback.yml + working-directory: infra/ansible + env: + ANSIBLE_LOG_PATH: ${{ runner.temp }}/ansible-rollback-${{ inputs.env }}-${{ inputs.mode }}.log + ANSIBLE_HOST_KEY_CHECKING: "False" + run: | + EXTRA="-e veza_env=${{ inputs.env }} -e mode=${{ inputs.mode }}" + if [ "${{ inputs.mode }}" = "fast" ]; then + EXTRA="$EXTRA -e target_color=${{ inputs.target_color }}" + else + EXTRA="$EXTRA -e veza_release_sha=${{ inputs.release_sha }}" + EXTRA="$EXTRA -e vault_forgejo_registry_token=${{ secrets.FORGEJO_REGISTRY_TOKEN }}" + fi + ansible-playbook \ + -i inventory/${{ inputs.env }}.yml \ + playbooks/rollback.yml \ + --vault-password-file "$VAULT_PASS_FILE" \ + $EXTRA + + - name: Upload Ansible log + if: always() + uses: actions/upload-artifact@v4 + with: + name: ansible-rollback-${{ inputs.env }}-${{ inputs.mode }} + path: ${{ runner.temp }}/ansible-rollback-*.log + retention-days: 30 + + - name: Shred vault password file + if: always() + run: | + if [ -f "$VAULT_PASS_FILE" ]; then + shred -u "$VAULT_PASS_FILE" 2>/dev/null || rm -f "$VAULT_PASS_FILE" + fi