veza/.github/workflows/e2e.yml

name: E2E Playwright

# v1.0.8 Batch C — Playwright E2E suite triggered on PRs (@critical only,
# fast feedback) + push to main and nightly (full suite, deeper coverage).
# Uses the --ci seed flag (cmd/tools/seed --ci) for ~5s seeding instead
# of the ~60s minimal seed.

on:
    pull_request:
        branches: [main]
    push:
        branches: [main]
    schedule:
        # Nightly full run — 03:00 UTC keeps it off the daytime runner pool.
        - cron: "0 3 * * *"
    workflow_dispatch:

env:
    GIT_SSL_NO_VERIFY: "true"
    NODE_TLS_REJECT_UNAUTHORIZED: "0"
    # Forces playwright.config.ts:141,155 to spawn fresh backend + Vite
    # instead of reusing whatever is on the runner.
    CI: "true"
    # Falls back to a CI-only dev key if the Forgejo secret is unset.
    # Used at the "Build + start backend API" step.
    JWT_SECRET: ${{ secrets.E2E_JWT_SECRET || 'ci-dev-jwt-secret-32-chars-min-padding!!' }}

jobs:
    # ===========================================================================
    # Job: e2e — single matrix entry that selects the test scope per trigger.
    #   - PR              → @critical only (5-7min target)
    #   - push main / cron / dispatch → full suite (~25min target)
    # ===========================================================================
    e2e:
        # Scope matrix:
        #   - pull_request    → @critical (PR gate, ~5-10min)
        #   - push to main    → @critical (commit gate, dev velocity priority)
        #   - schedule (cron) → full suite (nightly coverage)
        #   - workflow_dispatch → full (manual broad sweep)
        # Push was previously running the full suite (~1h30 pre-perf, ~15-20min
        # post-perf). The dev velocity cost was unjustifiable for the
        # incremental coverage over the @critical scope, especially while the
        # full suite carries pre-existing fixme'd tests. Cron picks up the
        # rest on a 24h cadence.
        name: e2e (${{ (github.event_name == 'pull_request' || github.event_name == 'push') && '@critical' || 'full' }})
        runs-on: ubuntu-latest
        timeout-minutes: ${{ (github.event_name == 'pull_request' || github.event_name == 'push') && 20 || 45 }}

        # Service containers are managed by act_runner: spawned on the job
        # network with healthchecks, torn down at the end. This replaces
        # the previous `docker compose up -d` pattern which relied on
        # docker socket sharing + host port mappings — fragile (port
        # collisions across concurrent jobs, manual cleanup, double-DinD,
        # whole compose file validated even when only 3 services are
        # needed). Service hostnames (`postgres`, `redis`, `rabbitmq`)
        # resolve from the job container on standard ports.
        services:
            postgres:
                image: postgres:16-alpine
                env:
                    POSTGRES_USER: veza
                    POSTGRES_PASSWORD: devpassword
                    POSTGRES_DB: veza
                options: >-
                    --health-cmd "pg_isready -U veza"
                    --health-interval 5s
                    --health-timeout 3s
                    --health-retries 10
            redis:
                # No-auth redis for CI: act_runner services don't support a
                # `command:` field, and the redis:7-alpine entrypoint does
                # NOT read REDIS_ARGS (verified empirically) — so passing
                # --requirepass via env doesn't work. The dev/prod password
                # policy (REM-023) is enforced via docker-compose.yml only;
                # the CI service network is ephemeral and isolated, so
                # dropping auth here is acceptable.
                image: redis:7-alpine
                options: >-
                    --health-cmd "redis-cli ping"
                    --health-interval 5s
                    --health-timeout 3s
                    --health-retries 10
            rabbitmq:
                image: rabbitmq:3-management-alpine
                env:
                    RABBITMQ_DEFAULT_USER: veza
                    RABBITMQ_DEFAULT_PASS: devpassword
                options: >-
                    --health-cmd "rabbitmq-diagnostics -q check_port_connectivity"
                    --health-interval 10s
                    --health-timeout 5s
                    --health-retries 10

        # Service hostnames + standard ports — no host-port mapping needed.
        env:
            DATABASE_URL: postgresql://veza:${{ secrets.E2E_DB_PASSWORD || 'devpassword' }}@postgres:5432/veza?sslmode=disable
            REDIS_URL: redis://redis:6379
            RABBITMQ_URL: ${{ secrets.E2E_RABBITMQ_URL || 'amqp://veza:devpassword@rabbitmq:5672/' }}

        steps:
            - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

            - name: Set up Node
              uses: actions/setup-node@1d0ff469b7ec7b3cb9d8673fde0c81c44821de2a # v4.2.0
              with:
                  node-version: "20"
                  cache: "npm"
                  cache-dependency-path: package-lock.json

            - name: Set up Go
              uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0
              with:
                  go-version: "1.25"
                  cache: true
                  cache-dependency-path: veza-backend-api/go.sum

            - name: Install dependencies
              run: npm ci

            # Sprint 2 design-system migrated to Style Dictionary; the
            # generated tokens live in packages/design-system/dist/
            # (gitignored). The Playwright-spawned Vite imports them via
            # `@veza/design-system/tokens-generated`, so dist/ MUST exist
            # before vite starts.
            - name: Build design tokens
              run: npm run build:tokens --workspace=@veza/design-system

            # Playwright tests reach the frontend via http://veza.fr:5174,
            # which the browsers resolve via /etc/hosts. Without this entry
            # the navigation step times out.
            - name: Add veza.fr to hosts
              run: echo "127.0.0.1 veza.fr" | sudo tee -a /etc/hosts

            - name: Generate dev JWT keys + SSL cert
              run: |
                  ./scripts/generate-jwt-keys.sh
                  ./scripts/generate-ssl-cert.sh

            - name: Run database migrations
              run: |
                  cd veza-backend-api
                  go run cmd/migrate_tool/main.go

            - name: Seed database (CI mode — 5 test accounts + minimal fixtures)
              run: |
                  cd veza-backend-api
                  go run ./cmd/tools/seed --ci

            - name: Build + start backend API
              env:
                  APP_ENV: test
                  APP_PORT: "18080"
                  COOKIE_SECURE: "false"
                  CORS_ALLOWED_ORIGINS: http://veza.fr:5174,http://localhost:5174
                  DISABLE_RATE_LIMIT_FOR_TESTS: "true"
                  RATE_LIMIT_LIMIT: "10000"
                  RATE_LIMIT_WINDOW: "60"
                  ACCOUNT_LOCKOUT_EXEMPT_EMAILS: "user@veza.music,artist@veza.music,admin@veza.music,mod@veza.music,new@veza.music"
              run: |
                  cd veza-backend-api
                  go build -o veza-api ./cmd/api/main.go
                  ./veza-api > /tmp/backend.log 2>&1 &
                  BACKEND_PID=$!

                  # Poll for up to 30s — beats a fixed sleep on a cold start.
                  for i in $(seq 1 30); do
                    if curl -sf -m 2 http://localhost:18080/api/v1/health > /tmp/health.json 2>/dev/null; then
                      break
                    fi
                    if ! kill -0 "$BACKEND_PID" 2>/dev/null; then
                      echo "::error::backend process died before becoming reachable"
                      echo "--- /tmp/backend.log (last 200 lines) ---"
                      tail -200 /tmp/backend.log
                      exit 1
                    fi
                    sleep 1
                  done

                  # Always print the response body so debugging doesn't
                  # require re-running with extra logging. Artifact upload
                  # is broken under Forgejo (GHES not supported), so the
                  # log step output is our only diagnostic channel.
                  echo "--- /api/v1/health response ---"
                  cat /tmp/health.json
                  echo

                  # The /api/v1/health envelope is the standard veza response
                  # shape: {"success": true, "data": {"status": "ok"}}. Earlier
                  # versions of this check used `.status == "ok"` at the root,
                  # which silently misses the actual ok signal nested under
                  # `.data`. The misread surfaced as "backend health is not ok"
                  # despite a 200 + valid body — wasted a CI cycle.
                  if ! jq -e '.data.status == "ok"' /tmp/health.json >/dev/null; then
                    echo "::error::backend health is not ok"
                    echo "--- /tmp/backend.log (last 200 lines) ---"
                    tail -200 /tmp/backend.log
                    exit 1
                  fi
                  echo "Backend healthy"

            # Cache the Playwright browser binaries between runs.
            # Chromium download is ~150MB and adds 30-60s to every cold
            # run. The cache key tracks the playwright version pinned in
            # package-lock.json, so a Playwright bump invalidates the
            # cache automatically.
            - name: Resolve Playwright version
              id: playwright-version
              run: |
                PV=$(node -p "require('./node_modules/@playwright/test/package.json').version")
                echo "version=$PV" >> $GITHUB_OUTPUT

            - name: Cache Playwright browsers
              id: playwright-cache
              uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
              with:
                path: ~/.cache/ms-playwright
                key: playwright-${{ runner.os }}-${{ steps.playwright-version.outputs.version }}-chromium
                restore-keys: |
                  playwright-${{ runner.os }}-${{ steps.playwright-version.outputs.version }}-

            - name: Install Playwright browsers
              # Browsers cached: only install OS deps (apt-get sweep) so the
              # download is skipped. Browsers absent: full install + deps.
              run: |
                if [ "${{ steps.playwright-cache.outputs.cache-hit }}" = "true" ]; then
                  npx playwright install-deps chromium
                else
                  npx playwright install --with-deps chromium
                fi

            - name: Run E2E (@critical — PR + push)
              if: github.event_name == 'pull_request' || github.event_name == 'push'
              env:
                  PORT: "5174"
                  VITE_API_URL: "/api/v1"
                  VITE_DOMAIN: veza.fr
                  VITE_BACKEND_PORT: "18080"
                  PLAYWRIGHT_BASE_URL: "http://localhost:5174"
              run: npm run e2e:critical

            - name: Run E2E (full — cron / workflow_dispatch)
              if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
              env:
                  PORT: "5174"
                  VITE_API_URL: "/api/v1"
                  VITE_DOMAIN: veza.fr
                  VITE_BACKEND_PORT: "18080"
                  PLAYWRIGHT_BASE_URL: "http://localhost:5174"
              run: npm run e2e

            - name: Upload Playwright report
              if: failure()
              uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
              with:
                  name: playwright-report-${{ github.run_id }}-${{ github.run_attempt }}
                  path: |
                      tests/e2e/playwright-report/
                      tests/e2e/test-results/
                  retention-days: 7

            - name: Upload backend log
              if: failure()
              uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
              with:
                  name: backend-log-${{ github.run_id }}-${{ github.run_attempt }}
                  path: /tmp/backend.log
                  retention-days: 7