fix(ci): unblock CI red — gofmt + e2e webserver reuse + orders.hyperswitch_payment_id (Day 4)
Some checks failed
Veza CI / Rust (Stream Server) (push) Successful in 4m22s
Security Scan / Secret Scanning (gitleaks) (push) Successful in 1m5s
Veza CI / Frontend (Web) (push) Failing after 17m19s
E2E Playwright / e2e (full) (push) Failing after 20m28s
Veza CI / Backend (Go) (push) Successful in 21m31s
Veza CI / Notify on failure (push) Successful in 4s
Some checks failed
Veza CI / Rust (Stream Server) (push) Successful in 4m22s
Security Scan / Secret Scanning (gitleaks) (push) Successful in 1m5s
Veza CI / Frontend (Web) (push) Failing after 17m19s
E2E Playwright / e2e (full) (push) Failing after 20m28s
Veza CI / Backend (Go) (push) Successful in 21m31s
Veza CI / Notify on failure (push) Successful in 4s
Three pre-existing infra issues surfaced by the Day 1→Day 3 push wave.
Each is independent — bundled here because the goal is "ci.yml + e2e.yml
green" before the v1.0.9 tag, and they're all small.
(1) gofmt — ci.yml golangci-lint v2 step
Five files were unformatted on main. Pre-existing (untouched by my
Item G work, but the formatter caught them now):
- internal/api/router.go
- internal/core/marketplace/reconcile_hyperswitch_test.go
- internal/models/user.go
- internal/monitoring/ledger_metrics.go
- internal/monitoring/ledger_metrics_test.go
Pure whitespace via `gofmt -w` — no behavior change.
(2) e2e silent-fail — playwright webServer port collision
The e2e workflow pre-starts the backend in step 9 ("Build + start
backend API") so it can fail-fast on a non-ok health check. But
playwright.config.ts had `reuseExistingServer: !process.env.CI` on
the backend webServer entry — meaning in CI Playwright tried to
spawn a SECOND backend on port 18080. The spawn collided with
EADDRINUSE and Playwright silently exited before printing any test
output. The artifact upload then warned "No files were found"
because tests/e2e/playwright-report/ never got written, and the job
ended in `Failure` for an unrelated reason (the artifact upload
step's GHESNotSupportedError).
Fix: backend `reuseExistingServer: true` always — workflow + dev
both pre-start backend on 18080. Vite stays `!CI` because the
workflow doesn't pre-start it. Comment in playwright.config.ts
documents the symptom so the next person debugging gets the
pointer immediately.
(3) orders.hyperswitch_payment_id missing in fresh DBs — migration 080
skip-branch + 099 ordering drift
Migration 080 (`add_payment_fields`) wraps its ALTERs in
"skip if orders doesn't exist". At authoring time orders existed
earlier in the migration sequence; that ordering has since shifted
(orders is now created at 099_z_create_orders.sql, AFTER 080).
Result: in any freshly-migrated DB (CI, fresh dev, future restore
drills) migration 080 takes the skip branch and the columns are
never added — even though the Order model and the marketplace code
rely on them.
Symptom: every CI run logs
pq: column "hyperswitch_payment_id" does not exist
from the periodic ledger_metrics worker. Order checkout would also
fail to persist payment_id at write time, breaking reconciliation.
Fix: append-only migration 987 with idempotent
`ADD COLUMN IF NOT EXISTS` + a partial index on the reconciliation
hot path. Production envs that did pick up 080 in the original
order are no-ops; fresh envs converge to the same end state.
Rollback in migrations/rollback/.
Verified locally:
$ cd veza-backend-api && go build ./... && VEZA_SKIP_INTEGRATION=1 \
go test -short -count=1 ./internal/...
(all green)
SKIP_TESTS=1: backend-only Go + Playwright config + SQL. Frontend
unit tests irrelevant to this commit.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7e26a8dd1f
commit
3f326e8266
8 changed files with 105 additions and 49 deletions
|
|
@ -138,7 +138,16 @@ export default defineConfig({
|
|||
command: 'go run cmd/api/main.go',
|
||||
cwd: '../../veza-backend-api',
|
||||
port: 18080,
|
||||
reuseExistingServer: !process.env.CI,
|
||||
// Backend is pre-started by the e2e workflow (Build + start backend
|
||||
// API step) so it can fail-fast on a non-ok health check. Playwright
|
||||
// must reuse it instead of trying to spawn a second instance on the
|
||||
// same port — the spawn collides on EADDRINUSE and the entire test
|
||||
// run silently exits before printing any test output (visible
|
||||
// symptom: "No files were found with the provided path:
|
||||
// tests/e2e/playwright-report/" in the artifact upload step).
|
||||
// In dev, `make dev` (or `make dev-backend-api`) is the canonical
|
||||
// pre-start path, so reuse is also correct there.
|
||||
reuseExistingServer: true,
|
||||
timeout: 30_000,
|
||||
env: {
|
||||
APP_ENV: 'test',
|
||||
|
|
@ -152,6 +161,9 @@ export default defineConfig({
|
|||
command: 'npm run dev -- --host 127.0.0.1',
|
||||
cwd: '../../apps/web',
|
||||
port: parseInt(process.env.PORT || '5173', 10),
|
||||
// Vite is NOT pre-started by the e2e workflow, so Playwright spawns
|
||||
// it in CI. In dev `make dev` already runs vite — reuse to avoid
|
||||
// double-spawn on a known port.
|
||||
reuseExistingServer: !process.env.CI,
|
||||
timeout: 30_000,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -201,12 +201,12 @@ func (r *APIRouter) Setup(router *gin.Engine) error {
|
|||
if r.db != nil && r.db.GormDB != nil {
|
||||
middleware.InitMaintenanceMode(r.db.GormDB, r.logger)
|
||||
}
|
||||
router.Use(middleware.MaintenanceGin()) // v0.803 ADM1-03: Maintenance mode (503 except /health, /admin)
|
||||
router.Use(middleware.RequestLogger(r.logger)) // Utilisation du structured logger
|
||||
router.Use(middleware.Metrics()) // Prometheus Metrics
|
||||
router.Use(middleware.SentryRecover(r.logger)) // Sentry error tracking
|
||||
router.Use(middleware.SecurityHeaders()) // MOD-P2-005: Security headers (HSTS, CSP, etc.)
|
||||
router.Use(middleware.CCPA()) // v0.803 SEC2-06: CCPA Do Not Sell (Sec-GPC)
|
||||
router.Use(middleware.MaintenanceGin()) // v0.803 ADM1-03: Maintenance mode (503 except /health, /admin)
|
||||
router.Use(middleware.RequestLogger(r.logger)) // Utilisation du structured logger
|
||||
router.Use(middleware.Metrics()) // Prometheus Metrics
|
||||
router.Use(middleware.SentryRecover(r.logger)) // Sentry error tracking
|
||||
router.Use(middleware.SecurityHeaders()) // MOD-P2-005: Security headers (HSTS, CSP, etc.)
|
||||
router.Use(middleware.CCPA()) // v0.803 SEC2-06: CCPA Do Not Sell (Sec-GPC)
|
||||
|
||||
// v0.803 SEC2-03: HTTP audit middleware for auto-logging POST/PUT/DELETE
|
||||
if r.config != nil && r.config.AuditService != nil {
|
||||
|
|
|
|||
|
|
@ -73,10 +73,10 @@ func newReconcileWorker(t *testing.T, db *gorm.DB, hs HyperswitchReadClient, svc
|
|||
t.Helper()
|
||||
return NewReconcileHyperswitchWorker(
|
||||
db, hs, svc, zap.NewNop(),
|
||||
time.Hour, // interval — not used in RunOnce
|
||||
30*time.Minute, // orderStuckAfter
|
||||
30*time.Minute, // refundStuckAfter
|
||||
5*time.Minute, // refundOrphanAfter
|
||||
time.Hour, // interval — not used in RunOnce
|
||||
30*time.Minute, // orderStuckAfter
|
||||
30*time.Minute, // refundStuckAfter
|
||||
5*time.Minute, // refundOrphanAfter
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,39 +10,39 @@ import (
|
|||
// User représente un utilisateur dans le système
|
||||
// MIGRATION UUID: User.ID est maintenant un UUID pour cohérence Go↔Rust et alignment ORIGIN
|
||||
type User struct {
|
||||
ID uuid.UUID `gorm:"type:uuid;primary_key" json:"id" db:"id"`
|
||||
Username string `gorm:"not null;size:30" json:"username" db:"username"`
|
||||
Slug string `gorm:"size:255" json:"slug" db:"slug"`
|
||||
Email string `gorm:"not null;size:255" json:"email" db:"email"`
|
||||
PasswordHash string `gorm:"size:255" json:"-" db:"password_hash"`
|
||||
Password string `gorm:"-" json:"password,omitempty"` // Virtual field for input
|
||||
TokenVersion int `gorm:"default:0;not null" json:"token_version" db:"token_version"`
|
||||
FirstName string `gorm:"size:100" json:"first_name" db:"first_name"`
|
||||
LastName string `gorm:"size:100" json:"last_name" db:"last_name"`
|
||||
Avatar string `gorm:"type:text" json:"avatar" db:"avatar"`
|
||||
BannerURL string `gorm:"type:text" json:"banner_url" db:"banner_url"`
|
||||
Bio string `gorm:"type:text" json:"bio" db:"bio"`
|
||||
Location string `gorm:"size:100" json:"location" db:"location"`
|
||||
Birthdate *time.Time `json:"birthdate" db:"birthdate"`
|
||||
Gender string `gorm:"size:20" json:"gender" db:"gender"`
|
||||
UsernameChangedAt *time.Time `json:"username_changed_at" db:"username_changed_at"`
|
||||
Role string `gorm:"type:user_role;not null;default:'user'" json:"role" db:"role"`
|
||||
IsActive bool `gorm:"default:true" json:"is_active" db:"is_active"`
|
||||
IsVerified bool `gorm:"default:false" json:"is_verified" db:"is_verified"`
|
||||
IsBanned bool `gorm:"default:false;not null" json:"is_banned" db:"is_banned"`
|
||||
IsAdmin bool `gorm:"default:false" json:"is_admin" db:"is_admin"`
|
||||
IsPublic bool `gorm:"default:true" json:"is_public" db:"is_public"`
|
||||
LastLoginAt *time.Time `json:"last_login_at" db:"last_login_at"`
|
||||
LoginCount int `gorm:"default:0;not null" json:"login_count" db:"login_count"`
|
||||
PasswordChangedAt *time.Time `json:"password_changed_at,omitempty" db:"password_changed_at"` // F016: Password expiration tracking
|
||||
ID uuid.UUID `gorm:"type:uuid;primary_key" json:"id" db:"id"`
|
||||
Username string `gorm:"not null;size:30" json:"username" db:"username"`
|
||||
Slug string `gorm:"size:255" json:"slug" db:"slug"`
|
||||
Email string `gorm:"not null;size:255" json:"email" db:"email"`
|
||||
PasswordHash string `gorm:"size:255" json:"-" db:"password_hash"`
|
||||
Password string `gorm:"-" json:"password,omitempty"` // Virtual field for input
|
||||
TokenVersion int `gorm:"default:0;not null" json:"token_version" db:"token_version"`
|
||||
FirstName string `gorm:"size:100" json:"first_name" db:"first_name"`
|
||||
LastName string `gorm:"size:100" json:"last_name" db:"last_name"`
|
||||
Avatar string `gorm:"type:text" json:"avatar" db:"avatar"`
|
||||
BannerURL string `gorm:"type:text" json:"banner_url" db:"banner_url"`
|
||||
Bio string `gorm:"type:text" json:"bio" db:"bio"`
|
||||
Location string `gorm:"size:100" json:"location" db:"location"`
|
||||
Birthdate *time.Time `json:"birthdate" db:"birthdate"`
|
||||
Gender string `gorm:"size:20" json:"gender" db:"gender"`
|
||||
UsernameChangedAt *time.Time `json:"username_changed_at" db:"username_changed_at"`
|
||||
Role string `gorm:"type:user_role;not null;default:'user'" json:"role" db:"role"`
|
||||
IsActive bool `gorm:"default:true" json:"is_active" db:"is_active"`
|
||||
IsVerified bool `gorm:"default:false" json:"is_verified" db:"is_verified"`
|
||||
IsBanned bool `gorm:"default:false;not null" json:"is_banned" db:"is_banned"`
|
||||
IsAdmin bool `gorm:"default:false" json:"is_admin" db:"is_admin"`
|
||||
IsPublic bool `gorm:"default:true" json:"is_public" db:"is_public"`
|
||||
LastLoginAt *time.Time `json:"last_login_at" db:"last_login_at"`
|
||||
LoginCount int `gorm:"default:0;not null" json:"login_count" db:"login_count"`
|
||||
PasswordChangedAt *time.Time `json:"password_changed_at,omitempty" db:"password_changed_at"` // F016: Password expiration tracking
|
||||
// v1.0.6: set the first time a user self-promotes to `role='creator'`
|
||||
// via POST /api/v1/users/me/upgrade-creator. NULL for users who never
|
||||
// took that path (still 'user', or promoted by an admin out-of-band).
|
||||
PromotedToCreatorAt *time.Time `json:"promoted_to_creator_at,omitempty" db:"promoted_to_creator_at"`
|
||||
CreatedAt time.Time `gorm:"autoCreateTime" json:"created_at" db:"created_at"`
|
||||
UpdatedAt time.Time `gorm:"autoUpdateTime" json:"updated_at" db:"updated_at"`
|
||||
DeletedAt gorm.DeletedAt `gorm:"index" json:"-"`
|
||||
SocialLinks string `gorm:"type:jsonb;default:'{}'" json:"social_links" db:"social_links"`
|
||||
PromotedToCreatorAt *time.Time `json:"promoted_to_creator_at,omitempty" db:"promoted_to_creator_at"`
|
||||
CreatedAt time.Time `gorm:"autoCreateTime" json:"created_at" db:"created_at"`
|
||||
UpdatedAt time.Time `gorm:"autoUpdateTime" json:"updated_at" db:"updated_at"`
|
||||
DeletedAt gorm.DeletedAt `gorm:"index" json:"-"`
|
||||
SocialLinks string `gorm:"type:jsonb;default:'{}'" json:"social_links" db:"social_links"`
|
||||
|
||||
// Relations
|
||||
Roles []Role `gorm:"many2many:user_roles;" json:"-"`
|
||||
|
|
|
|||
|
|
@ -24,8 +24,8 @@ import (
|
|||
// reconciler has resolved M of them today."
|
||||
//
|
||||
// Plus two alert rules in config/alertmanager/ledger.yml:
|
||||
// * ledger_stuck_orders_pending > 0 for 10m → page
|
||||
// * ledger_orphan_refund_rows > 0 for 5m → page (bug in two-phase
|
||||
// - ledger_stuck_orders_pending > 0 for 10m → page
|
||||
// - ledger_orphan_refund_rows > 0 for 5m → page (bug in two-phase
|
||||
// commit between DB and PSP — immediate ops attention)
|
||||
var (
|
||||
// LedgerStuckOrdersPending is the count of orders sitting in
|
||||
|
|
@ -142,8 +142,8 @@ const LedgerSamplerInterval = 60 * time.Second
|
|||
// A mismatch means alerts fire while the reconciler has already
|
||||
// started working on the issue, which is the correct behavior.
|
||||
const (
|
||||
ledgerStuckOrderAgeThreshold = 30 * time.Minute
|
||||
ledgerStuckRefundAgeThreshold = 30 * time.Minute
|
||||
ledgerStuckOrderAgeThreshold = 30 * time.Minute
|
||||
ledgerStuckRefundAgeThreshold = 30 * time.Minute
|
||||
ledgerOrphanRefundAgeThreshold = 5 * time.Minute
|
||||
ledgerReversalPendingThreshold = 30 * time.Minute
|
||||
)
|
||||
|
|
|
|||
|
|
@ -30,10 +30,10 @@ type testOrder struct {
|
|||
func (testOrder) TableName() string { return "orders" }
|
||||
|
||||
type testRefund struct {
|
||||
ID uuid.UUID `gorm:"type:uuid;primaryKey"`
|
||||
Status string
|
||||
HyperswitchRefundID string
|
||||
CreatedAt time.Time
|
||||
ID uuid.UUID `gorm:"type:uuid;primaryKey"`
|
||||
Status string
|
||||
HyperswitchRefundID string
|
||||
CreatedAt time.Time
|
||||
}
|
||||
|
||||
func (testRefund) TableName() string { return "refunds" }
|
||||
|
|
|
|||
|
|
@ -0,0 +1,40 @@
|
|||
-- v1.0.9 Day 4 — Backfill hyperswitch_payment_id + payment_status on orders.
|
||||
--
|
||||
-- Migration 080 (`add_payment_fields`) wraps its ALTERs in a "skip if
|
||||
-- the orders table doesn't exist" guard. At the time 080 was authored,
|
||||
-- orders existed earlier in the migration sequence; that ordering has
|
||||
-- since shifted (orders is now created at 099_z_create_orders.sql,
|
||||
-- AFTER 080). Result: in any freshly-migrated DB (CI runs, fresh dev
|
||||
-- envs, future restore drills) migration 080 takes the skip branch
|
||||
-- and the columns are never added — even though the Order model and
|
||||
-- the marketplace code rely on them.
|
||||
--
|
||||
-- Symptoms in CI: the periodic ledger_metrics worker
|
||||
-- (internal/monitoring/ledger_metrics.go:164) logs
|
||||
-- pq: column "hyperswitch_payment_id" does not exist
|
||||
-- on every run. The order checkout flow would also fail to persist
|
||||
-- the payment_id at write time, breaking any reconciliation that
|
||||
-- depends on the linkage.
|
||||
--
|
||||
-- The fix is idempotent (`ADD COLUMN IF NOT EXISTS`) so it safely
|
||||
-- applies to:
|
||||
-- - production envs that *did* see migration 080 in the original
|
||||
-- order (column already there, this migration is a no-op);
|
||||
-- - fresh envs that picked up the broken ordering (column missing,
|
||||
-- this migration adds it).
|
||||
--
|
||||
-- Adding a backfill at 987 instead of patching 080 in place keeps the
|
||||
-- migration history append-only — re-running the suite from scratch
|
||||
-- arrives at the same end state regardless of when the migration was
|
||||
-- merged.
|
||||
|
||||
ALTER TABLE orders ADD COLUMN IF NOT EXISTS hyperswitch_payment_id TEXT;
|
||||
ALTER TABLE orders ADD COLUMN IF NOT EXISTS payment_status TEXT DEFAULT 'pending';
|
||||
|
||||
-- Index supporting the reconciliation worker's hot path
|
||||
-- (`SELECT ... WHERE status='pending' AND hyperswitch_payment_id IS NOT NULL`).
|
||||
-- Partial because the universe of pending+linked rows is much smaller
|
||||
-- than the full orders table.
|
||||
CREATE INDEX IF NOT EXISTS idx_orders_hyperswitch_payment_id
|
||||
ON orders(hyperswitch_payment_id)
|
||||
WHERE hyperswitch_payment_id IS NOT NULL AND hyperswitch_payment_id <> '';
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
-- Rollback 987 — drop the backfill columns + index.
|
||||
DROP INDEX IF EXISTS idx_orders_hyperswitch_payment_id;
|
||||
ALTER TABLE orders DROP COLUMN IF EXISTS hyperswitch_payment_id;
|
||||
ALTER TABLE orders DROP COLUMN IF EXISTS payment_status;
|
||||
Loading…
Reference in a new issue