Every POST /webhooks/hyperswitch delivery now writes a row to
`hyperswitch_webhook_log` regardless of signature-valid or
processing outcome. Captures both legitimate deliveries and attack
probes — a forensics query now has the actual bytes to read, not
just a "webhook rejected" log line. Disputes (axis-1 P1.6) ride
along: the log captures dispute.* events alongside payment and
refund events, ready for when disputes get a handler.
Table shape (migration 984):
* payload TEXT — readable in psql, invalid UTF-8 replaced with
empty (forensics value is in headers + ip + timing for those
attacks, not the binary body).
* signature_valid BOOLEAN + partial index for "show me attack
attempts" being instantaneous.
* processing_result TEXT — 'ok' / 'error: <msg>' /
'signature_invalid' / 'skipped'. Matches the P1.5 action
semantic exactly.
* source_ip, user_agent, request_id — forensics essentials.
request_id is captured from Hyperswitch's X-Request-Id header
when present, else a server-side UUID so every row correlates
to VEZA's structured logs.
* event_type — best-effort extract from the JSON payload, NULL
on malformed input.
Hardening:
* 64KB body cap via io.LimitReader rejects oversize with 413
before any INSERT — prevents log-spam DoS.
* Single INSERT per delivery with final state; no two-phase
update race on signature-failure path. signature_invalid and
processing-error rows both land.
* DB persistence failures are logged but swallowed — the
endpoint's contract is to ack Hyperswitch, not perfect audit.
Retention sweep:
* CleanupHyperswitchWebhookLog in internal/jobs, daily tick,
batched DELETE (10k rows + 100ms pause) so a large backlog
doesn't lock the table.
* HYPERSWITCH_WEBHOOK_LOG_RETENTION_DAYS (default 90).
* Same goroutine-ticker pattern as ScheduleOrphanTracksCleanup.
* Wired in cmd/api/main.go alongside the existing cleanup jobs.
Tests: 5 in webhook_log_test.go (persistence, request_id auto-gen,
invalid-JSON leaves event_type empty, invalid-signature capture,
extractEventType 5 sub-cases) + 4 in cleanup_hyperswitch_webhook_
log_test.go (deletes-older-than, noop, default-on-zero,
context-cancel). Migration 984 applied cleanly to local Postgres;
all indexes present.
Also (v107-plan.md):
* Item G acceptance gains an explicit Idempotency-Key threading
requirement with an empty-key loud-fail test — "literally
copy-paste D's 4-line test skeleton". Closes the risk that
item G silently reopens the HTTP-retry duplicate-charge
exposure D closed.
Out of scope for E (noted in CHANGELOG):
* Rate limit on the endpoint — pre-existing middleware covers
it at the router level; adding a per-endpoint limit is
separate scope.
* Readable-payload SQL view — deferred, the TEXT column is
already human-readable; a convenience view is a nice-to-have
not a ship-blocker.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
120 lines
3.8 KiB
Go
120 lines
3.8 KiB
Go
package jobs
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"go.uber.org/zap"
|
|
"gorm.io/driver/sqlite"
|
|
"gorm.io/gorm"
|
|
|
|
"veza-backend-api/internal/database"
|
|
"veza-backend-api/internal/services/hyperswitch"
|
|
)
|
|
|
|
func setupCleanupTestDB(t *testing.T) *database.Database {
|
|
t.Helper()
|
|
gdb, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{})
|
|
require.NoError(t, err)
|
|
require.NoError(t, gdb.AutoMigrate(&hyperswitch.WebhookLog{}))
|
|
return &database.Database{GormDB: gdb}
|
|
}
|
|
|
|
// seedWebhookLogRow plants a row with a specific received_at so the
|
|
// cleanup cutoff logic can be exercised.
|
|
func seedWebhookLogRow(t *testing.T, db *gorm.DB, receivedAt time.Time) {
|
|
t.Helper()
|
|
row := &hyperswitch.WebhookLog{
|
|
ID: uuid.New(),
|
|
Payload: `{"event_type":"test"}`,
|
|
SignatureValid: true,
|
|
ProcessingResult: "ok",
|
|
RequestID: uuid.New().String(),
|
|
}
|
|
require.NoError(t, db.Create(row).Error)
|
|
// Override received_at post-Create because autoCreateTime sets NOW().
|
|
require.NoError(t, db.Model(row).Update("received_at", receivedAt).Error)
|
|
}
|
|
|
|
func TestCleanupHyperswitchWebhookLog_DeletesOlderThanRetention(t *testing.T) {
|
|
db := setupCleanupTestDB(t)
|
|
logger := zap.NewNop()
|
|
|
|
now := time.Now()
|
|
// Seed: 3 old rows (100 days ago), 2 recent (1 day ago).
|
|
for i := 0; i < 3; i++ {
|
|
seedWebhookLogRow(t, db.GormDB, now.Add(-100*24*time.Hour))
|
|
}
|
|
for i := 0; i < 2; i++ {
|
|
seedWebhookLogRow(t, db.GormDB, now.Add(-24*time.Hour))
|
|
}
|
|
|
|
deleted, err := CleanupHyperswitchWebhookLog(context.Background(), db, logger, 90)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, int64(3), deleted)
|
|
|
|
var remaining int64
|
|
require.NoError(t, db.GormDB.Model(&hyperswitch.WebhookLog{}).Count(&remaining).Error)
|
|
assert.Equal(t, int64(2), remaining, "recent rows must be preserved")
|
|
}
|
|
|
|
func TestCleanupHyperswitchWebhookLog_NoopWhenNothingExpired(t *testing.T) {
|
|
db := setupCleanupTestDB(t)
|
|
logger := zap.NewNop()
|
|
|
|
// All rows well within retention.
|
|
for i := 0; i < 5; i++ {
|
|
seedWebhookLogRow(t, db.GormDB, time.Now().Add(-1*time.Hour))
|
|
}
|
|
|
|
deleted, err := CleanupHyperswitchWebhookLog(context.Background(), db, logger, 90)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, int64(0), deleted)
|
|
|
|
var count int64
|
|
require.NoError(t, db.GormDB.Model(&hyperswitch.WebhookLog{}).Count(&count).Error)
|
|
assert.Equal(t, int64(5), count)
|
|
}
|
|
|
|
func TestCleanupHyperswitchWebhookLog_DefaultRetentionOnZero(t *testing.T) {
|
|
db := setupCleanupTestDB(t)
|
|
logger := zap.NewNop()
|
|
|
|
// Row at 100 days old: deletable under default (90d), retained
|
|
// under 365d.
|
|
seedWebhookLogRow(t, db.GormDB, time.Now().Add(-100*24*time.Hour))
|
|
|
|
// Passing 0 or negative must route through the default (90).
|
|
deleted, err := CleanupHyperswitchWebhookLog(context.Background(), db, logger, 0)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, int64(1), deleted)
|
|
}
|
|
|
|
func TestCleanupHyperswitchWebhookLog_RespectsCtxCancellation(t *testing.T) {
|
|
db := setupCleanupTestDB(t)
|
|
logger := zap.NewNop()
|
|
|
|
// Plant enough rows to force at least one inter-batch pause.
|
|
// batchSize=10000, so one batch handles 10k; plant 10,001 to need 2.
|
|
// For test speed keep small and exercise only the cancel path via
|
|
// a pre-cancelled context — that covers the ctx.Done() branch in
|
|
// the inter-batch select.
|
|
for i := 0; i < 12; i++ {
|
|
seedWebhookLogRow(t, db.GormDB, time.Now().Add(-100*24*time.Hour))
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel() // pre-cancelled
|
|
|
|
_, err := CleanupHyperswitchWebhookLog(ctx, db, logger, 90)
|
|
// Either ctx.Err() from the select or context.Canceled wrapped
|
|
// in the batch delete — both are acceptable. What matters is
|
|
// the function doesn't hang.
|
|
if err != nil {
|
|
assert.Contains(t, err.Error(), "context", "error must mention context cancellation")
|
|
}
|
|
}
|