veza/veza-backend-api/internal/middleware/maintenance_test.go
senke 3a95e38fdf fix(middleware): persist maintenance flag via platform_settings table
The maintenance toggle lived in a package-level `bool` inside
`middleware/maintenance.go`. Flipping it via `PUT /admin/maintenance`
only updated the pod handling that request — the other N-1 pods stayed
open for traffic. In practice this meant deploys-in-progress or
incident playbooks silently failed to put the fleet into maintenance.

New storage:

  * Migration `976_platform_settings.sql` adds a typed key/value table
    (`value_bool` / `value_text` to avoid string parsing in the hot
    path) and seeds `maintenance_mode=false`. Idempotent on re-run.
  * `middleware/maintenance.go` rewritten around a `maintenanceState`
    with a 10s TTL cache. `InitMaintenanceMode(db, logger)` primes the
    cache at boot; `MaintenanceModeEnabled()` refreshes lazily when the
    next request lands after the TTL. Startup `MAINTENANCE_MODE` env is
    still honoured for fresh pods.
  * `router.go` calls `InitMaintenanceMode` before applying the
    `MaintenanceGin()` middleware so the first request sees DB truth.
  * `PUT /api/v1/admin/maintenance` in `routes_core.go` now does an
    `INSERT ... ON CONFLICT DO UPDATE` on the table *before* the
    in-memory setter, so the flip survives restarts and propagates to
    every pod within ~10s (one TTL window).

Tests: `TestMaintenanceGin_DBBacked` flips the DB row, waits past a
shrunk-for-test TTL, and asserts the cache picked up the change. All
four pre-existing tests preserved (`Disabled`, `Enabled_Returns503`,
`HealthExempt`, `AdminExempt`).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 14:57:06 +02:00

138 lines
3.6 KiB
Go

package middleware
import (
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
func TestMaintenanceGin_Disabled(t *testing.T) {
gin.SetMode(gin.TestMode)
SetMaintenanceMode(false)
defer SetMaintenanceMode(false)
router := gin.New()
router.Use(MaintenanceGin())
router.GET("/api/v1/dashboard", func(c *gin.Context) {
c.Status(http.StatusOK)
})
req := httptest.NewRequest("GET", "/api/v1/dashboard", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
}
func TestMaintenanceGin_Enabled_Returns503(t *testing.T) {
gin.SetMode(gin.TestMode)
SetMaintenanceMode(true)
defer SetMaintenanceMode(false)
router := gin.New()
router.Use(MaintenanceGin())
router.GET("/api/v1/dashboard", func(c *gin.Context) {
c.Status(http.StatusOK)
})
req := httptest.NewRequest("GET", "/api/v1/dashboard", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
assert.Equal(t, http.StatusServiceUnavailable, w.Code)
assert.Contains(t, w.Body.String(), "maintenance")
}
func TestMaintenanceGin_HealthExempt(t *testing.T) {
gin.SetMode(gin.TestMode)
SetMaintenanceMode(true)
defer SetMaintenanceMode(false)
router := gin.New()
router.Use(MaintenanceGin())
router.GET("/health", func(c *gin.Context) {
c.Status(http.StatusOK)
})
req := httptest.NewRequest("GET", "/health", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
}
func TestMaintenanceGin_AdminExempt(t *testing.T) {
gin.SetMode(gin.TestMode)
SetMaintenanceMode(true)
defer SetMaintenanceMode(false)
router := gin.New()
router.Use(MaintenanceGin())
router.GET("/api/v1/admin/reports", func(c *gin.Context) {
c.Status(http.StatusOK)
})
req := httptest.NewRequest("GET", "/api/v1/admin/reports", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
}
// TestMaintenanceGin_DBBacked verifies that changes written to
// platform_settings propagate to MaintenanceModeEnabled() once the cache TTL
// lapses. This guards the multi-pod correctness claim of v1.0.4.
func TestMaintenanceGin_DBBacked(t *testing.T) {
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{})
require.NoError(t, err)
require.NoError(t, db.Exec(`
CREATE TABLE platform_settings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
key TEXT NOT NULL UNIQUE,
value_bool BOOLEAN,
value_text TEXT,
description TEXT,
updated_at DATETIME,
updated_by TEXT
)`).Error)
require.NoError(t, db.Exec(
`INSERT INTO platform_settings (key, value_bool, description) VALUES ('maintenance_mode', 0, 'test')`,
).Error)
// Start from a clean slate so no prior test leaked state into the package
// globals.
SetMaintenanceMode(false)
defer SetMaintenanceMode(false)
InitMaintenanceMode(db, zaptest.NewLogger(t))
// Shrink the TTL so we don't have to sleep 10s.
state.mu.Lock()
state.ttl = 50 * time.Millisecond
state.mu.Unlock()
defer func() {
state.mu.Lock()
state.ttl = defaultMaintenanceCacheTTL
state.db = nil
state.mu.Unlock()
}()
assert.False(t, MaintenanceModeEnabled(), "seeded value=0 should read as off")
// Flip the DB row; before TTL the cached value still says off.
require.NoError(t, db.Exec(
`UPDATE platform_settings SET value_bool = 1 WHERE key = 'maintenance_mode'`,
).Error)
assert.False(t, MaintenanceModeEnabled(), "cache should still report off before TTL")
time.Sleep(70 * time.Millisecond)
assert.True(t, MaintenanceModeEnabled(), "after TTL the refresh should pick up the new value")
}