The maintenance toggle lived in a package-level `bool` inside
`middleware/maintenance.go`. Flipping it via `PUT /admin/maintenance`
only updated the pod handling that request — the other N-1 pods stayed
open for traffic. In practice this meant deploys-in-progress or
incident playbooks silently failed to put the fleet into maintenance.
New storage:
* Migration `976_platform_settings.sql` adds a typed key/value table
(`value_bool` / `value_text` to avoid string parsing in the hot
path) and seeds `maintenance_mode=false`. Idempotent on re-run.
* `middleware/maintenance.go` rewritten around a `maintenanceState`
with a 10s TTL cache. `InitMaintenanceMode(db, logger)` primes the
cache at boot; `MaintenanceModeEnabled()` refreshes lazily when the
next request lands after the TTL. Startup `MAINTENANCE_MODE` env is
still honoured for fresh pods.
* `router.go` calls `InitMaintenanceMode` before applying the
`MaintenanceGin()` middleware so the first request sees DB truth.
* `PUT /api/v1/admin/maintenance` in `routes_core.go` now does an
`INSERT ... ON CONFLICT DO UPDATE` on the table *before* the
in-memory setter, so the flip survives restarts and propagates to
every pod within ~10s (one TTL window).
Tests: `TestMaintenanceGin_DBBacked` flips the DB row, waits past a
shrunk-for-test TTL, and asserts the cache picked up the change. All
four pre-existing tests preserved (`Disabled`, `Enabled_Returns503`,
`HealthExempt`, `AdminExempt`).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
138 lines
3.6 KiB
Go
138 lines
3.6 KiB
Go
package middleware
|
|
|
|
import (
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"go.uber.org/zap/zaptest"
|
|
"gorm.io/driver/sqlite"
|
|
"gorm.io/gorm"
|
|
)
|
|
|
|
func TestMaintenanceGin_Disabled(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
SetMaintenanceMode(false)
|
|
defer SetMaintenanceMode(false)
|
|
|
|
router := gin.New()
|
|
router.Use(MaintenanceGin())
|
|
router.GET("/api/v1/dashboard", func(c *gin.Context) {
|
|
c.Status(http.StatusOK)
|
|
})
|
|
|
|
req := httptest.NewRequest("GET", "/api/v1/dashboard", nil)
|
|
w := httptest.NewRecorder()
|
|
router.ServeHTTP(w, req)
|
|
|
|
assert.Equal(t, http.StatusOK, w.Code)
|
|
}
|
|
|
|
func TestMaintenanceGin_Enabled_Returns503(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
SetMaintenanceMode(true)
|
|
defer SetMaintenanceMode(false)
|
|
|
|
router := gin.New()
|
|
router.Use(MaintenanceGin())
|
|
router.GET("/api/v1/dashboard", func(c *gin.Context) {
|
|
c.Status(http.StatusOK)
|
|
})
|
|
|
|
req := httptest.NewRequest("GET", "/api/v1/dashboard", nil)
|
|
w := httptest.NewRecorder()
|
|
router.ServeHTTP(w, req)
|
|
|
|
assert.Equal(t, http.StatusServiceUnavailable, w.Code)
|
|
assert.Contains(t, w.Body.String(), "maintenance")
|
|
}
|
|
|
|
func TestMaintenanceGin_HealthExempt(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
SetMaintenanceMode(true)
|
|
defer SetMaintenanceMode(false)
|
|
|
|
router := gin.New()
|
|
router.Use(MaintenanceGin())
|
|
router.GET("/health", func(c *gin.Context) {
|
|
c.Status(http.StatusOK)
|
|
})
|
|
|
|
req := httptest.NewRequest("GET", "/health", nil)
|
|
w := httptest.NewRecorder()
|
|
router.ServeHTTP(w, req)
|
|
|
|
assert.Equal(t, http.StatusOK, w.Code)
|
|
}
|
|
|
|
func TestMaintenanceGin_AdminExempt(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
SetMaintenanceMode(true)
|
|
defer SetMaintenanceMode(false)
|
|
|
|
router := gin.New()
|
|
router.Use(MaintenanceGin())
|
|
router.GET("/api/v1/admin/reports", func(c *gin.Context) {
|
|
c.Status(http.StatusOK)
|
|
})
|
|
|
|
req := httptest.NewRequest("GET", "/api/v1/admin/reports", nil)
|
|
w := httptest.NewRecorder()
|
|
router.ServeHTTP(w, req)
|
|
|
|
assert.Equal(t, http.StatusOK, w.Code)
|
|
}
|
|
|
|
// TestMaintenanceGin_DBBacked verifies that changes written to
|
|
// platform_settings propagate to MaintenanceModeEnabled() once the cache TTL
|
|
// lapses. This guards the multi-pod correctness claim of v1.0.4.
|
|
func TestMaintenanceGin_DBBacked(t *testing.T) {
|
|
db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{})
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, db.Exec(`
|
|
CREATE TABLE platform_settings (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
key TEXT NOT NULL UNIQUE,
|
|
value_bool BOOLEAN,
|
|
value_text TEXT,
|
|
description TEXT,
|
|
updated_at DATETIME,
|
|
updated_by TEXT
|
|
)`).Error)
|
|
require.NoError(t, db.Exec(
|
|
`INSERT INTO platform_settings (key, value_bool, description) VALUES ('maintenance_mode', 0, 'test')`,
|
|
).Error)
|
|
|
|
// Start from a clean slate so no prior test leaked state into the package
|
|
// globals.
|
|
SetMaintenanceMode(false)
|
|
defer SetMaintenanceMode(false)
|
|
|
|
InitMaintenanceMode(db, zaptest.NewLogger(t))
|
|
// Shrink the TTL so we don't have to sleep 10s.
|
|
state.mu.Lock()
|
|
state.ttl = 50 * time.Millisecond
|
|
state.mu.Unlock()
|
|
defer func() {
|
|
state.mu.Lock()
|
|
state.ttl = defaultMaintenanceCacheTTL
|
|
state.db = nil
|
|
state.mu.Unlock()
|
|
}()
|
|
|
|
assert.False(t, MaintenanceModeEnabled(), "seeded value=0 should read as off")
|
|
|
|
// Flip the DB row; before TTL the cached value still says off.
|
|
require.NoError(t, db.Exec(
|
|
`UPDATE platform_settings SET value_bool = 1 WHERE key = 'maintenance_mode'`,
|
|
).Error)
|
|
assert.False(t, MaintenanceModeEnabled(), "cache should still report off before TTL")
|
|
|
|
time.Sleep(70 * time.Millisecond)
|
|
assert.True(t, MaintenanceModeEnabled(), "after TTL the refresh should pick up the new value")
|
|
}
|