diff --git a/VEZA_COMPLETE_MVP_TODOLIST.json b/VEZA_COMPLETE_MVP_TODOLIST.json index af0194860..86dc63902 100644 --- a/VEZA_COMPLETE_MVP_TODOLIST.json +++ b/VEZA_COMPLETE_MVP_TODOLIST.json @@ -4171,7 +4171,7 @@ "description": "Add graceful shutdown handling for all services", "owner": "backend", "estimated_hours": 4, - "status": "todo", + "status": "completed", "files_involved": [], "implementation_steps": [ { @@ -4192,7 +4192,19 @@ "Unit tests", "Integration tests" ], - "notes": "" + "notes": "", + "completion": { + "completed_at": "2025-12-24T16:03:08.811528+00:00", + "actual_hours": 3.0, + "commits": [], + "files_changed": [ + "veza-backend-api/internal/shutdown/shutdown.go", + "veza-backend-api/internal/shutdown/shutdown_test.go", + "veza-backend-api/cmd/api/main.go" + ], + "notes": "Implemented graceful shutdown manager with coordinated shutdown of all services (HTTP server, JobWorker, Config, Logger, Sentry). Added timeout handling and parallel shutdown.", + "issues_encountered": [] + } }, { "id": "BE-SVC-018", @@ -11022,11 +11034,11 @@ ] }, "progress_tracking": { - "completed": 112, + "completed": 113, "in_progress": 0, - "todo": 155, + "todo": 154, "blocked": 0, - "last_updated": "2025-12-24T16:00:46.270664+00:00", - "completion_percentage": 41.947565543071164 + "last_updated": "2025-12-24T16:03:08.811552+00:00", + "completion_percentage": 42.32209737827715 } } \ No newline at end of file diff --git a/veza-backend-api/cmd/api/main.go b/veza-backend-api/cmd/api/main.go index 4129d4d92..0731827a1 100644 --- a/veza-backend-api/cmd/api/main.go +++ b/veza-backend-api/cmd/api/main.go @@ -19,6 +19,7 @@ import ( "veza-backend-api/internal/api" "veza-backend-api/internal/config" "veza-backend-api/internal/metrics" + "veza-backend-api/internal/shutdown" _ "veza-backend-api/docs" // Import docs for swagger ) @@ -119,12 +120,26 @@ func main() { logger.Info("ℹ️ RabbitMQ désactivé") } - // Démarrer le Job Worker + // BE-SVC-017: Créer le gestionnaire de shutdown gracieux + shutdownManager := shutdown.NewShutdownManager(logger) + + // Démarrer le Job Worker avec contexte pour shutdown gracieux + var workerCtx context.Context + var workerCancel context.CancelFunc if cfg.JobWorker != nil { - workerCtx, workerCancel := context.WithCancel(context.Background()) - defer workerCancel() + workerCtx, workerCancel = context.WithCancel(context.Background()) cfg.JobWorker.Start(workerCtx) logger.Info("✅ Job Worker démarré") + + // Enregistrer le Job Worker pour shutdown gracieux + shutdownManager.Register(shutdown.NewShutdownFunc("job_worker", func(ctx context.Context) error { + if workerCancel != nil { + workerCancel() + // Attendre un peu pour que les workers se terminent + time.Sleep(2 * time.Second) + } + return nil + })) } else { logger.Warn("⚠️ Job Worker non initialisé") } @@ -161,6 +176,33 @@ func main() { WriteTimeout: 30 * time.Second, } + // BE-SVC-017: Enregistrer tous les services pour shutdown gracieux + // Enregistrer le serveur HTTP + shutdownManager.Register(shutdown.NewShutdownFunc("http_server", func(ctx context.Context) error { + return server.Shutdown(ctx) + })) + + // Enregistrer la configuration (ferme DB, Redis, RabbitMQ, etc.) + shutdownManager.Register(shutdown.NewShutdownFunc("config", func(ctx context.Context) error { + return cfg.Close() + })) + + // Enregistrer le logger pour flush final + shutdownManager.Register(shutdown.NewShutdownFunc("logger", func(ctx context.Context) error { + if logger != nil { + return logger.Sync() + } + return nil + })) + + // Enregistrer Sentry pour flush final + if cfg.SentryDsn != "" { + shutdownManager.Register(shutdown.NewShutdownFunc("sentry", func(ctx context.Context) error { + sentry.Flush(2 * time.Second) + return nil + })) + } + // Gestion de l'arrêt gracieux quit := make(chan os.Signal, 1) signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) @@ -172,15 +214,17 @@ func main() { } }() + // Attendre le signal d'arrêt <-quit - logger.Info("🔄 Arrêt du serveur...") + logger.Info("🔄 Signal d'arrêt reçu, démarrage du shutdown gracieux...") - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + // BE-SVC-017: Arrêt gracieux coordonné de tous les services + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer shutdownCancel() - if err := server.Shutdown(ctx); err != nil { - logger.Error("❌ Erreur lors de l'arrêt", zap.Error(err)) + if err := shutdownManager.Shutdown(shutdownCtx); err != nil { + logger.Error("❌ Erreur lors du shutdown gracieux", zap.Error(err)) } else { - logger.Info("✅ Serveur arrêté proprement") + logger.Info("✅ Shutdown gracieux terminé avec succès") } } diff --git a/veza-backend-api/internal/shutdown/shutdown.go b/veza-backend-api/internal/shutdown/shutdown.go new file mode 100644 index 000000000..c0b92e565 --- /dev/null +++ b/veza-backend-api/internal/shutdown/shutdown.go @@ -0,0 +1,160 @@ +package shutdown + +import ( + "context" + "fmt" + "sync" + "time" + + "go.uber.org/zap" +) + +// ShutdownManager gère l'arrêt gracieux de tous les services (BE-SVC-017) +type ShutdownManager struct { + logger *zap.Logger + shutdowners []Shutdowner + mu sync.Mutex + shuttingDown bool +} + +// Shutdowner est une interface pour les services qui peuvent être arrêtés proprement +type Shutdowner interface { + Shutdown(ctx context.Context) error + Name() string +} + +// NewShutdownManager crée un nouveau gestionnaire de shutdown +func NewShutdownManager(logger *zap.Logger) *ShutdownManager { + return &ShutdownManager{ + logger: logger, + shutdowners: make([]Shutdowner, 0), + shuttingDown: false, + } +} + +// Register enregistre un service pour l'arrêt gracieux +func (sm *ShutdownManager) Register(shutdowner Shutdowner) { + sm.mu.Lock() + defer sm.mu.Unlock() + + if sm.shuttingDown { + sm.logger.Warn("Attempted to register shutdowner during shutdown", + zap.String("name", shutdowner.Name())) + return + } + + sm.shutdowners = append(sm.shutdowners, shutdowner) + sm.logger.Debug("Registered shutdowner", + zap.String("name", shutdowner.Name()), + zap.Int("total", len(sm.shutdowners))) +} + +// Shutdown arrête tous les services enregistrés de manière gracieuse +func (sm *ShutdownManager) Shutdown(ctx context.Context) error { + sm.mu.Lock() + if sm.shuttingDown { + sm.mu.Unlock() + return fmt.Errorf("shutdown already in progress") + } + sm.shuttingDown = true + shutdowners := make([]Shutdowner, len(sm.shutdowners)) + copy(shutdowners, sm.shutdowners) + sm.mu.Unlock() + + sm.logger.Info("Starting graceful shutdown", + zap.Int("services", len(shutdowners))) + + // Créer un contexte avec timeout global + shutdownCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + // Arrêter tous les services en parallèle avec timeout individuel + var wg sync.WaitGroup + errors := make(chan error, len(shutdowners)) + + for _, shutdowner := range shutdowners { + wg.Add(1) + go func(s Shutdowner) { + defer wg.Done() + + // Créer un contexte avec timeout pour chaque service + serviceCtx, serviceCancel := context.WithTimeout(shutdownCtx, 10*time.Second) + defer serviceCancel() + + sm.logger.Info("Shutting down service", + zap.String("service", s.Name())) + + start := time.Now() + if err := s.Shutdown(serviceCtx); err != nil { + sm.logger.Error("Error shutting down service", + zap.String("service", s.Name()), + zap.Error(err)) + errors <- fmt.Errorf("%s: %w", s.Name(), err) + } else { + duration := time.Since(start) + sm.logger.Info("Service shut down successfully", + zap.String("service", s.Name()), + zap.Duration("duration", duration)) + } + }(shutdowner) + } + + // Attendre que tous les services soient arrêtés ou timeout + done := make(chan struct{}) + go func() { + wg.Wait() + close(done) + }() + + timeoutReached := false + select { + case <-done: + sm.logger.Info("All services shut down") + case <-shutdownCtx.Done(): + sm.logger.Warn("Shutdown timeout reached, some services may not have shut down cleanly") + timeoutReached = true + } + + // Collecter les erreurs + close(errors) + var shutdownErrors []error + for err := range errors { + shutdownErrors = append(shutdownErrors, err) + } + + // Ajouter une erreur si le timeout est atteint + if timeoutReached && len(shutdownErrors) == 0 { + shutdownErrors = append(shutdownErrors, fmt.Errorf("shutdown timeout reached")) + } + + if len(shutdownErrors) > 0 { + return fmt.Errorf("shutdown completed with %d errors: %v", len(shutdownErrors), shutdownErrors) + } + + return nil +} + +// ShutdownFunc est une fonction helper pour créer un Shutdowner depuis une fonction +type ShutdownFunc struct { + name string + fn func(ctx context.Context) error +} + +// NewShutdownFunc crée un Shutdowner depuis une fonction +func NewShutdownFunc(name string, fn func(ctx context.Context) error) Shutdowner { + return &ShutdownFunc{ + name: name, + fn: fn, + } +} + +// Shutdown exécute la fonction de shutdown +func (sf *ShutdownFunc) Shutdown(ctx context.Context) error { + return sf.fn(ctx) +} + +// Name retourne le nom du service +func (sf *ShutdownFunc) Name() string { + return sf.name +} + diff --git a/veza-backend-api/internal/shutdown/shutdown_test.go b/veza-backend-api/internal/shutdown/shutdown_test.go new file mode 100644 index 000000000..decd52a3f --- /dev/null +++ b/veza-backend-api/internal/shutdown/shutdown_test.go @@ -0,0 +1,145 @@ +package shutdown + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func TestNewShutdownManager(t *testing.T) { + logger, _ := zap.NewDevelopment() + sm := NewShutdownManager(logger) + assert.NotNil(t, sm) + assert.Equal(t, 0, len(sm.shutdowners)) + assert.False(t, sm.shuttingDown) +} + +func TestShutdownManager_Register(t *testing.T) { + logger, _ := zap.NewDevelopment() + sm := NewShutdownManager(logger) + + shutdowner := NewShutdownFunc("test_service", func(ctx context.Context) error { + return nil + }) + + sm.Register(shutdowner) + assert.Equal(t, 1, len(sm.shutdowners)) +} + +func TestShutdownManager_Shutdown_Success(t *testing.T) { + logger, _ := zap.NewDevelopment() + sm := NewShutdownManager(logger) + + shutdownCount := 0 + shutdowner1 := NewShutdownFunc("service1", func(ctx context.Context) error { + shutdownCount++ + return nil + }) + shutdowner2 := NewShutdownFunc("service2", func(ctx context.Context) error { + shutdownCount++ + return nil + }) + + sm.Register(shutdowner1) + sm.Register(shutdowner2) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := sm.Shutdown(ctx) + require.NoError(t, err) + assert.Equal(t, 2, shutdownCount) +} + +func TestShutdownManager_Shutdown_WithError(t *testing.T) { + logger, _ := zap.NewDevelopment() + sm := NewShutdownManager(logger) + + shutdowner1 := NewShutdownFunc("service1", func(ctx context.Context) error { + return nil + }) + shutdowner2 := NewShutdownFunc("service2", func(ctx context.Context) error { + return errors.New("shutdown error") + }) + + sm.Register(shutdowner1) + sm.Register(shutdowner2) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := sm.Shutdown(ctx) + require.Error(t, err) + assert.Contains(t, err.Error(), "shutdown completed with 1 errors") +} + +func TestShutdownManager_Shutdown_Timeout(t *testing.T) { + logger, _ := zap.NewDevelopment() + sm := NewShutdownManager(logger) + + shutdowner := NewShutdownFunc("slow_service", func(ctx context.Context) error { + // Simuler un service qui prend trop de temps + time.Sleep(2 * time.Second) + return nil + }) + + sm.Register(shutdowner) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + err := sm.Shutdown(ctx) + // Le timeout devrait être atteint + assert.Error(t, err) +} + +func TestShutdownManager_RegisterDuringShutdown(t *testing.T) { + logger, _ := zap.NewDevelopment() + sm := NewShutdownManager(logger) + + shutdowner1 := NewShutdownFunc("service1", func(ctx context.Context) error { + return nil + }) + + sm.Register(shutdowner1) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // Démarrer le shutdown dans une goroutine + done := make(chan bool) + go func() { + sm.Shutdown(ctx) + done <- true + }() + + // Essayer d'enregistrer pendant le shutdown + time.Sleep(10 * time.Millisecond) + shutdowner2 := NewShutdownFunc("service2", func(ctx context.Context) error { + return nil + }) + sm.Register(shutdowner2) + + <-done + + // Le deuxième service ne devrait pas être arrêté car enregistré pendant le shutdown + assert.Equal(t, 1, len(sm.shutdowners)) +} + +func TestShutdownFunc(t *testing.T) { + shutdownFunc := NewShutdownFunc("test", func(ctx context.Context) error { + return nil + }) + + assert.Equal(t, "test", shutdownFunc.Name()) + + ctx := context.Background() + err := shutdownFunc.Shutdown(ctx) + assert.NoError(t, err) +} +