veza/veza-backend-api/internal/eventbus/rabbitmq.go
senke 4b4770f06e fix(eventbus): log RabbitMQ publish failures instead of silent drop
Sixth item of the v1.0.6 backlog. `RabbitMQEventBus.Publish` returned the
broker error but did not log it. Callers that wrap Publish in
fire-and-forget (`_ = eb.Publish(...)`) lost events with zero trace —
during an RMQ outage the backend would quietly shed work and operators
only noticed via downstream symptoms (missing notifications, stuck
async jobs, etc.).

Changes
  * `Publish` now emits a structured ERROR with the exchange,
    routing_key, payload_bytes, content_type, and message_id on every
    broker failure. The function still returns the error so call-sites
    that actually check it keep working exactly as before.
  * The pre-existing "EventBus disabled" warning is kept but upgraded
    with payload_bytes so dashboards can quantify drops when RMQ is
    intentionally off (tests, dev without docker-compose --profile).
  * `infrastructure/eventbus/rabbitmq.go:PublishEvent` (the newer,
    event-sourcing variant) already had this pattern — this commit
    brings the legacy path in line.

Tests
  * 2 new tests in `rabbitmq_test.go`:
      - disabled bus emits a single WARN with structured context and
        returns EventBusUnavailableError
      - nil logger path stays panic-free (legacy callers construct
        bus without a logger)
  * Broker-side failure path (closed channel) is not unit-tested here
    because amqp091-go types don't expose a mockable channel without
    spinning up a real RMQ — covered by the existing integration test
    in `internal/integration/e2e_test.go`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-16 20:50:51 +02:00

179 lines
5.7 KiB
Go

package eventbus
import (
"context"
"fmt"
"time"
amqp "github.com/rabbitmq/amqp091-go"
"go.uber.org/zap"
)
// RabbitMQConfig contient la configuration pour RabbitMQ
type RabbitMQConfig struct {
URL string
MaxRetries int
RetryInterval time.Duration
Enable bool // Si false, l'EventBus sera désactivé
}
// EventBusUnavailableError est retourné si l'EventBus est désactivé ou non disponible
type EventBusUnavailableError struct {
Msg string
}
func (e *EventBusUnavailableError) Error() string {
return e.Msg
}
// RabbitMQEventBus gère la connexion et les opérations RabbitMQ
type RabbitMQEventBus struct {
conn *amqp.Connection
channel *amqp.Channel
config *RabbitMQConfig
logger *zap.Logger
IsEnabled bool // Indique si l'EventBus est actif
}
// NewRabbitMQEventBusWithRetry initialise une connexion RabbitMQ avec retry
func NewRabbitMQEventBusWithRetry(cfg *RabbitMQConfig, logger *zap.Logger) (*RabbitMQEventBus, error) {
if !cfg.Enable {
logger.Info("📴 EventBus RabbitMQ désactivé par configuration.")
return &RabbitMQEventBus{config: cfg, logger: logger, IsEnabled: false}, nil
}
if cfg.MaxRetries == 0 {
cfg.MaxRetries = 1
}
if cfg.RetryInterval == 0 {
cfg.RetryInterval = 5 * time.Second
}
var conn *amqp.Connection
var err error
for i := 0; i < cfg.MaxRetries; i++ {
logger.Info("🔄 Tentative de connexion à RabbitMQ",
zap.Int("attempt", i+1),
zap.Int("max_attempts", cfg.MaxRetries),
zap.String("url", cfg.URL))
conn, err = amqp.Dial(cfg.URL)
if err == nil {
logger.Info("✅ Connexion à RabbitMQ établie avec succès.")
channel, err := conn.Channel()
if err != nil {
conn.Close()
return nil, fmt.Errorf("failed to open RabbitMQ channel: %w", err)
}
return &RabbitMQEventBus{conn: conn, channel: channel, config: cfg, logger: logger, IsEnabled: true}, nil
}
logger.Warn("❌ Échec de connexion à RabbitMQ",
zap.Error(err),
zap.Int("attempt", i+1),
zap.Int("max_attempts", cfg.MaxRetries))
if i < cfg.MaxRetries-1 {
logger.Info("🔄 Nouvelle tentative de connexion RabbitMQ dans quelques secondes...",
zap.Duration("interval", cfg.RetryInterval))
time.Sleep(cfg.RetryInterval)
}
}
// Si toutes les tentatives échouent, décider du mode dégradé ou fatal
logger.Error("❌ Échec de connexion à RabbitMQ après toutes les tentatives.",
zap.Int("max_attempts", cfg.MaxRetries),
zap.Error(err))
return nil, &EventBusUnavailableError{Msg: fmt.Sprintf("failed to connect to RabbitMQ after %d attempts: %v", cfg.MaxRetries, err)}
}
// Publish envoie un message à un exchange RabbitMQ.
// v1.0.6: silent-drop fix — every failure path now emits a structured log so
// callers that wrap Publish in fire-and-forget (`_ = eb.Publish(...)`) can't
// lose events without a trace. The function still returns the error so
// call-sites that do care get the exact failure for their own handling.
func (eb *RabbitMQEventBus) Publish(ctx context.Context, exchange, routingKey string, mandatory, immediate bool, msg amqp.Publishing) error {
if !eb.IsEnabled {
if eb.logger != nil {
eb.logger.Warn("EventBus disabled — message dropped",
zap.String("exchange", exchange),
zap.String("routing_key", routingKey),
zap.Int("payload_bytes", len(msg.Body)),
)
}
return &EventBusUnavailableError{Msg: "EventBus is disabled"}
}
if err := eb.channel.PublishWithContext(ctx, exchange, routingKey, mandatory, immediate, msg); err != nil {
if eb.logger != nil {
// ERROR level: the broker is reachable in config but rejected the
// publish. Could be a dropped connection, channel close, flow
// control, or a missing exchange. Operators should page on this.
eb.logger.Error("RabbitMQ publish failed — message not delivered",
zap.String("exchange", exchange),
zap.String("routing_key", routingKey),
zap.Int("payload_bytes", len(msg.Body)),
zap.String("content_type", msg.ContentType),
zap.String("message_id", msg.MessageId),
zap.Error(err),
)
}
return err
}
return nil
}
// Consume démarre un consommateur RabbitMQ
func (eb *RabbitMQEventBus) Consume(queue, consumer string, autoAck, exclusive, noLocal, noWait bool, args amqp.Table) (<-chan amqp.Delivery, error) {
if !eb.IsEnabled {
if eb.logger != nil {
eb.logger.Warn("⚠️ Tentative de consommation sur EventBus désactivé",
zap.String("queue", queue),
zap.String("consumer", consumer))
}
return nil, &EventBusUnavailableError{Msg: "EventBus is disabled"}
}
return eb.channel.Consume(queue, consumer, autoAck, exclusive, noLocal, noWait, args)
}
// Close ferme la connexion et le canal RabbitMQ
func (eb *RabbitMQEventBus) Close() error {
if !eb.IsEnabled {
return nil
}
var errs []error
if eb.channel != nil {
if err := eb.channel.Close(); err != nil {
errs = append(errs, fmt.Errorf("failed to close RabbitMQ channel: %w", err))
}
}
if eb.conn != nil {
if err := eb.conn.Close(); err != nil {
errs = append(errs, fmt.Errorf("failed to close RabbitMQ connection: %w", err))
}
}
if len(errs) > 0 {
return fmt.Errorf("errors closing RabbitMQ: %v", errs)
}
eb.logger.Info("🔌 Connexion RabbitMQ fermée.")
return nil
}
// Health vérifie la santé de la connexion RabbitMQ
func (eb *RabbitMQEventBus) Health() error {
if !eb.IsEnabled {
return fmt.Errorf("RabbitMQ EventBus est désactivé")
}
if eb.conn == nil || eb.conn.IsClosed() {
return fmt.Errorf("connexion RabbitMQ non établie ou fermée")
}
// Tenter d'ouvrir un canal temporaire pour vérifier l'état de la connexion
tmpChannel, err := eb.conn.Channel()
if err != nil {
return fmt.Errorf("impossible d'ouvrir un canal RabbitMQ: %w", err)
}
tmpChannel.Close() // Fermer le canal temporaire
return nil
}