Backend Go: - Remplacement complet des anciennes migrations par la base V1 alignée sur ORIGIN. - Durcissement global du parsing JSON (BindAndValidateJSON + RespondWithAppError). - Sécurisation de config.go, CORS, statuts de santé et monitoring. - Implémentation des transactions P0 (RBAC, duplication de playlists, social toggles). - Ajout d’un job worker structuré (emails, analytics, thumbnails) + tests associés. - Nouvelle doc backend : AUDIT_CONFIG, BACKEND_CONFIG, AUTH_PASSWORD_RESET, JOB_WORKER_*. Chat server (Rust): - Refonte du pipeline JWT + sécurité, audit et rate limiting avancé. - Implémentation complète du cycle de message (read receipts, delivered, edit/delete, typing). - Nettoyage des panics, gestion d’erreurs robuste, logs structurés. - Migrations chat alignées sur le schéma UUID et nouvelles features. Stream server (Rust): - Refonte du moteur de streaming (encoding pipeline + HLS) et des modules core. - Transactions P0 pour les jobs et segments, garanties d’atomicité. - Documentation détaillée de la pipeline (AUDIT_STREAM_*, DESIGN_STREAM_PIPELINE, TRANSACTIONS_P0_IMPLEMENTATION). Documentation & audits: - TRIAGE.md et AUDIT_STABILITY.md à jour avec l’état réel des 3 services. - Cartographie complète des migrations et des transactions (DB_MIGRATIONS_*, DB_TRANSACTION_PLAN, AUDIT_DB_TRANSACTIONS, TRANSACTION_TESTS_PHASE3). - Scripts de reset et de cleanup pour la lab DB et la V1. Ce commit fige l’ensemble du travail de stabilisation P0 (UUID, backend, chat et stream) avant les phases suivantes (Coherence Guardian, WS hardening, etc.).
258 lines
6.7 KiB
Go
258 lines
6.7 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
)
|
|
|
|
// Métriques Prometheus custom pour l'application Veza
|
|
|
|
var (
|
|
// HTTP Requests Metrics
|
|
HTTPRequestsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_http_requests_total",
|
|
Help: "Total number of HTTP requests",
|
|
},
|
|
[]string{"method", "endpoint", "status"},
|
|
)
|
|
|
|
HTTPRequestDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "veza_http_request_duration_seconds",
|
|
Help: "HTTP request duration in seconds",
|
|
Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0},
|
|
},
|
|
[]string{"method", "endpoint"},
|
|
)
|
|
|
|
// Authentication Metrics
|
|
AuthLoginAttempts = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_auth_login_attempts_total",
|
|
Help: "Total number of login attempts",
|
|
},
|
|
[]string{"success"},
|
|
)
|
|
|
|
AuthSessionActive = promauto.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "veza_auth_sessions_active",
|
|
Help: "Number of active sessions",
|
|
},
|
|
)
|
|
|
|
// Database Metrics
|
|
DatabaseQueryDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "veza_database_query_duration_seconds",
|
|
Help: "Database query duration in seconds",
|
|
Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0},
|
|
},
|
|
[]string{"operation", "table"},
|
|
)
|
|
|
|
DatabaseConnectionsActive = promauto.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "veza_database_connections_active",
|
|
Help: "Number of active database connections",
|
|
},
|
|
)
|
|
|
|
DatabaseQueryErrors = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_database_query_errors_total",
|
|
Help: "Total number of database query errors",
|
|
},
|
|
[]string{"operation", "error_type"},
|
|
)
|
|
|
|
// File Upload Metrics
|
|
FileUploadsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_file_uploads_total",
|
|
Help: "Total number of file uploads",
|
|
},
|
|
[]string{"type", "status"},
|
|
)
|
|
|
|
FileUploadSize = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "veza_file_upload_size_bytes",
|
|
Help: "File upload size in bytes",
|
|
Buckets: prometheus.ExponentialBuckets(1024, 2, 15), // 1KB to 32MB
|
|
},
|
|
[]string{"type"},
|
|
)
|
|
|
|
// Rate Limiting Metrics
|
|
RateLimitHitsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_rate_limit_hits_total",
|
|
Help: "Total number of rate limit hits",
|
|
},
|
|
[]string{"endpoint", "limit_type"},
|
|
)
|
|
|
|
// Active Users Metrics
|
|
ActiveUsers = promauto.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "veza_active_users",
|
|
Help: "Number of active users",
|
|
},
|
|
)
|
|
|
|
// WebSocket Metrics
|
|
WebSocketConnectionsActive = promauto.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "veza_websocket_connections_active",
|
|
Help: "Number of active WebSocket connections",
|
|
},
|
|
)
|
|
|
|
WebSocketMessagesTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_websocket_messages_total",
|
|
Help: "Total number of WebSocket messages",
|
|
},
|
|
[]string{"type", "status"},
|
|
)
|
|
|
|
// Cache Metrics
|
|
CacheHitsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_cache_hits_total",
|
|
Help: "Total number of cache hits",
|
|
},
|
|
[]string{"cache_type"},
|
|
)
|
|
|
|
CacheMissesTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_cache_misses_total",
|
|
Help: "Total number of cache misses",
|
|
},
|
|
[]string{"cache_type"},
|
|
)
|
|
|
|
// Error Metrics
|
|
ErrorsTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "veza_errors_total",
|
|
Help: "Total number of errors",
|
|
},
|
|
[]string{"type", "severity"},
|
|
)
|
|
|
|
// Health Check Metrics
|
|
HealthCheckDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "veza_health_check_duration_ms",
|
|
Help: "Health check duration in milliseconds",
|
|
Buckets: []float64{1, 5, 10, 25, 50, 100, 250, 500, 1000},
|
|
},
|
|
[]string{"service"}, // database, redis, chat_server, stream_server
|
|
)
|
|
|
|
HealthCheckStatus = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "veza_health_check_status",
|
|
Help: "Health check status (1=ok, 0.5=slow, 0=error)",
|
|
},
|
|
[]string{"service"},
|
|
)
|
|
)
|
|
|
|
// Middleware pour enregistrer les métriques HTTP
|
|
func HTTPMetricsMiddleware(endpoint string, duration time.Duration, statusCode int, method string) {
|
|
status := string(rune(statusCode / 100)) // '2', '4', '5'
|
|
|
|
HTTPRequestsTotal.WithLabelValues(method, endpoint, status).Inc()
|
|
HTTPRequestDuration.WithLabelValues(method, endpoint).Observe(duration.Seconds())
|
|
}
|
|
|
|
// Enregistrer une tentative de login
|
|
func RecordLoginAttempt(success bool) {
|
|
status := "failure"
|
|
if success {
|
|
status = "success"
|
|
}
|
|
AuthLoginAttempts.WithLabelValues(status).Inc()
|
|
}
|
|
|
|
// Mettre à jour le nombre de sessions actives
|
|
func UpdateActiveSessions(count int) {
|
|
AuthSessionActive.Set(float64(count))
|
|
}
|
|
|
|
// Enregistrer une requête database
|
|
func RecordDatabaseQuery(operation, table string, duration time.Duration) {
|
|
DatabaseQueryDuration.WithLabelValues(operation, table).Observe(duration.Seconds())
|
|
}
|
|
|
|
// Enregistrer une erreur de database
|
|
func RecordDatabaseError(operation, errorType string) {
|
|
DatabaseQueryErrors.WithLabelValues(operation, errorType).Inc()
|
|
}
|
|
|
|
// Enregistrer un upload de fichier
|
|
func RecordFileUpload(fileType, status string, sizeBytes int64) {
|
|
FileUploadsTotal.WithLabelValues(fileType, status).Inc()
|
|
FileUploadSize.WithLabelValues(fileType).Observe(float64(sizeBytes))
|
|
}
|
|
|
|
// Enregistrer un hit de rate limit
|
|
func RecordRateLimitHit(endpoint, limitType string) {
|
|
RateLimitHitsTotal.WithLabelValues(endpoint, limitType).Inc()
|
|
}
|
|
|
|
// Mettre à jour le nombre d'utilisateurs actifs
|
|
func UpdateActiveUsers(count int) {
|
|
ActiveUsers.Set(float64(count))
|
|
}
|
|
|
|
// Enregistrer une connexion WebSocket
|
|
func UpdateWebSocketConnections(count int) {
|
|
WebSocketConnectionsActive.Set(float64(count))
|
|
}
|
|
|
|
// Enregistrer un message WebSocket
|
|
func RecordWebSocketMessage(messageType, status string) {
|
|
WebSocketMessagesTotal.WithLabelValues(messageType, status).Inc()
|
|
}
|
|
|
|
// Enregistrer un cache hit
|
|
func RecordCacheHit(cacheType string) {
|
|
CacheHitsTotal.WithLabelValues(cacheType).Inc()
|
|
}
|
|
|
|
// Enregistrer un cache miss
|
|
func RecordCacheMiss(cacheType string) {
|
|
CacheMissesTotal.WithLabelValues(cacheType).Inc()
|
|
}
|
|
|
|
// Enregistrer une erreur
|
|
func RecordError(errorType, severity string) {
|
|
ErrorsTotal.WithLabelValues(errorType, severity).Inc()
|
|
}
|
|
|
|
// Enregistrer un health check
|
|
func RecordHealthCheck(service string, durationMs float64, status string) {
|
|
HealthCheckDuration.WithLabelValues(service).Observe(durationMs)
|
|
|
|
// Convertir le status en valeur numérique pour la gauge
|
|
var statusValue float64
|
|
switch status {
|
|
case "ok":
|
|
statusValue = 1.0
|
|
case "slow":
|
|
statusValue = 0.5
|
|
case "error":
|
|
statusValue = 0.0
|
|
default:
|
|
statusValue = 0.0
|
|
}
|
|
HealthCheckStatus.WithLabelValues(service).Set(statusValue)
|
|
}
|