veza/veza-chat-server/src/prometheus_metrics.rs
2025-12-03 20:33:26 +01:00

365 lines
15 KiB
Rust

//! Métriques Prometheus pour le serveur de chat
//! Version corrigée avec la nouvelle API metrics
use crate::config::PrometheusConfig;
use crate::error::Result;
use axum::{extract::State, http::StatusCode, response::Response, routing::get, Router};
use metrics::{counter, gauge, histogram, describe_counter, describe_gauge, describe_histogram, Unit, Counter, Gauge, Histogram};
use metrics_exporter_prometheus::{PrometheusBuilder, PrometheusHandle};
use std::sync::Arc;
use std::time::Duration;
use tracing::{debug, info};
/// Gestionnaire des métriques Prometheus
pub struct PrometheusMetrics {
/// Métriques de connexions WebSocket
pub websocket_connections: Gauge,
pub websocket_connections_total: Counter,
pub websocket_disconnections_total: Counter,
/// Métriques de messages
pub messages_sent_total: Counter,
pub messages_received_total: Counter,
pub message_processing_duration: Histogram,
pub message_size_bytes: Histogram,
/// Métriques de salons
pub active_rooms: Gauge,
pub rooms_created_total: Counter,
pub rooms_deleted_total: Counter,
/// Métriques d'erreurs
pub errors_total: Counter,
pub authentication_failures_total: Counter,
pub rate_limits_triggered_total: Counter,
/// Métriques de performance
pub request_duration_seconds: Histogram,
pub database_operation_duration_seconds: Histogram,
pub cache_hit_ratio: Gauge,
/// Métriques système
pub memory_usage_bytes: Gauge,
pub cpu_usage_percent: Gauge,
pub uptime_seconds: Gauge,
/// Métriques de sécurité
pub security_events_total: Counter,
pub jwt_tokens_issued_total: Counter,
pub jwt_tokens_revoked_total: Counter,
/// Métriques de cache
pub cache_operations_total: Counter,
pub cache_hits_total: Counter,
pub cache_misses_total: Counter,
/// Métriques de base de données
pub database_connections_active: Gauge,
pub database_operations_total: Counter,
pub database_errors_total: Counter,
/// Métriques de modération
pub moderation_actions_total: Counter,
pub content_filtered_total: Counter,
pub users_banned_total: Counter,
/// Métriques d'utilisateurs
pub users_online: Gauge,
pub users_away: Gauge,
pub users_busy: Gauge,
pub users_offline: Gauge,
}
impl PrometheusMetrics {
/// Initialise les métriques avec les descriptions
pub fn init_metrics() -> Result<()> {
// Descriptions des compteurs
describe_counter!("websocket_connections_total", Unit::Count, "Total des connexions WebSocket");
describe_counter!("websocket_disconnections_total", Unit::Count, "Total des déconnexions WebSocket");
describe_counter!("messages_sent_total", Unit::Count, "Total des messages envoyés");
describe_counter!("messages_received_total", Unit::Count, "Total des messages reçus");
describe_counter!("rooms_created_total", Unit::Count, "Total des salons créés");
describe_counter!("rooms_deleted_total", Unit::Count, "Total des salons supprimés");
describe_counter!("errors_total", Unit::Count, "Total des erreurs");
describe_counter!("authentication_failures_total", Unit::Count, "Total des échecs d'authentification");
describe_counter!("rate_limits_triggered_total", Unit::Count, "Total des rate limits déclenchés");
describe_counter!("security_events_total", Unit::Count, "Total des événements de sécurité");
describe_counter!("jwt_tokens_issued_total", Unit::Count, "Total des tokens JWT émis");
describe_counter!("jwt_tokens_revoked_total", Unit::Count, "Total des tokens JWT révoqués");
describe_counter!("cache_operations_total", Unit::Count, "Total des opérations de cache");
describe_counter!("cache_hits_total", Unit::Count, "Total des hits de cache");
describe_counter!("cache_misses_total", Unit::Count, "Total des misses de cache");
describe_counter!("database_operations_total", Unit::Count, "Total des opérations de base de données");
describe_counter!("database_errors_total", Unit::Count, "Total des erreurs de base de données");
describe_counter!("moderation_actions_total", Unit::Count, "Total des actions de modération");
describe_counter!("content_filtered_total", Unit::Count, "Total du contenu filtré");
describe_counter!("users_banned_total", Unit::Count, "Total des utilisateurs bannis");
// Descriptions des jauges
describe_gauge!("websocket_connections", Unit::Count, "Nombre de connexions WebSocket actives");
describe_gauge!("active_rooms", Unit::Count, "Nombre de salons actifs");
describe_gauge!("cache_hit_ratio", Unit::Percent, "Ratio de hits du cache");
describe_gauge!("memory_usage_bytes", Unit::Bytes, "Utilisation mémoire en bytes");
describe_gauge!("cpu_usage_percent", Unit::Percent, "Utilisation CPU en pourcentage");
describe_gauge!("uptime_seconds", Unit::Seconds, "Temps de fonctionnement en secondes");
describe_gauge!("database_connections_active", Unit::Count, "Connexions de base de données actives");
describe_gauge!("users_online", Unit::Count, "Utilisateurs en ligne");
describe_gauge!("users_away", Unit::Count, "Utilisateurs absents");
describe_gauge!("users_busy", Unit::Count, "Utilisateurs occupés");
describe_gauge!("users_offline", Unit::Count, "Utilisateurs hors ligne");
// Descriptions des histogrammes
describe_histogram!("message_processing_duration_seconds", Unit::Seconds, "Durée de traitement des messages");
describe_histogram!("message_size_bytes", Unit::Bytes, "Taille des messages en bytes");
describe_histogram!("request_duration_seconds", Unit::Seconds, "Durée des requêtes HTTP");
describe_histogram!("database_operation_duration_seconds", Unit::Seconds, "Durée des opérations de base de données");
Ok(())
}
/// Crée une nouvelle instance des métriques
pub fn new(config: &PrometheusConfig) -> Result<Self> {
// Initialiser les descriptions
Self::init_metrics()?;
// Configurer l'export Prometheus
PrometheusBuilder::new()
.with_http_listener(config.bind_addr)
.install()
.map_err(|e| crate::error::ChatError::internal_error(format!("Failed to install Prometheus exporter: {e}")))?;
info!("📊 Métriques Prometheus configurées");
// Enregistrement des métriques
let websocket_connections = gauge!("websocket_connections");
let websocket_connections_total = counter!("websocket_connections_total");
let websocket_disconnections_total = counter!("websocket_disconnections_total");
let messages_sent_total = counter!("messages_sent_total");
let messages_received_total = counter!("messages_received_total");
let message_processing_duration = histogram!("message_processing_duration_seconds");
let message_size_bytes = histogram!("message_size_bytes");
let active_rooms = gauge!("active_rooms");
let rooms_created_total = counter!("rooms_created_total");
let rooms_deleted_total = counter!("rooms_deleted_total");
let errors_total = counter!("errors_total");
let authentication_failures_total = counter!("authentication_failures_total");
let rate_limits_triggered_total = counter!("rate_limits_triggered_total");
let request_duration_seconds = histogram!("request_duration_seconds");
let database_operation_duration_seconds = histogram!("database_operation_duration_seconds");
let cache_hit_ratio = gauge!("cache_hit_ratio");
let memory_usage_bytes = gauge!("memory_usage_bytes");
let cpu_usage_percent = gauge!("cpu_usage_percent");
let uptime_seconds = gauge!("uptime_seconds");
let security_events_total = counter!("security_events_total");
let jwt_tokens_issued_total = counter!("jwt_tokens_issued_total");
let jwt_tokens_revoked_total = counter!("jwt_tokens_revoked_total");
let cache_operations_total = counter!("cache_operations_total");
let cache_hits_total = counter!("cache_hits_total");
let cache_misses_total = counter!("cache_misses_total");
let database_connections_active = gauge!("database_connections_active");
let database_operations_total = counter!("database_operations_total");
let database_errors_total = counter!("database_errors_total");
let moderation_actions_total = counter!("moderation_actions_total");
let content_filtered_total = counter!("content_filtered_total");
let users_banned_total = counter!("users_banned_total");
let users_online = gauge!("users_online");
let users_away = gauge!("users_away");
let users_busy = gauge!("users_busy");
let users_offline = gauge!("users_offline");
Ok(Self {
websocket_connections,
websocket_connections_total,
websocket_disconnections_total,
messages_sent_total,
messages_received_total,
message_processing_duration,
message_size_bytes,
active_rooms,
rooms_created_total,
rooms_deleted_total,
errors_total,
authentication_failures_total,
rate_limits_triggered_total,
request_duration_seconds,
database_operation_duration_seconds,
cache_hit_ratio,
memory_usage_bytes,
cpu_usage_percent,
uptime_seconds,
security_events_total,
jwt_tokens_issued_total,
jwt_tokens_revoked_total,
cache_operations_total,
cache_hits_total,
cache_misses_total,
database_connections_active,
database_operations_total,
database_errors_total,
moderation_actions_total,
content_filtered_total,
users_banned_total,
users_online,
users_away,
users_busy,
users_offline,
})
}
/// Enregistre une connexion WebSocket
pub fn record_websocket_connection(&self) {
self.websocket_connections.increment(1.0);
self.websocket_connections_total.increment(1);
}
/// Enregistre une déconnexion WebSocket
pub fn record_websocket_disconnection(&self) {
self.websocket_connections.decrement(1.0);
self.websocket_disconnections_total.increment(1);
}
/// Enregistre un message envoyé
pub fn record_message_sent(&self, size_bytes: u64) {
self.messages_sent_total.increment(1);
self.message_size_bytes.record(size_bytes as f64);
}
/// Enregistre un message reçu
pub fn record_message_received(&self, size_bytes: u64) {
self.messages_received_total.increment(1);
self.message_size_bytes.record(size_bytes as f64);
}
/// Enregistre la durée de traitement d'un message
pub fn record_message_processing_duration(&self, duration: Duration) {
self.message_processing_duration.record(duration.as_secs_f64());
}
/// Enregistre la création d'un salon
pub fn record_room_created(&self, room_type: &str) {
self.rooms_created_total.increment(1);
self.update_active_rooms(1);
}
/// Enregistre la suppression d'un salon
pub fn record_room_deleted(&self, room_type: &str) {
self.rooms_deleted_total.increment(1);
self.update_active_rooms(-1);
}
/// Met à jour le nombre de salons actifs
pub fn update_active_rooms(&self, delta: i32) {
if delta > 0 {
self.active_rooms.increment(delta as f64);
} else {
self.active_rooms.decrement((-delta) as f64);
}
}
/// Enregistre une erreur
pub fn record_error(&self, error_type: &str) {
self.errors_total.increment(1);
}
/// Enregistre un échec d'authentification
pub fn record_authentication_failure(&self) {
self.authentication_failures_total.increment(1);
}
/// Enregistre un rate limit déclenché
pub fn record_rate_limit_triggered(&self) {
self.rate_limits_triggered_total.increment(1);
}
/// Enregistre la durée d'une requête HTTP
pub fn record_request_duration(&self, duration: Duration, method: &str, endpoint: &str, status_code: u16) {
self.request_duration_seconds.record(duration.as_secs_f64());
}
/// Enregistre la durée d'une opération de base de données
pub fn record_database_operation_duration(&self, duration: Duration, operation: &str, table: &str) {
self.database_operation_duration_seconds.record(duration.as_secs_f64());
}
/// Enregistre une opération de cache
pub fn record_cache_operation(&self, operation_type: &str) {
self.cache_operations_total.increment(1);
}
/// Enregistre une opération de base de données
pub fn record_database_operation(&self, operation_type: &str) {
self.database_operations_total.increment(1);
}
/// Enregistre une erreur de base de données
pub fn record_database_error(&self, error_type: &str) {
self.database_errors_total.increment(1);
}
/// Enregistre une action de modération
pub fn record_moderation_action(&self, action_type: &str) {
self.moderation_actions_total.increment(1);
}
/// Enregistre du contenu filtré
pub fn record_content_filtered(&self, filter_type: &str) {
self.content_filtered_total.increment(1);
}
/// Enregistre un utilisateur banni
pub fn record_user_banned(&self, reason: &str) {
self.users_banned_total.increment(1);
}
/// Enregistre un upload de fichier
pub fn record_file_upload(&self, file_type: &str, size_bytes: u64) {
// Implementation pour les uploads de fichiers
debug!("File upload recorded: {} bytes", size_bytes);
}
/// Enregistre un échec d'upload de fichier
pub fn record_file_upload_failed(&self, error_type: &str) {
// Implementation pour les échecs d'upload
debug!("File upload failed: {}", error_type);
}
/// Enregistre un appel webhook
pub fn record_webhook_call(&self, webhook_type: &str, status_code: u16) {
// Implementation pour les webhooks
debug!("Webhook call recorded: {} - {}", webhook_type, status_code);
}
/// Met à jour les métriques système
pub fn update_system_metrics(&self) {
// Implementation pour les métriques système
debug!("System metrics updated");
}
}
/// Handler pour l'endpoint Prometheus
pub async fn prometheus_handler(State(_metrics): State<Arc<PrometheusMetrics>>) -> Response<String> {
// Le handle Prometheus gère automatiquement l'export
Response::builder()
.status(StatusCode::OK)
.header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
.body("Metrics available at /metrics".to_string())
.unwrap()
}
/// Crée le routeur Prometheus
pub fn create_prometheus_router(metrics: Arc<PrometheusMetrics>) -> Router {
Router::new()
.route("/metrics", get(prometheus_handler))
.with_state(metrics)
}