365 lines
15 KiB
Rust
365 lines
15 KiB
Rust
//! Métriques Prometheus pour le serveur de chat
|
|
//! Version corrigée avec la nouvelle API metrics
|
|
|
|
use crate::config::PrometheusConfig;
|
|
use crate::error::Result;
|
|
use axum::{extract::State, http::StatusCode, response::Response, routing::get, Router};
|
|
use metrics::{counter, gauge, histogram, describe_counter, describe_gauge, describe_histogram, Unit, Counter, Gauge, Histogram};
|
|
use metrics_exporter_prometheus::{PrometheusBuilder, PrometheusHandle};
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
use tracing::{debug, info};
|
|
|
|
/// Gestionnaire des métriques Prometheus
|
|
pub struct PrometheusMetrics {
|
|
|
|
/// Métriques de connexions WebSocket
|
|
pub websocket_connections: Gauge,
|
|
pub websocket_connections_total: Counter,
|
|
pub websocket_disconnections_total: Counter,
|
|
|
|
/// Métriques de messages
|
|
pub messages_sent_total: Counter,
|
|
pub messages_received_total: Counter,
|
|
pub message_processing_duration: Histogram,
|
|
pub message_size_bytes: Histogram,
|
|
|
|
/// Métriques de salons
|
|
pub active_rooms: Gauge,
|
|
pub rooms_created_total: Counter,
|
|
pub rooms_deleted_total: Counter,
|
|
|
|
/// Métriques d'erreurs
|
|
pub errors_total: Counter,
|
|
pub authentication_failures_total: Counter,
|
|
pub rate_limits_triggered_total: Counter,
|
|
|
|
/// Métriques de performance
|
|
pub request_duration_seconds: Histogram,
|
|
pub database_operation_duration_seconds: Histogram,
|
|
pub cache_hit_ratio: Gauge,
|
|
|
|
/// Métriques système
|
|
pub memory_usage_bytes: Gauge,
|
|
pub cpu_usage_percent: Gauge,
|
|
pub uptime_seconds: Gauge,
|
|
|
|
/// Métriques de sécurité
|
|
pub security_events_total: Counter,
|
|
pub jwt_tokens_issued_total: Counter,
|
|
pub jwt_tokens_revoked_total: Counter,
|
|
|
|
/// Métriques de cache
|
|
pub cache_operations_total: Counter,
|
|
pub cache_hits_total: Counter,
|
|
pub cache_misses_total: Counter,
|
|
|
|
/// Métriques de base de données
|
|
pub database_connections_active: Gauge,
|
|
pub database_operations_total: Counter,
|
|
pub database_errors_total: Counter,
|
|
|
|
/// Métriques de modération
|
|
pub moderation_actions_total: Counter,
|
|
pub content_filtered_total: Counter,
|
|
pub users_banned_total: Counter,
|
|
|
|
/// Métriques d'utilisateurs
|
|
pub users_online: Gauge,
|
|
pub users_away: Gauge,
|
|
pub users_busy: Gauge,
|
|
pub users_offline: Gauge,
|
|
}
|
|
|
|
impl PrometheusMetrics {
|
|
/// Initialise les métriques avec les descriptions
|
|
pub fn init_metrics() -> Result<()> {
|
|
// Descriptions des compteurs
|
|
describe_counter!("websocket_connections_total", Unit::Count, "Total des connexions WebSocket");
|
|
describe_counter!("websocket_disconnections_total", Unit::Count, "Total des déconnexions WebSocket");
|
|
describe_counter!("messages_sent_total", Unit::Count, "Total des messages envoyés");
|
|
describe_counter!("messages_received_total", Unit::Count, "Total des messages reçus");
|
|
describe_counter!("rooms_created_total", Unit::Count, "Total des salons créés");
|
|
describe_counter!("rooms_deleted_total", Unit::Count, "Total des salons supprimés");
|
|
describe_counter!("errors_total", Unit::Count, "Total des erreurs");
|
|
describe_counter!("authentication_failures_total", Unit::Count, "Total des échecs d'authentification");
|
|
describe_counter!("rate_limits_triggered_total", Unit::Count, "Total des rate limits déclenchés");
|
|
describe_counter!("security_events_total", Unit::Count, "Total des événements de sécurité");
|
|
describe_counter!("jwt_tokens_issued_total", Unit::Count, "Total des tokens JWT émis");
|
|
describe_counter!("jwt_tokens_revoked_total", Unit::Count, "Total des tokens JWT révoqués");
|
|
describe_counter!("cache_operations_total", Unit::Count, "Total des opérations de cache");
|
|
describe_counter!("cache_hits_total", Unit::Count, "Total des hits de cache");
|
|
describe_counter!("cache_misses_total", Unit::Count, "Total des misses de cache");
|
|
describe_counter!("database_operations_total", Unit::Count, "Total des opérations de base de données");
|
|
describe_counter!("database_errors_total", Unit::Count, "Total des erreurs de base de données");
|
|
describe_counter!("moderation_actions_total", Unit::Count, "Total des actions de modération");
|
|
describe_counter!("content_filtered_total", Unit::Count, "Total du contenu filtré");
|
|
describe_counter!("users_banned_total", Unit::Count, "Total des utilisateurs bannis");
|
|
|
|
// Descriptions des jauges
|
|
describe_gauge!("websocket_connections", Unit::Count, "Nombre de connexions WebSocket actives");
|
|
describe_gauge!("active_rooms", Unit::Count, "Nombre de salons actifs");
|
|
describe_gauge!("cache_hit_ratio", Unit::Percent, "Ratio de hits du cache");
|
|
describe_gauge!("memory_usage_bytes", Unit::Bytes, "Utilisation mémoire en bytes");
|
|
describe_gauge!("cpu_usage_percent", Unit::Percent, "Utilisation CPU en pourcentage");
|
|
describe_gauge!("uptime_seconds", Unit::Seconds, "Temps de fonctionnement en secondes");
|
|
describe_gauge!("database_connections_active", Unit::Count, "Connexions de base de données actives");
|
|
describe_gauge!("users_online", Unit::Count, "Utilisateurs en ligne");
|
|
describe_gauge!("users_away", Unit::Count, "Utilisateurs absents");
|
|
describe_gauge!("users_busy", Unit::Count, "Utilisateurs occupés");
|
|
describe_gauge!("users_offline", Unit::Count, "Utilisateurs hors ligne");
|
|
|
|
// Descriptions des histogrammes
|
|
describe_histogram!("message_processing_duration_seconds", Unit::Seconds, "Durée de traitement des messages");
|
|
describe_histogram!("message_size_bytes", Unit::Bytes, "Taille des messages en bytes");
|
|
describe_histogram!("request_duration_seconds", Unit::Seconds, "Durée des requêtes HTTP");
|
|
describe_histogram!("database_operation_duration_seconds", Unit::Seconds, "Durée des opérations de base de données");
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Crée une nouvelle instance des métriques
|
|
pub fn new(config: &PrometheusConfig) -> Result<Self> {
|
|
// Initialiser les descriptions
|
|
Self::init_metrics()?;
|
|
|
|
// Configurer l'export Prometheus
|
|
PrometheusBuilder::new()
|
|
.with_http_listener(config.bind_addr)
|
|
.install()
|
|
.map_err(|e| crate::error::ChatError::internal_error(format!("Failed to install Prometheus exporter: {e}")))?;
|
|
|
|
info!("📊 Métriques Prometheus configurées");
|
|
|
|
// Enregistrement des métriques
|
|
let websocket_connections = gauge!("websocket_connections");
|
|
let websocket_connections_total = counter!("websocket_connections_total");
|
|
let websocket_disconnections_total = counter!("websocket_disconnections_total");
|
|
|
|
let messages_sent_total = counter!("messages_sent_total");
|
|
let messages_received_total = counter!("messages_received_total");
|
|
let message_processing_duration = histogram!("message_processing_duration_seconds");
|
|
let message_size_bytes = histogram!("message_size_bytes");
|
|
|
|
let active_rooms = gauge!("active_rooms");
|
|
let rooms_created_total = counter!("rooms_created_total");
|
|
let rooms_deleted_total = counter!("rooms_deleted_total");
|
|
|
|
let errors_total = counter!("errors_total");
|
|
let authentication_failures_total = counter!("authentication_failures_total");
|
|
let rate_limits_triggered_total = counter!("rate_limits_triggered_total");
|
|
|
|
let request_duration_seconds = histogram!("request_duration_seconds");
|
|
let database_operation_duration_seconds = histogram!("database_operation_duration_seconds");
|
|
let cache_hit_ratio = gauge!("cache_hit_ratio");
|
|
|
|
let memory_usage_bytes = gauge!("memory_usage_bytes");
|
|
let cpu_usage_percent = gauge!("cpu_usage_percent");
|
|
let uptime_seconds = gauge!("uptime_seconds");
|
|
|
|
let security_events_total = counter!("security_events_total");
|
|
let jwt_tokens_issued_total = counter!("jwt_tokens_issued_total");
|
|
let jwt_tokens_revoked_total = counter!("jwt_tokens_revoked_total");
|
|
|
|
let cache_operations_total = counter!("cache_operations_total");
|
|
let cache_hits_total = counter!("cache_hits_total");
|
|
let cache_misses_total = counter!("cache_misses_total");
|
|
|
|
let database_connections_active = gauge!("database_connections_active");
|
|
let database_operations_total = counter!("database_operations_total");
|
|
let database_errors_total = counter!("database_errors_total");
|
|
|
|
let moderation_actions_total = counter!("moderation_actions_total");
|
|
let content_filtered_total = counter!("content_filtered_total");
|
|
let users_banned_total = counter!("users_banned_total");
|
|
|
|
let users_online = gauge!("users_online");
|
|
let users_away = gauge!("users_away");
|
|
let users_busy = gauge!("users_busy");
|
|
let users_offline = gauge!("users_offline");
|
|
|
|
Ok(Self {
|
|
websocket_connections,
|
|
websocket_connections_total,
|
|
websocket_disconnections_total,
|
|
messages_sent_total,
|
|
messages_received_total,
|
|
message_processing_duration,
|
|
message_size_bytes,
|
|
active_rooms,
|
|
rooms_created_total,
|
|
rooms_deleted_total,
|
|
errors_total,
|
|
authentication_failures_total,
|
|
rate_limits_triggered_total,
|
|
request_duration_seconds,
|
|
database_operation_duration_seconds,
|
|
cache_hit_ratio,
|
|
memory_usage_bytes,
|
|
cpu_usage_percent,
|
|
uptime_seconds,
|
|
security_events_total,
|
|
jwt_tokens_issued_total,
|
|
jwt_tokens_revoked_total,
|
|
cache_operations_total,
|
|
cache_hits_total,
|
|
cache_misses_total,
|
|
database_connections_active,
|
|
database_operations_total,
|
|
database_errors_total,
|
|
moderation_actions_total,
|
|
content_filtered_total,
|
|
users_banned_total,
|
|
users_online,
|
|
users_away,
|
|
users_busy,
|
|
users_offline,
|
|
})
|
|
}
|
|
|
|
/// Enregistre une connexion WebSocket
|
|
pub fn record_websocket_connection(&self) {
|
|
self.websocket_connections.increment(1.0);
|
|
self.websocket_connections_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre une déconnexion WebSocket
|
|
pub fn record_websocket_disconnection(&self) {
|
|
self.websocket_connections.decrement(1.0);
|
|
self.websocket_disconnections_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre un message envoyé
|
|
pub fn record_message_sent(&self, size_bytes: u64) {
|
|
self.messages_sent_total.increment(1);
|
|
self.message_size_bytes.record(size_bytes as f64);
|
|
}
|
|
|
|
/// Enregistre un message reçu
|
|
pub fn record_message_received(&self, size_bytes: u64) {
|
|
self.messages_received_total.increment(1);
|
|
self.message_size_bytes.record(size_bytes as f64);
|
|
}
|
|
|
|
/// Enregistre la durée de traitement d'un message
|
|
pub fn record_message_processing_duration(&self, duration: Duration) {
|
|
self.message_processing_duration.record(duration.as_secs_f64());
|
|
}
|
|
|
|
/// Enregistre la création d'un salon
|
|
pub fn record_room_created(&self, room_type: &str) {
|
|
self.rooms_created_total.increment(1);
|
|
self.update_active_rooms(1);
|
|
}
|
|
|
|
/// Enregistre la suppression d'un salon
|
|
pub fn record_room_deleted(&self, room_type: &str) {
|
|
self.rooms_deleted_total.increment(1);
|
|
self.update_active_rooms(-1);
|
|
}
|
|
|
|
/// Met à jour le nombre de salons actifs
|
|
pub fn update_active_rooms(&self, delta: i32) {
|
|
if delta > 0 {
|
|
self.active_rooms.increment(delta as f64);
|
|
} else {
|
|
self.active_rooms.decrement((-delta) as f64);
|
|
}
|
|
}
|
|
|
|
/// Enregistre une erreur
|
|
pub fn record_error(&self, error_type: &str) {
|
|
self.errors_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre un échec d'authentification
|
|
pub fn record_authentication_failure(&self) {
|
|
self.authentication_failures_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre un rate limit déclenché
|
|
pub fn record_rate_limit_triggered(&self) {
|
|
self.rate_limits_triggered_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre la durée d'une requête HTTP
|
|
pub fn record_request_duration(&self, duration: Duration, method: &str, endpoint: &str, status_code: u16) {
|
|
self.request_duration_seconds.record(duration.as_secs_f64());
|
|
}
|
|
|
|
/// Enregistre la durée d'une opération de base de données
|
|
pub fn record_database_operation_duration(&self, duration: Duration, operation: &str, table: &str) {
|
|
self.database_operation_duration_seconds.record(duration.as_secs_f64());
|
|
}
|
|
|
|
/// Enregistre une opération de cache
|
|
pub fn record_cache_operation(&self, operation_type: &str) {
|
|
self.cache_operations_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre une opération de base de données
|
|
pub fn record_database_operation(&self, operation_type: &str) {
|
|
self.database_operations_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre une erreur de base de données
|
|
pub fn record_database_error(&self, error_type: &str) {
|
|
self.database_errors_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre une action de modération
|
|
pub fn record_moderation_action(&self, action_type: &str) {
|
|
self.moderation_actions_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre du contenu filtré
|
|
pub fn record_content_filtered(&self, filter_type: &str) {
|
|
self.content_filtered_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre un utilisateur banni
|
|
pub fn record_user_banned(&self, reason: &str) {
|
|
self.users_banned_total.increment(1);
|
|
}
|
|
|
|
/// Enregistre un upload de fichier
|
|
pub fn record_file_upload(&self, file_type: &str, size_bytes: u64) {
|
|
// Implementation pour les uploads de fichiers
|
|
debug!("File upload recorded: {} bytes", size_bytes);
|
|
}
|
|
|
|
/// Enregistre un échec d'upload de fichier
|
|
pub fn record_file_upload_failed(&self, error_type: &str) {
|
|
// Implementation pour les échecs d'upload
|
|
debug!("File upload failed: {}", error_type);
|
|
}
|
|
|
|
/// Enregistre un appel webhook
|
|
pub fn record_webhook_call(&self, webhook_type: &str, status_code: u16) {
|
|
// Implementation pour les webhooks
|
|
debug!("Webhook call recorded: {} - {}", webhook_type, status_code);
|
|
}
|
|
|
|
/// Met à jour les métriques système
|
|
pub fn update_system_metrics(&self) {
|
|
// Implementation pour les métriques système
|
|
debug!("System metrics updated");
|
|
}
|
|
}
|
|
|
|
/// Handler pour l'endpoint Prometheus
|
|
pub async fn prometheus_handler(State(_metrics): State<Arc<PrometheusMetrics>>) -> Response<String> {
|
|
// Le handle Prometheus gère automatiquement l'export
|
|
Response::builder()
|
|
.status(StatusCode::OK)
|
|
.header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
|
.body("Metrics available at /metrics".to_string())
|
|
.unwrap()
|
|
}
|
|
|
|
/// Crée le routeur Prometheus
|
|
pub fn create_prometheus_router(metrics: Arc<PrometheusMetrics>) -> Router {
|
|
Router::new()
|
|
.route("/metrics", get(prometheus_handler))
|
|
.with_state(metrics)
|
|
}
|