veza/veza-stream-server/src/testing/mod.rs
2025-12-03 20:36:56 +01:00

366 lines
11 KiB
Rust

/// Module de tests de production
///
/// Implémente load testing, stress testing et chaos testing
/// pour valider la scalabilité et résilience production
pub mod load_testing;
pub mod chaos_testing;
pub mod benchmarks;
pub mod stress_testing;
pub use load_testing::*;
pub use chaos_testing::*;
pub use benchmarks::*;
pub use stress_testing::*;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::RwLock;
// Note: Use tracing::info! macro directly instead of importing
use serde::{Serialize, Deserialize};
use uuid::Uuid;
use crate::error::AppError;
use crate::core::StreamManager;
/// Configuration des tests de production
#[derive(Debug, Clone)]
pub struct ProductionTestConfig {
/// Nombre maximum de connexions simultanées
pub max_concurrent_connections: u32,
/// Durée du test
pub test_duration: Duration,
/// Montée en charge progressive
pub ramp_up_duration: Duration,
/// Descente en charge
pub ramp_down_duration: Duration,
/// Métriques cibles
pub target_metrics: TargetMetrics,
/// Configuration du chaos testing
pub chaos_config: ChaosConfig,
}
/// Métriques cibles pour validation
#[derive(Debug, Clone)]
pub struct TargetMetrics {
/// Latence P99 maximale acceptable (ms)
pub max_p99_latency_ms: f64,
/// Throughput minimum (requêtes/sec)
pub min_throughput_rps: f64,
/// Taux d'erreur maximum acceptable (%)
pub max_error_rate_percent: f64,
/// Utilisation CPU maximale (%)
pub max_cpu_usage_percent: f64,
/// Utilisation mémoire maximale (GB)
pub max_memory_usage_gb: f64,
}
/// Configuration du chaos testing
#[derive(Debug, Clone)]
pub struct ChaosConfig {
/// Taux de panne réseau (%)
pub network_failure_rate: f64,
/// Latence réseau artificielle (ms)
pub artificial_latency_ms: u64,
/// Taux de perte de paquets (%)
pub packet_loss_rate: f64,
/// Redémarrages de services
pub service_restart_interval: Duration,
}
/// Gestionnaire principal des tests de production
#[derive(Debug)]
pub struct ProductionTestRunner {
config: ProductionTestConfig,
stream_manager: Arc<StreamManager>,
test_results: Arc<RwLock<TestResults>>,
active_connections: Arc<RwLock<Vec<TestConnection>>>,
}
/// Résultats des tests de production
#[derive(Debug, Clone, Default)]
pub struct TestResults {
/// Métriques de performance
pub performance_metrics: PerformanceMetrics,
/// Métriques de résilience
pub resilience_metrics: ResilienceMetrics,
/// Événements détectés
pub events: Vec<TestEvent>,
/// Status final
pub final_status: TestStatus,
}
/// Métriques de performance mesurées
#[derive(Debug, Clone, Default)]
pub struct PerformanceMetrics {
/// Latence P50, P95, P99 (ms)
pub latency_p50: f64,
pub latency_p95: f64,
pub latency_p99: f64,
/// Throughput (requêtes/sec)
pub throughput_rps: f64,
/// Taux d'erreur (%)
pub error_rate: f64,
/// Connexions simultanées maximum atteint
pub max_concurrent_connections: u32,
/// Utilisation des ressources
pub cpu_usage_percent: f64,
pub memory_usage_gb: f64,
pub network_io_mbps: f64,
}
/// Métriques de résilience
#[derive(Debug, Clone, Default)]
pub struct ResilienceMetrics {
/// Temps de récupération après panne (s)
pub recovery_time_seconds: f64,
/// Pourcentage de requêtes perdues durant panne
pub lost_requests_percent: f64,
/// Nombre de redémarrages survivis
pub survived_restarts: u32,
/// Stabilité sous charge
pub stability_score: f64,
}
/// Événement de test
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TestEvent {
pub timestamp: std::time::SystemTime,
pub event_type: TestEventType,
pub description: String,
pub severity: EventSeverity,
}
/// Types d'événements de test
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TestEventType {
LoadThresholdReached,
LatencySpike,
ErrorRateHigh,
ResourceExhaustion,
ServiceFailure,
RecoveryCompleted,
TestCompleted,
}
/// Sévérité des événements
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum EventSeverity {
Info,
Warning,
Error,
Critical,
}
/// Status final du test
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TestStatus {
Passed,
Failed { reason: String },
PartiallyPassed { warnings: Vec<String> },
}
/// Connexion de test simulée
#[derive(Debug, Clone)]
pub struct TestConnection {
pub id: Uuid,
pub created_at: Instant,
pub last_activity: Instant,
pub status: ConnectionStatus,
pub metrics: ConnectionMetrics,
}
/// Status d'une connexion de test
#[derive(Debug, Clone)]
pub enum ConnectionStatus {
Connecting,
Connected,
Active,
Idle,
Disconnected,
Error { message: String },
}
/// Métriques par connexion
#[derive(Debug, Clone, Default)]
pub struct ConnectionMetrics {
pub requests_sent: u64,
pub responses_received: u64,
pub bytes_sent: u64,
pub bytes_received: u64,
pub errors: u64,
}
impl Default for ProductionTestConfig {
fn default() -> Self {
Self {
max_concurrent_connections: 100_000,
test_duration: Duration::from_secs(3600), // 1 heure
ramp_up_duration: Duration::from_secs(300), // 5 minutes
ramp_down_duration: Duration::from_secs(120), // 2 minutes
target_metrics: TargetMetrics {
max_p99_latency_ms: 50.0,
min_throughput_rps: 10_000.0,
max_error_rate_percent: 0.1,
max_cpu_usage_percent: 80.0,
max_memory_usage_gb: 16.0,
},
chaos_config: ChaosConfig {
network_failure_rate: 0.1,
artificial_latency_ms: 100,
packet_loss_rate: 0.01,
service_restart_interval: Duration::from_secs(600),
},
}
}
}
impl ProductionTestRunner {
/// Crée un nouveau runner de tests
pub fn new(
config: ProductionTestConfig,
stream_manager: Arc<StreamManager>,
) -> Self {
Self {
config,
stream_manager,
test_results: Arc::new(RwLock::new(TestResults::default())),
active_connections: Arc::new(RwLock::new(Vec::new())),
}
}
/// Lance la suite complète de tests de production
pub async fn run_production_tests(&self) -> Result<TestResults, AppError> {
tracing::info!("🚀 Démarrage tests de production - 100k+ connexions");
let start_time = Instant::now();
// Phase 1: Load Testing
self.run_load_test().await?;
// Phase 2: Stress Testing
self.run_stress_test().await?;
// Phase 3: Chaos Testing
self.run_chaos_test().await?;
// Phase 4: Validation des résultats
let results = self.validate_results().await?;
let total_duration = start_time.elapsed();
tracing::info!("✅ Tests de production terminés en {:?}", total_duration);
Ok(results)
}
/// Execute le load testing
async fn run_load_test(&self) -> Result<(), AppError> {
tracing::info!("📈 Phase 1: Load Testing - Montée en charge progressive");
let load_tester = LoadTester::new(
self.config.max_concurrent_connections,
self.config.ramp_up_duration,
self.config.test_duration,
);
load_tester.execute(
self.stream_manager.clone(),
self.active_connections.clone(),
).await?;
Ok(())
}
/// Execute le stress testing
async fn run_stress_test(&self) -> Result<(), AppError> {
tracing::info!("🔥 Phase 2: Stress Testing - Limites système");
let stress_tester = StressTester::new(
self.config.max_concurrent_connections * 2, // 200k connexions
Duration::from_secs(600), // 10 minutes de stress
);
stress_tester.execute().await?;
Ok(())
}
/// Execute le chaos testing
async fn run_chaos_test(&self) -> Result<(), AppError> {
tracing::info!("🌪️ Phase 3: Chaos Testing - Résilience");
let chaos_tester = ChaosTester::new(self.config.chaos_config.clone());
chaos_tester.execute(
self.stream_manager.clone(),
Duration::from_secs(900), // 15 minutes de chaos
).await?;
Ok(())
}
/// Valide les résultats par rapport aux métriques cibles
async fn validate_results(&self) -> Result<TestResults, AppError> {
tracing::info!("📊 Validation des résultats contre métriques cibles");
let results = self.test_results.read().await.clone();
// Validation des métriques
let mut warnings = Vec::new();
let mut failed = false;
if results.performance_metrics.latency_p99 > self.config.target_metrics.max_p99_latency_ms {
failed = true;
warnings.push(format!(
"Latence P99 trop élevée: {:.1}ms > {:.1}ms",
results.performance_metrics.latency_p99,
self.config.target_metrics.max_p99_latency_ms
));
}
if results.performance_metrics.throughput_rps < self.config.target_metrics.min_throughput_rps {
failed = true;
warnings.push(format!(
"Throughput insuffisant: {:.1} < {:.1} req/s",
results.performance_metrics.throughput_rps,
self.config.target_metrics.min_throughput_rps
));
}
if results.performance_metrics.error_rate > self.config.target_metrics.max_error_rate_percent {
failed = true;
warnings.push(format!(
"Taux d'erreur trop élevé: {:.2}% > {:.2}%",
results.performance_metrics.error_rate,
self.config.target_metrics.max_error_rate_percent
));
}
let mut final_results = results.clone();
final_results.final_status = if failed {
TestStatus::Failed { reason: warnings.join(", ") }
} else if !warnings.is_empty() {
TestStatus::PartiallyPassed { warnings }
} else {
TestStatus::Passed
};
Ok(final_results)
}
/// Obtient un snapshot des métriques actuelles
pub async fn get_current_metrics(&self) -> PerformanceMetrics {
// Simulation de métriques en temps réel
PerformanceMetrics {
latency_p50: 12.5,
latency_p95: 28.3,
latency_p99: 45.7,
throughput_rps: 12_500.0,
error_rate: 0.05,
max_concurrent_connections: 95_000,
cpu_usage_percent: 72.8,
memory_usage_gb: 12.4,
network_io_mbps: 890.3,
}
}
}