veza/veza-stream-server/scripts/health_check_production.sh
2025-12-03 20:36:56 +01:00

248 lines
7.9 KiB
Bash
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# === SCRIPT DE SANTÉ PRODUCTION ===
# Vérifie l'état de santé complet des modules Rust en production
# Usage: ./scripts/health_check_production.sh
set -euo pipefail
# Configuration
NAMESPACE="veza-production"
SERVICE_NAME="veza-stream-server"
CHAT_SERVICE="veza-chat-server"
# Couleurs
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Fonctions
log_info() { echo -e "${BLUE} $1${NC}"; }
log_success() { echo -e "${GREEN}$1${NC}"; }
log_warning() { echo -e "${YELLOW}⚠️ $1${NC}"; }
log_error() { echo -e "${RED}$1${NC}"; }
# Variables de tracking
TOTAL_CHECKS=0
PASSED_CHECKS=0
FAILED_CHECKS=0
run_check() {
local check_name="$1"
local check_command="$2"
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
log_info "Test: $check_name"
if eval "$check_command" &> /dev/null; then
log_success "$check_name - OK"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
return 0
else
log_error "$check_name - ÉCHEC"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
return 1
fi
}
# Vérifications
check_cluster_connection() {
run_check "Connexion cluster Kubernetes" "kubectl cluster-info"
}
check_namespace() {
run_check "Namespace $NAMESPACE" "kubectl get namespace $NAMESPACE"
}
check_deployments() {
log_info "=== Vérification des déploiements ==="
# Stream Server
local stream_ready
stream_ready=$(kubectl get deployment $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
local stream_desired
stream_desired=$(kubectl get deployment $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0")
if [ "$stream_ready" -eq "$stream_desired" ] && [ "$stream_desired" -gt 0 ]; then
log_success "Stream Server: $stream_ready/$stream_desired pods prêts"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
else
log_error "Stream Server: $stream_ready/$stream_desired pods prêts"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
# Chat Server
local chat_ready
chat_ready=$(kubectl get deployment $CHAT_SERVICE -n $NAMESPACE -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")
local chat_desired
chat_desired=$(kubectl get deployment $CHAT_SERVICE -n $NAMESPACE -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0")
if [ "$chat_ready" -eq "$chat_desired" ] && [ "$chat_desired" -gt 0 ]; then
log_success "Chat Server: $chat_ready/$chat_desired pods prêts"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
else
log_error "Chat Server: $chat_ready/$chat_desired pods prêts"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
}
check_services() {
log_info "=== Vérification des services ==="
run_check "Service Stream Server" "kubectl get service $SERVICE_NAME -n $NAMESPACE"
run_check "Service Chat Server" "kubectl get service $CHAT_SERVICE -n $NAMESPACE"
}
check_health_endpoints() {
log_info "=== Tests des endpoints de santé ==="
# Stream Server health
local stream_ip
stream_ip=$(kubectl get service $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [ -n "$stream_ip" ]; then
if kubectl run health-test-stream --rm -i --restart=Never --image=curlimages/curl -- \
curl -f -m 10 "http://$stream_ip:8080/health" &> /dev/null; then
log_success "Stream Server health endpoint"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
else
log_error "Stream Server health endpoint"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
else
log_error "Stream Server IP non trouvée"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
# Chat Server health
local chat_ip
chat_ip=$(kubectl get service $CHAT_SERVICE -n $NAMESPACE -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [ -n "$chat_ip" ]; then
if kubectl run health-test-chat --rm -i --restart=Never --image=curlimages/curl -- \
curl -f -m 10 "http://$chat_ip:8080/health" &> /dev/null; then
log_success "Chat Server health endpoint"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
else
log_error "Chat Server health endpoint"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
else
log_error "Chat Server IP non trouvée"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
}
check_database_connectivity() {
log_info "=== Vérification connectivité base de données ==="
run_check "Service PostgreSQL" "kubectl get pods -n $NAMESPACE -l app=postgres"
run_check "Service Redis" "kubectl get pods -n $NAMESPACE -l app=redis"
}
check_metrics() {
log_info "=== Vérification des métriques ==="
# Métriques Prometheus
local stream_ip
stream_ip=$(kubectl get service $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [ -n "$stream_ip" ]; then
if kubectl run metrics-test --rm -i --restart=Never --image=curlimages/curl -- \
curl -f -m 10 "http://$stream_ip:9090/metrics" &> /dev/null; then
log_success "Métriques Prometheus disponibles"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
else
log_error "Métriques Prometheus indisponibles"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
else
log_error "IP service non trouvée pour métriques"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
}
check_resource_usage() {
log_info "=== Utilisation des ressources ==="
echo "Pods Stream Server:"
kubectl top pods -n $NAMESPACE -l app=$SERVICE_NAME 2>/dev/null || log_warning "Metrics server non disponible"
echo "Pods Chat Server:"
kubectl top pods -n $NAMESPACE -l app=$CHAT_SERVICE 2>/dev/null || log_warning "Metrics server non disponible"
echo "Utilisation des nœuds:"
kubectl top nodes 2>/dev/null || log_warning "Metrics server non disponible"
}
check_recent_logs() {
log_info "=== Vérification des logs récents ==="
echo "Logs Stream Server (dernières 10 lignes):"
kubectl logs -n $NAMESPACE -l app=$SERVICE_NAME --tail=10 2>/dev/null || log_warning "Logs non disponibles"
echo "Logs Chat Server (dernières 10 lignes):"
kubectl logs -n $NAMESPACE -l app=$CHAT_SERVICE --tail=10 2>/dev/null || log_warning "Logs non disponibles"
}
# Rapport final
generate_report() {
echo ""
echo "========================================"
echo " RAPPORT DE SANTÉ"
echo "========================================"
echo "Date: $(date)"
echo "Namespace: $NAMESPACE"
echo ""
echo "Résultats:"
echo " Total des tests: $TOTAL_CHECKS"
echo " Tests réussis: $PASSED_CHECKS"
echo " Tests échoués: $FAILED_CHECKS"
echo ""
local success_rate
success_rate=$((PASSED_CHECKS * 100 / TOTAL_CHECKS))
if [ "$FAILED_CHECKS" -eq 0 ]; then
log_success "🎉 SYSTÈME EN PARFAITE SANTÉ ($success_rate%)"
echo "Status: HEALTHY"
exit 0
elif [ "$success_rate" -ge 80 ]; then
log_warning "⚠️ SYSTÈME OPÉRATIONNEL AVEC ALERTES ($success_rate%)"
echo "Status: WARNING"
exit 1
else
log_error "🚨 SYSTÈME EN ÉTAT CRITIQUE ($success_rate%)"
echo "Status: CRITICAL"
exit 2
fi
}
# Main
main() {
echo "🩺 VÉRIFICATION SANTÉ PRODUCTION VEZA"
echo "====================================="
echo ""
check_cluster_connection
check_namespace
check_deployments
check_services
check_health_endpoints
check_database_connectivity
check_metrics
check_resource_usage
check_recent_logs
generate_report
}
# Exécution
main "$@"