veza/apps/web/src/utils/sanitize.ts

/**
 * XSS Protection utilities
 * Sanitise et valide le contenu utilisateur pour prévenir les attaques XSS
 */

// Types pour la configuration de sanitisation
export interface SanitizeOptions {
  allowedTags?: string[]
  allowedAttributes?: Record<string, string[]>
  allowedSchemes?: string[]
  stripUnknownTags?: boolean
  stripEmptyTags?: boolean
}

// Configuration par défaut pour la sanitisation
const DEFAULT_OPTIONS: SanitizeOptions = {
  allowedTags: [
    'p', 'br', 'strong', 'em', 'u', 'i', 'b', 'span', 'div',
    'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
    'ul', 'ol', 'li', 'blockquote', 'pre', 'code',
    'a', 'img'
  ],
  allowedAttributes: {
    'a': ['href', 'title', 'target'],
    'img': ['src', 'alt', 'title', 'width', 'height'],
    'span': ['class'],
    'div': ['class'],
    'p': ['class'],
    'pre': ['class'],
    'code': ['class']
  },
  allowedSchemes: ['http', 'https', 'mailto'],
  stripUnknownTags: true,
  stripEmptyTags: true
}

/**
 * Patterns dangereux à détecter et supprimer
 */
const DANGEROUS_PATTERNS = [
  // Scripts et exécution de code
  /<script[^>]*>[\s\S]*?<\/script>/gi,
  /javascript:/gi,
  /vbscript:/gi,
  /data:text\/html/gi,
  /data:application\/javascript/gi,

  // Event handlers inline
  /on\w+\s*=\s*["'][^"']*["']/gi,
  /on\w+\s*=\s*[^>\s]+/gi,

  // Expressions CSS dangereuses
  /expression\s*\(/gi,
  /url\s*\(\s*javascript:/gi,

  // Tags dangereux
  /<iframe[^>]*>[\s\S]*?<\/iframe>/gi,
  /<object[^>]*>[\s\S]*?<\/object>/gi,
  /<embed[^>]*>/gi,
  /<applet[^>]*>[\s\S]*?<\/applet>/gi,
  /<form[^>]*>[\s\S]*?<\/form>/gi,
  /<input[^>]*>/gi,
  /<textarea[^>]*>[\s\S]*?<\/textarea>/gi,
  /<select[^>]*>[\s\S]*?<\/select>/gi,
  /<button[^>]*>[\s\S]*?<\/button>/gi,

  // Meta tags dangereux
  /<meta[^>]*>/gi,
  /<link[^>]*>/gi,
  /<style[^>]*>[\s\S]*?<\/style>/gi,
]

/**
 * Patterns pour les URLs suspectes
 */
const SUSPICIOUS_URL_PATTERNS = [
  /javascript:/i,
  /vbscript:/i,
  /data:/i,
  /file:/i,
  /ftp:/i,
  /gopher:/i,
  /jar:/i,
  /ldap:/i,
  /ldaps:/i,
  /magnet:/i,
  /news:/i,
  /nntp:/i,
  /sftp:/i,
  /smb:/i,
  /ssh:/i,
  /telnet:/i,
  /tftp:/i,
  /view-source:/i,
]

/**
 * Sanitise le contenu HTML pour prévenir les attaques XSS
 */
export function sanitizeHTML(content: string, options: SanitizeOptions = {}): string {
  const config = { ...DEFAULT_OPTIONS, ...options }
  let sanitized = content

  // Supprimer les patterns dangereux
  DANGEROUS_PATTERNS.forEach(pattern => {
    sanitized = sanitized.replace(pattern, '')
  })

  // Supprimer les tags non autorisés
  if (config.stripUnknownTags) {
    sanitized = stripUnknownTags(sanitized, config.allowedTags!)
  }

  // Nettoyer les attributs
  sanitized = sanitizeAttributes(sanitized, config.allowedAttributes!)

  // Valider les URLs
  sanitized = validateURLs(sanitized, config.allowedSchemes!)

  // Supprimer les tags vides
  if (config.stripEmptyTags) {
    sanitized = stripEmptyTags(sanitized)
  }

  // Échapper les caractères HTML restants
  sanitized = escapeHTML(sanitized)

  return sanitized
}

/**
 * Supprime les tags non autorisés
 */
function stripUnknownTags(content: string, allowedTags: string[]): string {
  const tagPattern = /<\/?([a-zA-Z][a-zA-Z0-9]*)[^>]*>/g

  return content.replace(tagPattern, (match, tagName) => {
    if (allowedTags.includes(tagName.toLowerCase())) {
      return match
    }
    return ''
  })
}

/**
 * Nettoie les attributs des tags
 */
function sanitizeAttributes(content: string, allowedAttributes: Record<string, string[]>): string {
  const tagPattern = /<([a-zA-Z][a-zA-Z0-9]*)([^>]*)>/g

  return content.replace(tagPattern, (match, tagName, attributes) => {
    const allowedAttrs = allowedAttributes[tagName.toLowerCase()]
    if (!allowedAttrs) {
      return `<${tagName}>`
    }

    const attrPattern = /(\w+)\s*=\s*["']([^"']*)["']/g
    const cleanAttributes = attributes.replace(attrPattern, (attrMatch, attrName, attrValue) => {
      if (allowedAttrs.includes(attrName.toLowerCase())) {
        // Valider les URLs dans les attributs href et src
        if (attrName.toLowerCase() === 'href' || attrName.toLowerCase() === 'src') {
          if (isValidURL(attrValue)) {
            return `${attrName}="${attrValue}"`
          }
          return ''
        }
        return attrMatch
      }
      return ''
    })

    return `<${tagName}${cleanAttributes}>`
  })
}

/**
 * Valide les URLs dans le contenu
 */
function validateURLs(content: string, allowedSchemes: string[]): string {
  const urlPattern = /(https?:\/\/[^\s<>"']+)/g

  return content.replace(urlPattern, (url) => {
    try {
      const urlObj = new URL(url)
      if (allowedSchemes.includes(urlObj.protocol.slice(0, -1))) {
        return url
      }
    } catch {
      // URL invalide
    }
    return ''
  })
}

/**
 * Vérifie si une URL est valide et sûre
 */
function isValidURL(url: string): boolean {
  try {
    const urlObj = new URL(url)

    // Vérifier le protocole
    if (!['http:', 'https:', 'mailto:'].includes(urlObj.protocol)) {
      return false
    }

    // Vérifier les patterns suspects
    return !SUSPICIOUS_URL_PATTERNS.some(pattern => pattern.test(url))
  } catch {
    return false
  }
}

/**
 * Supprime les tags vides
 */
function stripEmptyTags(content: string): string {
  return content.replace(/<([a-zA-Z][a-zA-Z0-9]*)[^>]*>\s*<\/\1>/g, '')
}

/**
 * Échappe les caractères HTML spéciaux
 */
function escapeHTML(content: string): string {
  const escapeMap: Record<string, string> = {
    '&': '&amp;',
    '<': '&lt;',
    '>': '&gt;',
    '"': '&quot;',
    "'": '&#x27;',
    '/': '&#x2F;',
  }

  return content.replace(/[&<>"'/]/g, (char) => escapeMap[char])
}

/**
 * Sanitise spécifiquement les messages de chat
 */
export function sanitizeChatMessage(message: string): string {
  const chatOptions: SanitizeOptions = {
    allowedTags: ['p', 'br', 'strong', 'em', 'u', 'i', 'b', 'span'],
    allowedAttributes: {
      'span': ['class']
    },
    allowedSchemes: ['http', 'https'],
    stripUnknownTags: true,
    stripEmptyTags: true
  }

  return sanitizeHTML(message, chatOptions)
}

/**
 * Sanitise les noms d'utilisateur et autres champs texte
 */
export function sanitizeTextInput(input: string): string {
  // Pour les champs texte simples, on échappe tout le HTML
  return escapeHTML(input.trim())
}

/**
 * Valide et nettoie les URLs utilisateur
 */
export function sanitizeURL(url: string): string | null {
  try {
    const urlObj = new URL(url)

    // Seuls HTTP et HTTPS sont autorisés
    if (!['http:', 'https:'].includes(urlObj.protocol)) {
      return null
    }

    // Vérifier les patterns suspects
    if (SUSPICIOUS_URL_PATTERNS.some(pattern => pattern.test(url))) {
      return null
    }

    return urlObj.toString()
  } catch {
    return null
  }
}

/**
 * Valide les emails
 */
export function sanitizeEmail(email: string): string | null {
  const emailPattern = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/
  const sanitized = email.trim().toLowerCase()

  if (emailPattern.test(sanitized)) {
    return sanitized
  }

  return null
}

/**
 * Valide les mots de passe selon les critères de sécurité
 */
export function validatePassword(password: string): {
  isValid: boolean
  errors: string[]
} {
  const errors: string[] = []

  if (password.length < 12) {
    errors.push('Le mot de passe doit contenir au moins 12 caractères')
  }

  if (!/[A-Z]/.test(password)) {
    errors.push('Le mot de passe doit contenir au moins une majuscule')
  }

  if (!/[a-z]/.test(password)) {
    errors.push('Le mot de passe doit contenir au moins une minuscule')
  }

  if (!/[0-9]/.test(password)) {
    errors.push('Le mot de passe doit contenir au moins un chiffre')
  }

  if (!/[!@#$%^&*()_+\-=[\]{};':"\\|,.<>/?]/.test(password)) {
    errors.push('Le mot de passe doit contenir au moins un caractère spécial')
  }

  // Vérifier les patterns communs faibles
  const weakPatterns = [
    /(.)\1{3,}/, // Répétition de caractères
    /123456/, // Séquence numérique
    /password/i, // Mot "password"
    /qwerty/i, // Mot "qwerty"
  ]

  if (weakPatterns.some(pattern => pattern.test(password))) {
    errors.push('Le mot de passe contient des patterns trop communs')
  }

  return {
    isValid: errors.length === 0,
    errors
  }
}

/**
 * Hook React pour utiliser la sanitisation
 */
export function useSanitization() {
  return {
    sanitizeHTML,
    sanitizeChatMessage,
    sanitizeTextInput,
    sanitizeURL,
    sanitizeEmail,
    validatePassword
  }
}