package utils import ( "html" "regexp" "strings" "unicode" ) // BE-SEC-009: Input sanitization to prevent XSS and injection attacks // SanitizeInput sanitizes user input to prevent XSS and injection attacks // It performs the following operations: // 1. HTML escape special characters // 2. Remove control characters (except newlines and tabs) // 3. Trim whitespace // 4. Remove dangerous URL schemes (javascript:, data:, vbscript:, etc.) // 5. Limit length to prevent DoS func SanitizeInput(input string, maxLength int) string { if input == "" { return "" } // Default max length if not specified if maxLength <= 0 { maxLength = 10000 } // Step 1: HTML escape to prevent XSS cleaned := html.EscapeString(input) // Step 2: Remove dangerous URL schemes (case-insensitive) dangerousSchemes := regexp.MustCompile(`(?i)(javascript|data|vbscript|file|about):`) cleaned = dangerousSchemes.ReplaceAllString(cleaned, "") // Step 3: Remove control characters except newline (\n), carriage return (\r), and tab (\t) cleaned = strings.Map(func(r rune) rune { if r == '\n' || r == '\r' || r == '\t' { return r } if unicode.IsControl(r) { return -1 } return r }, cleaned) // Step 4: Trim whitespace cleaned = strings.TrimSpace(cleaned) // Step 5: Limit length if len(cleaned) > maxLength { cleaned = cleaned[:maxLength] } return cleaned } // SanitizeText sanitizes text input (for usernames, titles, descriptions, etc.) // More permissive than SanitizeInput - allows more characters but still prevents XSS func SanitizeText(input string, maxLength int) string { if input == "" { return "" } if maxLength <= 0 { maxLength = 5000 } // HTML escape to prevent XSS cleaned := html.EscapeString(input) // Remove dangerous URL schemes dangerousSchemes := regexp.MustCompile(`(?i)(javascript|data|vbscript|file|about):`) cleaned = dangerousSchemes.ReplaceAllString(cleaned, "") // Remove null bytes and other dangerous control characters cleaned = strings.ReplaceAll(cleaned, "\x00", "") cleaned = strings.ReplaceAll(cleaned, "\x1a", "") // SUB character // Trim whitespace cleaned = strings.TrimSpace(cleaned) // Limit length if len(cleaned) > maxLength { cleaned = cleaned[:maxLength] } return cleaned } // SanitizeHTML sanitizes HTML content by removing dangerous tags and attributes // This is more aggressive than SanitizeText and should be used for HTML content func SanitizeHTML(input string, maxLength int) string { if input == "" { return "" } if maxLength <= 0 { maxLength = 50000 } // Remove script tags and their content scriptPattern := regexp.MustCompile(`(?i)]*>.*?`) cleaned := scriptPattern.ReplaceAllString(input, "") // Remove iframe tags iframePattern := regexp.MustCompile(`(?i)]*>.*?`) cleaned = iframePattern.ReplaceAllString(cleaned, "") // Remove object and embed tags objectPattern := regexp.MustCompile(`(?i)<(object|embed)[^>]*>.*?`) cleaned = objectPattern.ReplaceAllString(cleaned, "") // Remove dangerous event handlers (onclick, onerror, etc.) eventHandlerPattern := regexp.MustCompile(`(?i)\s*on\w+\s*=\s*["'][^"']*["']`) cleaned = eventHandlerPattern.ReplaceAllString(cleaned, "") // Remove dangerous URL schemes in href/src attributes dangerousSchemes := regexp.MustCompile(`(?i)(href|src)\s*=\s*["'](javascript|data|vbscript|file|about):[^"']*["']`) cleaned = dangerousSchemes.ReplaceAllString(cleaned, "") // Remove style tags with potentially dangerous content stylePattern := regexp.MustCompile(`(?i)]*>.*?`) cleaned = stylePattern.ReplaceAllString(cleaned, "") // Limit length if len(cleaned) > maxLength { cleaned = cleaned[:maxLength] } return cleaned } // SanitizeURL sanitizes a URL to prevent XSS and injection func SanitizeURL(input string) string { if input == "" { return "" } // Trim whitespace cleaned := strings.TrimSpace(input) // Remove dangerous URL schemes dangerousSchemes := regexp.MustCompile(`(?i)^(javascript|data|vbscript|file|about):`) cleaned = dangerousSchemes.ReplaceAllString(cleaned, "") // Remove null bytes cleaned = strings.ReplaceAll(cleaned, "\x00", "") // Limit length if len(cleaned) > 2048 { cleaned = cleaned[:2048] } return cleaned } // SanitizeEmail sanitizes an email address func SanitizeEmail(input string) string { if input == "" { return "" } // Trim whitespace and convert to lowercase cleaned := strings.TrimSpace(strings.ToLower(input)) // Remove control characters cleaned = strings.Map(func(r rune) rune { if unicode.IsControl(r) { return -1 } return r }, cleaned) // Limit length (RFC 5321: 320 characters max for email) if len(cleaned) > 320 { cleaned = cleaned[:320] } return cleaned } // SanitizeUsername sanitizes a username func SanitizeUsername(input string) string { if input == "" { return "" } // Trim whitespace cleaned := strings.TrimSpace(input) // Remove HTML tags htmlTagPattern := regexp.MustCompile(`<[^>]*>`) cleaned = htmlTagPattern.ReplaceAllString(cleaned, "") // Remove control characters cleaned = strings.Map(func(r rune) rune { if unicode.IsControl(r) { return -1 } return r }, cleaned) // Limit length if len(cleaned) > 50 { cleaned = cleaned[:50] } return cleaned }