veza/veza-backend-api/internal/utils/sanitizer.go
senke d3bcfd8e60 [BE-SEC-009] be-sec: Implement input sanitization
- Created comprehensive sanitization utility functions
- SanitizeInput, SanitizeText, SanitizeHTML, SanitizeURL, SanitizeEmail, SanitizeUsername
- Applied sanitization to profile handler (username, bio, names, search)
- Applied sanitization to social posts content
- Applied sanitization to comment content
- Applied sanitization to playlist titles and descriptions
- All functions prevent XSS via HTML escaping and remove dangerous URL schemes
- Removes control characters and limits input length to prevent DoS
2025-12-24 12:15:25 +01:00

211 lines
5.2 KiB
Go

package utils
import (
"html"
"regexp"
"strings"
"unicode"
)
// BE-SEC-009: Input sanitization to prevent XSS and injection attacks
// SanitizeInput sanitizes user input to prevent XSS and injection attacks
// It performs the following operations:
// 1. HTML escape special characters
// 2. Remove control characters (except newlines and tabs)
// 3. Trim whitespace
// 4. Remove dangerous URL schemes (javascript:, data:, vbscript:, etc.)
// 5. Limit length to prevent DoS
func SanitizeInput(input string, maxLength int) string {
if input == "" {
return ""
}
// Default max length if not specified
if maxLength <= 0 {
maxLength = 10000
}
// Step 1: HTML escape to prevent XSS
cleaned := html.EscapeString(input)
// Step 2: Remove dangerous URL schemes (case-insensitive)
dangerousSchemes := regexp.MustCompile(`(?i)(javascript|data|vbscript|file|about):`)
cleaned = dangerousSchemes.ReplaceAllString(cleaned, "")
// Step 3: Remove control characters except newline (\n), carriage return (\r), and tab (\t)
cleaned = strings.Map(func(r rune) rune {
if r == '\n' || r == '\r' || r == '\t' {
return r
}
if unicode.IsControl(r) {
return -1
}
return r
}, cleaned)
// Step 4: Trim whitespace
cleaned = strings.TrimSpace(cleaned)
// Step 5: Limit length
if len(cleaned) > maxLength {
cleaned = cleaned[:maxLength]
}
return cleaned
}
// SanitizeText sanitizes text input (for usernames, titles, descriptions, etc.)
// More permissive than SanitizeInput - allows more characters but still prevents XSS
func SanitizeText(input string, maxLength int) string {
if input == "" {
return ""
}
if maxLength <= 0 {
maxLength = 5000
}
// HTML escape to prevent XSS
cleaned := html.EscapeString(input)
// Remove dangerous URL schemes
dangerousSchemes := regexp.MustCompile(`(?i)(javascript|data|vbscript|file|about):`)
cleaned = dangerousSchemes.ReplaceAllString(cleaned, "")
// Remove null bytes and other dangerous control characters
cleaned = strings.ReplaceAll(cleaned, "\x00", "")
cleaned = strings.ReplaceAll(cleaned, "\x1a", "") // SUB character
// Trim whitespace
cleaned = strings.TrimSpace(cleaned)
// Limit length
if len(cleaned) > maxLength {
cleaned = cleaned[:maxLength]
}
return cleaned
}
// SanitizeHTML sanitizes HTML content by removing dangerous tags and attributes
// This is more aggressive than SanitizeText and should be used for HTML content
func SanitizeHTML(input string, maxLength int) string {
if input == "" {
return ""
}
if maxLength <= 0 {
maxLength = 50000
}
// Remove script tags and their content
scriptPattern := regexp.MustCompile(`(?i)<script[^>]*>.*?</script>`)
cleaned := scriptPattern.ReplaceAllString(input, "")
// Remove iframe tags
iframePattern := regexp.MustCompile(`(?i)<iframe[^>]*>.*?</iframe>`)
cleaned = iframePattern.ReplaceAllString(cleaned, "")
// Remove object and embed tags
objectPattern := regexp.MustCompile(`(?i)<(object|embed)[^>]*>.*?</\1>`)
cleaned = objectPattern.ReplaceAllString(cleaned, "")
// Remove dangerous event handlers (onclick, onerror, etc.)
eventHandlerPattern := regexp.MustCompile(`(?i)\s*on\w+\s*=\s*["'][^"']*["']`)
cleaned = eventHandlerPattern.ReplaceAllString(cleaned, "")
// Remove dangerous URL schemes in href/src attributes
dangerousSchemes := regexp.MustCompile(`(?i)(href|src)\s*=\s*["'](javascript|data|vbscript|file|about):[^"']*["']`)
cleaned = dangerousSchemes.ReplaceAllString(cleaned, "")
// Remove style tags with potentially dangerous content
stylePattern := regexp.MustCompile(`(?i)<style[^>]*>.*?</style>`)
cleaned = stylePattern.ReplaceAllString(cleaned, "")
// Limit length
if len(cleaned) > maxLength {
cleaned = cleaned[:maxLength]
}
return cleaned
}
// SanitizeURL sanitizes a URL to prevent XSS and injection
func SanitizeURL(input string) string {
if input == "" {
return ""
}
// Trim whitespace
cleaned := strings.TrimSpace(input)
// Remove dangerous URL schemes
dangerousSchemes := regexp.MustCompile(`(?i)^(javascript|data|vbscript|file|about):`)
cleaned = dangerousSchemes.ReplaceAllString(cleaned, "")
// Remove null bytes
cleaned = strings.ReplaceAll(cleaned, "\x00", "")
// Limit length
if len(cleaned) > 2048 {
cleaned = cleaned[:2048]
}
return cleaned
}
// SanitizeEmail sanitizes an email address
func SanitizeEmail(input string) string {
if input == "" {
return ""
}
// Trim whitespace and convert to lowercase
cleaned := strings.TrimSpace(strings.ToLower(input))
// Remove control characters
cleaned = strings.Map(func(r rune) rune {
if unicode.IsControl(r) {
return -1
}
return r
}, cleaned)
// Limit length (RFC 5321: 320 characters max for email)
if len(cleaned) > 320 {
cleaned = cleaned[:320]
}
return cleaned
}
// SanitizeUsername sanitizes a username
func SanitizeUsername(input string) string {
if input == "" {
return ""
}
// Trim whitespace
cleaned := strings.TrimSpace(input)
// Remove HTML tags
htmlTagPattern := regexp.MustCompile(`<[^>]*>`)
cleaned = htmlTagPattern.ReplaceAllString(cleaned, "")
// Remove control characters
cleaned = strings.Map(func(r rune) rune {
if unicode.IsControl(r) {
return -1
}
return r
}, cleaned)
// Limit length
if len(cleaned) > 50 {
cleaned = cleaned[:50]
}
return cleaned
}