fix(chat): replace Regex::new().unwrap() with static Lazy in security_legacy.rs

Replace 65+ Regex::new().unwrap() calls with three once_cell::sync::Lazy
static collections:

- DANGEROUS_PATTERNS: 60+ XSS/SQL/command injection regexes
- ROOM_NAME_REGEX: room name character validation
- TOXIC_PATTERNS: 5 toxicity detection regexes

All patterns are compiled once at startup with .ok() filter for safety.
ContentFilter, ToxicityDetector now clone from the statics.

Also adds pub mod security_legacy to lib.rs so the module is compiled
and checked during CI builds.

Addresses audit finding D9: .unwrap() on Regex::new() in legacy code.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
senke 2026-02-11 23:27:54 +01:00
parent ff5d6736f8
commit 00bb07b180
2 changed files with 51 additions and 75 deletions

View file

@ -18,6 +18,7 @@ pub mod permissions;
pub mod read_receipts;
pub mod repository;
pub mod security;
pub mod security_legacy;
pub mod services;
pub mod simple_message_store;
pub mod typing_indicator;

View file

@ -1,10 +1,55 @@
use std::collections::{HashMap, HashSet};
use std::time::{Duration, SystemTime};
use once_cell::sync::Lazy;
use regex::Regex;
use uuid::Uuid;
use crate::error::{ChatError, Result};
use sha2::{Sha256, Digest};
/// Pre-compiled dangerous content patterns (XSS, SQL injection, command injection).
/// Compiled once at startup; invalid patterns silently skipped via .ok().
static DANGEROUS_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
let raw = [
r"<script[^>]*>.*?</script>", r"javascript:", r"data:text/html",
r"on\w+\s*=", r"eval\s*\(", r"document\.(write|cookie)",
r"window\.(location|open)", r"<iframe[^>]*>", r"<object[^>]*>",
r"<embed[^>]*>", r"<link[^>]*>", r"<meta[^>]*>",
r"@import", r"expression\s*\(", r"url\s*\(", r"behavior\s*:",
r"-moz-binding", r"<\?php", r"<%.*?%>", r"\{\{.*?\}\}",
r"\{%.*?%\}", r"<\s*script", r"<\s*style", r"<\s*link",
r"<\s*meta", r"<\s*base", r"<\s*title", r"<\s*frame",
r"<\s*applet", r"<\s*form", r"<\s*input",
r"SELECT\s+.*\s+FROM", r"INSERT\s+INTO", r"UPDATE\s+.*\s+SET",
r"DELETE\s+FROM", r"DROP\s+TABLE", r"CREATE\s+TABLE",
r"ALTER\s+TABLE", r"TRUNCATE\s+TABLE", r"UNION\s+SELECT",
r"OR\s+1\s*=\s*1", r"AND\s+1\s*=\s*1", r"'\s*OR\s*'",
r"'\s*AND\s*'", r"--\s*", r"/\*.*?\*/",
r"xp_cmdshell", r"sp_executesql", r"exec\s*\(",
r"execute\s*\(", r"cmd\.exe", r"powershell",
r"bash", r"sh\s", r"perl", r"python", r"ruby",
r"wget", r"curl", r"nc\s", r"netcat", r"telnet",
r"ssh", r"ftp", r"tftp",
];
raw.iter().filter_map(|p| Regex::new(p).ok()).collect()
});
/// Pre-compiled room name validation regex.
static ROOM_NAME_REGEX: Lazy<Option<Regex>> = Lazy::new(|| {
Regex::new(r"^[a-zA-Z0-9\-_\s]+$").ok()
});
/// Pre-compiled toxicity detection patterns.
static TOXIC_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
let raw = [
r"(?i)\b(idiot|stupide|con|connard|salope|pute|merde)\b",
r"(?i)\b(fuck|shit|bitch|asshole|damn)\b",
r"(?i)\b(kill\s+yourself|suicide|die)\b",
r"(?i)\b(hate\s+you|je\s+te\s+déteste)\b",
r"(?i)\b(racist|nazi|fascist)\b",
];
raw.iter().filter_map(|p| Regex::new(p).ok()).collect()
});
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum SecurityAction {
SendMessage,
@ -33,73 +78,7 @@ impl ContentFilter {
words.insert("test_bad_word".to_string());
words
},
dangerous_patterns: vec![
Regex::new(r"<script[^>]*>.*?</script>").unwrap(),
Regex::new(r"javascript:").unwrap(),
Regex::new(r"data:text/html").unwrap(),
Regex::new(r"on\w+\s*=").unwrap(),
Regex::new(r"eval\s*\(").unwrap(),
Regex::new(r"document\.(write|cookie)").unwrap(),
Regex::new(r"window\.(location|open)").unwrap(),
Regex::new(r"<iframe[^>]*>").unwrap(),
Regex::new(r"<object[^>]*>").unwrap(),
Regex::new(r"<embed[^>]*>").unwrap(),
Regex::new(r"<link[^>]*>").unwrap(),
Regex::new(r"<meta[^>]*>").unwrap(),
Regex::new(r"@import").unwrap(),
Regex::new(r"expression\s*\(").unwrap(),
Regex::new(r"url\s*\(").unwrap(),
Regex::new(r"behavior\s*:").unwrap(),
Regex::new(r"-moz-binding").unwrap(),
Regex::new(r"<\?php").unwrap(),
Regex::new(r"<%.*?%>").unwrap(),
Regex::new(r"\{\{.*?\}\}").unwrap(),
Regex::new(r"\{%.*?%\}").unwrap(),
Regex::new(r"<\s*script").unwrap(),
Regex::new(r"<\s*style").unwrap(),
Regex::new(r"<\s*link").unwrap(),
Regex::new(r"<\s*meta").unwrap(),
Regex::new(r"<\s*base").unwrap(),
Regex::new(r"<\s*title").unwrap(),
Regex::new(r"<\s*frame").unwrap(),
Regex::new(r"<\s*applet").unwrap(),
Regex::new(r"<\s*form").unwrap(),
Regex::new(r"<\s*input").unwrap(),
Regex::new(r"SELECT\s+.*\s+FROM").unwrap(),
Regex::new(r"INSERT\s+INTO").unwrap(),
Regex::new(r"UPDATE\s+.*\s+SET").unwrap(),
Regex::new(r"DELETE\s+FROM").unwrap(),
Regex::new(r"DROP\s+TABLE").unwrap(),
Regex::new(r"CREATE\s+TABLE").unwrap(),
Regex::new(r"ALTER\s+TABLE").unwrap(),
Regex::new(r"TRUNCATE\s+TABLE").unwrap(),
Regex::new(r"UNION\s+SELECT").unwrap(),
Regex::new(r"OR\s+1\s*=\s*1").unwrap(),
Regex::new(r"AND\s+1\s*=\s*1").unwrap(),
Regex::new(r"'\s*OR\s*'").unwrap(),
Regex::new(r"'\s*AND\s*'").unwrap(),
Regex::new(r"--\s*").unwrap(),
Regex::new(r"/\*.*?\*/").unwrap(),
Regex::new(r"xp_cmdshell").unwrap(),
Regex::new(r"sp_executesql").unwrap(),
Regex::new(r"exec\s*\(").unwrap(),
Regex::new(r"execute\s*\(").unwrap(),
Regex::new(r"cmd\.exe").unwrap(),
Regex::new(r"powershell").unwrap(),
Regex::new(r"bash").unwrap(),
Regex::new(r"sh\s").unwrap(),
Regex::new(r"perl").unwrap(),
Regex::new(r"python").unwrap(),
Regex::new(r"ruby").unwrap(),
Regex::new(r"wget").unwrap(),
Regex::new(r"curl").unwrap(),
Regex::new(r"nc\s").unwrap(),
Regex::new(r"netcat").unwrap(),
Regex::new(r"telnet").unwrap(),
Regex::new(r"ssh").unwrap(),
Regex::new(r"ftp").unwrap(),
Regex::new(r"tftp").unwrap(),
],
dangerous_patterns: DANGEROUS_PATTERNS.clone(),
spam_detector: SpamDetector::new(),
toxicity_detector: ToxicityDetector::new(),
})
@ -153,7 +132,9 @@ impl ContentFilter {
}
// Caractères autorisés : lettres, chiffres, tirets, underscores
let room_regex = Regex::new(r"^[a-zA-Z0-9\-_\s]+$").unwrap();
let room_regex = ROOM_NAME_REGEX.as_ref().ok_or_else(|| ChatError::internal_error(
"Failed to compile room name regex".to_string(),
))?;
if !room_regex.is_match(room_name) {
return Err(ChatError::InvalidFormat {
field: "room_name".to_string(),
@ -308,13 +289,7 @@ impl Default for ToxicityDetector {
impl ToxicityDetector {
pub fn new() -> Self {
Self {
toxic_patterns: vec![
Regex::new(r"(?i)\b(idiot|stupide|con|connard|salope|pute|merde)\b").unwrap(),
Regex::new(r"(?i)\b(fuck|shit|bitch|asshole|damn)\b").unwrap(),
Regex::new(r"(?i)\b(kill\s+yourself|suicide|die)\b").unwrap(),
Regex::new(r"(?i)\b(hate\s+you|je\s+te\s+déteste)\b").unwrap(),
Regex::new(r"(?i)\b(racist|nazi|fascist)\b").unwrap(),
],
toxic_patterns: TOXIC_PATTERNS.clone(),
_severity_threshold: 0.7,
}
}