diff --git a/relay-event-normalization/src/regexes.rs b/relay-event-normalization/src/regexes.rs index 86b459e9f1..6ab9518e50 100644 --- a/relay-event-normalization/src/regexes.rs +++ b/relay-event-normalization/src/regexes.rs @@ -9,51 +9,52 @@ pub static TRANSACTION_NAME_NORMALIZER_REGEX: Lazy = Lazy::new(|| { Regex::new( r"(?x) (?P[^/\\]* - \b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b + (?-u:\b)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}(?-u:\b) [^/\\]*) | (?P[^/\\]* - \b[0-9a-fA-F]{40}\b + (?-u:\b)[0-9a-fA-F]{40}(?-u:\b) [^/\\]*) | (?P[^/\\]* - \b[0-9a-fA-F]{32}\b + (?-u:\b)[0-9a-fA-F]{32}(?-u:\b) [^/\\]*) | (?P[^/\\]* (?: - (?:\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))| - (?:\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))| - (?:\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)) + (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]\.[0-9]+([+-][0-2][0-9]:[0-5][0-9]|Z))| + (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z))| + (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z)) ) | (?: - \b(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat)\s+)? - (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+ - (?:[\d]{1,2})\s+ - (?:[\d]{2}:[\d]{2}:[\d]{2})\s+ - [\d]{4} + (?-u:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(?-u:\s)+)? + (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?-u:\s)+ + (?:[0-9]{1,2})(?-u:\s)+ + (?:[0-9]{2}:[0-9]{2}:[0-9]{2})(?-u:\s)+ + [0-9]{4} ) | (?: - \b(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),\s+)? - (?:0[1-9]|[1-2]?[\d]|3[01])\s+ - (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+ - (?:19[\d]{2}|[2-9][\d]{3})\s+ - (?:2[0-3]|[0-1][\d]):([0-5][\d]) - (?::(60|[0-5][\d]))?\s+ - (?:[-\+][\d]{2}[0-5][\d]|(?:UT|GMT|(?:E|C|M|P)(?:ST|DT)|[A-IK-Z])) + (?-u:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),(?-u:\s)+)? + (?:0[1-9]|[1-2]?[0-9]|3[01])(?-u:\s)+ + (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?-u:\s)+ + (?:19[0-9]{2}|[2-9][0-9]{3})(?-u:\s)+ + (?:2[0-3]|[0-1][0-9]):([0-5][0-9]) + (?::(60|[0-5][0-9]))?(?-u:\s)+ + (?:[-\+][0-9]{2}[0-5][0-9]|(?:UT|GMT|(?:E|C|M|P)(?:ST|DT)|[A-IK-Z])) ) [^/\\]*) | (?P[^/\\]* - \b0[xX][0-9a-fA-F]+\b + (?-u:\b)0[xX][0-9a-fA-F]+(?-u:\b) [^/\\]*) | (?:^|[/\\]) (?P - (:?[^%/\\]|%[0-9a-fA-F]{2})*\d{2,} + (:?[^%/\\]|%[0-9a-fA-F]{2})*[0-9]{2,} [^/\\]*)", ) .unwrap() }); /// Captures initial all-caps words as redis command, the rest as arguments. -pub static REDIS_COMMAND_REGEX: Lazy = - Lazy::new(|| Regex::new(r"\s*(?P[A-Z]+(\s+[A-Z]+)*\b)(?P.+)?").unwrap()); +pub static REDIS_COMMAND_REGEX: Lazy = Lazy::new(|| { + Regex::new(r"(?-u:\s)*(?P[A-Z]+((?-u:\s)+[A-Z]+)*(?-u:\b))(?P.+)?").unwrap() +}); /// Regex with multiple capture groups for resource tokens we should scrub. /// @@ -65,32 +66,32 @@ pub static REDIS_COMMAND_REGEX: Lazy = /// pub static RESOURCE_NORMALIZER_REGEX: Lazy = Lazy::new(|| { Regex::new( - r"(?xi) + r"(?x) # UUIDs. - (?P[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) | + (?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) | # Version strings. (?P(v[0-9]+(?:\.[0-9]+)*)) | # Hexadecimal strings with more than 5 digits. - (?P[a-f0-9]{5}[a-f0-9]+) | + (?P[a-fA-F0-9]{5}[a-fA-F0-9]+) | # Integer IDs with more than one digit. - (?P\d\d+) + (?P[0-9][0-9]+) ", ) .unwrap() }); pub static DB_SQL_TRANSACTION_CORE_DATA_REGEX: Lazy = - Lazy::new(|| Regex::new(r"(?P\d+)").unwrap()); + Lazy::new(|| Regex::new(r"(?P[0-9]+)").unwrap()); pub static DB_SUPABASE_REGEX: Lazy = Lazy::new(|| { Regex::new( - r"(?xi) + r"(?x) # UUIDs. - (?P[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) | + (?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) | # Hexadecimal strings with more than 5 digits. - (?P[a-f0-9]{5}[a-f0-9]+) | + (?P[a-fA-F0-9]{5}[a-fA-F0-9]+) | # Integer IDs with more than one digit. - (?P\d\d+) + (?P[0-9][0-9]+) ", ) .unwrap() @@ -98,11 +99,11 @@ pub static DB_SUPABASE_REGEX: Lazy = Lazy::new(|| { pub static FUNCTION_NORMALIZER_REGEX: Lazy = Lazy::new(|| { Regex::new( - r"(?xi) + r"(?x) # UUIDs. - (?P[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) | + (?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) | # Hexadecimal strings with more than 5 digits. - (?P[a-f0-9]{5}[a-f0-9]+) + (?P[a-fA-F0-9]{5}[a-fA-F0-9]+) ", ) .unwrap()