Skip to content

Commit

Permalink
fix: make sure negated code points above U+FFFF are forbidden in .NET
Browse files Browse the repository at this point in the history
  • Loading branch information
Aloso committed Nov 2, 2023
1 parent 36c6dea commit c8cef90
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 13 deletions.
11 changes: 10 additions & 1 deletion pomsky-lib/src/diagnose/compile_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ impl core::fmt::Display for UnsupportedError {
#[non_exhaustive]
pub(crate) enum IllegalNegationKind {
Literal(String),
DotNetChar(char),
Unescaped,
Grapheme,
Dot,
Expand All @@ -248,7 +249,15 @@ impl core::fmt::Display for IllegalNegationKind {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let s = match self {
IllegalNegationKind::Literal(s) => {
return write!(f, "String literal {s:?} can't be negated")
return write!(f, "String literal {s:?} can't be negated");
}
&IllegalNegationKind::DotNetChar(c) => {
return write!(
f,
"Code point {c:?} (U+{:X}) can't be negated in the .NET flavor, because it is \
above U+FFFF, and is therefore incorrectly treated as two code points by .NET.",
c as u32
);
}
IllegalNegationKind::Unescaped => "An inline regex",
IllegalNegationKind::Grapheme => "A grapheme",
Expand Down
22 changes: 13 additions & 9 deletions pomsky-lib/src/diagnose/help.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use pomsky_syntax::{
Span,
};

use super::CompileErrorKind;
use super::{CompileErrorKind, IllegalNegationKind};

pub(super) fn get_parser_help(
kind: &ParseErrorKind,
Expand Down Expand Up @@ -165,14 +165,18 @@ pub(super) fn get_compiler_help(
CompileErrorKind::NegativeShorthandInAsciiMode | CompileErrorKind::UnicodeInAsciiMode => {
Some(format!("Enable Unicode, e.g. `(enable unicode; {slice})`"))
}
CompileErrorKind::IllegalNegation { .. } => Some(
"Only the following expressions can be negated:\n\
- character sets\n\
- string literals and alternations that match exactly one code point\n\
- lookarounds\n\
- the `%` word boundary"
.to_string(),
),
CompileErrorKind::IllegalNegation { kind }
if !matches!(kind, IllegalNegationKind::DotNetChar(_)) =>
{
Some(
"Only the following expressions can be negated:\n\
- character sets\n\
- string literals and alternations that match exactly one code point\n\
- lookarounds\n\
- the `%` word boundary"
.to_string(),
)
}

_ => None,
}
Expand Down
5 changes: 4 additions & 1 deletion pomsky-lib/src/exprs/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ impl<'i> RuleExt<'i> for Rule<'i> {
Rule::Recursion(r) => r.compile(options, state),
Rule::Negation(n) => {
let span = n.rule.span();
let regex = n.rule.compile(options, state).and_then(|r| r.negate(n.not_span))?;
let regex = n
.rule
.compile(options, state)
.and_then(|r| r.negate(n.not_span, options.flavor))?;
if let Regex::CharSet(char_set) = &regex {
check_char_class_empty(char_set, span)?;
}
Expand Down
10 changes: 8 additions & 2 deletions pomsky-lib/src/regex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ impl RegexProperty {
}

impl<'i> Regex<'i> {
pub(crate) fn negate(self, not_span: Span) -> CompileResult<'i> {
pub(crate) fn negate(self, not_span: Span, flavor: RegexFlavor) -> CompileResult<'i> {
match self {
Regex::Literal(l) => {
let mut iter = l.chars();
Expand All @@ -141,6 +141,12 @@ impl<'i> Regex<'i> {
}
.at(not_span));
};
if flavor == RegexFlavor::DotNet && c.len_utf16() > 1 {
return Err(CompileErrorKind::IllegalNegation {
kind: IllegalNegationKind::DotNetChar(c),
}
.at(not_span));
}
Ok(Regex::CharSet(RegexCharSet::new(vec![RegexCharSetItem::Char(c)]).negate()))
}
Regex::Char(c) => {
Expand All @@ -166,7 +172,7 @@ impl<'i> Regex<'i> {
Regex::Group(mut g)
if matches!(g.kind, RegexGroupKind::Normal) && g.parts.len() == 1 =>
{
g.parts.pop().unwrap().negate(not_span)
g.parts.pop().unwrap().negate(not_span, flavor)
}

Regex::Unescaped(_)
Expand Down
4 changes: 4 additions & 0 deletions pomsky-lib/tests/testcases/negation/large_codepoint.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#! flavor=Pcre
!U+10330
-----
[^\x{10330}]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#! flavor=Pcre
![U+10330]
-----
[^\x{10330}]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#! expect=error, flavor=DotNet
![U+10330]
-----
ERROR: Code point '𐌰' (U+10330) can't be negated in the .NET flavor, because it is above U+FFFF, and is therefore incorrectly treated as two code points by .NET.
SPAN: 0..1
5 changes: 5 additions & 0 deletions pomsky-lib/tests/testcases/negation/large_codepoint_net.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#! expect=error, flavor=DotNet
!U+10330
-----
ERROR: Code point '𐌰' (U+10330) can't be negated in the .NET flavor, because it is above U+FFFF, and is therefore incorrectly treated as two code points by .NET.
SPAN: 0..1

0 comments on commit c8cef90

Please sign in to comment.