From 1a40a1feef53d2bb617ac8b588600325927f1aab Mon Sep 17 00:00:00 2001 From: iorate <682043+iorate@users.noreply.github.com> Date: Sun, 25 Aug 2024 22:48:58 +0900 Subject: [PATCH] fix: allow escape sequence in regexp class --- src/scripts/ruleset/parser.js | 2 +- src/scripts/ruleset/ruleset.grammar | 2 +- src/scripts/ruleset/ruleset.test.ts | 27 +++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/scripts/ruleset/parser.js b/src/scripts/ruleset/parser.js index 7f822515c..eff5b7261 100644 --- a/src/scripts/ruleset/parser.js +++ b/src/scripts/ruleset/parser.js @@ -13,7 +13,7 @@ export const parser = LRParser.deserialize({ ], skippedNodes: [0,1], repeatNodeCount: 1, - tokenData: ">e~RlXY!yYZ#X]^!ypq!yqr#^rs#cst$jtu%Rvw%xwx%}xy'Pyz'Uz{'Z!P!Q(i!Q!['t![!]:_!^!_:p!_!`;}!b!c<[!c!k=S!k!l=m!l!}=S#Q#R>Y#R#S%g#T#]=S#]#^=m#^#o=S#p#q>`~#ORn~XY!y]^!ypq!y~#^Ov~~#cOf~~#fWOY#cZr#crs$Os#O#c#O#P$T#P;'S#c;'S;=`$d<%lO#c~$TO_~~$WSOY#cZ;'S#c;'S;=`$d<%lO#c~$gP;=`<%l#c~$oSP~OY$jZ;'S$j;'S;=`${<%lO$j~%OP;=`<%l$jR%US!_!`%b!c!}%g#R#S%g#T#o%gQ%gO^QP%lSXP!Q![%g!c!}%g#R#S%g#T#o%g~%}Oh~~&QWOY%}Zw%}wx$Ox#O%}#O#P&j#P;'S%};'S;=`&y<%lO%}~&mSOY%}Z;'S%};'S;=`&y<%lO%}~&|P;=`<%l%}~'UO[~~'ZOd~V'bQpPsS!O!P'h!_!`%bS'kR!Q!['t!c!}'t#T#o'tS'yTsS}!O(Y!O!P'h!Q!['t!c!}'t#T#o'tS(]S}!O(Y!Q!['t!c!}'t#T#o'tV(nutSOY+RZq+Rqr-Urt+Rtu-Uuv/nvw-Uwx-Uxy-Uyz-Uz{-U{|-U|}-U}!O-U!O!P-U!P!Q3z!Q![-U![!]-U!]!^-U!^!_+R!_!`-U!`!a+R!a!b-U!b!c-U!c!}-U!}#O6W#O#P,o#P#Q-U#Q#R+R#R#S-U#S#T+R#T#o-U#o#r+R#r#s-U#s;'S+R;'S;=`-O<%lO+RR+UXOY+RZ!P+R!P!Q+q!Q!}+R!}#O,S#O#P,o#P;'S+R;'S;=`-O<%lO+RR+vSbR#]#^+q#a#b+q#g#h+q#i#j+qR,VUOY,SZ#O,S#P#Q+R#Q;'S,S;'S;=`,i<%lO,SR,lP;=`<%l,SR,rSOY+RZ;'S+R;'S;=`-O<%lO+RR-RP;=`<%l+RV-ZutSOY+RZq+Rqr-Urt+Rtu-Uuv/nvw-Uwx-Uxy-Uyz-Uz{-U{|-U|}-U}!O-U!O!P-U!P!Q1l!Q![-U![!]-U!]!^-U!^!_+R!_!`-U!`!a+R!a!b-U!b!c-U!c!}-U!}#O6W#O#P,o#P#Q-U#Q#R+R#R#S-U#S#T+R#T#o-U#o#r+R#r#s-U#s;'S+R;'S;=`-O<%lO+RV/q^OY+RZ!P+R!P!Q+q!Q![0m![!c+R!c!i0m!i!}+R!}#O,S#O#P,o#P#T+R#T#Z0m#Z;'S+R;'S;=`-O<%lO+RV0p^OY+RZ!P+R!P!Q+q!Q![-U![!c+R!c!i-U!i!}+R!}#O,S#O#P,o#P#T+R#T#Z-U#Z;'S+R;'S;=`-O<%lO+RV1sqbRtSqr3ztu3zuv5nvw3zwx3zxy3zyz3zz{3z{|3z|}3z}!O3z!O!P3z!P!Q3z!Q![3z![!]3z!]!^3z!_!`3z!a!b3z!b!c3z!c!}3z!}#O3z#P#Q3z#R#S3z#T#]3z#]#^1l#^#a3z#a#b1l#b#g3z#g#h1l#h#i3z#i#j1l#j#o3z#r#s3zS4PitSqr3ztu3zuv5nvw3zwx3zxy3zyz3zz{3z{|3z|}3z}!O3z!O!P3z!P!Q3z!Q![3z![!]3z!]!^3z!_!`3z!a!b3z!b!c3z!c!}3z!}#O3z#P#Q3z#R#S3z#T#o3z#r#s3zS5qR!Q![5z!c!i5z#T#Z5zS5}R!Q![3z!c!i3z#T#Z3zV6]ttSOY,SZq,Sqr6Wrt,Stu6Wuv8mvw6Wwx6Wxy6Wyz6Wz{6W{|6W|}6W}!O6W!O!P6W!P!Q6W!Q![6W![!]6W!]!^6W!^!_,S!_!`6W!`!a,S!a!b6W!b!c6W!c!}6W!}#O6W#P#Q-U#Q#R,S#R#S6W#S#T,S#T#o6W#o#r,S#r#s6W#s;'S,S;'S;=`,i<%lO,SV8p[OY,SZ!Q,S!Q![9f![!c,S!c!i9f!i#O,S#P#Q+R#Q#T,S#T#Z9f#Z;'S,S;'S;=`,i<%lO,SV9i[OY,SZ!Q,S!Q![6W![!c,S!c!i6W!i#O,S#P#Q+R#Q#T,S#T#Z6W#Z;'S,S;'S;=`,i<%lO,S~:bP!P!Q:e~:hP!P!Q:k~:pOr~~:sP#T#U:v~:yP#`#a:|~;PP#`#a;S~;VP#R#S;Y~;]P#i#j;`~;cP#f#g;f~;iP#`#a;l~;oP#g#h;r~;uP!`!a;x~;}Oo~~]P!_!`%b~>eOj~", + tokenData: "?Q~RlXY!yYZ#X]^!ypq!yqr#^rs#cst$jtu%Rvw%xwx%}xy'Pyz'Uz{'Z!P!Q(i!Q!['t![!]:z!^!_;]!_!`Y!l!}=o#Q#R>u#R#S%g#T#]=o#]#^>Y#^#o=o#p#q>{~#ORn~XY!y]^!ypq!y~#^Ov~~#cOf~~#fWOY#cZr#crs$Os#O#c#O#P$T#P;'S#c;'S;=`$d<%lO#c~$TO_~~$WSOY#cZ;'S#c;'S;=`$d<%lO#c~$gP;=`<%l#c~$oSP~OY$jZ;'S$j;'S;=`${<%lO$j~%OP;=`<%l$jR%US!_!`%b!c!}%g#R#S%g#T#o%gQ%gO^QP%lSXP!Q![%g!c!}%g#R#S%g#T#o%g~%}Oh~~&QWOY%}Zw%}wx$Ox#O%}#O#P&j#P;'S%};'S;=`&y<%lO%}~&mSOY%}Z;'S%};'S;=`&y<%lO%}~&|P;=`<%l%}~'UO[~~'ZOd~V'bQpPsS!O!P'h!_!`%bS'kR!Q!['t!c!}'t#T#o'tS'yTsS}!O(Y!O!P'h!Q!['t!c!}'t#T#o'tS(]S}!O(Y!Q!['t!c!}'t#T#o'tV(nutSOY+RZq+Rqr-hrt+Rtu-huv0Qvw-hwx-hxy-hyz-hz{-h{|-h|}-h}!O-h!O!P-h!P!Q4^!Q![-h![!]-h!]!^-h!^!_+R!_!`-h!`!a+R!a!b-h!b!c-h!c!}-h!}#O6j#O#P-R#P#Q-h#Q#R+R#R#S-h#S#T+R#T#o-h#o#r+R#r#s-h#s;'S+R;'S;=`-b<%lO+RR+UXOY+RZ!P+R!P!Q+q!Q!}+R!}#O,S#O#P-R#P;'S+R;'S;=`-b<%lO+RR+vSbR#]#^+q#a#b+q#g#h+q#i#j+qR,VVOY,SZ#O,S#O#P,l#P#Q+R#Q;'S,S;'S;=`,{<%lO,SR,oSOY,SZ;'S,S;'S;=`,{<%lO,SR-OP;=`<%l,SR-USOY+RZ;'S+R;'S;=`-b<%lO+RR-eP;=`<%l+RV-mutSOY+RZq+Rqr-hrt+Rtu-huv0Qvw-hwx-hxy-hyz-hz{-h{|-h|}-h}!O-h!O!P-h!P!Q2O!Q![-h![!]-h!]!^-h!^!_+R!_!`-h!`!a+R!a!b-h!b!c-h!c!}-h!}#O6j#O#P-R#P#Q-h#Q#R+R#R#S-h#S#T+R#T#o-h#o#r+R#r#s-h#s;'S+R;'S;=`-b<%lO+RV0T^OY+RZ!P+R!P!Q+q!Q![1P![!c+R!c!i1P!i!}+R!}#O,S#O#P-R#P#T+R#T#Z1P#Z;'S+R;'S;=`-b<%lO+RV1S^OY+RZ!P+R!P!Q+q!Q![-h![!c+R!c!i-h!i!}+R!}#O,S#O#P-R#P#T+R#T#Z-h#Z;'S+R;'S;=`-b<%lO+RV2VqbRtSqr4^tu4^uv6Qvw4^wx4^xy4^yz4^z{4^{|4^|}4^}!O4^!O!P4^!P!Q4^!Q![4^![!]4^!]!^4^!_!`4^!a!b4^!b!c4^!c!}4^!}#O4^#P#Q4^#R#S4^#T#]4^#]#^2O#^#a4^#a#b2O#b#g4^#g#h2O#h#i4^#i#j2O#j#o4^#r#s4^S4citSqr4^tu4^uv6Qvw4^wx4^xy4^yz4^z{4^{|4^|}4^}!O4^!O!P4^!P!Q4^!Q![4^![!]4^!]!^4^!_!`4^!a!b4^!b!c4^!c!}4^!}#O4^#P#Q4^#R#S4^#T#o4^#r#s4^S6TR!Q![6^!c!i6^#T#Z6^S6aR!Q![4^!c!i4^#T#Z4^V6outSOY,SZq,Sqr6jrt,Stu6juv9Svw6jwx6jxy6jyz6jz{6j{|6j|}6j}!O6j!O!P6j!P!Q6j!Q![6j![!]6j!]!^6j!^!_,S!_!`6j!`!a,S!a!b6j!b!c6j!c!}6j!}#O6j#O#P,l#P#Q-h#Q#R,S#R#S6j#S#T,S#T#o6j#o#r,S#r#s6j#s;'S,S;'S;=`,{<%lO,SV9V]OY,SZ!Q,S!Q![:O![!c,S!c!i:O!i#O,S#O#P,l#P#Q+R#Q#T,S#T#Z:O#Z;'S,S;'S;=`,{<%lO,SV:R]OY,SZ!Q,S!Q![6j![!c,S!c!i6j!i#O,S#O#P,l#P#Q+R#Q#T,S#T#Z6j#Z;'S,S;'S;=`,{<%lO,S~:}P!P!Q;Q~;TP!P!Q;W~;]Or~~;`P#T#U;c~;fP#`#a;i~;lP#`#a;o~;rP#R#S;u~;xP#i#j;{~cU`QXPsS}!O(Y!O!P'h!Q![=o!c!}=o#R#S%g#T#o=oQ>xP!_!`%b~?QOj~", tokenizers: [0, 1, 2], topRules: {"Ruleset":[0,2]}, specialized: [{term: 9, get: (value) => spec_Identifier[value] || -1}], diff --git a/src/scripts/ruleset/ruleset.grammar b/src/scripts/ruleset/ruleset.grammar index 76613b871..aa07834e5 100644 --- a/src/scripts/ruleset/ruleset.grammar +++ b/src/scripts/ruleset/ruleset.grammar @@ -70,7 +70,7 @@ expression[@isGroup=Expression] { RegExp { "/" regExpPattern "/" regExpFlags? } regExpPattern { (regExpEscape | "[" regExpClassContent* "]" | regExpContent)+ } regExpEscape { "\\" ![\n] } - regExpClassContent { ![\]\\\n] } + regExpClassContent { ![\]\\\n] | "\\" ![\n] } regExpContent { ![/\\[\n] } regExpFlags { $[imsu]+ } diff --git a/src/scripts/ruleset/ruleset.test.ts b/src/scripts/ruleset/ruleset.test.ts index 2ad80cbc8..fce33677d 100644 --- a/src/scripts/ruleset/ruleset.test.ts +++ b/src/scripts/ruleset/ruleset.test.ts @@ -367,6 +367,33 @@ test("Ruleset", async (t) => { !ruleset.test({ url: "http://example.com/", title: "foo bar \\xA" }), ); } + // Regular expression Literals + // Escape sequence in class characters + // https://github.com/iorate/ublacklist/issues/527 + { + const ruleset = new Ruleset(String.raw`title=~/[\u3040-\u309F]/`); + assert.ok( + ruleset.test({ + url: "http://example.com/", + title: "ひらがな", + }), + ); + assert.ok( + !ruleset.test({ + url: "http://example.com/", + title: "カタカナ", + }), + ); + } + { + const ruleset = new Ruleset(String.raw`title=~/[\u3040-\u309G]/`); + assert.ok( + !ruleset.test({ + url: "http://example.com/", + title: "ひらがな", + }), + ); + } }); await t.test("Complex expressions", () => {