diff --git a/src/__tests__/classes.js b/src/__tests__/classes.js index 7a9c396..d6eb6a7 100644 --- a/src/__tests__/classes.js +++ b/src/__tests__/classes.js @@ -264,3 +264,4 @@ test('class selector with escaping (36)', '.not-pseudo\\:\\:focus', (t, tree) => t.deepEqual(tree.nodes[0].nodes[0].type, 'class'); t.deepEqual(tree.nodes[0].nodes[0].raws.value, 'not-pseudo\\:\\:focus'); }); + diff --git a/src/__tests__/util/unesc.js b/src/__tests__/util/unesc.js new file mode 100644 index 0000000..87ccaf1 --- /dev/null +++ b/src/__tests__/util/unesc.js @@ -0,0 +1,62 @@ +import {test} from '../util/helpers'; + +test('id selector', '#foo', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, 'foo'); +}); + +test('escaped special char', '#w\\+', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, 'w+'); +}); + +test('tailing escape', '#foo\\', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, 'foo\\'); +}); + +test('double escape', '#wow\\\\k', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, 'wow\\k'); +}); + +test('leading numeric', '.\\31 23', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, '123'); +}); + +test('emoji', '.\\🐐', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, '🐐'); +}); + +// https://www.w3.org/International/questions/qa-escapes#cssescapes +test('hex escape', '.\\E9motion', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, 'émotion'); +}); + +test('hex escape with space', '.\\E9 dition', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, 'édition'); +}); + +test('hex escape with hex number', '.\\0000E9dition', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, 'édition'); +}); + +test('class selector with escaping', '.\\1D306', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, '𝌆'); +}); + +test('class selector with escaping with more chars', '.\\1D306k', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, '𝌆k'); +}); + +test('class selector with escaping with more chars with whitespace', '.wow\\1D306 k', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, 'wow𝌆k'); +}); + +test('handles 0 value hex', '\\0', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, String.fromCodePoint(0xFFFD)); +}); + +test('handles lone surrogate value hex', '\\DBFF', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, String.fromCodePoint(0xFFFD)); +}); + +test('handles out of bound values', '\\110000', (t, tree) => { + t.deepEqual(tree.nodes[0].nodes[0].value, String.fromCodePoint(0xFFFD)); +}); diff --git a/src/util/unesc.js b/src/util/unesc.js index bb18ad9..5b3cd17 100644 --- a/src/util/unesc.js +++ b/src/util/unesc.js @@ -1,19 +1,82 @@ -const whitespace = '[\\x20\\t\\r\\n\\f]'; -const unescapeRegExp = new RegExp('\\\\([\\da-f]{1,6}' + whitespace + '?|(' + whitespace + ')|.)', 'ig'); +// Many thanks for this post which made this migration much easier. +// https://mathiasbynens.be/notes/css-escapes + +/** + * + * @param {string} str + * @returns {[string, number]|undefined} + */ +function gobbleHex (str) { + const lower = str.toLowerCase(); + let hex = ''; + let spaceTerminated = false; + for (let i = 0; i < 6 && lower[i] !== undefined; i++) { + const code = lower.charCodeAt(i); + // check to see if we are dealing with a valid hex char [a-f|0-9] + const valid = (code >= 97 && code <= 102) || (code >= 48 && code <= 57); + // https://drafts.csswg.org/css-syntax/#consume-escaped-code-point + spaceTerminated = code === 32; + if (!valid) { + break; + } + hex += lower[i]; + } + + if (hex.length === 0) { + return undefined; + } + const codePoint = parseInt(hex, 16); + + const isSurrogate = codePoint >= 0xD800 && codePoint <= 0xDFFF; + // Add special case for + // "If this number is zero, or is for a surrogate, or is greater than the maximum allowed code point" + // https://drafts.csswg.org/css-syntax/#maximum-allowed-code-point + if (isSurrogate || codePoint === 0x0000 || codePoint > 0x10FFFF) { + return ['\uFFFD', hex.length + (spaceTerminated ? 1 : 0)]; + } + + return [ + String.fromCodePoint(codePoint), + hex.length + (spaceTerminated ? 1 : 0), + ]; +} + +const CONTAINS_ESCAPE = /\\/; export default function unesc (str) { - return str.replace(unescapeRegExp, (_, escaped, escapedWhitespace) => { - const high = '0x' + escaped - 0x10000; - - // NaN means non-codepoint - // Workaround erroneous numeric interpretation of +"0x" - // eslint-disable-next-line no-self-compare - return high !== high || escapedWhitespace - ? escaped - : high < 0 - ? // BMP codepoint - String.fromCharCode(high + 0x10000) - : // Supplemental Plane codepoint (surrogate pair) - String.fromCharCode((high >> 10) | 0xd800, (high & 0x3ff) | 0xdc00); - }); + let needToProcess = CONTAINS_ESCAPE.test(str); + if (!needToProcess) { + return str; + } + let ret = ""; + + for (let i = 0; i < str.length; i++) { + if ((str[i] === "\\")) { + const gobbled = gobbleHex(str.slice(i + 1, i + 7)); + if (gobbled !== undefined) { + ret += gobbled[0]; + i += gobbled[1]; + continue; + } + + // Retain a pair of \\ if double escaped `\\\\` + // https://github.com/postcss/postcss-selector-parser/commit/268c9a7656fb53f543dc620aa5b73a30ec3ff20e + if (str[i + 1] === "\\") { + ret += "\\"; + i++; + continue; + } + + // if \\ is at the end of the string retain it + // https://github.com/postcss/postcss-selector-parser/commit/01a6b346e3612ce1ab20219acc26abdc259ccefb + if (str.length === i + 1) { + ret += str[i]; + } + continue; + } + + ret += str[i]; + } + + return ret; }