From cffeadf33124a77e942dadec4cc919d3f70dd336 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 13:45:38 +0100 Subject: [PATCH] DOMXPath::quote(string $str): string method to quote strings in XPath, similar to PDO::quote() / mysqli::real_escape_string sample usage: $xp->query("//span[contains(text()," . $xp->quote($string) . ")]") the algorithm is derived from Robert Rossney's research into XPath quoting published at https://stackoverflow.com/a/1352556/1067003 (but using an improved implementation I wrote myself, originally for https://github.com/chrome-php/chrome/pull/575 ) --- ext/dom/php_dom.stub.php | 2 + ext/dom/php_dom_arginfo.h | 12 ++++- ext/dom/tests/DOMXPath_quote.phpt | 85 ++++++++++++++++++++++++++++++ ext/dom/xpath.c | 87 +++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 ext/dom/tests/DOMXPath_quote.phpt diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index bda09872694a9..8a6e87676f2eb 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -934,6 +934,8 @@ public function registerNamespace(string $prefix, string $namespace): bool {} public function registerPhpFunctions(string|array|null $restrict = null): void {} public function registerPhpFunctionNS(string $namespaceURI, string $name, callable $callable): void {} + + public static function quote(string $str): string {} } #endif diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index 5646bde7867ec..95cc8e245478e 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 184308dfd1a133145d170c467e7600a12b14e327 */ + * Stub hash: 498e4aa2e670454b78808215e8efaedb2ce7d251 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) @@ -459,6 +459,12 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOMXPath_registerPhpFuncti ZEND_END_ARG_INFO() #endif +#if defined(LIBXML_XPATH_ENABLED) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOMXPath_quote, 0, 1, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, str, IS_STRING, 0) +ZEND_END_ARG_INFO() +#endif + ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_createAttribute, 0, 0, 1) ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0) ZEND_END_ARG_INFO() @@ -748,6 +754,9 @@ ZEND_METHOD(DOMXPath, registerPhpFunctions); #if defined(LIBXML_XPATH_ENABLED) ZEND_METHOD(DOMXPath, registerPhpFunctionNS); #endif +#if defined(LIBXML_XPATH_ENABLED) +ZEND_METHOD(DOMXPath, quote); +#endif ZEND_METHOD(DOM_Document, createAttribute); ZEND_METHOD(DOM_Document, createAttributeNS); ZEND_METHOD(DOM_Document, createCDATASection); @@ -1002,6 +1011,7 @@ static const zend_function_entry class_DOMXPath_methods[] = { ZEND_ME(DOMXPath, registerNamespace, arginfo_class_DOMXPath_registerNamespace, ZEND_ACC_PUBLIC) ZEND_ME(DOMXPath, registerPhpFunctions, arginfo_class_DOMXPath_registerPhpFunctions, ZEND_ACC_PUBLIC) ZEND_ME(DOMXPath, registerPhpFunctionNS, arginfo_class_DOMXPath_registerPhpFunctionNS, ZEND_ACC_PUBLIC) + ZEND_ME(DOMXPath, quote, arginfo_class_DOMXPath_quote, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) ZEND_FE_END }; #endif diff --git a/ext/dom/tests/DOMXPath_quote.phpt b/ext/dom/tests/DOMXPath_quote.phpt new file mode 100644 index 0000000000000..4998ae9ddef66 --- /dev/null +++ b/ext/dom/tests/DOMXPath_quote.phpt @@ -0,0 +1,85 @@ +--TEST-- +Test DOMXPath::quote with various inputs +--SKIPIF-- + +--FILE-- +query("//span[contains(text()," . $xp->quote($string) . ")]") + * + * @param string $string string to quote. + * @return string quoted string. + */ +function UserlandDOMXPathQuote(string $string): string +{ + if (false === \strpos($string, '\'')) { + return '\'' . $string . '\''; + } + if (false === \strpos($string, '"')) { + return '"' . $string . '"'; + } + // if the string contains both single and double quotes, construct an + // expression that concatenates all non-double-quote substrings with + // the quotes, e.g.: + // 'foo'"bar => concat("'foo'", '"bar") + $sb = []; + while ($string !== '') { + $bytesUntilSingleQuote = \strcspn($string, '\''); + $bytesUntilDoubleQuote = \strcspn($string, '"'); + $quoteMethod = ($bytesUntilSingleQuote > $bytesUntilDoubleQuote) ? "'" : '"'; + $bytesUntilQuote = \max($bytesUntilSingleQuote, $bytesUntilDoubleQuote); + $sb[] = $quoteMethod . \substr($string, 0, $bytesUntilQuote) . $quoteMethod; + $string = \substr($string, $bytesUntilQuote); + } + $sb = \implode(',', $sb); + return 'concat(' . $sb . ')'; +} + + + +$tests = [ + 'foo' => "'foo'", // no quotes + '"foo' => '\'"foo\'', // double quotes only + '\'foo' => '"\'foo"', // single quotes only + '\'foo"bar' => 'concat("\'foo",\'"bar\')', // both; double quotes in mid-string + '\'foo"bar"baz' => 'concat("\'foo",\'"bar"baz\')', // multiple double quotes in mid-string + '\'foo"' => 'concat("\'foo",\'"\')', // string ends with double quotes + '\'foo""' => 'concat("\'foo",\'""\')', // string ends with run of double quotes + '"\'foo' => 'concat(\'"\',"\'foo")', // string begins with double quotes + '""\'foo' => 'concat(\'""\',"\'foo")', // string begins with run of double quotes + '\'foo""bar' => 'concat("\'foo",\'""bar\')', // run of double quotes in mid-string +]; + +foreach ($tests as $input => $expected) { + $result = $xpath->quote($input); + if ($result === $expected) { + echo "Pass: {$input} => {$result}\n"; + } else { + echo 'Fail: '; + var_dump([ + 'input' => $input, + 'expected' => $expected, + 'result' => $result, + 'userland_implementation_result' => UserlandDOMXPathQuote($input), + ]); + } +} +?> +--EXPECT-- +Pass: foo => 'foo' +Pass: "foo => '"foo' +Pass: 'foo => "'foo" +Pass: 'foo"bar => concat("'foo",'"bar') +Pass: 'foo"bar"baz => concat("'foo",'"bar"baz') +Pass: 'foo" => concat("'foo",'"') +Pass: 'foo"" => concat("'foo",'""') +Pass: "'foo => concat('"',"'foo") +Pass: ""'foo => concat('""',"'foo") +Pass: 'foo""bar => concat("'foo",'""bar') \ No newline at end of file diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 272541c61a9c1..a5991233b0c75 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -446,6 +446,93 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS) ); } +/* {{{ */ +PHP_METHOD(DOMXPath, quote) { + char *input; + size_t input_len; + char *output; + size_t output_len = 0; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) == + FAILURE) { + RETURN_THROWS(); + } + if (memchr(input, '\'', input_len) == NULL) { + output_len = input_len + 2; + output = emalloc(output_len); + output[0] = '\''; + memcpy(output + 1, input, input_len); + output[output_len - 1] = '\''; + } else if (memchr(input, '"', input_len) == NULL) { + output_len = input_len + 2; + output = emalloc(output_len); + output[0] = '"'; + memcpy(output + 1, input, input_len); + output[output_len - 1] = '"'; + } else { + // need to do the concat() trick + // first lets calculate the length (probably faster than repeated reallocs) + output_len = strlen("concat("); + size_t i; + for (size_t i = 0; i < input_len; ++i) { + uintptr_t bytesUntilSingleQuote = + (uintptr_t)memchr(input + i, '\'', input_len - i); + if (bytesUntilSingleQuote == 0) { + bytesUntilSingleQuote = input_len - i; + } else { + bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i); + } + uintptr_t bytesUntilDoubleQuote = + (uintptr_t)memchr(input + i, '"', input_len - i); + if (bytesUntilDoubleQuote == 0) { + bytesUntilDoubleQuote = input_len - i; + } else { + bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); + } + const size_t bytesUntilQuote = + (bytesUntilSingleQuote > bytesUntilDoubleQuote) + ? bytesUntilSingleQuote + : bytesUntilDoubleQuote; + i += bytesUntilQuote - 1; + output_len += 1 + bytesUntilQuote + 1 + 1; // "bytesUntilQuote"[,)] + } + output = emalloc(output_len); + size_t outputPos = strlen("concat("); + memcpy(output, "concat(", outputPos); + for (size_t i = 0; i < input_len; ++i) { + uintptr_t bytesUntilSingleQuote = + (uintptr_t)memchr(input + i, '\'', input_len - i); + if (bytesUntilSingleQuote == 0) { + bytesUntilSingleQuote = input_len - i; + } else { + bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i); + } + uintptr_t bytesUntilDoubleQuote = + (uintptr_t)memchr(input + i, '"', input_len - i); + if (bytesUntilDoubleQuote == 0) { + bytesUntilDoubleQuote = input_len - i; + } else { + bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); + } + const size_t bytesUntilQuote = + (bytesUntilSingleQuote > bytesUntilDoubleQuote) + ? bytesUntilSingleQuote + : bytesUntilDoubleQuote; + const char quoteMethod = + (bytesUntilSingleQuote > bytesUntilDoubleQuote) ? '\'' : '"'; + output[outputPos++] = quoteMethod; + memcpy(output + outputPos, input + i, bytesUntilQuote); + outputPos += bytesUntilQuote; + output[outputPos++] = quoteMethod; + i += bytesUntilQuote - 1; + output[outputPos++] = ','; + } + output[outputPos - 1] = ')'; + } + RETVAL_STRINGL(output, output_len); +} +/* }}} */ + #endif /* LIBXML_XPATH_ENABLED */ #endif