From 1730af162a66aee420cdda35f0359188ec0ce859 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 13:45:38 +0100 Subject: [PATCH 01/19] DOMXPath::quote(string $str): string method to quote strings in XPath, similar to PDO::quote() / mysqli::real_escape_string sample usage: $xp->query("//span[contains(text()," . $xp->quote($string) . ")]") the algorithm is derived from Robert Rossney's research into XPath quoting published at https://stackoverflow.com/a/1352556/1067003 (but using an improved implementation I wrote myself, originally for https://github.com/chrome-php/chrome/pull/575 ) --- ext/dom/php_dom.stub.php | 2 + ext/dom/php_dom_arginfo.h | 12 ++++- ext/dom/tests/DOMXPath_quote.phpt | 84 +++++++++++++++++++++++++++++ ext/dom/xpath.c | 87 +++++++++++++++++++++++++++++++ 4 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 ext/dom/tests/DOMXPath_quote.phpt diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index bda09872694a9..8a6e87676f2eb 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -934,6 +934,8 @@ public function registerNamespace(string $prefix, string $namespace): bool {} public function registerPhpFunctions(string|array|null $restrict = null): void {} public function registerPhpFunctionNS(string $namespaceURI, string $name, callable $callable): void {} + + public static function quote(string $str): string {} } #endif diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index 5646bde7867ec..95cc8e245478e 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 184308dfd1a133145d170c467e7600a12b14e327 */ + * Stub hash: 498e4aa2e670454b78808215e8efaedb2ce7d251 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) @@ -459,6 +459,12 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOMXPath_registerPhpFuncti ZEND_END_ARG_INFO() #endif +#if defined(LIBXML_XPATH_ENABLED) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOMXPath_quote, 0, 1, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, str, IS_STRING, 0) +ZEND_END_ARG_INFO() +#endif + ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_createAttribute, 0, 0, 1) ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0) ZEND_END_ARG_INFO() @@ -748,6 +754,9 @@ ZEND_METHOD(DOMXPath, registerPhpFunctions); #if defined(LIBXML_XPATH_ENABLED) ZEND_METHOD(DOMXPath, registerPhpFunctionNS); #endif +#if defined(LIBXML_XPATH_ENABLED) +ZEND_METHOD(DOMXPath, quote); +#endif ZEND_METHOD(DOM_Document, createAttribute); ZEND_METHOD(DOM_Document, createAttributeNS); ZEND_METHOD(DOM_Document, createCDATASection); @@ -1002,6 +1011,7 @@ static const zend_function_entry class_DOMXPath_methods[] = { ZEND_ME(DOMXPath, registerNamespace, arginfo_class_DOMXPath_registerNamespace, ZEND_ACC_PUBLIC) ZEND_ME(DOMXPath, registerPhpFunctions, arginfo_class_DOMXPath_registerPhpFunctions, ZEND_ACC_PUBLIC) ZEND_ME(DOMXPath, registerPhpFunctionNS, arginfo_class_DOMXPath_registerPhpFunctionNS, ZEND_ACC_PUBLIC) + ZEND_ME(DOMXPath, quote, arginfo_class_DOMXPath_quote, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) ZEND_FE_END }; #endif diff --git a/ext/dom/tests/DOMXPath_quote.phpt b/ext/dom/tests/DOMXPath_quote.phpt new file mode 100644 index 0000000000000..8bdc1bdb14cd4 --- /dev/null +++ b/ext/dom/tests/DOMXPath_quote.phpt @@ -0,0 +1,84 @@ +--TEST-- +Test DOMXPath::quote with various inputs +--SKIPIF-- + +--FILE-- +query("//span[contains(text()," . $xp->quote($string) . ")]") + * + * @param string $string string to quote. + * @return string quoted string. + */ +function UserlandDOMXPathQuote(string $string): string +{ + if (false === \strpos($string, '\'')) { + return '\'' . $string . '\''; + } + if (false === \strpos($string, '"')) { + return '"' . $string . '"'; + } + // if the string contains both single and double quotes, construct an + // expression that concatenates all non-double-quote substrings with + // the quotes, e.g.: + // 'foo'"bar => concat("'foo'", '"bar") + $sb = []; + while ($string !== '') { + $bytesUntilSingleQuote = \strcspn($string, '\''); + $bytesUntilDoubleQuote = \strcspn($string, '"'); + $quoteMethod = ($bytesUntilSingleQuote > $bytesUntilDoubleQuote) ? "'" : '"'; + $bytesUntilQuote = \max($bytesUntilSingleQuote, $bytesUntilDoubleQuote); + $sb[] = $quoteMethod . \substr($string, 0, $bytesUntilQuote) . $quoteMethod; + $string = \substr($string, $bytesUntilQuote); + } + $sb = \implode(',', $sb); + return 'concat(' . $sb . ')'; +} + + + +$tests = [ + 'foo' => "'foo'", // no quotes + '"foo' => '\'"foo\'', // double quotes only + '\'foo' => '"\'foo"', // single quotes only + '\'foo"bar' => 'concat("\'foo",\'"bar\')', // both; double quotes in mid-string + '\'foo"bar"baz' => 'concat("\'foo",\'"bar"baz\')', // multiple double quotes in mid-string + '\'foo"' => 'concat("\'foo",\'"\')', // string ends with double quotes + '\'foo""' => 'concat("\'foo",\'""\')', // string ends with run of double quotes + '"\'foo' => 'concat(\'"\',"\'foo")', // string begins with double quotes + '""\'foo' => 'concat(\'""\',"\'foo")', // string begins with run of double quotes + '\'foo""bar' => 'concat("\'foo",\'""bar\')', // run of double quotes in mid-string +]; + +foreach ($tests as $input => $expected) { + $result = $xpath->quote($input); + if ($result === $expected) { + echo "Pass: {$input} => {$result}\n"; + } else { + echo 'Fail: '; + var_dump([ + 'input' => $input, + 'expected' => $expected, + 'result' => $result, + 'userland_implementation_result' => UserlandDOMXPathQuote($input), + ]); + } +} +?> +--EXPECT-- +Pass: foo => 'foo' +Pass: "foo => '"foo' +Pass: 'foo => "'foo" +Pass: 'foo"bar => concat("'foo",'"bar') +Pass: 'foo"bar"baz => concat("'foo",'"bar"baz') +Pass: 'foo" => concat("'foo",'"') +Pass: 'foo"" => concat("'foo",'""') +Pass: "'foo => concat('"',"'foo") +Pass: ""'foo => concat('""',"'foo") +Pass: 'foo""bar => concat("'foo",'""bar') \ No newline at end of file diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 272541c61a9c1..d3a9755af1773 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -446,6 +446,93 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS) ); } +/* {{{ */ +PHP_METHOD(DOMXPath, quote) { + char *input; + size_t input_len; + char *output; + size_t output_len = 0; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) == + FAILURE) { + RETURN_THROWS(); + } + if (memchr(input, '\'', input_len) == NULL) { + output_len = input_len + 2; + output = emalloc(output_len); + output[0] = '\''; + memcpy(output + 1, input, input_len); + output[output_len - 1] = '\''; + } else if (memchr(input, '"', input_len) == NULL) { + output_len = input_len + 2; + output = emalloc(output_len); + output[0] = '"'; + memcpy(output + 1, input, input_len); + output[output_len - 1] = '"'; + } else { + // need to do the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003 + // first lets calculate the length (probably faster than repeated reallocs) + output_len = strlen("concat("); + size_t i; + for (size_t i = 0; i < input_len; ++i) { + uintptr_t bytesUntilSingleQuote = + (uintptr_t)memchr(input + i, '\'', input_len - i); + if (bytesUntilSingleQuote == 0) { + bytesUntilSingleQuote = input_len - i; + } else { + bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i); + } + uintptr_t bytesUntilDoubleQuote = + (uintptr_t)memchr(input + i, '"', input_len - i); + if (bytesUntilDoubleQuote == 0) { + bytesUntilDoubleQuote = input_len - i; + } else { + bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); + } + const size_t bytesUntilQuote = + (bytesUntilSingleQuote > bytesUntilDoubleQuote) + ? bytesUntilSingleQuote + : bytesUntilDoubleQuote; + i += bytesUntilQuote - 1; + output_len += 1 + bytesUntilQuote + 1 + 1; // "bytesUntilQuote"[,)] + } + output = emalloc(output_len); + size_t outputPos = strlen("concat("); + memcpy(output, "concat(", outputPos); + for (size_t i = 0; i < input_len; ++i) { + uintptr_t bytesUntilSingleQuote = + (uintptr_t)memchr(input + i, '\'', input_len - i); + if (bytesUntilSingleQuote == 0) { + bytesUntilSingleQuote = input_len - i; + } else { + bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i); + } + uintptr_t bytesUntilDoubleQuote = + (uintptr_t)memchr(input + i, '"', input_len - i); + if (bytesUntilDoubleQuote == 0) { + bytesUntilDoubleQuote = input_len - i; + } else { + bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); + } + const size_t bytesUntilQuote = + (bytesUntilSingleQuote > bytesUntilDoubleQuote) + ? bytesUntilSingleQuote + : bytesUntilDoubleQuote; + const char quoteMethod = + (bytesUntilSingleQuote > bytesUntilDoubleQuote) ? '\'' : '"'; + output[outputPos++] = quoteMethod; + memcpy(output + outputPos, input + i, bytesUntilQuote); + outputPos += bytesUntilQuote; + output[outputPos++] = quoteMethod; + i += bytesUntilQuote - 1; + output[outputPos++] = ','; + } + output[outputPos - 1] = ')'; + } + RETVAL_STRINGL(output, output_len); +} +/* }}} */ + #endif /* LIBXML_XPATH_ENABLED */ #endif From bedc0d026682481c7f98c141dbd6ef3d5648568e Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 14:09:19 +0100 Subject: [PATCH 02/19] oops --- ext/dom/xpath.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index d3a9755af1773..10deb37275ac4 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -473,7 +473,6 @@ PHP_METHOD(DOMXPath, quote) { // need to do the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003 // first lets calculate the length (probably faster than repeated reallocs) output_len = strlen("concat("); - size_t i; for (size_t i = 0; i < input_len; ++i) { uintptr_t bytesUntilSingleQuote = (uintptr_t)memchr(input + i, '\'', input_len - i); From 779bfe147821f0a9e3c44e44859b7b5d94c239e4 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 14:58:34 +0100 Subject: [PATCH 03/19] efree --- ext/dom/xpath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 10deb37275ac4..39f22b4ac2be6 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -529,6 +529,7 @@ PHP_METHOD(DOMXPath, quote) { output[outputPos - 1] = ')'; } RETVAL_STRINGL(output, output_len); + efree(output); // todo: directly return a string and avoid the copy, probably possible but idk how (reviewers may want to check this) } /* }}} */ From 9925f4a059fb6d6bc856c409f1892443b77356eb Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 16:31:20 +0100 Subject: [PATCH 04/19] eol --- ext/dom/tests/DOMXPath_quote.phpt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/dom/tests/DOMXPath_quote.phpt b/ext/dom/tests/DOMXPath_quote.phpt index 8bdc1bdb14cd4..c2aeb027bf7cd 100644 --- a/ext/dom/tests/DOMXPath_quote.phpt +++ b/ext/dom/tests/DOMXPath_quote.phpt @@ -81,4 +81,4 @@ Pass: 'foo" => concat("'foo",'"') Pass: 'foo"" => concat("'foo",'""') Pass: "'foo => concat('"',"'foo") Pass: ""'foo => concat('""',"'foo") -Pass: 'foo""bar => concat("'foo",'""bar') \ No newline at end of file +Pass: 'foo""bar => concat("'foo",'""bar') From 74404a6bd04ea874512ac429da1f8cf7574dff71 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 17:29:31 +0100 Subject: [PATCH 05/19] use smart_str api may be slightly slower but easier to read/maintain --- ext/dom/xpath.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 39f22b4ac2be6..7b096bacd820c 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -29,7 +29,7 @@ /* * class DOMXPath */ - +#define LIBXML_XPATH_ENABLED #ifdef LIBXML_XPATH_ENABLED void dom_xpath_objects_free_storage(zend_object *object) @@ -447,6 +447,7 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS) } /* {{{ */ +#if 0 PHP_METHOD(DOMXPath, quote) { char *input; size_t input_len; @@ -531,6 +532,57 @@ PHP_METHOD(DOMXPath, quote) { RETVAL_STRINGL(output, output_len); efree(output); // todo: directly return a string and avoid the copy, probably possible but idk how (reviewers may want to check this) } +#endif +PHP_METHOD(DOMXPath, quote) { + char *input; + size_t input_len; + smart_str output = {0}; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) == + FAILURE) { + RETURN_THROWS(); + } + if (memchr(input, '\'', input_len) == NULL) { + smart_str_appendc(&output, '\''); + smart_str_appendl(&output, input, input_len); + smart_str_appendc(&output, '\''); + } else if (memchr(input, '"', input_len) == NULL) { + smart_str_appendc(&output, '"'); + smart_str_appendl(&output, input, input_len); + smart_str_appendc(&output, '"'); + } else { + smart_str_appends(&output, "concat("); + for (size_t i = 0; i < input_len; ++i) { + uintptr_t bytesUntilSingleQuote = + (uintptr_t)memchr(input + i, '\'', input_len - i); + if (bytesUntilSingleQuote == 0) { + bytesUntilSingleQuote = input_len - i; + } else { + bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i); + } + uintptr_t bytesUntilDoubleQuote = + (uintptr_t)memchr(input + i, '"', input_len - i); + if (bytesUntilDoubleQuote == 0) { + bytesUntilDoubleQuote = input_len - i; + } else { + bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); + } + const size_t bytesUntilQuote = + (bytesUntilSingleQuote > bytesUntilDoubleQuote) + ? bytesUntilSingleQuote + : bytesUntilDoubleQuote; + const char quoteMethod = + (bytesUntilSingleQuote > bytesUntilDoubleQuote) ? '\'' : '"'; + smart_str_appendc(&output, quoteMethod); + smart_str_appendl(&output, input + i, bytesUntilQuote); + smart_str_appendc(&output, quoteMethod); + i += bytesUntilQuote - 1; + smart_str_appends(&output, ","); + } + output.s->val[output.s->len -1 ] = ')'; // is there a smart_str function for this? (probably not) + } + RETURN_STR(smart_str_extract(&output)); +} /* }}} */ #endif /* LIBXML_XPATH_ENABLED */ From c17264e8888e03d07705695eb3736496096265e2 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 17:30:43 +0100 Subject: [PATCH 06/19] cleanup --- ext/dom/xpath.c | 86 ------------------------------------------------- 1 file changed, 86 deletions(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 7b096bacd820c..ce2a1b4dd4777 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -447,92 +447,6 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS) } /* {{{ */ -#if 0 -PHP_METHOD(DOMXPath, quote) { - char *input; - size_t input_len; - char *output; - size_t output_len = 0; - - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) == - FAILURE) { - RETURN_THROWS(); - } - if (memchr(input, '\'', input_len) == NULL) { - output_len = input_len + 2; - output = emalloc(output_len); - output[0] = '\''; - memcpy(output + 1, input, input_len); - output[output_len - 1] = '\''; - } else if (memchr(input, '"', input_len) == NULL) { - output_len = input_len + 2; - output = emalloc(output_len); - output[0] = '"'; - memcpy(output + 1, input, input_len); - output[output_len - 1] = '"'; - } else { - // need to do the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003 - // first lets calculate the length (probably faster than repeated reallocs) - output_len = strlen("concat("); - for (size_t i = 0; i < input_len; ++i) { - uintptr_t bytesUntilSingleQuote = - (uintptr_t)memchr(input + i, '\'', input_len - i); - if (bytesUntilSingleQuote == 0) { - bytesUntilSingleQuote = input_len - i; - } else { - bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i); - } - uintptr_t bytesUntilDoubleQuote = - (uintptr_t)memchr(input + i, '"', input_len - i); - if (bytesUntilDoubleQuote == 0) { - bytesUntilDoubleQuote = input_len - i; - } else { - bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); - } - const size_t bytesUntilQuote = - (bytesUntilSingleQuote > bytesUntilDoubleQuote) - ? bytesUntilSingleQuote - : bytesUntilDoubleQuote; - i += bytesUntilQuote - 1; - output_len += 1 + bytesUntilQuote + 1 + 1; // "bytesUntilQuote"[,)] - } - output = emalloc(output_len); - size_t outputPos = strlen("concat("); - memcpy(output, "concat(", outputPos); - for (size_t i = 0; i < input_len; ++i) { - uintptr_t bytesUntilSingleQuote = - (uintptr_t)memchr(input + i, '\'', input_len - i); - if (bytesUntilSingleQuote == 0) { - bytesUntilSingleQuote = input_len - i; - } else { - bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i); - } - uintptr_t bytesUntilDoubleQuote = - (uintptr_t)memchr(input + i, '"', input_len - i); - if (bytesUntilDoubleQuote == 0) { - bytesUntilDoubleQuote = input_len - i; - } else { - bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); - } - const size_t bytesUntilQuote = - (bytesUntilSingleQuote > bytesUntilDoubleQuote) - ? bytesUntilSingleQuote - : bytesUntilDoubleQuote; - const char quoteMethod = - (bytesUntilSingleQuote > bytesUntilDoubleQuote) ? '\'' : '"'; - output[outputPos++] = quoteMethod; - memcpy(output + outputPos, input + i, bytesUntilQuote); - outputPos += bytesUntilQuote; - output[outputPos++] = quoteMethod; - i += bytesUntilQuote - 1; - output[outputPos++] = ','; - } - output[outputPos - 1] = ')'; - } - RETVAL_STRINGL(output, output_len); - efree(output); // todo: directly return a string and avoid the copy, probably possible but idk how (reviewers may want to check this) -} -#endif PHP_METHOD(DOMXPath, quote) { char *input; size_t input_len; From b85a768169ccce6806741030da8afe1440b3c647 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 17:32:12 +0100 Subject: [PATCH 07/19] credits --- ext/dom/xpath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index ce2a1b4dd4777..a1450a7c5f094 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -465,6 +465,7 @@ PHP_METHOD(DOMXPath, quote) { smart_str_appendl(&output, input, input_len); smart_str_appendc(&output, '"'); } else { + // need to use the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003 smart_str_appends(&output, "concat("); for (size_t i = 0; i < input_len; ++i) { uintptr_t bytesUntilSingleQuote = From 7ad4c910b3363d6c3f3729d6e6892c305fcaab78 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 17:32:43 +0100 Subject: [PATCH 08/19] cleanup --- ext/dom/xpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index a1450a7c5f094..315814fb41bb6 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -29,7 +29,7 @@ /* * class DOMXPath */ -#define LIBXML_XPATH_ENABLED + #ifdef LIBXML_XPATH_ENABLED void dom_xpath_objects_free_storage(zend_object *object) From e3fb678d3214c9064b1f4effe65f1b7fff3c8f81 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 17:46:22 +0100 Subject: [PATCH 09/19] micro optimize --- ext/dom/xpath.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 315814fb41bb6..088fce8a4d42e 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -467,7 +467,7 @@ PHP_METHOD(DOMXPath, quote) { } else { // need to use the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003 smart_str_appends(&output, "concat("); - for (size_t i = 0; i < input_len; ++i) { + for (size_t i = 0; i < input_len;) { uintptr_t bytesUntilSingleQuote = (uintptr_t)memchr(input + i, '\'', input_len - i); if (bytesUntilSingleQuote == 0) { @@ -491,7 +491,7 @@ PHP_METHOD(DOMXPath, quote) { smart_str_appendc(&output, quoteMethod); smart_str_appendl(&output, input + i, bytesUntilQuote); smart_str_appendc(&output, quoteMethod); - i += bytesUntilQuote - 1; + i += bytesUntilQuote; smart_str_appends(&output, ","); } output.s->val[output.s->len -1 ] = ')'; // is there a smart_str function for this? (probably not) From 62a6eedee35d3dd5b6ebbd1b41540e9cef73bbb3 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Wed, 21 Feb 2024 18:06:23 +0100 Subject: [PATCH 10/19] derp --- ext/dom/xpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 088fce8a4d42e..a5c764f2080f8 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -492,7 +492,7 @@ PHP_METHOD(DOMXPath, quote) { smart_str_appendl(&output, input + i, bytesUntilQuote); smart_str_appendc(&output, quoteMethod); i += bytesUntilQuote; - smart_str_appends(&output, ","); + smart_str_appendc(&output, ','); } output.s->val[output.s->len -1 ] = ')'; // is there a smart_str function for this? (probably not) } From 307a9bea67a41ac53da7015fbbed119557d1e800 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Wed, 21 Feb 2024 23:57:06 +0100 Subject: [PATCH 11/19] Update ext/dom/tests/DOMXPath_quote.phpt Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- ext/dom/tests/DOMXPath_quote.phpt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/dom/tests/DOMXPath_quote.phpt b/ext/dom/tests/DOMXPath_quote.phpt index c2aeb027bf7cd..40856cac6a54f 100644 --- a/ext/dom/tests/DOMXPath_quote.phpt +++ b/ext/dom/tests/DOMXPath_quote.phpt @@ -1,5 +1,7 @@ --TEST-- Test DOMXPath::quote with various inputs +--EXTENSIONS-- +dom --SKIPIF-- --FILE-- From 641f01fba6438cef894073dd7c612ecb57819042 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Thu, 22 Feb 2024 00:04:50 +0100 Subject: [PATCH 12/19] emptystring test --- ext/dom/tests/DOMXPath_quote.phpt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/dom/tests/DOMXPath_quote.phpt b/ext/dom/tests/DOMXPath_quote.phpt index 40856cac6a54f..f0ca165c1e8fa 100644 --- a/ext/dom/tests/DOMXPath_quote.phpt +++ b/ext/dom/tests/DOMXPath_quote.phpt @@ -46,6 +46,7 @@ function UserlandDOMXPathQuote(string $string): string $tests = [ + '' => "''", // empty string 'foo' => "'foo'", // no quotes '"foo' => '\'"foo\'', // double quotes only '\'foo' => '"\'foo"', // single quotes only @@ -74,6 +75,7 @@ foreach ($tests as $input => $expected) { } ?> --EXPECT-- +Pass: => '' Pass: foo => 'foo' Pass: "foo => '"foo' Pass: 'foo => "'foo" From 1d935fdf2e73e69c37204ac521656ba123ce8d18 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Thu, 22 Feb 2024 00:13:33 +0100 Subject: [PATCH 13/19] Update ext/dom/xpath.c Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- ext/dom/xpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index a5c764f2080f8..60a978cb56565 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -448,7 +448,7 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS) /* {{{ */ PHP_METHOD(DOMXPath, quote) { - char *input; + const char *input; size_t input_len; smart_str output = {0}; From e9d499090e2fb58ebafb3badb68ccecd1f8ffef7 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Thu, 22 Feb 2024 00:38:07 +0100 Subject: [PATCH 14/19] micro-optimize requested in PR feedback :o --- ext/dom/xpath.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 60a978cb56565..e55ed33848e4e 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -450,23 +450,27 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS) PHP_METHOD(DOMXPath, quote) { const char *input; size_t input_len; - smart_str output = {0}; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) == FAILURE) { RETURN_THROWS(); } if (memchr(input, '\'', input_len) == NULL) { - smart_str_appendc(&output, '\''); - smart_str_appendl(&output, input, input_len); - smart_str_appendc(&output, '\''); + zend_string *output = zend_string_alloc(input_len + 2, 0); + output->val[0] = '\''; + memcpy(output->val + 1, input, input_len); + output->val[input_len + 1] = '\''; + RETURN_STR(output); } else if (memchr(input, '"', input_len) == NULL) { - smart_str_appendc(&output, '"'); - smart_str_appendl(&output, input, input_len); - smart_str_appendc(&output, '"'); + zend_string *output = zend_string_alloc(input_len + 2, 0); + output->val[0] = '"'; + memcpy(output->val + 1, input, input_len); + output->val[input_len + 1] = '"'; + RETURN_STR(output); } else { + smart_str output = {0}; // need to use the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003 - smart_str_appends(&output, "concat("); + smart_str_appendl(&output, "concat(", 7); for (size_t i = 0; i < input_len;) { uintptr_t bytesUntilSingleQuote = (uintptr_t)memchr(input + i, '\'', input_len - i); @@ -495,8 +499,8 @@ PHP_METHOD(DOMXPath, quote) { smart_str_appendc(&output, ','); } output.s->val[output.s->len -1 ] = ')'; // is there a smart_str function for this? (probably not) + RETURN_STR(smart_str_extract(&output)); } - RETURN_STR(smart_str_extract(&output)); } /* }}} */ From f90a244738083f85d7dc1f1f8312ee4d4e75b0a6 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Thu, 22 Feb 2024 00:42:48 +0100 Subject: [PATCH 15/19] Update ext/dom/xpath.c Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- ext/dom/xpath.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index e55ed33848e4e..a4f2cd5c08234 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -486,10 +486,7 @@ PHP_METHOD(DOMXPath, quote) { } else { bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); } - const size_t bytesUntilQuote = - (bytesUntilSingleQuote > bytesUntilDoubleQuote) - ? bytesUntilSingleQuote - : bytesUntilDoubleQuote; + const size_t bytesUntilQuote = MAX(bytesUntilSingleQuote, bytesUntilDoubleQuote); const char quoteMethod = (bytesUntilSingleQuote > bytesUntilDoubleQuote) ? '\'' : '"'; smart_str_appendc(&output, quoteMethod); From a20e67b5b4c79d0c51b78df5cc4482010376f577 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Thu, 22 Feb 2024 00:43:25 +0100 Subject: [PATCH 16/19] Update ext/dom/xpath.c Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- ext/dom/xpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index a4f2cd5c08234..d36d92ee035e6 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -495,7 +495,7 @@ PHP_METHOD(DOMXPath, quote) { i += bytesUntilQuote; smart_str_appendc(&output, ','); } - output.s->val[output.s->len -1 ] = ')'; // is there a smart_str function for this? (probably not) + output.s->val[output.s->len - 1] = ')'; RETURN_STR(smart_str_extract(&output)); } } From ce9b73772298e1d57dbddab17b07d71f6ef4a0e6 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Thu, 22 Feb 2024 01:26:38 +0100 Subject: [PATCH 17/19] better loop --- ext/dom/xpath.c | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index d36d92ee035e6..b09c92e8b233b 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -450,19 +450,17 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS) PHP_METHOD(DOMXPath, quote) { const char *input; size_t input_len; - - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) == - FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) == FAILURE) { RETURN_THROWS(); } if (memchr(input, '\'', input_len) == NULL) { - zend_string *output = zend_string_alloc(input_len + 2, 0); + zend_string *const output = zend_string_alloc(input_len + 2, 0); output->val[0] = '\''; memcpy(output->val + 1, input, input_len); output->val[input_len + 1] = '\''; RETURN_STR(output); } else if (memchr(input, '"', input_len) == NULL) { - zend_string *output = zend_string_alloc(input_len + 2, 0); + zend_string *const output = zend_string_alloc(input_len + 2, 0); output->val[0] = '"'; memcpy(output->val + 1, input, input_len); output->val[input_len + 1] = '"'; @@ -471,30 +469,22 @@ PHP_METHOD(DOMXPath, quote) { smart_str output = {0}; // need to use the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003 smart_str_appendl(&output, "concat(", 7); - for (size_t i = 0; i < input_len;) { - uintptr_t bytesUntilSingleQuote = - (uintptr_t)memchr(input + i, '\'', input_len - i); - if (bytesUntilSingleQuote == 0) { - bytesUntilSingleQuote = input_len - i; - } else { - bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i); - } - uintptr_t bytesUntilDoubleQuote = - (uintptr_t)memchr(input + i, '"', input_len - i); - if (bytesUntilDoubleQuote == 0) { - bytesUntilDoubleQuote = input_len - i; - } else { - bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i); - } - const size_t bytesUntilQuote = MAX(bytesUntilSingleQuote, bytesUntilDoubleQuote); - const char quoteMethod = - (bytesUntilSingleQuote > bytesUntilDoubleQuote) ? '\'' : '"'; + const char *ptr = input; + const char *const end = input + input_len; + while (ptr < end) { + const char *const single_quote_ptr = memchr(ptr, '\'', end - ptr); + const char *const double_quote_ptr = memchr(ptr, '"', end - ptr); + const size_t distance_to_single_quote = single_quote_ptr ? single_quote_ptr - ptr : end - ptr; + const size_t distance_to_double_quote = double_quote_ptr ? double_quote_ptr - ptr : end - ptr; + const size_t bytesUntilQuote = MAX(distance_to_single_quote, distance_to_double_quote); + const char quoteMethod = (distance_to_single_quote > distance_to_double_quote) ? '\'' : '"'; smart_str_appendc(&output, quoteMethod); - smart_str_appendl(&output, input + i, bytesUntilQuote); + smart_str_appendl(&output, ptr, bytesUntilQuote); smart_str_appendc(&output, quoteMethod); - i += bytesUntilQuote; + ptr += bytesUntilQuote; smart_str_appendc(&output, ','); } + ZEND_ASSERT(ptr == end); output.s->val[output.s->len - 1] = ')'; RETURN_STR(smart_str_extract(&output)); } From d930979e48e5c8c493b1f38d14e466f400edd093 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Thu, 22 Feb 2024 01:37:18 +0100 Subject: [PATCH 18/19] snakes on a plane --- ext/dom/xpath.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index b09c92e8b233b..12cff592f3ab4 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -476,12 +476,12 @@ PHP_METHOD(DOMXPath, quote) { const char *const double_quote_ptr = memchr(ptr, '"', end - ptr); const size_t distance_to_single_quote = single_quote_ptr ? single_quote_ptr - ptr : end - ptr; const size_t distance_to_double_quote = double_quote_ptr ? double_quote_ptr - ptr : end - ptr; - const size_t bytesUntilQuote = MAX(distance_to_single_quote, distance_to_double_quote); - const char quoteMethod = (distance_to_single_quote > distance_to_double_quote) ? '\'' : '"'; - smart_str_appendc(&output, quoteMethod); - smart_str_appendl(&output, ptr, bytesUntilQuote); - smart_str_appendc(&output, quoteMethod); - ptr += bytesUntilQuote; + const size_t bytes_until_quote = MAX(distance_to_single_quote, distance_to_double_quote); + const char quote_method = (distance_to_single_quote > distance_to_double_quote) ? '\'' : '"'; + smart_str_appendc(&output, quote_method); + smart_str_appendl(&output, ptr, bytes_until_quote); + smart_str_appendc(&output, quote_method); + ptr += bytes_until_quote; smart_str_appendc(&output, ','); } ZEND_ASSERT(ptr == end); From 7c478a19bce51a474b10d7d75e0e1a02b6212e14 Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Thu, 22 Feb 2024 10:17:03 +0100 Subject: [PATCH 19/19] safe_alloc + null terminated --- ext/dom/xpath.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 12cff592f3ab4..168316ca289e2 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -454,16 +454,18 @@ PHP_METHOD(DOMXPath, quote) { RETURN_THROWS(); } if (memchr(input, '\'', input_len) == NULL) { - zend_string *const output = zend_string_alloc(input_len + 2, 0); + zend_string *const output = zend_string_safe_alloc(1, input_len, 2, false); output->val[0] = '\''; memcpy(output->val + 1, input, input_len); output->val[input_len + 1] = '\''; + output->val[input_len + 2] = '\0'; RETURN_STR(output); } else if (memchr(input, '"', input_len) == NULL) { - zend_string *const output = zend_string_alloc(input_len + 2, 0); + zend_string *const output = zend_string_safe_alloc(1, input_len, 2, false); output->val[0] = '"'; memcpy(output->val + 1, input, input_len); output->val[input_len + 1] = '"'; + output->val[input_len + 2] = '\0'; RETURN_STR(output); } else { smart_str output = {0};