Skip to content

Commit

Permalink
DOMXPath::quote(string $str): string
Browse files Browse the repository at this point in the history
method to quote strings in XPath,
similar to PDO::quote() / mysqli::real_escape_string

sample usage: $xp->query("//span[contains(text()," . $xp->quote($string) . ")]")

the algorithm is derived from Robert Rossney's research into XPath quoting published at https://stackoverflow.com/a/1352556/1067003
(but using an improved implementation I wrote myself, originally for chrome-php/chrome#575 )
  • Loading branch information
divinity76 committed Feb 21, 2024
1 parent 7ed26c0 commit 1730af1
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 1 deletion.
2 changes: 2 additions & 0 deletions ext/dom/php_dom.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,8 @@ public function registerNamespace(string $prefix, string $namespace): bool {}
public function registerPhpFunctions(string|array|null $restrict = null): void {}

public function registerPhpFunctionNS(string $namespaceURI, string $name, callable $callable): void {}

public static function quote(string $str): string {}
}
#endif

Expand Down
12 changes: 11 additions & 1 deletion ext/dom/php_dom_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

84 changes: 84 additions & 0 deletions ext/dom/tests/DOMXPath_quote.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
--TEST--
Test DOMXPath::quote with various inputs
--SKIPIF--
<?php if (!class_exists('DOMXPath')) die('skip DOMXPath not available.'); ?>
--FILE--
<?php
$dom = new DOMDocument();
$xpath = new DOMXPath($dom);


/**
* Quote a string for use in an XPath expression.
*
* Example: $xp->query("//span[contains(text()," . $xp->quote($string) . ")]")
*
* @param string $string string to quote.
* @return string quoted string.
*/
function UserlandDOMXPathQuote(string $string): string
{
if (false === \strpos($string, '\'')) {
return '\'' . $string . '\'';
}
if (false === \strpos($string, '"')) {
return '"' . $string . '"';
}
// if the string contains both single and double quotes, construct an
// expression that concatenates all non-double-quote substrings with
// the quotes, e.g.:
// 'foo'"bar => concat("'foo'", '"bar")
$sb = [];
while ($string !== '') {
$bytesUntilSingleQuote = \strcspn($string, '\'');
$bytesUntilDoubleQuote = \strcspn($string, '"');
$quoteMethod = ($bytesUntilSingleQuote > $bytesUntilDoubleQuote) ? "'" : '"';
$bytesUntilQuote = \max($bytesUntilSingleQuote, $bytesUntilDoubleQuote);
$sb[] = $quoteMethod . \substr($string, 0, $bytesUntilQuote) . $quoteMethod;
$string = \substr($string, $bytesUntilQuote);
}
$sb = \implode(',', $sb);
return 'concat(' . $sb . ')';
}



$tests = [
'foo' => "'foo'", // no quotes
'"foo' => '\'"foo\'', // double quotes only
'\'foo' => '"\'foo"', // single quotes only
'\'foo"bar' => 'concat("\'foo",\'"bar\')', // both; double quotes in mid-string
'\'foo"bar"baz' => 'concat("\'foo",\'"bar"baz\')', // multiple double quotes in mid-string
'\'foo"' => 'concat("\'foo",\'"\')', // string ends with double quotes
'\'foo""' => 'concat("\'foo",\'""\')', // string ends with run of double quotes
'"\'foo' => 'concat(\'"\',"\'foo")', // string begins with double quotes
'""\'foo' => 'concat(\'""\',"\'foo")', // string begins with run of double quotes
'\'foo""bar' => 'concat("\'foo",\'""bar\')', // run of double quotes in mid-string
];

foreach ($tests as $input => $expected) {
$result = $xpath->quote($input);
if ($result === $expected) {
echo "Pass: {$input} => {$result}\n";
} else {
echo 'Fail: ';
var_dump([
'input' => $input,
'expected' => $expected,
'result' => $result,
'userland_implementation_result' => UserlandDOMXPathQuote($input),
]);
}
}
?>
--EXPECT--
Pass: foo => 'foo'
Pass: "foo => '"foo'
Pass: 'foo => "'foo"
Pass: 'foo"bar => concat("'foo",'"bar')
Pass: 'foo"bar"baz => concat("'foo",'"bar"baz')
Pass: 'foo" => concat("'foo",'"')
Pass: 'foo"" => concat("'foo",'""')
Pass: "'foo => concat('"',"'foo")
Pass: ""'foo => concat('""',"'foo")
Pass: 'foo""bar => concat("'foo",'""bar')
87 changes: 87 additions & 0 deletions ext/dom/xpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,93 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS)
);
}

/* {{{ */
PHP_METHOD(DOMXPath, quote) {
char *input;
size_t input_len;
char *output;
size_t output_len = 0;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) ==
FAILURE) {
RETURN_THROWS();
}
if (memchr(input, '\'', input_len) == NULL) {
output_len = input_len + 2;
output = emalloc(output_len);
output[0] = '\'';
memcpy(output + 1, input, input_len);
output[output_len - 1] = '\'';
} else if (memchr(input, '"', input_len) == NULL) {
output_len = input_len + 2;
output = emalloc(output_len);
output[0] = '"';
memcpy(output + 1, input, input_len);
output[output_len - 1] = '"';
} else {
// need to do the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003
// first lets calculate the length (probably faster than repeated reallocs)
output_len = strlen("concat(");
size_t i;
for (size_t i = 0; i < input_len; ++i) {
uintptr_t bytesUntilSingleQuote =
(uintptr_t)memchr(input + i, '\'', input_len - i);
if (bytesUntilSingleQuote == 0) {
bytesUntilSingleQuote = input_len - i;
} else {
bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i);
}
uintptr_t bytesUntilDoubleQuote =
(uintptr_t)memchr(input + i, '"', input_len - i);
if (bytesUntilDoubleQuote == 0) {
bytesUntilDoubleQuote = input_len - i;
} else {
bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i);
}
const size_t bytesUntilQuote =
(bytesUntilSingleQuote > bytesUntilDoubleQuote)
? bytesUntilSingleQuote
: bytesUntilDoubleQuote;
i += bytesUntilQuote - 1;
output_len += 1 + bytesUntilQuote + 1 + 1; // "bytesUntilQuote"[,)]
}
output = emalloc(output_len);
size_t outputPos = strlen("concat(");
memcpy(output, "concat(", outputPos);
for (size_t i = 0; i < input_len; ++i) {
uintptr_t bytesUntilSingleQuote =
(uintptr_t)memchr(input + i, '\'', input_len - i);
if (bytesUntilSingleQuote == 0) {
bytesUntilSingleQuote = input_len - i;
} else {
bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i);
}
uintptr_t bytesUntilDoubleQuote =
(uintptr_t)memchr(input + i, '"', input_len - i);
if (bytesUntilDoubleQuote == 0) {
bytesUntilDoubleQuote = input_len - i;
} else {
bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i);
}
const size_t bytesUntilQuote =
(bytesUntilSingleQuote > bytesUntilDoubleQuote)
? bytesUntilSingleQuote
: bytesUntilDoubleQuote;
const char quoteMethod =
(bytesUntilSingleQuote > bytesUntilDoubleQuote) ? '\'' : '"';
output[outputPos++] = quoteMethod;
memcpy(output + outputPos, input + i, bytesUntilQuote);
outputPos += bytesUntilQuote;
output[outputPos++] = quoteMethod;
i += bytesUntilQuote - 1;
output[outputPos++] = ',';
}
output[outputPos - 1] = ')';
}
RETVAL_STRINGL(output, output_len);
}
/* }}} */

#endif /* LIBXML_XPATH_ENABLED */

#endif

0 comments on commit 1730af1

Please sign in to comment.