Skip to content

Commit

Permalink
fix: correctly handle message formatting for long truncated UTF8 strings
Browse files Browse the repository at this point in the history
  • Loading branch information
danog authored Jun 6, 2023
1 parent 861c3b9 commit 0a8f37b
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 7 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,12 @@ jobs:

- name: Running integration tests
run: php vendor/bin/phpunit --testsuite=integration

- name: Running unit tests (no mb_strcut)
run: php -d disable_functions=mb_strcut vendor/bin/phpunit --testsuite=unit

- name: Running functional tests (no mb_strcut)
run: php -d disable_functions=mb_strcut vendor/bin/phpunit --testsuite=functional

- name: Running integration tests (no mb_strcut)
run: php -d disable_functions=mb_strcut vendor/bin/phpunit --testsuite=integration
51 changes: 51 additions & 0 deletions src/Utility/String/StringCutter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<?php

declare(strict_types=1);

namespace CuyZ\Valinor\Utility\String;

use function substr;

/** @internal */
final class StringCutter
{
public static function cut(string $s, int $length): string
{
if (function_exists('mb_strcut')) {
return mb_strcut($s, 0, $length);
}

return self::cutPolyfill($s, $length);
}

public static function cutPolyfill(string $s, int $length): string
{
$s = substr($s, 0, $length);
$cur = strlen($s) - 1;
// U+0000 - U+007F
if ((ord($s[$cur]) & 0b1000_0000) === 0) {
return $s;
}
$cnt = 0;
while ((ord($s[$cur]) & 0b1100_0000) === 0b1000_0000) {
++$cnt;
if ($cur === 0) {
// @infection-ignore-all // Causes infinite loop
break;
}
--$cur;
}

assert($cur >= 0);

return match (true) {
default => substr($s, 0, $cur),
// U+0080 - U+07FF
$cnt === 1 && (ord($s[$cur]) & 0b1110_0000) === 0b1100_0000,
// U+0800 - U+FFFF
$cnt === 2 && (ord($s[$cur]) & 0b1111_0000) === 0b1110_0000,
// U+10000 - U+10FFFF
$cnt === 3 && (ord($s[$cur]) & 0b1111_1000) === 0b1111_0000 => $s
};
}
}
2 changes: 1 addition & 1 deletion src/Utility/String/StringFormatter.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public static function for(HasParameters $message): string
private static function formatWithIntl(string $locale, string $body, array $parameters): string
{
return MessageFormatter::formatMessage($locale, $body, $parameters)
?: throw new StringFormatterError($body);
?: throw new StringFormatterError($body, intl_get_error_message());
}

/**
Expand Down
7 changes: 5 additions & 2 deletions src/Utility/String/StringFormatterError.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
/** @internal */
final class StringFormatterError extends RuntimeException
{
public function __construct(string $body)
public function __construct(string $body, string $message = '')
{
parent::__construct("Message formatter error using `$body`.", 1652901203);
if ($message !== '') {
$message = ": $message";
}
parent::__construct("Message formatter error using `$body`$message.", 1652901203);
}
}
5 changes: 3 additions & 2 deletions src/Utility/ValueDumper.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace CuyZ\Valinor\Utility;

use BackedEnum;
use CuyZ\Valinor\Utility\String\StringCutter;
use DateTimeInterface;
use UnitEnum;

Expand Down Expand Up @@ -112,11 +113,11 @@ private static function crop(string $string): string
return $string;
}

$string = substr($string, 0, self::MAX_STRING_LENGTH + 1);
$string = StringCutter::cut($string, self::MAX_STRING_LENGTH + 1);

for ($i = strlen($string) - 1; $i > 10; $i--) {
if ($string[$i] === ' ') {
return substr($string, 0, $i) . '';
return StringCutter::cut($string, $i) . '';
}
}

Expand Down
12 changes: 12 additions & 0 deletions tests/Integration/Mapping/Other/StrictMappingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,18 @@ public function test_invalid_union_value_throws_exception(): void
}
}

public function test_invalid_utf8_union_value_throws_exception(): void
{
try {
(new MapperBuilder())->mapper()->map('bool|int|float', '🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄');
} catch (MappingError $exception) {
$error = $exception->node()->messages()[0];

self::assertSame('1607027306', $error->code());
self::assertSame("Value '🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄…' does not match any of `bool`, `int`, `float`.", (string)$error);
}
}

public function test_null_in_union_value_throws_exception(): void
{
try {
Expand Down
105 changes: 105 additions & 0 deletions tests/Unit/Utility/String/StringCutterTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
<?php

declare(strict_types=1);

namespace CuyZ\Valinor\Tests\Unit\Utility\String;

use CuyZ\Valinor\Utility\String\StringCutter;
use PHPUnit\Framework\TestCase;

final class StringCutterTest extends TestCase
{
/**
* @dataProvider mb_strcut_polyfill_data_provider
*/
public function test_mb_strcut_polyfill(string $base, int $length, string $expected): void
{
$cut = StringCutter::cutPolyfill($base, $length);

self::assertSame($expected, $cut);
}

public function mb_strcut_polyfill_data_provider(): iterable
{
yield '1 byte' => [
'base' => 'foobar',
'length' => 3,
'expected' => 'foo',
];

yield '2 bytes not cut' => [
'base' => "foo\u{07FF}bar",
'length' => 5,
'expected' => "foo\u{07FF}",
];

yield '2 bytes cut' => [
'base' => "foo\u{07FF}",
'length' => 4,
'expected' => 'foo',
];

yield '3 bytes not cut' => [
'base' => "foo\u{FFFF}bar",
'length' => 6,
'expected' => "foo\u{FFFF}",
];

yield '3 bytes cut' => [
'base' => "foo\u{FFFF}bar",
'length' => 5,
'expected' => 'foo',
];

yield '4 bytes not cut #1' => [
'base' => "foo\u{10FFFD}bar",
'length' => 7,
'expected' => "foo\u{10FFFD}",
];

yield '4 bytes cut #1' => [
'base' => "foo\u{10FFFD}bar",
'length' => 6,
'expected' => 'foo',
];

yield '4 bytes not cut #2' => [
'base' => "foo\u{90000}bar",
'length' => 7,
'expected' => "foo\u{90000}",
];

yield '4 bytes not cut #3' => [
'base' => "foo\u{40000}bar",
'length' => 7,
'expected' => "foo\u{40000}",
];

yield '4 bytes #4' => [
'base' => "foo🦄bar",
'length' => 7,
'expected' => "foo🦄",
];

yield '4 bytes cut #4' => [
'base' => "foo🦄bar",
'length' => 6,
'expected' => 'foo',
];
}

public function test_invalid_utf8(): void
{
// Invalid utf8 values are trimmed, if present at the end of the string
// (really just an edge case we shouldn't care about)

$base = "\u{07FF}";
$trailer = substr($base, 1);
self::assertSame('', StringCutter::cutPolyfill($trailer, 10));
self::assertSame('', StringCutter::cutPolyfill($base . $trailer, 10));
self::assertSame('', StringCutter::cutPolyfill($base . $trailer . $trailer, 10));
self::assertSame('', StringCutter::cutPolyfill($base . $trailer . $trailer . $trailer, 10));

self::assertSame('', StringCutter::cutPolyfill(substr($base, 0, 1), 10));
}
}
4 changes: 2 additions & 2 deletions tests/Unit/Utility/String/StringFormatterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ final class StringFormatterTest extends TestCase
public function test_wrong_intl_format_throws_exception(): void
{
$this->expectException(StringFormatterError::class);
$this->expectExceptionMessage('Message formatter error using `some {wrong.format}`.');
$this->expectExceptionMessage('Message formatter error using `some {wrong.format}`: pattern syntax error (parse error at offset 6, after "some {", before or at "wrong.format}"): U_PATTERN_SYNTAX_ERROR.');
$this->expectExceptionCode(1652901203);

StringFormatter::format('en', 'some {wrong.format}', []);
Expand All @@ -26,7 +26,7 @@ public function test_wrong_intl_format_throws_exception(): void
public function test_wrong_message_body_format_throws_exception(): void
{
$this->expectException(StringFormatterError::class);
$this->expectExceptionMessage('Message formatter error using `some message with {invalid format}`.');
$this->expectExceptionMessage('Message formatter error using `some message with {invalid format}`: pattern syntax error (parse error at offset 19, after " message with {", before or at "invalid format}"): U_PATTERN_SYNTAX_ERROR.');
$this->expectExceptionCode(1652901203);

StringFormatter::format('en', 'some message with {invalid format}');
Expand Down
1 change: 1 addition & 0 deletions tests/Unit/Utility/ValueDumperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public function dump_value_returns_correct_signature_data_provider(): array
'string with both quotes' => ['"foo\'bar"', '\'"foo\\\'bar"\''],
'string with exact max length' => ['Lorem ipsum dolor sit amet, consectetur adipiscing', "'Lorem ipsum dolor sit amet, consectetur adipiscing'"],
'string cropped' => ['Lorem ipsum dolor sit amet, consectetur adipiscing elit.', "'Lorem ipsum dolor sit amet, consectetur adipiscing…'"],
'utf8 string cropped' => ['🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄', "'🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄🦄…'"],
'string cropped only after threshold' => ['Lorem12345 ipsumdolorsitamet,consecteturadipiscingelit.Curabitur', "'Lorem12345 ipsumdolorsitamet,consecteturadipiscinge…'"],
'string without space cropped' => ['Loremipsumdolorsitamet,consecteturadipiscingelit.Curabitur',"'Loremipsumdolorsitamet,consecteturadipiscingelit.Cu…'"],
'date' => [new DateTimeImmutable('@1648733888'), '2022/03/31 13:38:08'],
Expand Down

0 comments on commit 0a8f37b

Please sign in to comment.