forked from CuyZ/Valinor
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: correctly handle message formatting for long truncated UTF8 strings
- Loading branch information
Showing
9 changed files
with
189 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace CuyZ\Valinor\Utility\String; | ||
|
||
use function substr; | ||
|
||
/** @internal */ | ||
final class StringCutter | ||
{ | ||
public static function cut(string $s, int $length): string | ||
{ | ||
if (function_exists('mb_strcut')) { | ||
return mb_strcut($s, 0, $length); | ||
} | ||
|
||
return self::cutPolyfill($s, $length); | ||
} | ||
|
||
public static function cutPolyfill(string $s, int $length): string | ||
{ | ||
$s = substr($s, 0, $length); | ||
$cur = strlen($s) - 1; | ||
// U+0000 - U+007F | ||
if ((ord($s[$cur]) & 0b1000_0000) === 0) { | ||
return $s; | ||
} | ||
$cnt = 0; | ||
while ((ord($s[$cur]) & 0b1100_0000) === 0b1000_0000) { | ||
++$cnt; | ||
if ($cur === 0) { | ||
// @infection-ignore-all // Causes infinite loop | ||
break; | ||
} | ||
--$cur; | ||
} | ||
|
||
assert($cur >= 0); | ||
|
||
return match (true) { | ||
default => substr($s, 0, $cur), | ||
// U+0080 - U+07FF | ||
$cnt === 1 && (ord($s[$cur]) & 0b1110_0000) === 0b1100_0000, | ||
// U+0800 - U+FFFF | ||
$cnt === 2 && (ord($s[$cur]) & 0b1111_0000) === 0b1110_0000, | ||
// U+10000 - U+10FFFF | ||
$cnt === 3 && (ord($s[$cur]) & 0b1111_1000) === 0b1111_0000 => $s | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace CuyZ\Valinor\Tests\Unit\Utility\String; | ||
|
||
use CuyZ\Valinor\Utility\String\StringCutter; | ||
use PHPUnit\Framework\TestCase; | ||
|
||
final class StringCutterTest extends TestCase | ||
{ | ||
/** | ||
* @dataProvider mb_strcut_polyfill_data_provider | ||
*/ | ||
public function test_mb_strcut_polyfill(string $base, int $length, string $expected): void | ||
{ | ||
$cut = StringCutter::cutPolyfill($base, $length); | ||
|
||
self::assertSame($expected, $cut); | ||
} | ||
|
||
public function mb_strcut_polyfill_data_provider(): iterable | ||
{ | ||
yield '1 byte' => [ | ||
'base' => 'foobar', | ||
'length' => 3, | ||
'expected' => 'foo', | ||
]; | ||
|
||
yield '2 bytes not cut' => [ | ||
'base' => "foo\u{07FF}bar", | ||
'length' => 5, | ||
'expected' => "foo\u{07FF}", | ||
]; | ||
|
||
yield '2 bytes cut' => [ | ||
'base' => "foo\u{07FF}", | ||
'length' => 4, | ||
'expected' => 'foo', | ||
]; | ||
|
||
yield '3 bytes not cut' => [ | ||
'base' => "foo\u{FFFF}bar", | ||
'length' => 6, | ||
'expected' => "foo\u{FFFF}", | ||
]; | ||
|
||
yield '3 bytes cut' => [ | ||
'base' => "foo\u{FFFF}bar", | ||
'length' => 5, | ||
'expected' => 'foo', | ||
]; | ||
|
||
yield '4 bytes not cut #1' => [ | ||
'base' => "foo\u{10FFFD}bar", | ||
'length' => 7, | ||
'expected' => "foo\u{10FFFD}", | ||
]; | ||
|
||
yield '4 bytes cut #1' => [ | ||
'base' => "foo\u{10FFFD}bar", | ||
'length' => 6, | ||
'expected' => 'foo', | ||
]; | ||
|
||
yield '4 bytes not cut #2' => [ | ||
'base' => "foo\u{90000}bar", | ||
'length' => 7, | ||
'expected' => "foo\u{90000}", | ||
]; | ||
|
||
yield '4 bytes not cut #3' => [ | ||
'base' => "foo\u{40000}bar", | ||
'length' => 7, | ||
'expected' => "foo\u{40000}", | ||
]; | ||
|
||
yield '4 bytes #4' => [ | ||
'base' => "foo🦄bar", | ||
'length' => 7, | ||
'expected' => "foo🦄", | ||
]; | ||
|
||
yield '4 bytes cut #4' => [ | ||
'base' => "foo🦄bar", | ||
'length' => 6, | ||
'expected' => 'foo', | ||
]; | ||
} | ||
|
||
public function test_invalid_utf8(): void | ||
{ | ||
// Invalid utf8 values are trimmed, if present at the end of the string | ||
// (really just an edge case we shouldn't care about) | ||
|
||
$base = "\u{07FF}"; | ||
$trailer = substr($base, 1); | ||
self::assertSame('', StringCutter::cutPolyfill($trailer, 10)); | ||
self::assertSame('', StringCutter::cutPolyfill($base . $trailer, 10)); | ||
self::assertSame('', StringCutter::cutPolyfill($base . $trailer . $trailer, 10)); | ||
self::assertSame('', StringCutter::cutPolyfill($base . $trailer . $trailer . $trailer, 10)); | ||
|
||
self::assertSame('', StringCutter::cutPolyfill(substr($base, 0, 1), 10)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters