Skip to content

Commit

Permalink
utils: Add utf8_check function
Browse files Browse the repository at this point in the history
Add 'utf8_check' function to check if a given string is utf8 encoded.

Signed-off-by: James Roy <rruuaanng@outlook.com>
  • Loading branch information
rruuaanng committed Nov 2, 2024
1 parent 2f23313 commit ed28e46
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 0 deletions.
10 changes: 10 additions & 0 deletions include/zephyr/sys/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,16 @@ char *utf8_trunc(char *utf8_str);
*/
char *utf8_lcpy(char *dst, const char *src, size_t n);

/**
* @brief Checks if the given string @p str is UTF-8 encode.
*
* @param str Target string
* @param len The length of string @p str
*
* @return true if @p str is UTF-8 encode, and false otherwise.
*/
bool utf8_check(const uint8_t *str, int len);

#define __z_log2d(x) (32 - __builtin_clz(x) - 1)
#define __z_log2q(x) (64 - __builtin_clzll(x) - 1)
#define __z_log2(x) (sizeof(__typeof__(x)) > 4 ? __z_log2q(x) : __z_log2d(x))
Expand Down
34 changes: 34 additions & 0 deletions lib/utils/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <stdint.h>
#include <string.h>
#include <zephyr/sys/__assert.h>
#include <zephyr/sys/util.h>

#define ASCII_CHAR 0x7F
#define SEQUENCE_FIRST_MASK 0xC0
Expand Down Expand Up @@ -79,3 +80,36 @@ char *utf8_lcpy(char *dst, const char *src, size_t n)

return dst;
}

bool utf8_check(uint8_t *str, int len)
{
int i = 0;
int nbyte = 0;

if (len < 0) {
errno = EINVAL;
return false;
}

while (i < len) {
if (str[i] <= 0x7F && str[i] >= 0x00) {
i++;
continue;
} else {
if (str[i] <= 0xDF && str[i] >= 0xC2) {
nbyte = 2;
} else if (str[i] <= 0xEF && str[i] >= 0xE0) {
nbyte = 3;
} else if (str[i] <= 0xF4 && str[i] >= 0xF0) {
nbyte = 4;
} else {
return false;
}
}
if (i + nbyte > len) {
return false;
}
i += nbyte;
}
return true;
}
9 changes: 9 additions & 0 deletions tests/unit/util/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,15 @@ ZTEST(util, test_utf8_lcpy_truncated)
zassert_str_equal(dest_str, expected_result, "Failed to copy");
}

ZTEST(util, test_utf8_check)
{
uint8_t s1[] = "€€€";
uint8_t s2[] = "\xe2";

zassert_true(utf8_check(s1, strlen(s1)), "Failed to check");
zassert_false(utf8_check(s2, strlen(s2)), "Failed to check");
}

ZTEST(util, test_utf8_lcpy_not_truncated)
{
/* dest_str size is based on storing 3 * € plus the null terminator */
Expand Down

0 comments on commit ed28e46

Please sign in to comment.