Skip to content

Commit

Permalink
utils: Add utf8_check function
Browse files Browse the repository at this point in the history
Add 'utf8_check' function to check if a given string is utf8 encoded.

Signed-off-by: James Roy <rruuaanng@outlook.com>
  • Loading branch information
rruuaanng committed Nov 3, 2024
1 parent 2f23313 commit fbec8b3
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 0 deletions.
10 changes: 10 additions & 0 deletions include/zephyr/sys/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,16 @@ char *utf8_trunc(char *utf8_str);
*/
char *utf8_lcpy(char *dst, const char *src, size_t n);

/**
* @brief Checks if the given string @p str is UTF-8 encoded.
*
* @param str Target string
* @param maxlen The max length of string @p str
*
* @return true if @p str is UTF-8 encoded, or false otherwise.
*/
bool utf8_check(const char *str, size_t maxlen);

#define __z_log2d(x) (32 - __builtin_clz(x) - 1)
#define __z_log2q(x) (64 - __builtin_clzll(x) - 1)
#define __z_log2(x) (sizeof(__typeof__(x)) > 4 ? __z_log2q(x) : __z_log2d(x))
Expand Down
37 changes: 37 additions & 0 deletions lib/utils/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <stdint.h>
#include <string.h>
#include <zephyr/sys/__assert.h>
#include <zephyr/sys/util.h>

#define ASCII_CHAR 0x7F
#define SEQUENCE_FIRST_MASK 0xC0
Expand Down Expand Up @@ -79,3 +80,39 @@ char *utf8_lcpy(char *dst, const char *src, size_t n)

return dst;
}

bool utf8_check(const char *str, size_t maxlen)
{
int i = 0, nbyte = 0;
size_t len = strlen(str);
unsigned char *buf = (unsigned char *)str;

/* It also return false when the string
* is greater than the maximum length.
*/
if (len > maxlen) {
return false;
}

while (i < len) {
if (buf[i] <= 0x7F && buf[i] >= 0x00) {
i++;
continue;
} else {
if (buf[i] <= 0xDF && buf[i] >= 0xC2) {
nbyte = 2;
} else if (buf[i] <= 0xEF && buf[i] >= 0xE0) {
nbyte = 3;
} else if (buf[i] <= 0xF4 && buf[i] >= 0xF0) {
nbyte = 4;
} else {
return false;
}
}
if (i + nbyte > len) {
return false;
}
i += nbyte;
}
return true;
}
9 changes: 9 additions & 0 deletions tests/unit/util/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,15 @@ ZTEST(util, test_utf8_lcpy_truncated)
zassert_str_equal(dest_str, expected_result, "Failed to copy");
}

ZTEST(util, test_utf8_check)
{
const char s1[] = "€€€";

Check warning on line 872 in tests/unit/util/main.c

View workflow job for this annotation

GitHub Actions / Run compliance checks on patch series (PR)

STATIC_CONST_CHAR_ARRAY

tests/unit/util/main.c:872 const array should probably be static const
const char s2[] = "\xe2";

Check warning on line 873 in tests/unit/util/main.c

View workflow job for this annotation

GitHub Actions / Run compliance checks on patch series (PR)

STATIC_CONST_CHAR_ARRAY

tests/unit/util/main.c:873 const array should probably be static const

zassert_true(utf8_check(s1, strlen(s1)), "Failed to check");
zassert_false(utf8_check(s2, strlen(s2)), "Failed to check");
}

ZTEST(util, test_utf8_lcpy_not_truncated)
{
/* dest_str size is based on storing 3 * € plus the null terminator */
Expand Down

0 comments on commit fbec8b3

Please sign in to comment.