forked from hzeller/upnp-display
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utf8.h
69 lines (66 loc) · 2.18 KB
/
utf8.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// -*- c++ -*-
// This file is part of UPnP LCD Display
//
// Copyright (C) 2013 Henner Zeller <h.zeller@acm.org>
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef UPNP_DISPLAY_UTF8_H
#define UPNP_DISPLAY_UTF8_H
#include <stdint.h>
// Utility function that reads UTF-8 encoded codepoints from byte iterator.
// No error checking, we assume string is UTF-8 clean.
template <typename byte_iterator>
uint32_t utf8_next_codepoint(byte_iterator &it) {
uint32_t cp = *it++;
if (cp < 0x80) {
return cp; // iterator already incremented.
}
else if ((cp & 0xE0) == 0xC0) {
cp = ((cp & 0x1F) << 6) + (*it & 0x3F);
}
else if ((cp & 0xF0) == 0xE0) {
cp = ((cp & 0x0F) << 12) + ((*it & 0x3F) << 6);
cp += (*++it & 0x3F);
}
else if ((cp & 0xF8) == 0xF0) {
cp = ((cp & 0x07) << 18) + ((*it & 0x3F) << 12);
cp += (*++it & 0x3F) << 6;
cp += (*++it & 0x3F);
}
else if ((cp & 0xFC) == 0xF8) {
cp = ((cp & 0x03) << 24) + ((*it & 0x3F) << 18);
cp += (*++it & 0x3F) << 12;
cp += (*++it & 0x3F) << 6;
cp += (*++it & 0x3F);
}
else if ((cp & 0xFE) == 0xFC) {
cp = ((cp & 0x01) << 30) + ((*it & 0x3F) << 24);
cp += (*++it & 0x3F) << 18;
cp += (*++it & 0x3F) << 12;
cp += (*++it & 0x3F) << 6;
cp += (*++it & 0x3F);
}
++it;
return cp;
}
template <typename byte_iterator>
size_t utf8_character_count(const byte_iterator &begin,
const byte_iterator &end) {
size_t result = 0;
for (byte_iterator it = begin; it != end; utf8_next_codepoint(it)) {
++result;
}
return result;
}
#endif // UPNP_DISPLAY_UTF8_H