From f15cf682b310331a8fdafa69133c006659a48ddc Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:30:08 +0200 Subject: [PATCH 1/5] UnicodeData.txt from L2/23-208 --- unicodetools/data/ucd/dev/UnicodeData.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 20da9b7d8..31593e75a 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,6 @@ +1AD6;COMBINING VERTICAL-LINE-ACUTE-GRAVE;Mn;230;NSM;;;;;N;;;;; +1AD7;COMBINING VERTICAL-LINE-GRAVE-ACUTE;Mn;230;NSM;;;;;N;;;;; +1AD8;COMBINING MACRON-ACUTE-GRAVE;Mn;230;NSM;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 57d2659dffcb3efe28b509b34b955cdb902ff044 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:34:59 +0200 Subject: [PATCH 2/5] LB=CM --- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index daea5c0ac..42d1491a8 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-10-24, 21:03:44 GMT +# Date: 2023-10-25, 10:34:14 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -777,6 +777,7 @@ 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; CM # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B05..1B33 ; AK # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA From c7f72aedee2cf945200d68e78083e4bbbb32cb69 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:39:56 +0200 Subject: [PATCH 3/5] Scripts.txt --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 376dac297..5f8d9a08d 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +1AD6..1AD8; Inherited # Scripts-16.0.0.txt # Date: 2023-10-24, 21:04:04 GMT # © 2023 Unicode®, Inc. From c20c43bdbea03148d0f167873942b4df47a7271e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:40:32 +0200 Subject: [PATCH 4/5] Diacritics are diacritics. --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index a77d2d267..5f8b4a467 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +1AD6..1AD8; Diacritic # PropList-16.0.0.txt # Date: 2023-10-24, 21:03:48 GMT # © 2023 Unicode®, Inc. From 0c4e789753b70f1ad8ddd7adbe1981f28ea80438 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 25 Oct 2023 12:42:20 +0200 Subject: [PATCH 5/5] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- .../data/ucd/dev/DerivedCoreProperties.txt | 17 +++++++++++------ unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/NormalizationTest.txt | 8 +++++++- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- unicodetools/data/ucd/dev/Scripts.txt | 6 +++--- unicodetools/data/ucd/dev/UnicodeData.txt | 6 +++--- .../data/ucd/dev/VerticalOrientation.txt | 3 ++- .../ucd/dev/auxiliary/GraphemeBreakProperty.txt | 5 +++-- .../ucd/dev/auxiliary/SentenceBreakProperty.txt | 5 +++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 9 +++++---- .../ucd/dev/extracted/DerivedCombiningClass.txt | 9 +++++---- .../ucd/dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 ++++++---- .../ucd/dev/extracted/DerivedJoiningType.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 7 +++++-- 18 files changed, 75 insertions(+), 48 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index a0de787d6..320717b18 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-10-24, 21:03:11 GMT +# Date: 2023-10-25, 10:41:01 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2012,6 +2012,7 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT 0897 ; 16.0 # ARABIC PEPET 0C5C ; 16.0 # TELUGU ARCHAIC SHRII 0CDC ; 16.0 # KANNADA ARCHAIC SHRII +1AD6..1AD8 ; 16.0 # [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE 105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO @@ -2028,6 +2029,6 @@ A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2 ; 16.0 # RIGHTWARDS ARROW WITH LOWER HOOK -# Total code points: 246 +# Total code points: 249 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 5d2157ef5..271f86d00 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-10-24, 21:03:37 GMT +# Date: 2023-10-25, 10:41:33 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3168,6 +3168,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Case_Ignorable # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; Case_Ignorable # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3467,7 +3468,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2718 +# Total code points: 2721 # ================================================ @@ -7403,6 +7404,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; ID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; ID_Continue # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -8283,7 +8285,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140344 +# Total code points: 140347 # ================================================ @@ -9541,6 +9543,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; XID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; XID_Continue # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -10426,7 +10429,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140325 +# Total code points: 140328 # ================================================ @@ -10643,6 +10646,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] ..;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -6142,6 +6139,9 @@ 1ACC;COMBINING LATIN SMALL LETTER INSULAR G;Mn;230;NSM;;;;;N;;;;; 1ACD;COMBINING LATIN SMALL LETTER INSULAR R;Mn;230;NSM;;;;;N;;;;; 1ACE;COMBINING LATIN SMALL LETTER INSULAR T;Mn;230;NSM;;;;;N;;;;; +1AD6;COMBINING VERTICAL-LINE-ACUTE-GRAVE;Mn;230;NSM;;;;;N;;;;; +1AD7;COMBINING VERTICAL-LINE-GRAVE-ACUTE;Mn;230;NSM;;;;;N;;;;; +1AD8;COMBINING MACRON-ACUTE-GRAVE;Mn;230;NSM;;;;;N;;;;; 1B00;BALINESE SIGN ULU RICEM;Mn;0;NSM;;;;;N;;;;; 1B01;BALINESE SIGN ULU CANDRA;Mn;0;NSM;;;;;N;;;;; 1B02;BALINESE SIGN CECEK;Mn;0;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 08b73924c..48cda4a7b 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-24, 21:04:08 GMT +# Date: 2023-10-25, 10:42:19 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -741,6 +741,7 @@ 1AB0..1ABD ; R # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; R # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; R # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; R # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; R # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; R # Mc BALINESE SIGN BISAH 1B05..1B33 ; R # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 058e3bb76..d37cb4a3b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2023-10-24, 21:03:44 GMT +# Date: 2023-10-25, 10:41:44 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -238,6 +238,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; Extend # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -461,7 +462,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2139 +# Total code points: 2142 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index c1fd23e22..72cf1eae2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-10-24, 21:04:05 GMT +# Date: 2023-10-25, 10:42:16 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -248,6 +248,7 @@ 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; Extend # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -569,7 +570,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2559 +# Total code points: 2562 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 62f655d99..5aada2779 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-10-24, 21:04:08 GMT +# Date: 2023-10-25, 10:42:19 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -284,6 +284,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; Extend # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -606,7 +607,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2563 +# Total code points: 2566 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index b3216b731..2153b3671 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-10-24, 21:03:35 GMT +# Date: 2023-10-25, 10:41:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1192,8 +1192,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820290 code points not listed here. -# Total code points: 1096260 +# The above property value applies to 820287 code points not listed here. +# Total code points: 1096257 # ================================================ @@ -2161,6 +2161,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; NSM # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2370,7 +2371,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2002 +# Total code points: 2005 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index c3ef6239a..5aa31afd5 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-10-24, 21:03:36 GMT +# Date: 2023-10-25, 10:41:32 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2026,8 +2026,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826520 code points not listed here. -# Total code points: 1113182 +# The above property value applies to 826517 code points not listed here. +# Total code points: 1113179 # ================================================ @@ -2699,6 +2699,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE 1ACB..1ACE ; 230 # Mn [4] COMBINING TRIPLE ACUTE ACCENT..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; 230 # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2765,7 +2766,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 517 +# Total code points: 520 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index b9c329c1d..2bf32ed06 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-24, 21:03:39 GMT +# Date: 2023-10-25, 10:41:36 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -692,6 +692,7 @@ 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; N # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -2065,7 +2066,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 766038 code points not listed here. +# The above property value applies to 766035 code points not listed here. # Total code points: 792618 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 2a5ac8419..435b4f263 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-10-24, 21:03:39 GMT +# Date: 2023-10-25, 10:41:37 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -228,7 +228,8 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1ACF..1AFF ; Cn # [49] .. +1ACF..1AD5 ; Cn # [7] .. +1AD9..1AFF ; Cn # [39] .. 1B4D..1B4F ; Cn # [3] .. 1B7F ; Cn # 1BF4..1BFB ; Cn # [8] .. @@ -734,7 +735,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824472 +# Total code points: 824469 # ================================================ @@ -2829,6 +2830,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABF..1ACE ; Mn # [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; Mn # [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3035,7 +3037,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1994 +# Total code points: 1997 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 4ae6df297..be7fad71d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2023-10-24, 21:03:40 GMT +# Date: 2023-10-25, 10:41:39 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -350,6 +350,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; T # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; T # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -572,6 +573,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2159 +# Total code points: 2162 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 0581bb420..3683efe20 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-10-24, 21:03:41 GMT +# Date: 2023-10-25, 10:41:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762480 code points not listed here. -# Total code points: 899948 +# The above property value applies to 762477 code points not listed here. +# Total code points: 899945 # ================================================ @@ -2046,6 +2046,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY 1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1AD6..1AD8 ; CM # Mn [3] COMBINING VERTICAL-LINE-ACUTE-GRAVE..COMBINING MACRON-ACUTE-GRAVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2358,7 +2359,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2438 +# Total code points: 2441 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index b9437c06f..aee4694f9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-10-24, 21:03:41 GMT +# Date: 2023-10-25, 10:41:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -6114,6 +6114,9 @@ 1ACC ; COMBINING LATIN SMALL LETTER INSULAR G 1ACD ; COMBINING LATIN SMALL LETTER INSULAR R 1ACE ; COMBINING LATIN SMALL LETTER INSULAR T +1AD6 ; COMBINING VERTICAL-LINE-ACUTE-GRAVE +1AD7 ; COMBINING VERTICAL-LINE-GRAVE-ACUTE +1AD8 ; COMBINING MACRON-ACUTE-GRAVE 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -44422,6 +44425,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 150059 +# Total code points: 150062 # EOF