Skip to content

Commit

Permalink
Unicode 15.1 (#795)
Browse files Browse the repository at this point in the history
  • Loading branch information
burgerrg authored Jan 24, 2024
1 parent 2f51156 commit 0fbd922
Show file tree
Hide file tree
Showing 17 changed files with 1,804 additions and 1,291 deletions.
3 changes: 2 additions & 1 deletion mats/5_4.ms
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,7 @@
(if (not (= n (length chars)))
(begin (display " but expected ")
(write n)
(display " in Unicode 15.0")))
(display " in Unicode 15.1")))
(newline)
0)
(define-syntax test
Expand Down Expand Up @@ -1459,6 +1459,7 @@
(test cat1 (char-general-category #\a) => 'Ll)
(test cat2 (char-general-category #\space) => 'Zs)
(test cat3 (char-general-category (integer->char #x10FFFF)) => 'Cn)
(test cat4 (char-general-category #\x31EF) => 'So) ; Unicode 15.1

(test alpha1 (char-alphabetic? #\a) => #t)
(test numer1 (char-numeric? #\1) => #t)
Expand Down
5 changes: 5 additions & 0 deletions release_notes/release_notes.stex
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ Online versions of both books can be found at
%-----------------------------------------------------------------------------
\section{Functionality Changes}\label{section:functionality}

\subsection{Unicode 15.1 support (9.9.9)}

The character sets, character classes, and word-breaking algorithms for character, string,
and Unicode-related bytevector operations have now been updated to Unicode 15.1.

\subsection{New supported platforms and portable bytecode (9.9.9)}

AArch64 (64-bit Arm), RV64G (64-bit RISC-V), and LoongArch64 architectures are
Expand Down
16 changes: 11 additions & 5 deletions unicode/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Scheme=../bin/scheme
Scheme=../bin/zuo .. run

doit: unicode-char-cases.ss unicode-charinfo.ss
doit:
$(MAKE) update
$(MAKE) unicode-char-cases.ss unicode-charinfo.ss

update:
./get-UNIDATA

unicode-char-cases.ss: extract-char-cases.ss extract-common.ss unicode-data.ss
echo | $(Scheme) -q extract-char-cases.ss
Expand All @@ -12,10 +17,11 @@ unicode-char-cases.ss: \
UNIDATA/CompositionExclusions.txt\
UNIDATA/UnicodeData.txt\
UNIDATA/CaseFolding.txt\
UNIDATA/SpecialCasing.txt
UNIDATA/SpecialCasing.txt\
UNIDATA/GraphemeBreakProperty.txt

unicode-charinfo.ss: \
UNIDATA/UnicodeData.txt\
UNIDATA/WordBreakProperty.txt\
UNIDATA/PropList.txt

UNIDATA/PropList.txt\
UNIDATA/emoji-data.txt
16 changes: 2 additions & 14 deletions unicode/ReadMe
Original file line number Diff line number Diff line change
@@ -1,14 +1,2 @@
To rebuild unicode-char-cases.ss and unicode-charinfo.ss, download into
./UNIDATA the following files:

http://www.unicode.org/Public/UCD/latest/ucd/CompositionExclusions.txt
http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
http://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
http://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/WordBreakProperty.txt
http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
http://www.unicode.org/Public/UCD/latest/ucd/NormalizationTest.txt
http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
http://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt

Then run 'make'.
To download the latest Unicode files and rebuild unicode-char-cases.ss and
unicode-charinfo.ss, run 'make'.
9 changes: 6 additions & 3 deletions unicode/UNIDATA/CaseFolding.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CaseFolding-15.0.0.txt
# Date: 2022-02-02, 23:35:35 GMT
# © 2022 Unicode®, Inc.
# CaseFolding-15.1.0.txt
# Date: 2023-05-12, 21:53:10 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
Expand Down Expand Up @@ -929,6 +929,7 @@
1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD3; S; 0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
Expand All @@ -937,6 +938,7 @@
1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE3; S; 03B0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
Expand Down Expand Up @@ -1328,6 +1330,7 @@ FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
FB05; S; FB06; # LATIN SMALL LIGATURE LONG S T
FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
Expand Down
6 changes: 3 additions & 3 deletions unicode/UNIDATA/CompositionExclusions.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CompositionExclusions-15.0.0.txt
# Date: 2022-05-03, 18:50:00 GMT [KW, LI]
# © 2022 Unicode®, Inc.
# CompositionExclusions-15.1.0.txt
# Date: 2023-01-05
# © 2023 Unicode®, Inc.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
Expand Down
Loading

0 comments on commit 0fbd922

Please sign in to comment.