X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=blobdiff_plain;f=basis%2Funicode%2FUCD%2Fauxiliary%2FWordBreakProperty.txt;h=6f868d21f352e9c39ce593ae5313df465aea03cd;hp=73cd069dcb24aca6b7ffce96dbbb4f255a3ec0e7;hb=e3f197c3bbd776e9bb83d7fa8598687a8842d0b6;hpb=631f909b7c6544e3391bdecb4139e7e2015ae69e diff --git a/basis/unicode/UCD/auxiliary/WordBreakProperty.txt b/basis/unicode/UCD/auxiliary/WordBreakProperty.txt index 73cd069dcb..6f868d21f3 100644 --- a/basis/unicode/UCD/auxiliary/WordBreakProperty.txt +++ b/basis/unicode/UCD/auxiliary/WordBreakProperty.txt @@ -1,11 +1,11 @@ -# WordBreakProperty-14.0.0.txt -# Date: 2021-07-10, 00:35:32 GMT -# © 2021 Unicode®, Inc. +# WordBreakProperty-15.0.0.txt +# Date: 2022-04-27, 02:41:26 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # ================================================ @@ -180,6 +180,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CF3 ; Extend # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA @@ -203,7 +204,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN 0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN 0EB4..0EBC ; Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO -0EC8..0ECD ; Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0EC8..0ECE ; Extend # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN 0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA 0F37 ; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS @@ -407,6 +408,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -443,6 +445,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11235 ; Extend # Mc KHOJKI SIGN VIRAMA 11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; Extend # Mn KHOJKI SIGN SUKUN +11241 ; Extend # Mn KHOJKI VOWEL SIGN VOCALIC R 112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA 112E0..112E2 ; Extend # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA @@ -552,6 +555,16 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11D97 ; Extend # Mn GUNJALA GONDI VIRAMA 11EF3..11EF4 ; Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; Extend # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; Extend # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F03 ; Extend # Mc KAWI SIGN VISARGA +11F34..11F35 ; Extend # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; Extend # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; Extend # Mn KAWI VOWEL SIGN EU +11F41 ; Extend # Mc KAWI SIGN KILLER +11F42 ; Extend # Mn KAWI CONJOINER +13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -580,16 +593,18 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI 1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS 1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Extend # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2512 +# Total code points: 2554 # ================================================ @@ -615,12 +630,12 @@ FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR 110BD ; Format # Cf KAITHI NUMBER SIGN 110CD ; Format # Cf KAITHI NUMBER SIGN ABOVE -13430..13438 ; Format # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +13430..1343F ; Format # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 1BCA0..1BCA3 ; Format # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP 1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE E0001 ; Format # Cf LANGUAGE TAG -# Total code points: 64 +# Total code points: 71 # ================================================ @@ -641,9 +656,10 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1AFFD..1AFFE ; Katakana # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E 1B120..1B122 ; Katakana # Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU +1B155 ; Katakana # Lo KATAKANA LETTER SMALL KO 1B164..1B167 ; Katakana # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N -# Total code points: 330 +# Total code points: 331 # ================================================ @@ -1127,6 +1143,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 111DC ; ALetter # Lo SHARADA HEADSTROKE 11200..11211 ; ALetter # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA 11213..1122B ; ALetter # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; ALetter # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I 11280..11286 ; ALetter # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; ALetter # Lo MULTANI LETTER GHA 1128A..1128D ; ALetter # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -1187,12 +1204,16 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D6A..11D89 ; ALetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; ALetter # Lo GUNJALA GONDI OM 11EE0..11EF2 ; ALetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; ALetter # Lo KAWI SIGN REPHA +11F04..11F10 ; ALetter # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; ALetter # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA 11FB0 ; ALetter # Lo LISU LETTER YHA 12000..12399 ; ALetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; ALetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; ALetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 12F90..12FF0 ; ALetter # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -13000..1342E ; ALetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13000..1342F ; ALetter # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; ALetter # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 14400..14646 ; ALetter # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; ALetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; ALetter # Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -1245,11 +1266,15 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF00..1DF09 ; ALetter # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ALetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ALetter # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; ALetter # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; ALetter # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ALetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ALetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; ALetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ 1E290..1E2AD ; ALetter # Lo [30] TOTO LETTER PA..TOTO LETTER A 1E2C0..1E2EB ; ALetter # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; ALetter # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; ALetter # Lm NAG MUNDARI SIGN OJOD 1E7E0..1E7E6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; ALetter # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; ALetter # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1294,7 +1319,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 29336 +# Total code points: 29489 # ================================================ @@ -1398,16 +1423,18 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE 1E2F0..1E2F9 ; Numeric # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4F0..1E4F9 ; Numeric # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 661 +# Total code points: 681 # ================================================