X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=blobdiff_plain;f=basis%2Funicode%2FUCD%2FIndicSyllabicCategory.txt;h=7843c175abb5238b615a3c640bc35a1115a69bdf;hp=23b86372aea9636e51ded90b9e04822af2713f7e;hb=e3f197c3bbd776e9bb83d7fa8598687a8842d0b6;hpb=631f909b7c6544e3391bdecb4139e7e2015ae69e diff --git a/basis/unicode/UCD/IndicSyllabicCategory.txt b/basis/unicode/UCD/IndicSyllabicCategory.txt index 23b86372ae..7843c175ab 100644 --- a/basis/unicode/UCD/IndicSyllabicCategory.txt +++ b/basis/unicode/UCD/IndicSyllabicCategory.txt @@ -1,11 +1,11 @@ -# IndicSyllabicCategory-14.0.0.txt -# Date: 2021-05-22, 01:01:00 GMT [KW, RP] -# © 2021 Unicode®, Inc. +# IndicSyllabicCategory-15.0.0.txt +# Date: 2022-05-26, 02:18:00 GMT [KW, RP] +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # For documentation, see UAX #44: Unicode Character Database, -# at http://www.unicode.org/reports/tr44/ +# at https://www.unicode.org/reports/tr44/ # # This file defines the following property: # @@ -37,7 +37,7 @@ # # Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, # Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, -# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, +# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, # Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu, # Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, # Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, @@ -84,6 +84,7 @@ 0C80 ; Bindu # Lo KANNADA SIGN SPACING CANDRABINDU 0C81 ; Bindu # Mn KANNADA SIGN CANDRABINDU 0C82 ; Bindu # Mc KANNADA SIGN ANUSVARA +0CF3 ; Bindu # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D00..0D01 ; Bindu # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU 0D02 ; Bindu # Mc MALAYALAM SIGN ANUSVARA 0D04 ; Bindu # Lo MALAYALAM LETTER VEDIC ANUSVARA @@ -133,6 +134,7 @@ A980..A981 ; Bindu # Mn [2] JAVANESE SIGN PANYANGGA..JAVANESE SIGN CECAK 11CB5..11CB6 ; Bindu # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU 11D40 ; Bindu # Mn MASARAM GONDI SIGN ANUSVARA 11D95 ; Bindu # Mn GUNJALA GONDI SIGN ANUSVARA +11F00..11F01 ; Bindu # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA # ================================================ @@ -179,6 +181,7 @@ AAF5 ; Visarga # Mc MEETEI MAYEK VOWEL SIGN VISARGA 11C3E ; Visarga # Mc BHAIKSUKI SIGN VISARGA 11D41 ; Visarga # Mn MASARAM GONDI SIGN VISARGA 11D96 ; Visarga # Mc GUNJALA GONDI SIGN VISARGA +11F03 ; Visarga # Mc KAWI SIGN VISARGA # ================================================ @@ -320,6 +323,7 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK 1193D ; Pure_Killer # Mc DIVES AKURU SIGN HALANTA 11A34 ; Pure_Killer # Mn ZANABAZAR SQUARE SIGN VIRAMA 11D44 ; Pure_Killer # Mn MASARAM GONDI SIGN HALANTA +11F41 ; Pure_Killer # Mc KAWI SIGN KILLER # ================================================ @@ -346,6 +350,7 @@ AAF6 ; Invisible_Stacker # Mn MEETEI MAYEK VIRAMA 11A99 ; Invisible_Stacker # Mn SOYOMBO SUBJOINER 11D45 ; Invisible_Stacker # Mn MASARAM GONDI VIRAMA 11D97 ; Invisible_Stacker # Mn GUNJALA GONDI VIRAMA +11F42 ; Invisible_Stacker # Mn KAWI CONJOINER # ================================================ @@ -416,6 +421,7 @@ ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 11103..11106 ; Vowel_Independent # Lo [4] CHAKMA LETTER AA..CHAKMA LETTER E 11183..11190 ; Vowel_Independent # Lo [14] SHARADA LETTER A..SHARADA LETTER AU 11200..11207 ; Vowel_Independent # Lo [8] KHOJKI LETTER A..KHOJKI LETTER AU +11240 ; Vowel_Independent # Lo KHOJKI LETTER SHORT I 11280..11283 ; Vowel_Independent # Lo [4] MULTANI LETTER A..MULTANI LETTER E 112B0..112B9 ; Vowel_Independent # Lo [10] KHUDAWADI LETTER A..KHUDAWADI LETTER AU 11305..1130C ; Vowel_Independent # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L @@ -443,6 +449,7 @@ ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 11D60..11D65 ; Vowel_Independent # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU 11D67..11D68 ; Vowel_Independent # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D6B ; Vowel_Independent # Lo [2] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER AU +11F04..11F10 ; Vowel_Independent # Lo [13] KAWI LETTER A..KAWI LETTER O # ================================================ @@ -638,6 +645,7 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 1122C..1122E ; Vowel_Dependent # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II 1122F..11231 ; Vowel_Dependent # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI 11232..11233 ; Vowel_Dependent # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11241 ; Vowel_Dependent # Mn KHOJKI VOWEL SIGN VOCALIC R 112E0..112E2 ; Vowel_Dependent # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 112E3..112E8 ; Vowel_Dependent # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU 1133E..1133F ; Vowel_Dependent # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I @@ -700,6 +708,10 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 11D93..11D94 ; Vowel_Dependent # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU 11EF3..11EF4 ; Vowel_Dependent # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; Vowel_Dependent # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F34..11F35 ; Vowel_Dependent # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; Vowel_Dependent # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; Vowel_Dependent # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; Vowel_Dependent # Mn KAWI VOWEL SIGN EU # ================================================ @@ -878,6 +890,7 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 11191..111B2 ; Consonant # Lo [34] SHARADA LETTER KA..SHARADA LETTER HA 11208..11211 ; Consonant # Lo [10] KHOJKI LETTER KA..KHOJKI LETTER JJA 11213..1122B ; Consonant # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F ; Consonant # Lo KHOJKI LETTER QA 11284..11286 ; Consonant # Lo [3] MULTANI LETTER KA..MULTANI LETTER GA 11288 ; Consonant # Lo MULTANI LETTER GHA 1128A..1128D ; Consonant # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -908,6 +921,7 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 11D0C..11D30 ; Consonant # Lo [37] MASARAM GONDI LETTER KA..MASARAM GONDI LETTER TRA 11D6C..11D89 ; Consonant # Lo [30] GUNJALA GONDI LETTER YA..GUNJALA GONDI LETTER SA 11EE0..11EF1 ; Consonant # Lo [18] MAKASAR LETTER KA..MAKASAR LETTER A +11F12..11F33 ; Consonant # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA # ================================================ @@ -963,6 +977,7 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 0D4E ; Consonant_Preceding_Repha # Lo MALAYALAM LETTER DOT REPH 11941 ; Consonant_Preceding_Repha # Lo DIVES AKURU INITIAL RA 11D46 ; Consonant_Preceding_Repha # Lo MASARAM GONDI REPHA +11F02 ; Consonant_Preceding_Repha # Lo KAWI SIGN REPHA # ================================================ @@ -1132,10 +1147,14 @@ ABEC ; Tone_Mark # Mc MEETEI MAYEK LUM IYEK # Indic_Syllabic_Category=Gemination_Mark # Gemination Mark (doubling of the preceding or following consonant) +# +# U+0A71 GURMUKHI ADDAK precedes the consonant it geminates, while the +# others follow the consonant they geminate. # [Not derivable] 0A71 ; Gemination_Mark # Mn GURMUKHI ADDAK +0AFB ; Gemination_Mark # Mn GUJARATI SIGN SHADDA 11237 ; Gemination_Mark # Mn KHOJKI SIGN SHADDA 11A98 ; Gemination_Mark # Mn SOYOMBO GEMINATION MARK @@ -1149,7 +1168,8 @@ ABEC ; Tone_Mark # Mc MEETEI MAYEK LUM IYEK 0951..0952 ; Cantillation_Mark # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA 0A51 ; Cantillation_Mark # Mn GURMUKHI SIGN UDAAT -0AFA..0AFC ; Cantillation_Mark # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH +0AFA ; Cantillation_Mark # Mn GUJARATI SIGN SUKUN +0AFC ; Cantillation_Mark # Mn GUJARATI SIGN MADDAH 1CD0..1CD2 ; Cantillation_Mark # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD4..1CE0 ; Cantillation_Mark # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE1 ; Cantillation_Mark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA @@ -1177,12 +1197,13 @@ A8E0..A8F1 ; Cantillation_Mark # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..CO # Indic_Syllabic_Category=Syllable_Modifier # Syllable Modifier (miscellaneous combining characters that modify -# something in the orthographic syllable they succeed) +# something in the orthographic syllable they succeed or appear in) # [Not derivable] 00B2..00B3 ; Syllable_Modifier # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 09FE ; Syllable_Modifier # Mn BENGALI SANDHI MARK +0ECE ; Syllable_Modifier # Mn LAO YAMAKKAN 0F35 ; Syllable_Modifier # Mn TIBETAN MARK NGAS BZUNG NYI ZLA 0F37 ; Syllable_Modifier # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS 0FC6 ; Syllable_Modifier # Mn TIBETAN SYMBOL PADMA GDAN @@ -1304,14 +1325,20 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI 11C5A..11C6C ; Number # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK 11D50..11D59 ; Number # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Number # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11F50..11F59 ; Number # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE # ================================================ # Indic_Syllabic_Category=Brahmi_Joining_Number -# Brahmi Joining Number (similar to Number in that in can be used as -# vowel-holders like Consonant_Placeholder, but may also be joined by -# a Number_Joiner of the same script, e.g. in Brahmi) +# Brahmi Joining Number (may be joined by a Number_Joiner of the same +# script, e.g. in Brahmi) +# +# Note: These are different from Numbers, in the way that there is no known +# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants. +# Until such evidence is found, implementations may assume that Brahmi +# Joining Numbers only participate in shaping with other Brahmi Joining +# Numbers. # [Not derivable]