1 # CompositionExclusions-12.1.0.txt
2 # Date: 2019-03-08, 23:59:00 GMT [KW, LI]
3 # © 2019 Unicode®, Inc.
4 # For terms of use, see http://www.unicode.org/terms_of_use.html
6 # Unicode Character Database
7 # For documentation, see http://www.unicode.org/reports/tr44/
9 # This file lists the characters for the Composition Exclusion Table
10 # defined in UAX #15, Unicode Normalization Forms.
12 # This file is a normative contributory data file in the
13 # Unicode Character Database.
15 # For more information, see
16 # http://www.unicode.org/unicode/reports/tr15/#Primary_Exclusion_List_Table
18 # For a full derivation of composition exclusions, see the derived property
19 # Full_Composition_Exclusion in DerivedNormalizationProps.txt
22 # ================================================
23 # (1) Script Specifics
25 # This list of characters cannot be derived from the UnicodeData.txt file.
26 # ================================================
28 0958 # DEVANAGARI LETTER QA
29 0959 # DEVANAGARI LETTER KHHA
30 095A # DEVANAGARI LETTER GHHA
31 095B # DEVANAGARI LETTER ZA
32 095C # DEVANAGARI LETTER DDDHA
33 095D # DEVANAGARI LETTER RHA
34 095E # DEVANAGARI LETTER FA
35 095F # DEVANAGARI LETTER YYA
36 09DC # BENGALI LETTER RRA
37 09DD # BENGALI LETTER RHA
38 09DF # BENGALI LETTER YYA
39 0A33 # GURMUKHI LETTER LLA
40 0A36 # GURMUKHI LETTER SHA
41 0A59 # GURMUKHI LETTER KHHA
42 0A5A # GURMUKHI LETTER GHHA
43 0A5B # GURMUKHI LETTER ZA
44 0A5E # GURMUKHI LETTER FA
45 0B5C # ORIYA LETTER RRA
46 0B5D # ORIYA LETTER RHA
47 0F43 # TIBETAN LETTER GHA
48 0F4D # TIBETAN LETTER DDHA
49 0F52 # TIBETAN LETTER DHA
50 0F57 # TIBETAN LETTER BHA
51 0F5C # TIBETAN LETTER DZHA
52 0F69 # TIBETAN LETTER KSSA
53 0F76 # TIBETAN VOWEL SIGN VOCALIC R
54 0F78 # TIBETAN VOWEL SIGN VOCALIC L
55 0F93 # TIBETAN SUBJOINED LETTER GHA
56 0F9D # TIBETAN SUBJOINED LETTER DDHA
57 0FA2 # TIBETAN SUBJOINED LETTER DHA
58 0FA7 # TIBETAN SUBJOINED LETTER BHA
59 0FAC # TIBETAN SUBJOINED LETTER DZHA
60 0FB9 # TIBETAN SUBJOINED LETTER KSSA
61 FB1D # HEBREW LETTER YOD WITH HIRIQ
62 FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
63 FB2A # HEBREW LETTER SHIN WITH SHIN DOT
64 FB2B # HEBREW LETTER SHIN WITH SIN DOT
65 FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
66 FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
67 FB2E # HEBREW LETTER ALEF WITH PATAH
68 FB2F # HEBREW LETTER ALEF WITH QAMATS
69 FB30 # HEBREW LETTER ALEF WITH MAPIQ
70 FB31 # HEBREW LETTER BET WITH DAGESH
71 FB32 # HEBREW LETTER GIMEL WITH DAGESH
72 FB33 # HEBREW LETTER DALET WITH DAGESH
73 FB34 # HEBREW LETTER HE WITH MAPIQ
74 FB35 # HEBREW LETTER VAV WITH DAGESH
75 FB36 # HEBREW LETTER ZAYIN WITH DAGESH
76 FB38 # HEBREW LETTER TET WITH DAGESH
77 FB39 # HEBREW LETTER YOD WITH DAGESH
78 FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
79 FB3B # HEBREW LETTER KAF WITH DAGESH
80 FB3C # HEBREW LETTER LAMED WITH DAGESH
81 FB3E # HEBREW LETTER MEM WITH DAGESH
82 FB40 # HEBREW LETTER NUN WITH DAGESH
83 FB41 # HEBREW LETTER SAMEKH WITH DAGESH
84 FB43 # HEBREW LETTER FINAL PE WITH DAGESH
85 FB44 # HEBREW LETTER PE WITH DAGESH
86 FB46 # HEBREW LETTER TSADI WITH DAGESH
87 FB47 # HEBREW LETTER QOF WITH DAGESH
88 FB48 # HEBREW LETTER RESH WITH DAGESH
89 FB49 # HEBREW LETTER SHIN WITH DAGESH
90 FB4A # HEBREW LETTER TAV WITH DAGESH
91 FB4B # HEBREW LETTER VAV WITH HOLAM
92 FB4C # HEBREW LETTER BET WITH RAFE
93 FB4D # HEBREW LETTER KAF WITH RAFE
94 FB4E # HEBREW LETTER PE WITH RAFE
96 # Total code points: 67
98 # ================================================
99 # (2) Post Composition Version precomposed characters
101 # These characters cannot be derived solely from the UnicodeData.txt file
102 # in this version of Unicode.
104 # Note that characters added to the standard after the
105 # Composition Version and which have canonical decomposition mappings
106 # are not automatically added to this list of Post Composition
107 # Version precomposed characters.
108 # ================================================
111 1D15E # MUSICAL SYMBOL HALF NOTE
112 1D15F # MUSICAL SYMBOL QUARTER NOTE
113 1D160 # MUSICAL SYMBOL EIGHTH NOTE
114 1D161 # MUSICAL SYMBOL SIXTEENTH NOTE
115 1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
116 1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
117 1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
118 1D1BB # MUSICAL SYMBOL MINIMA
119 1D1BC # MUSICAL SYMBOL MINIMA BLACK
120 1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
121 1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
122 1D1BF # MUSICAL SYMBOL FUSA WHITE
123 1D1C0 # MUSICAL SYMBOL FUSA BLACK
125 # Total code points: 14
127 # ================================================
128 # (3) Singleton Decompositions
130 # These characters can be derived from the UnicodeData.txt file
131 # by including all canonically decomposable characters whose
132 # canonical decomposition consists of a single character.
134 # These characters are simply quoted here for reference.
135 # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
136 # ================================================
138 # 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
139 # 0343 COMBINING GREEK KORONIS
140 # 0374 GREEK NUMERAL SIGN
141 # 037E GREEK QUESTION MARK
142 # 0387 GREEK ANO TELEIA
143 # 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
144 # 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
145 # 1F75 GREEK SMALL LETTER ETA WITH OXIA
146 # 1F77 GREEK SMALL LETTER IOTA WITH OXIA
147 # 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
148 # 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
149 # 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
150 # 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
151 # 1FBE GREEK PROSGEGRAMMENI
152 # 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
153 # 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
154 # 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
155 # 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
156 # 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
157 # 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
158 # 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
159 # 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
160 # 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
162 # 2000..2001 [2] EN QUAD..EM QUAD
164 # 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
165 # 2329 LEFT-POINTING ANGLE BRACKET
166 # 232A RIGHT-POINTING ANGLE BRACKET
167 # F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
168 # FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
169 # FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
170 # FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
171 # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
172 # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
173 # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
174 # FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D
175 # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
176 # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
178 # Total code points: 1035
180 # ================================================
181 # (4) Non-Starter Decompositions
183 # These characters can be derived from the UnicodeData.txt file
184 # by including each expanding canonical decomposition
185 # (i.e., those which canonically decompose to a sequence
186 # of characters instead of a single character), such that:
188 # A. The character is not a Starter.
192 # B. The character's canonical decomposition begins
193 # with a character that is not a Starter.
195 # Note that a "Starter" is any character with a zero combining class.
197 # These characters are simply quoted here for reference.
198 # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
199 # ================================================
201 # 0344 COMBINING GREEK DIALYTIKA TONOS
202 # 0F73 TIBETAN VOWEL SIGN II
203 # 0F75 TIBETAN VOWEL SIGN UU
204 # 0F81 TIBETAN VOWEL SIGN REVERSED II
206 # Total code points: 4