1 ! Copyright (C) 2008 Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: sequences namespaces make unicode.data kernel math arrays
4 locals sorting.insertion accessors ;
7 ! Conjoining Jamo behavior
9 : hangul-base HEX: ac00 ; inline
10 : hangul-end HEX: D7AF ; inline
11 : initial-base HEX: 1100 ; inline
12 : medial-base HEX: 1161 ; inline
13 : final-base HEX: 11a7 ; inline
15 : initial-count 19 ; inline
16 : medial-count 21 ; inline
17 : final-count 28 ; inline
19 : hangul? ( ch -- ? ) hangul-base hangul-end ?between? ;
20 : jamo? ( ch -- ? ) HEX: 1100 HEX: 11FF ?between? ;
22 ! These numbers come from UAX 29
23 : initial? ( ch -- ? )
24 dup HEX: 1100 HEX: 1159 ?between? [ ] [ HEX: 115F = ] ?if ;
25 : medial? ( ch -- ? ) HEX: 1160 HEX: 11A2 ?between? ;
26 : final? ( ch -- ? ) HEX: 11A8 HEX: 11F9 ?between? ;
28 : hangul>jamo ( hangul -- jamo-string )
29 hangul-base - final-count /mod final-base +
31 medial-count /mod medial-base +
32 [ initial-base + ] dip
34 dup final-base = [ drop 2array ] [ 3array ] if ;
36 : jamo>hangul ( initial medial final -- hangul )
38 [ initial-base - medial-count * ] dip
39 medial-base - + final-count *
40 ] dip final-base - + hangul-base + ;
42 ! Normalization -- Decomposition
44 : reorder-slice ( string start -- slice done? )
45 2dup swap [ non-starter? not ] find-from drop
46 [ [ over length ] unless* rot <slice> ] keep not ;
48 : reorder-next ( string i -- new-i done? )
49 over [ non-starter? ] find-from drop [
51 [ dup [ combining-class ] insertion-sort to>> ] dip
54 : reorder-loop ( string start -- )
55 dupd reorder-next [ 2drop ] [ reorder-loop ] if ;
57 : reorder ( string -- )
60 : reorder-back ( string i -- )
61 over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
63 :: decompose ( string quot -- decomposed )
64 ! When there are 8 and 32-bit strings, this'll be
65 ! equivalent to clone on 8 and the contents of the last
66 ! main quotation on 32.
67 string [ 127 < ] all? [ string ] [
70 dup hangul? [ hangul>jamo % ]
71 [ dup quot call [ % ] [ , ] ?if ] if
77 : nfd ( string -- string )
78 [ canonical-entry ] decompose ;
80 : nfkd ( string -- string )
81 [ compatibility-entry ] decompose ;
83 : string-append ( s1 s2 -- string )
84 ! This could be more optimized,
85 ! but in practice, it'll almost always just be append
87 0 over ?nth non-starter?
88 [ length dupd reorder-back ] [ drop ] if ;
90 ! Normalization -- Composition
96 : get-str ( i -- ch ) ind get + main-str get ?nth ;
97 : current ( -- ch ) 0 get-str ;
100 : initial-medial? ( -- ? )
101 current initial? [ 1 get-str medial? ] [ f ] if ;
107 current to current to current jamo>hangul , ;
110 current to current 0 jamo>hangul , ;
112 : compose-jamo ( -- )
114 --final? [ imf, ] [ im, ] if
115 ] when to current jamo? [ compose-jamo ] when ;
117 : pass-combining ( -- )
118 current non-starter? [ current , to pass-combining ] when ;
120 : try-compose ( last-class char current-class -- )
121 swapd = [ after get push ] [
122 char get over combine-chars
123 [ nip char set ] [ after get push ] if*
126 : compose-iter ( n -- )
128 dup combining-class dup
129 [ [ try-compose ] keep to compose-iter ] [ 3drop ] if
133 after [ dup empty? [ drop SBUF" " clone ] unless ] change ;
137 dup jamo? [ drop compose-jamo ] [
138 char set to ?new-after
140 char get , after get %
145 : compose ( str -- comp )
149 SBUF" " clone after set
150 pass-combining (compose)
153 : nfc ( string -- nfc )
156 : nfkc ( string -- nfkc )