1 ! Copyright (C) 2008 Daniel Ehrenberg.
2 ! See https://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays ascii combinators
4 combinators.short-circuit hints kernel make math
5 math.order sbufs sequences sorting.insertion strings
10 ! Conjoining Jamo behavior
12 CONSTANT: hangul-base 0xac00
13 CONSTANT: hangul-end 0xD7AF
14 CONSTANT: initial-base 0x1100
15 CONSTANT: medial-base 0x1161
16 CONSTANT: final-base 0x11a7
18 CONSTANT: initial-count 19
19 CONSTANT: medial-count 21
20 CONSTANT: final-count 28
22 : ?between? ( n/f from to -- ? )
23 pick [ between? ] [ 3drop f ] if ; inline
25 : hangul? ( ch -- ? ) hangul-base hangul-end ?between? ; inline
26 : jamo? ( ch -- ? ) 0x1100 0x11FF ?between? ; inline
28 ! These numbers come from UAX 29
29 : initial? ( ch -- ? )
30 dup 0x1100 0x1159 ?between? [ ] [ 0x115F = ] ?if-old ; inline
31 : medial? ( ch -- ? ) 0x1160 0x11A2 ?between? ; inline
32 : final? ( ch -- ? ) 0x11A8 0x11F9 ?between? ; inline
34 : hangul>jamo ( hangul -- jamo-string )
35 hangul-base - final-count /mod final-base +
37 medial-count /mod medial-base +
38 [ initial-base + ] dip
40 dup final-base = [ drop 2array ] [ 3array ] if ;
42 : jamo>hangul ( initial medial final -- hangul )
44 [ initial-base - medial-count * ] dip
45 medial-base - + final-count *
46 ] dip final-base - + hangul-base + ;
48 ! Normalization -- Decomposition
50 : reorder-slice ( string start -- slice done? )
51 2dup swap [ non-starter? not ] find-from drop
52 [ [ over length ] unless* rot <slice> ] keep not ; inline
54 : reorder-next ( string i -- new-i done? )
55 over [ non-starter? ] find-from drop [
57 [ dup [ combining-class ] insertion-sort to>> ] dip
58 ] [ length t ] if* ; inline
60 : reorder-loop ( string start -- )
61 dupd reorder-next [ 2drop ] [ reorder-loop ] if ; inline recursive
63 : reorder ( string -- )
66 : reorder-back ( string i -- )
67 over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
69 :: decompose ( string quot -- decomposed )
70 string length <sbuf> :> out
72 >fixnum dup ascii? [ out push ] [
73 dup hangul? [ hangul>jamo out push-all ]
74 [ dup quot call [ out push-all ] [ out push ] ?if-old ] if
77 out "" like dup reorder ; inline
79 : with-string ( str quot -- str )
80 over aux>> [ call ] [ drop ] if ; inline
82 : (nfd) ( string -- nfd )
83 [ canonical-entry ] decompose ;
87 : (nfkd) ( string -- nfkd )
88 [ compatibility-entry ] decompose ;
90 HINTS: (nfkd) string ;
92 ! Normalization -- Composition
94 : initial-medial? ( str i -- ? )
95 { [ swap nth initial? ] [ 1 + swap ?nth medial? ] } 2&& ;
97 : --final? ( str i -- ? )
98 2 + swap ?nth final? ;
100 : imf% ( str i -- str i )
101 [ tail-slice first3 jamo>hangul , ]
104 : im% ( str i -- str i )
105 [ tail-slice first2 final-base jamo>hangul , ]
108 : compose-jamo ( str i -- str i )
109 2dup initial-medial? [
110 2dup --final? [ imf% ] [ im% ] if
111 ] [ 2dup swap nth , 1 + ] if ;
113 : pass-combining ( str -- str i )
114 dup [ non-starter? not ] find drop
115 [ dup length ] unless*
118 TUPLE: compose-state i str char after last-class ;
120 : get-str ( state i -- ch )
121 swap [ i>> + ] [ str>> ] bi ?nth ; inline
122 : current ( state -- ch ) 0 get-str ; inline
123 : to ( state -- state ) [ 1 + ] change-i ; inline
124 : push-after ( ch state -- state ) [ ?push ] change-after ; inline
126 :: try-compose ( state new-char current-class -- state )
127 state last-class>> current-class =
128 [ new-char state push-after ] [
129 state char>> new-char combine-chars
130 [ state swap >>char ] [
131 new-char state push-after
132 current-class >>last-class
138 : try-noncombining ( state char -- state )
139 [ drop ] [ [ char>> ] dip combine-chars ] 2bi
140 [ >>char to f >>last-class compose-iter ] when* ; inline recursive
142 : compose-iter ( state -- state )
144 dup combining-class {
148 [ drop ] [ try-noncombining ] if ] }
149 [ try-compose to compose-iter ]
151 ] when* ; inline recursive
153 : compose-combining ( ch str i -- str i )
159 { [ char>> , ] [ after>> % ] [ str>> ] [ i>> ] } cleave ; inline
161 :: (compose) ( str i -- )
163 dup jamo? [ drop str i compose-jamo ] [
164 i 1 + str ?nth combining-class
165 [ str i 1 + compose-combining ] [ , str i 1 + ] if
167 ] when* ; inline recursive
169 : combine ( str -- comp )
170 [ pass-combining (compose) ] "" make ;
172 HINTS: combine string ;