1 USING: sequences namespaces make unicode.data kernel math arrays
2 locals sorting.insertion accessors ;
5 ! Conjoining Jamo behavior
7 : hangul-base HEX: ac00 ; inline
8 : hangul-end HEX: D7AF ; inline
9 : initial-base HEX: 1100 ; inline
10 : medial-base HEX: 1161 ; inline
11 : final-base HEX: 11a7 ; inline
13 : initial-count 19 ; inline
14 : medial-count 21 ; inline
15 : final-count 28 ; inline
17 : hangul? ( ch -- ? ) hangul-base hangul-end ?between? ;
18 : jamo? ( ch -- ? ) HEX: 1100 HEX: 11FF ?between? ;
20 ! These numbers come from UAX 29
21 : initial? ( ch -- ? )
22 dup HEX: 1100 HEX: 1159 ?between? [ ] [ HEX: 115F = ] ?if ;
23 : medial? ( ch -- ? ) HEX: 1160 HEX: 11A2 ?between? ;
24 : final? ( ch -- ? ) HEX: 11A8 HEX: 11F9 ?between? ;
26 : hangul>jamo ( hangul -- jamo-string )
27 hangul-base - final-count /mod final-base +
28 >r medial-count /mod medial-base +
29 >r initial-base + r> r>
30 dup final-base = [ drop 2array ] [ 3array ] if ;
32 : jamo>hangul ( initial medial final -- hangul )
33 >r >r initial-base - medial-count *
34 r> medial-base - + final-count *
35 r> final-base - + hangul-base + ;
37 ! Normalization -- Decomposition
39 : reorder-slice ( string start -- slice done? )
40 2dup swap [ non-starter? not ] find-from drop
41 [ [ over length ] unless* rot <slice> ] keep not ;
43 : reorder-next ( string i -- new-i done? )
44 over [ non-starter? ] find-from drop [
46 >r dup [ combining-class ] insertion-sort to>> r>
49 : reorder-loop ( string start -- )
50 dupd reorder-next [ 2drop ] [ reorder-loop ] if ;
52 : reorder ( string -- )
55 : reorder-back ( string i -- )
56 over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
58 :: decompose ( string quot -- decomposed )
59 ! When there are 8 and 32-bit strings, this'll be
60 ! equivalent to clone on 8 and the contents of the last
61 ! main quotation on 32.
62 string [ 127 < ] all? [ string ] [
65 dup hangul? [ hangul>jamo % ]
66 [ dup quot call [ % ] [ , ] ?if ] if
72 : nfd ( string -- string )
73 [ canonical-entry ] decompose ;
75 : nfkd ( string -- string )
76 [ compatibility-entry ] decompose ;
78 : string-append ( s1 s2 -- string )
79 ! This could be more optimized,
80 ! but in practice, it'll almost always just be append
82 0 over ?nth non-starter?
83 [ length dupd reorder-back ] [ drop ] if ;
85 ! Normalization -- Composition
91 : get-str ( i -- ch ) ind get + main-str get ?nth ;
92 : current ( -- ch ) 0 get-str ;
95 : initial-medial? ( -- ? )
96 current initial? [ 1 get-str medial? ] [ f ] if ;
102 current to current to current jamo>hangul , ;
105 current to current 0 jamo>hangul , ;
107 : compose-jamo ( -- )
109 --final? [ imf, ] [ im, ] if
110 ] when to current jamo? [ compose-jamo ] when ;
112 : pass-combining ( -- )
113 current non-starter? [ current , to pass-combining ] when ;
115 : try-compose ( last-class char current-class -- )
116 swapd = [ after get push ] [
117 char get over combine-chars
118 [ nip char set ] [ after get push ] if*
121 : compose-iter ( n -- )
123 dup combining-class dup
124 [ [ try-compose ] keep to compose-iter ] [ 3drop ] if
128 after [ dup empty? [ drop SBUF" " clone ] unless ] change ;
132 dup jamo? [ drop compose-jamo ] [
133 char set to ?new-after
135 char get , after get %
140 : compose ( str -- comp )
144 SBUF" " clone after set
145 pass-combining (compose)
148 : nfc ( string -- nfc )
151 : nfkc ( string -- nfkc )