1 USING: combinators.short-circuit unicode.categories kernel math combinators splitting
2 sequences math.parser io.files io assocs arrays namespaces
3 math.ranges unicode.normalize values io.encodings.ascii
4 unicode.syntax unicode.data compiler.units alien.syntax sets ;
7 C-ENUM: Any L V T Extend Control CR LF graphemes ;
9 : jamo-class ( ch -- class )
10 dup initial? [ drop L ]
11 [ dup medial? [ drop V ] [ final? T Any ? ] if ] if ;
13 CATEGORY: grapheme-control Zl Zp Cc Cf ;
14 : control-class ( ch -- class )
18 { HEX: 200C [ Extend ] }
19 { HEX: 200D [ Extend ] }
23 CATEGORY: (extend) Me Mn ;
25 { [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
27 : grapheme-class ( ch -- class )
29 { [ dup jamo? ] [ jamo-class ] }
30 { [ dup grapheme-control? ] [ control-class ] }
31 { [ extend? ] [ Extend ] }
35 : init-grapheme-table ( -- table )
36 graphemes [ graphemes f <array> ] replicate ;
40 : finish-table ( -- table )
41 table get [ [ 1 = ] map ] map ;
43 : set-table ( class1 class2 val -- )
44 -rot table get nth [ swap or ] change-nth ;
46 : connect ( class1 class2 -- ) 1 set-table ;
47 : disconnect ( class1 class2 -- ) 0 set-table ;
49 : connect-before ( class classes -- )
50 [ connect ] with each ;
52 : connect-after ( classes class -- )
53 [ connect ] curry each ;
55 : break-around ( classes1 classes2 -- )
56 [ [ 2dup disconnect swap disconnect ] with each ] curry each ;
58 : make-grapheme-table ( -- )
60 Control CR LF 3array graphemes break-around
61 L L V 2array connect-before
62 V V T 2array connect-before
64 graphemes Extend connect-after ;
68 : grapheme-break? ( class1 class2 -- ? )
69 grapheme-table nth nth not ;
71 : chars ( i str n -- str[i] str[i+n] )
72 swap >r dupd + r> [ ?nth ] curry bi@ ;
74 : find-index ( seq quot -- i ) find drop ; inline
75 : find-last-index ( seq quot -- i ) find-last drop ; inline
77 : first-grapheme ( str -- i )
78 unclip-slice grapheme-class over
79 [ grapheme-class tuck grapheme-break? ] find-index
80 nip swap length or 1+ ;
82 : (>graphemes) ( str -- )
84 dup first-grapheme cut-slice
88 : >graphemes ( str -- graphemes )
89 [ (>graphemes) ] { } make ;
91 : string-reverse ( str -- rts )
92 >graphemes reverse concat ;
94 : last-grapheme ( str -- i )
95 unclip-last-slice grapheme-class swap
96 [ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ;
98 init-grapheme-table table
99 [ make-grapheme-table finish-table ] with-variable
100 \ grapheme-table set-value