1 USING: unicode kernel math const combinators splitting
2 sequences math.parser io.files io assocs arrays namespaces
6 ENUM: Any L V T Extend Control CR LF graphemes ;
8 : jamo-class ( ch -- class )
9 dup initial? [ drop L ]
10 [ dup medial? [ drop V ] [ final? T Any ? ] if ] if ;
12 CATEGORY: grapheme-control Zl Zp Cc Cf ;
13 : control-class ( ch -- class )
17 { HEX: 200C [ Extend ] }
18 { HEX: 200D [ Extend ] }
22 : trim-blank ( str -- newstr )
23 dup [ blank? not ] find-last 1+* head ;
25 : process-other-extend ( lines -- set )
26 [ "#" split1 drop ";" split1 drop trim-blank ] map
28 [ ".." split1 [ dup ] unless* [ hex> ] 2apply range ] map
31 : other-extend-lines ( -- lines )
32 "extra/unicode/PropList.txt" resource-path <file-reader> lines ;
36 other-extend-lines process-other-extend
37 \ other-extend define-value ; parsing
40 CATEGORY: (extend) Me Mn ;
42 [ (extend)? ] [ other-extend key? ] either ;
44 : grapheme-class ( ch -- class )
46 { [ dup jamo? ] [ jamo-class ] }
47 { [ dup grapheme-control? ] [ control-class ] }
48 { [ extend? ] [ Extend ] }
52 : init-grapheme-table ( -- table )
53 graphemes [ drop graphemes f <array> ] map ;
57 : finish-table ( -- table )
58 table get [ [ 1 = ] map ] map ;
60 : set-table ( class1 class2 val -- )
61 -rot table get nth [ swap or ] change-nth ;
63 : connect ( class1 class2 -- ) 1 set-table ;
64 : disconnect ( class1 class2 -- ) 0 set-table ;
66 : connect-before ( class classes -- )
67 [ connect ] curry* each ;
69 : connect-after ( classes class -- )
70 [ connect ] curry each ;
72 : break-around ( classes1 classes2 -- )
73 [ [ 2dup disconnect swap disconnect ] curry* each ] curry each ;
75 : make-grapheme-table ( -- )
77 { Control CR LF } graphemes break-around
78 L { L V } connect-before
79 V { V T } connect-before
81 graphemes Extend connect-after ;
85 init-grapheme-table table
86 [ make-grapheme-table finish-table ] with-variable
87 \ grapheme-table define-value ; parsing
90 : grapheme-break? ( class1 class2 -- ? )
91 grapheme-table nth nth not ;
93 : chars ( i str n -- str[i] str[i+n] )
94 swap >r dupd + r> [ ?nth ] curry 2apply ;
96 : next-grapheme-step ( i str -- i+1 str prev-class )
97 2dup nth grapheme-class >r >r 1+ r> r> ;
99 : (next-grapheme) ( i str prev-class -- next-i )
100 3dup drop bounds-check? [
101 >r next-grapheme-step r> over grapheme-break?
102 [ 2drop 1- ] [ (next-grapheme) ] if
105 : next-grapheme ( i str -- next-i )
106 next-grapheme-step (next-grapheme) ;
108 : (>graphemes) ( i str -- )
110 dupd [ next-grapheme ] keep
111 [ subseq , ] 2keep (>graphemes)
113 : >graphemes ( str -- graphemes )
114 [ 0 swap (>graphemes) ] { } make* ;
116 : string-reverse ( str -- rts )
117 >graphemes reverse concat ;
119 : prev-grapheme-step ( i str -- i-1 str prev-class )
120 2dup nth grapheme-class >r >r 1- r> r> ;
122 : (prev-grapheme) ( i str next-class -- prev-i )
124 >r prev-grapheme-step r> dupd grapheme-break?
125 [ 2drop 1- ] [ (prev-grapheme) ] if
128 : prev-grapheme ( i str -- prev-i )
129 prev-grapheme-step (prev-grapheme) ;