1 USING: unicode.categories kernel math combinators splitting
2 sequences math.parser io.files io assocs arrays namespaces
3 math.ranges unicode.normalize unicode.syntax.backend
4 unicode.syntax unicode.data compiler.units alien.syntax io.encodings.ascii ;
7 C-ENUM: Any L V T Extend Control CR LF graphemes ;
9 : jamo-class ( ch -- class )
10 dup initial? [ drop L ]
11 [ dup medial? [ drop V ] [ final? T Any ? ] if ] if ;
13 CATEGORY: grapheme-control Zl Zp Cc Cf ;
14 : control-class ( ch -- class )
18 { HEX: 200C [ Extend ] }
19 { HEX: 200D [ Extend ] }
23 : trim-blank ( str -- newstr )
24 [ blank? ] right-trim ;
26 : process-other-extend ( lines -- set )
27 [ "#" split1 drop ";" split1 drop trim-blank ] map
29 [ ".." split1 [ dup ] unless* [ hex> ] bi@ [a,b] ] map
30 concat [ dup ] H{ } map>assoc ;
32 : other-extend-lines ( -- lines )
33 "extra/unicode/PropList.txt" resource-path ascii file-lines ;
37 CATEGORY: (extend) Me Mn ;
39 dup (extend)? [ ] [ other-extend key? ] ?if ;
41 : grapheme-class ( ch -- class )
43 { [ dup jamo? ] [ jamo-class ] }
44 { [ dup grapheme-control? ] [ control-class ] }
45 { [ extend? ] [ Extend ] }
49 : init-grapheme-table ( -- table )
50 graphemes [ drop graphemes f <array> ] map ;
54 : finish-table ( -- table )
55 table get [ [ 1 = ] map ] map ;
57 : set-table ( class1 class2 val -- )
58 -rot table get nth [ swap or ] change-nth ;
60 : connect ( class1 class2 -- ) 1 set-table ;
61 : disconnect ( class1 class2 -- ) 0 set-table ;
63 : connect-before ( class classes -- )
64 [ connect ] with each ;
66 : connect-after ( classes class -- )
67 [ connect ] curry each ;
69 : break-around ( classes1 classes2 -- )
70 [ [ 2dup disconnect swap disconnect ] with each ] curry each ;
72 : make-grapheme-table ( -- )
74 Control CR LF 3array graphemes break-around
75 L L V 2array connect-before
76 V V T 2array connect-before
78 graphemes Extend connect-after ;
82 : grapheme-break? ( class1 class2 -- ? )
83 grapheme-table nth nth not ;
85 : chars ( i str n -- str[i] str[i+n] )
86 swap >r dupd + r> [ ?nth ] curry bi@ ;
88 : find-index ( seq quot -- i ) find drop ; inline
89 : find-last-index ( seq quot -- i ) find-last drop ; inline
91 : first-grapheme ( str -- i )
92 unclip-slice grapheme-class over
93 [ grapheme-class tuck grapheme-break? ] find-index
94 nip swap length or 1+ ;
96 : (>graphemes) ( str -- )
98 dup first-grapheme cut-slice
102 : >graphemes ( str -- graphemes )
103 [ (>graphemes) ] { } make ;
105 : string-reverse ( str -- rts )
106 >graphemes reverse concat ;
108 : unclip-last-slice ( seq -- beginning last )
109 dup 1 head-slice* swap peek ;
111 : last-grapheme ( str -- i )
112 unclip-last-slice grapheme-class swap
113 [ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ;
116 other-extend-lines process-other-extend \ other-extend set-value
118 init-grapheme-table table
119 [ make-grapheme-table finish-table ] with-variable
120 \ grapheme-table set-value
121 ] with-compilation-unit