2 USING: accessors arrays assocs combinators.short-circuit fry
3 hints interval-maps kernel math math.order sequences sorting
4 strings unicode.breaks.private unicode.case.private
5 unicode.categories unicode.collation unicode.collation.private
6 unicode.data unicode.data.private unicode.normalize.private
11 CATEGORY: blank Zs Zl Zp | "\r\n\t" member? ;
13 CATEGORY: letter Ll | "Other_Lowercase" property? ;
15 CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
17 CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
19 CATEGORY: digit Nd Nl No ;
21 CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
23 CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
25 CATEGORY: control Cc ;
27 CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ;
29 CATEGORY-NOT: character Cn ;
31 CATEGORY: math Sm | "Other_Math" property? ;
33 : script-of ( char -- script )
34 script-table interval-at ;
36 : name>char ( name -- char ) name-map at ; inline
38 : char>name ( char -- name ) name-map value-at ; inline
40 : ch>lower ( ch -- lower ) simple-lower ?at drop ; inline
42 : ch>upper ( ch -- upper ) simple-upper ?at drop ; inline
44 : ch>title ( ch -- title ) simple-title ?at drop ; inline
46 : first-grapheme ( str -- i )
47 unclip-slice grapheme-class over
48 [ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop
49 nip swap length or 1 + ;
51 : first-grapheme-from ( start str -- i )
52 over tail-slice first-grapheme + ;
54 : last-grapheme ( str -- i )
55 unclip-last-slice grapheme-class swap
56 [ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ;
58 : last-grapheme-from ( end str -- i )
59 swap head-slice last-grapheme ;
63 : >pieces ( str quot: ( str -- i ) -- graphemes )
64 [ dup empty? not ] swap '[ dup @ cut-slice swap ] produce nip ; inline
68 : >graphemes ( str -- graphemes )
69 [ first-grapheme ] >pieces ;
71 : string-reverse ( str -- rts )
72 >graphemes reverse! concat ;
74 : first-word ( str -- i )
75 [ [ length ] [ first word-break-prop ] bi ] keep
76 1 swap dup '[ _ word-break-next ] find-index-from
79 : >words ( str -- words )
80 [ first-word ] >pieces ;
84 : nth-next ( i str -- str[i-1] str[i] )
85 [ [ 1 - ] keep ] dip '[ _ nth ] bi@ ;
89 : word-break-at? ( i str -- ? )
94 [ nth-next [ word-break-prop ] dip ] 2keep
99 : first-word-from ( start str -- i )
100 over tail-slice first-word + ;
102 : last-word ( str -- i )
103 [ length iota ] keep '[ _ word-break-at? ] find-last drop 0 or ;
105 : last-word-from ( end str -- i )
106 swap head-slice last-word ;
108 : >lower ( string -- lower )
109 locale>lower final-sigma
110 [ lower>> ] [ ch>lower ] map-case ;
112 HINTS: >lower string ;
114 : >upper ( string -- upper )
116 [ upper>> ] [ ch>upper ] map-case ;
118 HINTS: >upper string ;
122 : (>title) ( string -- title )
124 [ title>> ] [ ch>title ] map-case ; inline
128 : capitalize ( string -- title )
129 unclip-slice 1string [ >lower ] [ (>title) ] bi*
130 "" prepend-as ; inline
132 : >title ( string -- title )
133 final-sigma >words [ capitalize ] map! concat ;
135 HINTS: >title string ;
137 : >case-fold ( string -- fold )
140 : lower? ( string -- ? ) dup >lower sequence= ;
142 : upper? ( string -- ? ) dup >upper sequence= ;
144 : title? ( string -- ? ) dup >title sequence= ;
146 : case-fold? ( string -- ? ) dup >case-fold sequence= ;
148 : nfd ( string -- nfd )
149 [ (nfd) ] with-string ;
151 : nfkd ( string -- nfkd )
152 [ (nfkd) ] with-string ;
154 : string-append ( s1 s2 -- string )
156 0 over ?nth non-starter?
157 [ length dupd reorder-back ] [ drop ] if ;
159 HINTS: string-append string string ;
161 : nfc ( string -- nfc )
162 [ (nfd) combine ] with-string ;
164 : nfkc ( string -- nfkc )
165 [ (nfkd) combine ] with-string ;
167 : collation-key ( string -- key )
168 nfd string>graphemes graphemes>weights
169 filter-ignorable weights>bytes ;
173 : insensitive= ( str1 str2 levels-removed -- ? )
175 [ collation-key ] dip
176 [ [ 0 = not ] trim-tail but-last ] times
181 : primary= ( str1 str2 -- ? )
184 : secondary= ( str1 str2 -- ? )
187 : tertiary= ( str1 str2 -- ? )
190 : quaternary= ( str1 str2 -- ? )
193 : w/collation-key ( str -- {str,key} )
194 [ collation-key ] keep 2array ;
196 : sort-strings ( strings -- sorted )
197 [ w/collation-key ] map natural-sort values ;
199 : string<=> ( str1 str2 -- <=> )
200 [ w/collation-key ] compare ;