1 USING: kernel unicode.data sequences sequences.next namespaces
2 unicode.normalize math unicode.categories combinators
3 assocs strings splitting ;
6 : at-default ( key assoc -- value/key ) over >r at r> or ;
8 : ch>lower ( ch -- lower ) simple-lower at-default ;
9 : ch>upper ( ch -- upper ) simple-upper at-default ;
10 : ch>title ( ch -- title ) simple-title at-default ;
12 SYMBOL: locale ! Just casing locale, or overall?
15 locale get { "tr" "az" } member? ;
17 : lithuanian? ( -- ? ) locale get "lt" = ;
19 : dot-over ( -- ch ) HEX: 307 ;
21 : lithuanian-ch>upper ( ? next ch -- ? )
23 [ swap dot-over = over "ij" member? and swap , ] if ;
25 : lithuanian>upper ( string -- lower )
26 [ f swap [ lithuanian-ch>upper ] each-next drop ] "" make ;
28 : mark-above? ( ch -- ? )
29 combining-class 230 = ;
31 : lithuanian-ch>lower ( next ch -- )
32 ! This fails to add a dot above in certain edge cases
33 ! where there is a non-above combining mark before an above one
35 dup , "IJ" member? swap mark-above? and [ dot-over , ] when ;
37 : lithuanian>lower ( string -- lower )
38 [ [ lithuanian-ch>lower ] each-next ] "" make ;
40 : turk-ch>upper ( ch -- )
42 [ drop CHAR: I , dot-over , ] [ , ] if ;
44 : turk>upper ( string -- upper-i )
45 [ [ turk-ch>upper ] each ] "" make ;
47 : turk-ch>lower ( ? next ch -- ? )
49 { [ rot ] [ 2drop f ] }
52 dup CHAR: i HEX: 131 ? ,
57 : turk>lower ( string -- lower-i )
58 [ f swap [ turk-ch>lower ] each-next drop ] "" make ;
60 : word-boundary ( prev char -- new ? )
61 dup non-starter? [ drop dup ] when
64 : sigma-map ( string -- string )
66 swap [ uncased? ] keep not or
67 [ drop HEX: 3C2 ] when
70 : final-sigma ( string -- string )
71 HEX: 3A3 over member? [ sigma-map ] when ;
73 : map-case ( string string-quot char-quot -- case )
77 rot dup special-casing at
79 [ -rot nip call , ] ?if
84 : >lower ( string -- lower )
85 i-dot? [ turk>lower ] when
86 final-sigma [ code-point-lower ] [ ch>lower ] map-case ;
88 : >upper ( string -- upper )
89 i-dot? [ turk>upper ] when
90 [ code-point-upper ] [ ch>upper ] map-case ;
92 : >title ( string -- title )
95 [ tuck word-boundary swapd
96 [ code-point-title ] [ code-point-lower ] if ]
97 [ tuck word-boundary swapd
98 [ ch>title ] [ ch>lower ] if ]
101 : >case-fold ( string -- fold )
104 : lower? ( string -- ? )
106 : upper? ( string -- ? )
108 : title? ( string -- ? )
110 : case-fold? ( string -- ? )