USING: combinators.short-circuit unicode.categories kernel math
combinators splitting sequences math.parser io.files io assocs
arrays namespaces make math.ranges unicode.normalize.private values
-io.encodings.ascii unicode.syntax unicode.data compiler.units
+io.encodings.ascii unicode.syntax unicode.data compiler.units fry
alien.syntax sets accessors interval-maps memoize locals words ;
IN: unicode.breaks
<PRIVATE
-:: (>pieces) ( str quot -- )
- str [
- dup quot call cut-slice
- swap , quot (>pieces)
- ] unless-empty ; inline recursive
-
-: >pieces ( str quot -- graphemes )
- [ (>pieces) ] { } make ; inline
+: >pieces ( str quot: ( str -- i ) -- graphemes )
+ [ dup empty? not ] swap '[ dup @ cut-slice swap ]
+ [ ] produce nip ; inline
PRIVATE>
\ >lower must-infer
\ >title must-infer
-[ "Hello How Are You? I'M Good" ] [ "hEllo how ARE yOU? I'm good" >title ] unit-test
+[ "Hello How Are You? I'm Good" ] [ "hEllo how ARE yOU? I'm good" >title ] unit-test
[ "FUSS" ] [ "Fu\u0000DF" >upper ] unit-test
[ "\u0003C3\u0003C2" ] [ "\u0003A3\u0003A3" >lower ] unit-test
[ t ] [ "hello how are you?" lower? ] unit-test
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data sequences sequences.next namespaces make
unicode.normalize math unicode.categories combinators
-assocs strings splitting kernel accessors ;
+assocs strings splitting kernel accessors unicode.breaks ;
IN: unicode.case
<PRIVATE
[ [ % ] compose ] [ [ , ] compose ] bi* ?if
] 2curry each
] "" make ; inline
+
+: (>lower) ( string -- lower )
+ [ lower>> ] [ ch>lower ] map-case ;
+
+: (>title) ( string -- title )
+ [ title>> ] [ ch>title ] map-case ;
+
+: (>upper) ( string -- upper )
+ [ upper>> ] [ ch>upper ] map-case ;
+
+: title-word ( string -- title )
+ unclip 1string [ (>lower) ] [ (>title) ] bi* prepend ;
+
PRIVATE>
+
: >lower ( string -- lower )
i-dot? [ turk>lower ] when
- final-sigma [ lower>> ] [ ch>lower ] map-case ;
+ final-sigma (>lower) ;
: >upper ( string -- upper )
- i-dot? [ turk>upper ] when
- [ upper>> ] [ ch>upper ] map-case ;
+ i-dot? [ turk>upper ] when (>upper) ;
: >title ( string -- title )
- final-sigma
- CHAR: \s swap
- [ tuck word-boundary swapd
- [ title>> ] [ lower>> ] if ]
- [ tuck word-boundary swapd
- [ ch>title ] [ ch>lower ] if ]
- map-case nip ;
+ final-sigma >words [ title-word ] map concat ;
: >case-fold ( string -- fold )
>upper >lower ;
USING: io io.files splitting grouping unicode.collation\r
sequences kernel io.encodings.utf8 math.parser math.order\r
-tools.test assocs io.streams.null words ;\r
+tools.test assocs words ;\r
IN: unicode.collation.tests\r
\r
: parse-test ( -- strings )\r
unit-test\r
\r
parse-test 2 <clumps>\r
-[ [ test-two ] assoc-each ] with-null-writer\r
+[ test-two ] assoc-each\r