-USING: arrays assocs fry grouping io.encodings.utf8 io.files
-kernel math math.order math.parser sequences splitting
-strings tools.test unicode ;
+USING: arrays assocs fry grouping hash-sets io.encodings.utf8
+io.files kernel math math.order math.parser sequences sets
+splitting strings tools.test unicode ;
IN: unicode.collation.tests
: test-equality ( str1 str2 -- ? ? ? ? )
[ " " split harvest [ hex> ] map ] map
] bi* 2array ;
+! These tests actually would pass if I didn't fix up
+! the ducet table for Tibetan. It took me way too long to realize
+! that the Unicode committee recommends fixing Tibetan collation
+! yet ships tests that collation fails if you fix it.
+! (Specifically the ducet entries for { 0x0FB2 0x0F71 } and { 0x0FB3 0x0F71 }
+! cause these tests to fail)
+: xfailed-collation-tests ( -- seq )
+ HS{
+ { 3958 3953 820 }
+ { 4018 820 3953 3968 }
+ { 4018 820 3968 3953 }
+ { 4018 820 3969 }
+ { 3960 3953 820 }
+ { 4019 820 3953 3968 }
+ { 4019 820 3968 3953 }
+ { 4019 3953 820 3968 }
+ } ;
+
: parse-collation-test-weights ( -- weights )
collation-test-lines
- [ line>test-weights ] map ;
+ [ line>test-weights ] map
+ [ first xfailed-collation-tests in? ] reject ;
: calculate-collation ( chars collation -- collation-calculated collation-answer )
[ >string collation-key/nfd drop ] [ { 0 } join ] bi* ;
{ { } } [
parse-collation-test-shifted
- 2 clump
- [ string<=> { +lt+ +eq+ } member? ] assoc-reject
-] unit-test
-
-! FIXME: ducet table is wrong
-! Fixed by fixing ducet table
-! { +lt+ } [ { 4019 98 } { 4019 3953 1 3968 97 } [ >string ] bi@ string<=> ] unit-test
-
-{ +lt+ } [ { 4018 820 3969 } { 3959 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 4019 3953 820 3968 } { 3961 33 } [ >string ] bi@ string<=> ] unit-test
-
-
-{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
-[ { 3958 3953 820 } >string collation-key/nfd drop ] unit-test
+ 2 clump >hash-set
-{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
-[ { 4018 820 3953 3968 } >string collation-key/nfd drop ] unit-test
+ ! Remove these two expected-fail Tibetan collation comparison tests
+ ! They are bad tests once you fix up the ducet table with { 0x0FB2 0x0F71 } and { 0x0FB3 0x0F71 }
+ { 4018 820 3969 } { 3959 33 } [ >string ] bi@ 2array
+ { 4019 3953 820 3968 } { 3961 33 } [ >string ] bi@ 2array
+ 2array >hash-set diff members
-! { { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
-! [ { 0x0FB2 0x0334 0x0F80 0x0F71 } >string collation-key/nfd drop ] unit-test
-
-{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
-[ { 4018 820 3969 } >string collation-key/nfd drop ] unit-test
-
-{ { 12750 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
-[ { 3960 3953 820 } >string collation-key/nfd drop ] unit-test
-
-{ { 12750 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
-[ { 4019 820 3953 3968 } >string collation-key/nfd drop ] unit-test
-
-{ { 12750 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
-[ { 4019 820 3968 3953 } >string collation-key/nfd drop ] unit-test
-
-{ { 12750 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
-[ { 4019 3953 820 3968 } >string collation-key/nfd drop ] unit-test
+ [ string<=> { +lt+ +eq+ } member? ] assoc-reject
+] unit-test
-{ { 12722 12741 12744 7817 0 32 32 32 32 0 2 2 2 2 0 65535 65535 65535 65535 } }
-[ { 4019 3953 1 3968 97 } >string collation-key/nfd drop ] unit-test
-! { 0xfb3 0x0f71 0x0334 0x0f80 }
\ No newline at end of file
+! XXX: Once again, these tests pass if you don't
+! fix up the ducet table for { 0x0FB2 0x0F71 } and { 0x0FB3 0x0F71 }
+! { +lt+ } [ { 4018 820 3969 } { 3959 33 } [ >string ] bi@ string<=> ] unit-test
+! { +lt+ } [ { 4019 3953 820 3968 } { 3961 33 } [ >string ] bi@ string<=> ] unit-test
\ No newline at end of file