]> gitweb.factorcode.org Git - factor.git/commitdiff
unicode.collation: illegal? is now gone, do AAAA BBBB for every code point
authorDoug Coleman <doug.coleman@gmail.com>
Sun, 28 Jul 2019 17:57:23 +0000 (12:57 -0500)
committerDoug Coleman <doug.coleman@gmail.com>
Tue, 30 Jul 2019 07:12:41 +0000 (02:12 -0500)
remove some tests that pass now

basis/unicode/collation/collation-tests.factor
basis/unicode/collation/collation.factor

index ec160fc95bedb89cdeb274398c26301bb45e0164..d62597b61f48cd623c12ff64fa4d27a0d6bb2a75 100644 (file)
@@ -61,48 +61,6 @@ IN: unicode.collation.tests
 { +lt+ } [ { 111355 98 } { 19968 33 } [ >string ] bi@ string<=> ] unit-test
 { +lt+ } [ { 40943 98 } { 64014 33 } [ >string ] bi@ string<=> ] unit-test
 { +lt+ } [ { 191456 98 } { 888 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 19894 98 } { 55296 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 55296 98 } { 55297 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 55297 98 } { 55298 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 55298 98 } { 55299 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 55299 98 } { 56320 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 56320 98 } { 57343 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 63743 98 } { 64976 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 64976 98 } { 64977 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 64977 98 } { 64978 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 64978 98 } { 64979 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 65520 98 } { 65534 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 65534 98 } { 65535 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 65535 98 } { 131070 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 131070 98 } { 131071 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 191457 98 } { 196606 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 196606 98 } { 196607 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 196607 98 } { 262142 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 262142 98 } { 262143 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 262143 98 } { 327678 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 327678 98 } { 327679 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 327679 98 } { 393214 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 393214 98 } { 393215 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 393215 98 } { 458750 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 458750 98 } { 458751 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 458751 98 } { 524286 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 524286 98 } { 524287 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 524287 98 } { 589822 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 589822 98 } { 589823 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 589823 98 } { 655358 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 655358 98 } { 655359 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 655359 98 } { 720894 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 720894 98 } { 720895 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 720895 98 } { 786430 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 786430 98 } { 786431 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 786432 98 } { 851966 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 851966 98 } { 851967 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 851968 98 } { 917502 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 917502 98 } { 917503 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 917509 98 } { 983038 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 983038 98 } { 983039 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 1114109 98 } { 1114110 33 } [ >string ] bi@ string<=> ] unit-test
-{ +lt+ } [ { 1114110 98 } { 1114111 33 } [ >string ] bi@ string<=> ] unit-test
 
 
 { { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
index 13c2e68b0d2f4349518aecfe766752711d89ada2..fe476282569a97b2510f2ecd8e354a5715778f3e 100644 (file)
@@ -1,7 +1,7 @@
 ! Copyright (C) 2008 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs combinators
-combinators.short-circuit combinators.smart kernel locals make
+combinators.short-circuit combinators.smart fry kernel locals make
 math math.order math.parser namespaces sequences
 simple-flat-file splitting strings unicode.data ;
 IN: unicode.collation
@@ -40,10 +40,164 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
         [ swap set-at ] 2bi
     ] if ;
 
-: insert-helpers ( assoc -- )
-    dup keys [ length 3 >= ] filter [ help-one ] with each ;
-
-ducet get-global insert-helpers
+: fixup-ducet ( -- )
+    {
+        {
+            { 0x0FB2 0x0F71 } ! CE(0FB2) CE(0F71)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+        {
+            { 0x0FB3 0x0F71 } ! CE(0FB3) CE(0F71)
+            {
+                T{ weight-levels
+                    { primary 12720 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+
+        ! FIXME: WRONG WEIGHTS
+        {
+            { 0x0FB2 0x0F71 0x0F72 } ! CE(0FB2) CE(0F71 0F72)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+        {
+            { 0x0FB2 0x0F73        } ! CE(0FB2) CE(0F71 0F72)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+        {
+            { 0x0FB2 0x0F71 0x0F74 } ! CE(0FB2) CE(0F71 0F74)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+        {
+            { 0x0FB2 0x0F75        } ! CE(0FB2) CE(0F71 0F74)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+        {
+            { 0x0FB3 0x0F71 0x0F72 } ! CE(0FB3) CE(0F71 0F72)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+        {
+            { 0x0FB3 0x0F73        } ! CE(0FB3) CE(0F71 0F72)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+        {
+            { 0x0FB3 0x0F71 0x0F74 } ! CE(0FB3) CE(0F71 0F74)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+        {
+            { 0x0FB3 0x0F75        } ! CE(0FB3) CE(0F71 0F74)
+            {
+                T{ weight-levels
+                    { primary 12719 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+                T{ weight-levels
+                    { primary 12741 }
+                    { secondary 32 }
+                    { tertiary 2 }
+                }
+            }
+        }
+    } ducet get-global '[ swap >string _ set-at ] assoc-each ;
+
+! Add a few missing ducet values
+fixup-ducet
 
 : tangut-block? ( char -- ? )
     ! Tangut Block, Tangut Components Block
@@ -73,22 +227,12 @@ ducet get-global insert-helpers
 : BBBB ( char -- weight-levels )
     0x7FFF bitand 0x8000 bitor 0 0 <weight-levels> ; inline
 
-: illegal? ( char -- ? )
-    {
-        [ "Noncharacter_Code_Point" property? ]
-        [ category "Cs" = ]
-    } 1|| ;
-
 : derive-weight ( 1string -- weight-levels-pair )
     first
     dup tangut-block? [
         [ tangut-AAAA ] [ tangut-BBBB ] bi 2array
     ] [
-        dup illegal? [
-            drop { }
-        ] [
-            [ AAAA ] [ BBBB ] bi 2array
-        ] if
+        [ AAAA ] [ BBBB ] bi 2array
     ] if ;
 
 : building-last ( -- char )