basis/unicode/normalize/normalize-tests.factor

   1 USING: arrays assocs combinators combinators.short-circuit
   2 grouping io.encodings.utf8 io.files io.streams.null kernel
   3 locals math math.parser quotations random sequences splitting
   4 splitting.extras strings tools.test unicode
   5 unicode.normalize.private ;
   6 IN: unicode.normalize.tests
   7
   8 { "ab\u000323\u000302cd" } [ "ab\u000302" "\u000323cd" string-append ] unit-test
   9
  10 { "ab\u00064b\u000347\u00034e\u00034d\u000346" }
  11 [ "ab\u000346\u000347\u00064b\u00034e\u00034d" dup reorder ] unit-test
  12
  13 { "hello" "hello" } [ "hello" [ nfd ] keep nfkd ] unit-test
  14
  15 { "\u00FB012\u002075\u00017F\u000323\u000307" "fi25s\u000323\u000307" }
  16 [ "\u00FB012\u002075\u001E9B\u000323" [ nfd ] keep nfkd ] unit-test
  17
  18 { "\u001E69" "s\u000323\u000307" } [ "\u001E69" [ nfc ] keep nfd ] unit-test
  19 { "\u001E0D\u000307" } [ "\u001E0B\u000323" nfc ] unit-test
  20
  21 { 54620 } [ 4370 4449 4523 jamo>hangul ] unit-test
  22 { 4370 4449 4523 } [ 54620 hangul>jamo first3 ] unit-test
  23 { t } [ 54620 hangul? ] unit-test
  24 { f } [ 0 hangul? ] unit-test
  25 { "\u001112\u001161\u0011ab" } [ "\u00d55c" nfd ] unit-test
  26 { "\u00d55c" } [ "\u001112\u001161\u0011ab" nfc ] unit-test
  27
  28 ! Could use simple-flat-file after some cleanup
  29 : parse-normalization-tests ( -- tests )
  30     "vocab:unicode/UCD/NormalizationTest.txt" utf8 file-lines
  31     [ "#" head? ] reject
  32     [ "@" head? ] split*-when
  33     2 <groups> [ first2 [ first ] dip 2array ] map
  34     values [
  35         [
  36             "#@" split first [ CHAR: \s = ] trim-tail ";" split harvest
  37             [ words [ hex> ] "" map-as ] map
  38         ] map
  39     ] map concat ;
  40
  41 :: check-normalization-test? ( test spec quot -- ? )
  42     spec [
  43         [
  44             [ 1 - test nth ] bi@ quot call( str -- str' ) =
  45         ] with all?
  46     ] assoc-all? ;
  47
  48 { { } } [
  49     parse-normalization-tests [
  50         {
  51             [ { { 2 { 1 2 3 } } { 4 { 4 5 } } } [ nfc ] check-normalization-test? ]
  52             [ { { 3 { 1 2 3 } } { 5 { 4 5 } } } [ nfd ] check-normalization-test? ]
  53             [ { { 4 { 1 2 3 4 5 } } } [ nfkc ] check-normalization-test? ]
  54             [ { { 5 { 1 2 3 4 5 } } } [ nfkd ] check-normalization-test? ]
  55         } 1&&
  56     ] reject
  57 ] unit-test
  58
  59 { { 4018 820 3953 3968 } }
  60 [ { 3958 3953 820 } >string nfd >array ] unit-test