basis/unicode/breaks/breaks-tests.factor

   1 USING: tools.test unicode sequences math kernel splitting
   2 unicode.categories io.pathnames io.encodings.utf8 io.files
   3 strings quotations math.parser locals ;
   4 IN: unicode.breaks.tests
   5
   6 { "\u001112\u001161\u0011abA\u000300a\r\r\n" }
   7 [ "\r\n\raA\u000300\u001112\u001161\u0011ab" string-reverse ] unit-test
   8 { "dcba" } [ "abcd" string-reverse ] unit-test
   9 { 3 } [ "\u001112\u001161\u0011abA\u000300a"
  10         dup last-grapheme head last-grapheme ] unit-test
  11
  12 { 3 } [ 2 "hello" first-grapheme-from ] unit-test
  13 { 1 } [ 2 "hello" last-grapheme-from ] unit-test
  14
  15 { 4 } [ 2 "what am I saying" first-word-from ] unit-test
  16 { 0 } [ 2 "what am I saying" last-word-from ] unit-test
  17 { 16 } [ 11 "what am I saying" first-word-from ] unit-test
  18 { 10 } [ 11 "what am I saying" last-word-from ] unit-test
  19
  20 : grapheme-break-test ( -- filename )
  21     "vocab:unicode/breaks/GraphemeBreakTest.txt" ;
  22
  23 : word-break-test ( -- filename )
  24     "vocab:unicode/breaks/WordBreakTest.txt" ;
  25
  26 : parse-test-file ( file-name -- tests )
  27     utf8 file-lines
  28     [ "#" split1 drop ] map harvest [
  29         "÷" split
  30         [
  31             "×" split
  32             [ [ blank? ] trim hex> ] map
  33             [ { f 0 } member? ] reject
  34             >string
  35         ] map
  36         harvest
  37     ] map ;
  38
  39 :: test ( tests quot -- )
  40     tests [
  41         [ 1quotation ]
  42         [ concat [ quot call [ "" like ] map ] curry ] bi unit-test
  43     ] each ;
  44
  45 : grapheme-test ( tests -- )
  46     [
  47         [ 1quotation ]
  48         [ concat [ >graphemes [ "" like ] map ] curry ] bi unit-test
  49     ] each ;
  50
  51 grapheme-break-test parse-test-file [ >graphemes ] test
  52 word-break-test parse-test-file [ >words ] test
  53
  54 { { t f t t f t } } [ 6 <iota> [ "as df" word-break-at? ] map ] unit-test