]> gitweb.factorcode.org Git - factor.git/blobdiff - core/io/encodings/utf8/utf8-tests.factor
use radix literals
[factor.git] / core / io / encodings / utf8 / utf8-tests.factor
index 44d087038514af3a5d874e06f728cf6654aa7a6d..f3e444e777913175c8f442fa0ee5b476b839b6b0 100644 (file)
@@ -1,23 +1,43 @@
-USING: io.encodings.utf8 tools.test sbufs kernel io io.encodings
-sequences strings arrays unicode ;
+USING: io.encodings.utf8 tools.test io.encodings.string strings arrays
+bootstrap.unicode kernel sequences ;
+IN: io.encodings.utf8.tests
 
 : decode-utf8-w/stream ( array -- newarray )
-    >sbuf dup reverse-here utf8 <decoding> contents ;
+    utf8 decode >array ;
 
 : encode-utf8-w/stream ( array -- newarray )
-    SBUF" " clone tuck utf8 <encoding> stream-write >array ;
+    >string utf8 encode >array ;
 
-[ { CHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8-w/stream >array ] unit-test
-
-[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test
+[ { CHAR: replacement-character } ] [ { 0b11110,101 0b10,111111 0b10,000000 0b11111111 } decode-utf8-w/stream ] unit-test
 
 [ "x" ] [ "x" decode-utf8-w/stream >string ] unit-test
 
-[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8-w/stream >array ] unit-test
+[ { 0b11111000000 } ] [ { 0b110,11111 0b10,000000 } decode-utf8-w/stream >array ] unit-test
+
+[ { CHAR: replacement-character } ] [ { 0b10000000 } decode-utf8-w/stream ] unit-test
+
+[ { 0b1111000000111111 } ] [ { 0b1110,1111 0b10,000000 0b10,111111 } decode-utf8-w/stream >array ] unit-test
+
+[ { 0b11110,101 0b10,111111 0b10,000000 0b10,111111 0b1110,1111 0b10,000000 0b10,111111 0b110,11111 0b10,000000 CHAR: x } ]
+[ { 0b101111111000000111111 0b1111000000111111 0b11111000000 CHAR: x } encode-utf8-w/stream ] unit-test
+
+[ 3 ] [ 1 "日本語" >utf8-index ] unit-test
+[ 3 ] [ 9 "日本語" utf8-index> ] unit-test
+
+[ 3 ] [ 2 "lápis" >utf8-index ] unit-test
+
+[ V{ } ] [ 100000 iota [ [ code-point-length ] [ 1string utf8 encode length ] bi = not ] filter ] unit-test
 
-[ { CHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8-w/stream >array ] unit-test
+[ { CHAR: replacement-character } ] [ { 0b110,00000 0b10,000000 } decode-utf8-w/stream ] unit-test
+[ { CHAR: replacement-character } ] [ { 0b110,00001 0b10,111111 } decode-utf8-w/stream ] unit-test
+[ { 0x80 } ] [ { 0b110,00010 0b10,000000 } decode-utf8-w/stream ] unit-test
 
-[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test
+[ { CHAR: replacement-character } ] [ { 0b1110,0000 0b10,000000 0b10,000000 } decode-utf8-w/stream ] unit-test
+[ { CHAR: replacement-character } ] [ { 0b1110,0000 0b10,011111 0b10,111111 } decode-utf8-w/stream ] unit-test
+[ { 0x800 } ] [ { 0b1110,0000 0b10,100000 0b10,000000 } decode-utf8-w/stream ] unit-test
 
-[ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
-[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8-w/stream ] unit-test
+[ { CHAR: replacement-character } ] [ { 0b11110,000 0b10,000000 0b10,000000 0b10,000000 } decode-utf8-w/stream ] unit-test
+[ { CHAR: replacement-character } ] [ { 0b11110,000 0b10,001111 0b10,111111 0b10,111111 } decode-utf8-w/stream ] unit-test
+[ { CHAR: replacement-character } ] [ { 0b11110,100 0b10,010000 0b10,000000 0b10,000000 } decode-utf8-w/stream ] unit-test
+[ { 0x10000 } ] [ { 0b11110,000 0b10,010000 0b10,000000 0b10,000000 } decode-utf8-w/stream ] unit-test
+[ { 0x10FFFF } ] [ { 0b11110,100 0b10,001111 0b10,111111 0b10,111111 } decode-utf8-w/stream ] unit-test