USING: io.encodings.utf8 tools.test io.encodings.string strings arrays
-bootstrap.unicode ;
+bootstrap.unicode kernel sequences ;
IN: io.encodings.utf8.tests
: decode-utf8-w/stream ( array -- newarray )
[ 3 ] [ 1 "日本語" >utf8-index ] unit-test
[ 3 ] [ 9 "日本語" utf8-index> ] unit-test
+
+[ 3 ] [ 2 "lápis" >utf8-index ] unit-test
+
+[ V{ } ] [ 100000 [ [ code-point-length ] [ 1string utf8 encode length ] bi = not ] filter ] unit-test
PRIVATE>
: code-point-length ( n -- x )
- log2 {
+ next-power-of-2 log2 {
{ [ dup 0 7 between? ] [ 1 ] }
{ [ dup 8 11 between? ] [ 2 ] }
{ [ dup 12 16 between? ] [ 3 ] }
code-point-offsets [ <= ] with find drop ;
: >utf8-index ( n string -- n' )
- code-point-offsets nth ;
\ No newline at end of file
+ code-point-offsets nth ;