>r swap start-decoding r>
decode-read-loop ;
-GENERIC: init-decoding ( stream encoding -- decoded-stream )
-
: <decoding> ( stream decoding-class -- decoded-stream )
- construct-empty init-decoding <line-reader> ;
-
-GENERIC: init-encoding ( stream encoding -- encoded-stream )
+ construct-delegate <line-reader> ;
: <encoding> ( stream encoding-class -- encoded-stream )
- construct-empty init-encoding <plain-writer> ;
+ construct-delegate <plain-writer> ;
GENERIC: encode-string ( string encoding -- byte-array )
M: tuple-class encode-string construct-empty encode-string ;
MIXIN: encoding-stream
-M: encoding-stream init-decoding ( stream encoding-stream -- encoding-stream )
- tuck set-delegate ;
-
-M: encoding-stream init-encoding ( stream encoding-stream -- encoding-stream )
- tuck set-delegate ;
-
M: encoding-stream stream-read1 1 swap stream-read ;
M: encoding-stream stream-read
[ encode-string ] keep delegate stream-write ;
M: encoding-stream dispose delegate dispose ;
+
+GENERIC: underlying-stream ( encoded-stream -- delegate )
+M: encoding-stream underlying-stream delegate ;
+
+GENERIC: set-underlying-stream ( new-underlying stream -- )
+M: encoding-stream set-underlying-stream set-delegate ;
+
+: set-encoding ( encoding stream -- ) ! This doesn't work now
+ [ underlying-stream swap construct-delegate ] keep
+ set-underlying-stream ;
-USING: tools.test io.utf16 arrays unicode.syntax ;
+USING: kernel tools.test io.encodings.utf16 arrays sbufs sequences io.encodings
+io unicode.syntax ;
-[ { CHAR: x } ] [ { 0 CHAR: x } decode-utf16be >array ] unit-test
-[ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } decode-utf16be >array ] unit-test
-[ { UNICHAR: replacement-character } ] [ { BIN: 11011111 CHAR: q } decode-utf16be >array ] unit-test
-[ { UNICHAR: replacement-character } ] [ { BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } decode-utf16be >array ] unit-test
+: decode-w/stream ( array encoding -- newarray )
+ >r >sbuf dup reverse-here r> <decoding> contents >array ;
-[ B{ 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } encode-utf16be ] unit-test
+: encode-w/stream ( array encoding -- newarray )
+ >r SBUF" " clone tuck r> <encoding> stream-write >array ;
-[ { CHAR: x } ] [ { CHAR: x 0 } decode-utf16le >array ] unit-test
-[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } decode-utf16le >array ] unit-test
-[ { UNICHAR: replacement-character } ] [ { 0 BIN: 11011111 } decode-utf16le >array ] unit-test
-[ { UNICHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } decode-utf16le >array ] unit-test
+[ { CHAR: x } ] [ { 0 CHAR: x } utf16be decode-w/stream ] unit-test
+[ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } utf16be decode-w/stream ] unit-test
+[ { UNICHAR: replacement-character } ] [ { BIN: 11011111 CHAR: q } utf16be decode-w/stream ] unit-test
+[ { UNICHAR: replacement-character } ] [ { BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } utf16be decode-w/stream ] unit-test
-[ B{ 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } encode-utf16le ] unit-test
+[ { 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } utf16be encode-w/stream ] unit-test
+
+[ { CHAR: x } ] [ { CHAR: x 0 } utf16le decode-w/stream ] unit-test
+[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode-w/stream ] unit-test
+[ { UNICHAR: replacement-character } ] [ { 0 BIN: 11011111 } utf16le decode-w/stream ] unit-test
+[ { UNICHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } utf16le decode-w/stream ] unit-test
+[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode-w/stream ] unit-test
+
+[ { 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16le encode-w/stream ] unit-test
+
+[ { CHAR: x } ] [ { HEX: ff HEX: fe CHAR: x 0 } utf16 decode-w/stream ] unit-test
+[ { CHAR: x } ] [ { HEX: fe HEX: ff 0 CHAR: x } utf16 decode-w/stream ] unit-test
+
+[ { HEX: ff HEX: fe 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16 encode-w/stream ] unit-test
! Copyright (C) 2006, 2007 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces io.binary
-io.encodings combinators splitting ;
-IN: io.utf16
+io.encodings combinators splitting io byte-arrays ;
+IN: io.encodings.utf16
SYMBOL: double
SYMBOL: quad1
: encode-utf16 ( str -- seq )
encode-utf16le bom-le swap append ;
-: utf16le? ( seq1 -- seq2 ? ) bom-le ?head ;
+: start-utf16le? ( seq1 -- seq2 ? ) bom-le ?head ;
-: utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
+: start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
: decode-utf16 ( seq -- str )
{
- { [ utf16le? ] [ decode-utf16le ] }
- { [ utf16be? ] [ decode-utf16be ] }
+ { [ start-utf16le? ] [ decode-utf16le ] }
+ { [ start-utf16be? ] [ decode-utf16be ] }
{ [ t ] [ decode-error ] }
} cond ;
TUPLE: utf16le ;
-: <utf16le> utf16le construct-delegate ;
INSTANCE: utf16le encoding-stream
M: utf16le encode-string drop encode-utf16le ;
M: utf16le decode-step drop decode-utf16le-step ;
TUPLE: utf16be ;
-: <utf16be> utf16be construct-delegate ;
INSTANCE: utf16be encoding-stream
M: utf16be encode-string drop encode-utf16be ;
-M: utf16le decode-step drop decode-utf16be-step ;
+M: utf16be decode-step drop decode-utf16be-step ;
+
+TUPLE: utf16 encoding ;
+INSTANCE: utf16 encoding-stream
+M: utf16 underlying-stream delegate dup delegate [ ] [ ] ?if ; ! necessary?
+M: utf16 set-underlying-stream delegate set-delegate ; ! necessary?
+
+M: utf16 encode-string
+ >r encode-utf16le r>
+ dup utf16-encoding [ drop ]
+ [ t swap set-utf16-encoding bom-le swap append ] if ;
+
+: bom>le/be ( bom -- le/be )
+ dup bom-le sequence= [ drop utf16le ] [
+ bom-be sequence= [ utf16be ] [ decode-error ] if
+ ] if ;
+
+: read-bom ( utf16 -- encoding )
+ 2 over delegate stream-read bom>le/be construct-empty
+ [ swap set-utf16-encoding ] keep ;
+
+M: utf16 decode-step
+ ! inefficient: checks if bom is done many times
+ ! This should transform itself into utf16be or utf16le after reading BOM
+ dup utf16-encoding [ ] [ read-bom ] ?if decode-step ;
-USING: io.encodings.utf8 tools.test sbufs kernel io
+USING: io.encodings.utf8 tools.test sbufs kernel io io.encodings
sequences strings arrays unicode.syntax ;
: decode-utf8-w/stream ( array -- newarray )
- >sbuf dup reverse-here <utf8> contents >array ;
+ >sbuf dup reverse-here utf8 <decoding> contents ;
: encode-utf8-w/stream ( array -- newarray )
- SBUF" " clone tuck <utf8> write >array ;
+ SBUF" " clone tuck utf8 <encoding> stream-write >array ;
[ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8-w/stream >array ] unit-test
[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test
-[ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
-[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test
+[ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
+[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8-w/stream ] unit-test
! Interface for streams
TUPLE: utf8 ;
-: <utf8> utf8 construct-delegate ;
INSTANCE: utf8 encoding-stream
M: utf8 encode-string drop encode-utf8 ;