! Copyright (C) 2006, 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays combinators io io.binary io.encodings kernel math math.private namespaces sbufs sequences sequences.private splitting strings.private vectors ; IN: io.encodings.utf16 SINGLETON: utf16be SINGLETON: utf16le SINGLETON: utf16 ERROR: missing-bom ; utf16be ( char stream -- ) over 0xFFFF > [ [ 0x10000 - ] dip [ [ encode-first ] dip stream-write2 ] [ [ encode-second ] dip stream-write2 ] 2bi ] [ [ h>b/b swap ] dip stream-write2 ] if ; inline M: utf16be encode-char ( char stream encoding -- ) drop char>utf16be ; : char>utf16le ( char stream -- ) over 0xFFFF > [ [ 0x10000 - ] dip [ [ encode-first swap ] dip stream-write2 ] [ [ encode-second swap ] dip stream-write2 ] 2bi ] [ [ h>b/b ] dip stream-write2 ] if ; inline M: utf16le encode-char ( char stream encoding -- ) drop char>utf16le ; : ascii-char>utf16-byte-array ( off n byte-array string -- ) [ over ] dip string-nth-fast -rot [ 2 fixnum*fast rot fixnum+fast ] dip set-nth-unsafe ; inline : ascii-string>utf16-byte-array ( off string -- byte-array ) [ length >fixnum [ iota ] [ 2 fixnum*fast ] bi ] keep [ [ ascii-char>utf16-byte-array ] 2curry with each ] 2keep drop ; inline : ascii-string>utf16le ( string stream -- ) [ 0 swap ascii-string>utf16-byte-array ] dip stream-write ; inline : ascii-string>utf16be ( string stream -- ) [ 1 swap ascii-string>utf16-byte-array ] dip stream-write ; inline M: utf16le encode-string drop over aux>> [ [ char>utf16le ] curry each ] [ ascii-string>utf16le ] if ; M: utf16be encode-string drop over aux>> [ [ char>utf16be ] curry each ] [ ascii-string>utf16be ] if ; M: utf16le guess-encoded-length drop 2 * ; inline M: utf16le guess-decoded-length drop 2 /i ; inline M: utf16be guess-encoded-length drop 2 * ; inline M: utf16be guess-decoded-length drop 2 /i ; inline ! UTF-16 CONSTANT: bom-le B{ 0xff 0xfe } CONSTANT: bom-be B{ 0xfe 0xff } : bom>le/be ( bom -- le/be ) dup bom-le sequence= [ drop utf16le ] [ bom-be sequence= [ utf16be ] [ missing-bom ] if ] if ; M: utf16 ( stream utf16 -- decoder ) drop 2 over stream-read bom>le/be ; M: utf16 ( stream utf16 -- encoder ) drop bom-le over stream-write utf16le ; PRIVATE>