! Copyright (C) 2005, 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: kernel namespaces xml.name io.encodings.utf8 xml.elements io.encodings.utf16 xml.tokenize xml.state math ascii sequences io.encodings.string io.encodings combinators accessors xml.data io.encodings.iana ; IN: xml.autoencoding : decode-stream ( encoding -- ) spot get [ swap re-decode ] change-stream drop ; : continue-make-tag ( str -- tag ) parse-name-starting middle-tag end-tag ; : start-utf16le ( -- tag ) utf16le decode-stream "?\0" expect check instruct ; : 10xxxxxx? ( ch -- ? ) -6 shift 3 bitand 2 = ; : start> dup "UTF-16" = [ drop ] [ name>encoding [ decode-stream ] when* ] if ; : instruct-encoding ( instruct/prolog -- ) dup prolog? [ prolog-encoding ] [ drop utf8 decode-stream ] if ; : go-utf8 ( -- ) check utf8 decode-stream next next ; : start< ( -- tag ) ! What if first letter of processing instruction is non-ASCII? get-next { { 0 [ next next start-utf16le ] } { CHAR: ? [ go-utf8 instruct dup instruct-encoding ] } { CHAR: ! [ go-utf8 direct ] } [ check start