1 ! Copyright (C) 2009 Daniel Ehrenberg.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: math kernel io.encodings combinators io io.encodings.utf16
4 generalizations sequences ;
15 : 4spin ( a b c d -- b c d a )
20 : stream-read4 ( stream -- a b c d )
28 : with-replacement ( _ _ _ ch quot -- new-ch )
29 [ 3drop replacement-char ] if* ; inline
31 : >char ( d c b a -- abcd )
42 M: utf32be decode-char
43 drop stream-read4 4spin
44 [ >char ] [ 3drop f ] if* ;
46 M: utf32le decode-char
47 drop stream-read4 4 npick
48 [ >char ] [ 2drop 2drop f ] if ;
52 : split-off ( ab -- b a )
53 [ HEX: FF bitand ] [ -8 shift ] bi ;
55 : char> ( abcd -- d b c a )
56 split-off split-off split-off ;
58 : stream-write4 ( d c b a stream -- )
66 M: utf32be encode-char
67 drop [ char> ] dip stream-write4 ;
69 M: utf32le encode-char
70 drop [ char> 4spin ] dip stream-write4 ;
74 : bom-le B{ HEX: ff HEX: fe 0 0 } ; inline
76 : bom-be B{ 0 0 HEX: fe HEX: ff } ; inline
78 : bom>le/be ( bom -- le/be )
79 dup bom-le sequence= [ drop utf32le ] [
80 bom-be sequence= [ utf32be ] [ missing-bom ] if
83 M: utf32 <decoder> ( stream utf32 -- decoder )
84 drop 4 over stream-read bom>le/be <decoder> ;
86 M: utf32 <encoder> ( stream utf32 -- encoder )
87 drop bom-le over stream-write utf32le <encoder> ;