1 ! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors combinators destructors io io.streams.plain
4 kernel math namespaces sbufs sequences sequences.private
8 ! The encoding descriptor protocol
10 GENERIC: guess-encoded-length ( string-length encoding -- byte-length )
11 GENERIC: guess-decoded-length ( byte-length encoding -- string-length )
13 M: object guess-decoded-length drop ; inline
14 M: object guess-encoded-length drop ; inline
16 GENERIC: decode-char ( stream encoding -- char/f )
18 GENERIC: encode-char ( char stream encoding -- )
20 GENERIC: encode-string ( string stream encoding -- )
22 M: object encode-string [ encode-char ] 2curry each ; inline
24 GENERIC: <decoder> ( stream encoding -- newstream )
26 CONSTANT: replacement-char 0xfffd
28 TUPLE: decoder { stream read-only } { code read-only } { cr boolean } ;
29 INSTANCE: decoder input-stream
33 GENERIC: <encoder> ( stream encoding -- newstream )
35 TUPLE: encoder { stream read-only } { code read-only } ;
36 INSTANCE: encoder output-stream
42 M: object <decoder> f decoder boa ; inline
46 : cr+ ( stream -- ) t >>cr drop ; inline
48 : cr- ( stream -- ) f >>cr drop ; inline
50 : >decoder< ( decoder -- stream encoding )
51 [ stream>> ] [ code>> ] bi ; inline
53 M: decoder stream-element-type
54 drop +character+ ; inline
56 : (read1) ( decoder -- ch )
57 >decoder< decode-char ; inline
59 : fix-cr ( decoder c -- c' )
62 dup CHAR: \n eq? [ drop (read1) ] [ nip ] if
65 M: decoder stream-read1 ( decoder -- ch )
66 dup (read1) fix-cr ; inline
68 : (read-first) ( n buf decoder -- buf stream encoding n c )
69 [ rot [ >decoder< ] dip 2over decode-char ]
70 [ swap fix-cr ] bi ; inline
72 : (store-read) ( buf stream encoding n c i -- buf stream encoding n )
73 [ rot [ set-nth-unsafe ] keep ] 2curry 3dip ; inline
75 : (finish-read) ( buf stream encoding n i -- i )
78 : (read-next) ( stream encoding n i -- stream encoding n i c )
79 [ 2dup decode-char ] 2dip rot ; inline
81 : (read-rest) ( buf stream encoding n i -- count )
82 2dup = [ (finish-read) ] [
84 swap [ (store-read) ] [ 1 + ] bi (read-rest)
85 ] [ (finish-read) ] if*
86 ] if ; inline recursive
88 M: decoder stream-read-unsafe
89 pick 0 = [ 3drop 0 ] [
93 ] [ 2drop 2drop 0 ] if*
96 M: decoder stream-contents
97 (stream-contents-by-element) ;
99 : line-ends/eof ( stream str -- str ) f like swap cr- ; inline
101 : line-ends\r ( stream str -- str ) swap cr+ ; inline
103 : line-ends\n ( stream str -- str )
104 over cr>> over empty? and
105 [ drop dup cr- stream-readln ] [ swap cr- ] if ; inline
107 : handle-readln ( stream str ch -- str )
109 { f [ line-ends/eof ] }
110 { CHAR: \r [ line-ends\r ] }
111 { CHAR: \n [ line-ends\n ] }
114 ! If the stop? branch is taken convert the sbuf to a string
115 ! If sep is present, returns ``string sep'' (string can be "")
116 ! If sep is f, returns ``string f'' or ``f f''
117 : read-until-loop ( buf quot: ( -- char stop? ) -- string/f sep/f )
119 [ nip [ "" like ] dip [ f like f ] unless* ]
120 [ pick push read-until-loop ] if ; inline recursive
122 : (read-until) ( quot -- string/f sep/f )
123 [ 100 <sbuf> ] dip read-until-loop ; inline
125 : decoder-read-until ( seps stream encoding -- string/f sep/f )
126 [ decode-char dup [ dup rot member? ] [ 2drop f t ] if ] 3curry
129 M: decoder stream-read-until >decoder< decoder-read-until ;
131 : decoder-readln ( stream encoding -- string/f sep/f )
132 [ decode-char dup [ dup "\r\n" member? ] [ drop f t ] if ] 2curry
135 M: decoder stream-readln dup >decoder< decoder-readln handle-readln ;
137 M: decoder dispose stream>> dispose ;
140 M: object <encoder> encoder boa ; inline
142 : >encoder< ( encoder -- stream encoding )
143 [ stream>> ] [ code>> ] bi ; inline
145 M: encoder stream-element-type
146 drop +character+ ; inline
148 M: encoder stream-write1
149 >encoder< encode-char ; inline
151 M: encoder stream-write
152 >encoder< encode-string ; inline
154 M: encoder dispose stream>> dispose ; inline
156 M: encoder stream-flush stream>> stream-flush ; inline
158 INSTANCE: encoder plain-writer
161 GENERIC# re-encode 1 ( stream encoding -- newstream )
163 M: object re-encode <encoder> ;
165 M: encoder re-encode [ stream>> ] dip re-encode ;
167 : encode-output ( encoding -- )
168 output-stream [ swap re-encode ] change ;
170 : with-encoded-output ( encoding quot -- )
171 [ [ output-stream get ] dip re-encode ] dip
172 with-output-stream* ; inline
174 GENERIC# re-decode 1 ( stream encoding -- newstream )
176 M: object re-decode <decoder> ;
178 M: decoder re-decode [ stream>> ] dip re-decode ;
180 : decode-input ( encoding -- )
181 input-stream [ swap re-decode ] change ;
183 : with-decoded-input ( encoding quot -- )
184 [ [ input-stream get ] dip re-decode ] dip
185 with-input-stream* ; inline