1 ! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors combinators destructors io io.streams.plain
4 kernel math namespaces sbufs sequences sequences.private
8 ! The encoding descriptor protocol
10 GENERIC: decode-char ( stream encoding -- char/f )
12 GENERIC: encode-char ( char stream encoding -- )
14 GENERIC: encode-string ( string stream encoding -- )
16 M: object encode-string [ encode-char ] 2curry each ; inline
18 GENERIC: <decoder> ( stream encoding -- newstream )
20 CONSTANT: replacement-char HEX: fffd
22 TUPLE: decoder stream code cr ;
26 GENERIC: <encoder> ( stream encoding -- newstream )
28 TUPLE: encoder stream code ;
34 M: object <decoder> f decoder boa ;
38 : cr+ ( stream -- ) t >>cr drop ; inline
40 : cr- ( stream -- ) f >>cr drop ; inline
42 : >decoder< ( decoder -- stream encoding )
43 [ stream>> ] [ code>> ] bi ; inline
45 : fix-read1 ( stream char -- char )
53 M: decoder stream-element-type
56 M: decoder stream-tell stream>> stream-tell ;
58 M: decoder stream-seek stream>> stream-seek ;
60 M: decoder stream-read1
61 dup >decoder< decode-char fix-read1 ;
63 : fix-read ( stream string -- string )
67 over stream-read1 [ suffix ] when*
71 ! If we read the entire buffer, chars-read is f
72 ! If we hit EOF while reading, chars-read indicates how many chars were read
73 : (read) ( chars-requested quot -- chars-read/f string )
76 over [ swapd set-nth-unsafe f ] [ 3drop t ] if
77 ] curry compose find-integer
80 : finish-read ( n/f string -- string/f )
82 { [ dup zero? ] [ 2drop f ] }
83 { [ dup not ] [ drop ] }
87 M: decoder stream-read
92 [ >decoder< [ decode-char ] 2curry (read) finish-read ] 2bi
96 M: decoder stream-read-partial stream-read ;
98 : line-ends/eof ( stream str -- str ) f like swap cr- ; inline
100 : line-ends\r ( stream str -- str ) swap cr+ ; inline
102 : line-ends\n ( stream str -- str )
103 over cr>> over empty? and
104 [ drop dup cr- stream-readln ] [ swap cr- ] if ; inline
106 : handle-readln ( stream str ch -- str )
108 { f [ line-ends/eof ] }
109 { CHAR: \r [ line-ends\r ] }
110 { CHAR: \n [ line-ends\n ] }
113 ! If the stop? branch is taken convert the sbuf to a string
114 ! If sep is present, returns ``string sep'' (string can be "")
115 ! If sep is f, returns ``string f'' or ``f f''
116 : read-until-loop ( buf quot: ( -- char stop? ) -- string/f sep/f )
118 [ nip [ "" like ] dip [ f like f ] unless* ]
119 [ pick push read-until-loop ] if ; inline recursive
121 : (read-until) ( quot -- string/f sep/f )
122 [ 100 <sbuf> ] dip read-until-loop ; inline
124 : decoder-read-until ( seps stream encoding -- string/f sep/f )
125 [ decode-char dup [ dup rot member? ] [ 2drop f t ] if ] 3curry
128 M: decoder stream-read-until >decoder< decoder-read-until ;
130 : decoder-readln ( stream encoding -- string/f sep/f )
131 [ decode-char dup [ dup "\r\n" member? ] [ drop f t ] if ] 2curry
134 M: decoder stream-readln dup >decoder< decoder-readln handle-readln ;
136 M: decoder dispose stream>> dispose ;
139 M: object <encoder> encoder boa ;
141 : >encoder< ( encoder -- stream encoding )
142 [ stream>> ] [ code>> ] bi ; inline
144 M: encoder stream-element-type
147 M: encoder stream-write1
148 >encoder< encode-char ;
150 M: encoder stream-write
151 >encoder< encode-string ;
153 M: encoder dispose stream>> dispose ;
155 M: encoder stream-flush stream>> stream-flush ;
157 INSTANCE: encoder plain-writer
160 GENERIC# re-encode 1 ( stream encoding -- newstream )
162 M: object re-encode <encoder> ;
164 M: encoder re-encode [ stream>> ] dip re-encode ;
166 : encode-output ( encoding -- )
167 output-stream [ swap re-encode ] change ;
169 : with-encoded-output ( encoding quot -- )
170 [ [ output-stream get ] dip re-encode ] dip
171 with-output-stream* ; inline
173 GENERIC# re-decode 1 ( stream encoding -- newstream )
175 M: object re-decode <decoder> ;
177 M: decoder re-decode [ stream>> ] dip re-decode ;
179 : decode-input ( encoding -- )
180 input-stream [ swap re-decode ] change ;
182 : with-decoded-input ( encoding quot -- )
183 [ [ input-stream get ] dip re-decode ] dip
184 with-input-stream* ; inline