1 ! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors byte-arrays combinators destructors io
4 io.streams.plain kernel kernel.private math namespaces sbufs
5 sequences sequences.private splitting strings strings.private ;
8 ! The encoding descriptor protocol
10 GENERIC: guess-encoded-length ( string-length encoding -- byte-length )
11 GENERIC: guess-decoded-length ( byte-length encoding -- string-length )
13 M: object guess-decoded-length drop ; inline
14 M: object guess-encoded-length drop ; inline
16 GENERIC: decode-char ( stream encoding -- char/f )
18 GENERIC: decode-until ( seps stream encoding -- string/f sep/f )
22 ! If the stop? branch is taken convert the sbuf to a string
23 ! If sep is present, returns ``string sep'' (string can be "")
24 ! If sep is f, returns ``string f'' or ``f f''
25 : read-until-loop ( buf quot: ( -- char stop? ) -- string/f sep/f )
27 [ nip [ "" like ] dip [ f like f ] unless* ]
28 [ pick push read-until-loop ] if ; inline recursive
32 : (decode-until) ( seps stream encoding -- string/f sep/f )
33 [ decode-char dup ] 2curry swap [ dupd member? ] curry
34 [ [ drop f t ] if ] curry compose
35 [ 100 <sbuf> ] dip read-until-loop ; inline
37 M: object decode-until (decode-until) ;
39 CONSTANT: replacement-char 0xfffd
43 : string>byte-array-fast ( string -- byte-array )
44 { string } declare ! aux>> must be f
45 [ length ] keep over (byte-array) [
47 [ [ string-nth-fast ] 2keep drop ]
48 [ set-nth-unsafe ] bi*
52 : byte-array>string-fast ( byte-array -- string )
53 { byte-array } declare
54 [ length ] keep over 0 <string> [
56 [ [ nth-unsafe ] 2keep drop ]
59 [ set-string-nth-fast ]
60 [ [ drop replacement-char ] 2dip set-string-nth-slow ]
64 ] keep dup reset-string-hashcode ;
68 GENERIC: encode-char ( char stream encoding -- )
70 GENERIC: encode-string ( string stream encoding -- )
72 M: object encode-string [ encode-char ] 2curry each ; inline
74 GENERIC: <decoder> ( stream encoding -- newstream )
76 TUPLE: decoder { stream read-only } { code read-only } { cr boolean } ;
77 INSTANCE: decoder input-stream
81 GENERIC: <encoder> ( stream encoding -- newstream )
83 TUPLE: encoder { stream read-only } { code read-only } ;
84 INSTANCE: encoder output-stream
90 M: object <decoder> f decoder boa ; inline
94 : cr+ ( stream -- ) t >>cr drop ; inline
96 : cr- ( stream -- ) f >>cr drop ; inline
98 : >decoder< ( decoder -- stream encoding )
99 [ stream>> ] [ code>> ] bi ; inline
101 M: decoder stream-element-type
102 drop +character+ ; inline
104 : (read1) ( decoder -- ch )
105 >decoder< decode-char ; inline
107 : fix-cr ( decoder c -- c' )
109 over cr- dup CHAR: \n eq? [ drop (read1) ] [ nip ] if
110 ] [ nip ] if ; inline
112 M: decoder stream-read1 ( decoder -- ch )
113 dup (read1) fix-cr ; inline
115 : (read-first) ( n buf decoder -- buf stream encoding n c )
116 [ rot [ >decoder< ] dip 2over decode-char ]
117 [ swap fix-cr ] bi ; inline
119 : (store-read) ( buf stream encoding n c i -- buf stream encoding n )
120 [ rot [ set-nth-unsafe ] keep ] 2curry 3dip ; inline
122 : (finish-read) ( buf stream encoding n i -- i )
125 : (read-next) ( stream encoding n i -- stream encoding n i c )
126 [ 2dup decode-char ] 2dip rot ; inline
128 : (read-rest) ( buf stream encoding n i -- count )
129 2dup = [ (finish-read) ] [
131 swap [ (store-read) ] [ 1 + ] bi (read-rest)
132 ] [ (finish-read) ] if*
133 ] if ; inline recursive
135 M: decoder stream-read-unsafe
136 pick 0 = [ 3drop 0 ] [
143 M: decoder stream-contents*
144 (stream-contents-by-element) ; inline
146 : line-ends/eof ( stream str -- str ) f like swap cr- ; inline
148 : line-ends\r ( stream str -- str ) swap cr+ ; inline
150 : line-ends\n ( stream str -- str )
152 over cr- [ stream-readln ] [ nip ] if-empty
153 ] [ nip ] if ; inline
155 : handle-readln ( stream str ch -- str )
157 { f [ line-ends/eof ] }
158 { CHAR: \r [ line-ends\r ] }
159 { CHAR: \n [ line-ends\n ] }
162 M: decoder stream-read-until
165 >decoder< decode-until
168 2drop stream-read-until
173 first-unsafe CHAR: \n = [ [ rest ] dip ] when
177 >decoder< decode-until
180 M: decoder stream-readln
181 "\r\n" over >decoder< decode-until handle-readln ;
183 M: decoder dispose stream>> dispose ;
186 M: object <encoder> encoder boa ; inline
188 : >encoder< ( encoder -- stream encoding )
189 [ stream>> ] [ code>> ] bi ; inline
191 M: encoder stream-element-type
192 drop +character+ ; inline
194 M: encoder stream-write1
195 >encoder< encode-char ; inline
197 M: encoder stream-write
198 >encoder< encode-string ; inline
200 M: encoder dispose stream>> dispose ; inline
202 M: encoder stream-flush stream>> stream-flush ; inline
204 INSTANCE: encoder plain-writer
208 GENERIC#: re-encode 1 ( stream encoding -- newstream )
210 M: object re-encode <encoder> ;
212 M: encoder re-encode [ stream>> ] dip re-encode ;
214 : encode-output ( encoding -- )
215 output-stream [ swap re-encode ] change ;
217 : with-encoded-output ( encoding quot -- )
218 [ [ output-stream get ] dip re-encode ] dip
219 with-output-stream* ; inline
221 GENERIC#: re-decode 1 ( stream encoding -- newstream )
223 M: object re-decode <decoder> ;
225 M: decoder re-decode [ stream>> ] dip re-decode ;
227 : decode-input ( encoding -- )
228 input-stream [ swap re-decode ] change ;
230 : with-decoded-input ( encoding quot -- )
231 [ [ input-stream get ] dip re-decode ] dip
232 with-input-stream* ; inline