1 ! Copyright (C) 2006 Adam Langley and Chris Double.
2 ! Adam Langley was the original author of this work.
4 ! Chris Double modified it to fix bugs and get it working
5 ! correctly under the latest versions of Factor.
7 ! See http://factorcode.org/license.txt for BSD license.
9 USING: namespaces sequences kernel math io math.functions
10 io.binary strings classes words sbufs classes.tuple arrays
11 vectors byte-arrays quotations hashtables assocs help.syntax
12 help.markup splitting io.streams.byte-array io.encodings.string
13 io.encodings.utf8 io.encodings.binary combinators accessors
14 locals prettyprint compiler.units sequences.private
15 classes.tuple.private ;
18 GENERIC: (serialize) ( obj -- )
22 ! Variable holding a assoc of objects already serialized
29 M: id hashcode* obj>> hashcode* ;
31 M: id equal? over id? [ [ obj>> ] bi@ eq? ] [ 2drop f ] if ;
33 : add-object ( obj -- )
34 #! Add an object to the sequence of already serialized
36 serialized get [ assoc-size swap <id> ] keep set-at ;
38 : object-id ( obj -- id )
39 #! Return the id of an already serialized object
40 <id> serialized get at ;
42 ! Numbers are serialized as follows:
44 ! 1<=x<=126 => B{ x | 0x80 }
45 ! x>127 => B{ length(x) x[0] x[1] ... }
46 ! x>2^1024 => B{ 0xff length(x) x[0] x[1] ... }
47 ! The last case is needed because a very large number would
48 ! otherwise be confused with a small number.
49 : serialize-cell ( n -- )
50 dup zero? [ drop 0 write1 ] [
65 : deserialize-cell ( -- n )
67 { [ dup HEX: ff = ] [ drop deserialize-cell read be> ] }
68 { [ dup HEX: 80 >= ] [ HEX: 80 bitxor ] }
72 : serialize-shared ( obj quot -- )
75 [ CHAR: o write1 serialize-cell drop ]
78 M: f (serialize) ( obj -- )
81 M: integer (serialize) ( obj -- )
85 dup 0 < [ neg CHAR: m ] [ CHAR: p ] if write1
89 M: float (serialize) ( obj -- )
91 double>bits serialize-cell ;
93 M: complex (serialize) ( obj -- )
95 [ real-part (serialize) ]
96 [ imaginary-part (serialize) ] bi ;
98 M: ratio (serialize) ( obj -- )
100 [ numerator (serialize) ]
101 [ denominator (serialize) ] bi ;
103 : serialize-seq ( obj code -- )
107 [ length serialize-cell ]
108 [ [ (serialize) ] each ] tri
109 ] curry serialize-shared ;
111 M: tuple (serialize) ( obj -- )
114 [ class (serialize) ]
116 [ tuple>array rest (serialize) ]
120 M: array (serialize) ( obj -- )
121 CHAR: a serialize-seq ;
123 M: quotation (serialize) ( obj -- )
126 [ >array (serialize) ] [ add-object ] bi
129 M: hashtable (serialize) ( obj -- )
132 [ add-object ] [ >alist (serialize) ] bi
135 M: byte-array (serialize) ( obj -- )
139 [ length serialize-cell ]
143 M: string (serialize) ( obj -- )
149 [ length serialize-cell ]
154 : serialize-true ( word -- )
155 drop CHAR: t write1 ;
157 : serialize-gensym ( word -- )
161 [ def>> (serialize) ]
162 [ props>> (serialize) ]
166 : serialize-word ( word -- )
168 [ name>> (serialize) ]
169 [ vocabulary>> (serialize) ]
172 M: word (serialize) ( obj -- )
174 { [ dup t eq? ] [ serialize-true ] }
175 { [ dup vocabulary>> not ] [ serialize-gensym ] }
179 M: wrapper (serialize) ( obj -- )
181 wrapped>> (serialize) ;
183 DEFER: (deserialize) ( -- obj )
187 : intern-object ( obj -- )
188 deserialized get push ;
190 : deserialize-false ( -- f )
193 : deserialize-true ( -- f )
196 : deserialize-positive-integer ( -- number )
199 : deserialize-negative-integer ( -- number )
200 deserialize-positive-integer neg ;
202 : deserialize-zero ( -- number )
205 : deserialize-float ( -- float )
206 deserialize-cell bits>double ;
208 : deserialize-ratio ( -- ratio )
209 (deserialize) (deserialize) / ;
211 : deserialize-complex ( -- complex )
212 (deserialize) (deserialize) rect> ;
214 : (deserialize-string) ( -- string )
215 deserialize-cell read utf8 decode ;
217 : deserialize-string ( -- string )
218 (deserialize-string) dup intern-object ;
220 : deserialize-word ( -- word )
221 (deserialize) (deserialize) 2dup lookup
224 "Unknown word: " -rot
225 2array unparse append throw
228 : deserialize-gensym ( -- word )
231 [ (deserialize) define ]
232 [ (deserialize) >>props drop ]
236 : deserialize-wrapper ( -- wrapper )
237 (deserialize) <wrapper> ;
239 :: (deserialize-seq) ( exemplar quot -- seq )
240 deserialize-cell exemplar new-sequence
242 [ dup [ drop quot call ] change-each ] bi ; inline
244 : deserialize-array ( -- array )
245 { } [ (deserialize) ] (deserialize-seq) ;
247 : deserialize-quotation ( -- array )
248 (deserialize) >quotation dup intern-object ;
250 : deserialize-byte-array ( -- byte-array )
251 B{ } [ read1 ] (deserialize-seq) ;
253 : deserialize-hashtable ( -- hashtable )
256 [ (deserialize) update ]
259 : copy-seq-to-tuple ( seq tuple -- )
260 [ dup length ] dip [ set-array-nth ] curry 2each ;
262 : deserialize-tuple ( -- array )
263 #! Ugly because we have to intern the tuple before reading
269 [ [ copy-seq-to-tuple ] keep ] bi*
272 : deserialize-unknown ( -- object )
273 deserialize-cell deserialized get nth ;
275 : deserialize* ( -- object ? )
278 { CHAR: A [ deserialize-byte-array ] }
279 { CHAR: F [ deserialize-float ] }
280 { CHAR: T [ deserialize-tuple ] }
281 { CHAR: W [ deserialize-wrapper ] }
282 { CHAR: a [ deserialize-array ] }
283 { CHAR: c [ deserialize-complex ] }
284 { CHAR: h [ deserialize-hashtable ] }
285 { CHAR: m [ deserialize-negative-integer ] }
286 { CHAR: n [ deserialize-false ] }
287 { CHAR: t [ deserialize-true ] }
288 { CHAR: o [ deserialize-unknown ] }
289 { CHAR: p [ deserialize-positive-integer ] }
290 { CHAR: q [ deserialize-quotation ] }
291 { CHAR: r [ deserialize-ratio ] }
292 { CHAR: s [ deserialize-string ] }
293 { CHAR: w [ deserialize-word ] }
294 { CHAR: G [ deserialize-word ] }
295 { CHAR: z [ deserialize-zero ] }
301 : (deserialize) ( -- obj )
302 deserialize* [ "End of stream" throw ] unless ;
306 : deserialize ( -- obj )
307 V{ } clone deserialized
308 [ (deserialize) ] with-variable ;
310 : serialize ( obj -- )
311 H{ } clone serialized [ (serialize) ] with-variable ;
313 : bytes>object ( bytes -- obj )
314 binary [ deserialize ] with-byte-reader ;
316 : object>bytes ( obj -- bytes )
317 binary [ serialize ] with-byte-writer ;