1 ! Copyright (C) 2009 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors assocs bitstreams byte-vectors combinators io
4 io.encodings.binary io.streams.byte-array kernel math sequences
8 CONSTANT: clear-code 256
9 CONSTANT: end-of-information 257
11 TUPLE: lzw input output end-of-input? table count k omega omega-k #bits
16 ERROR: index-too-big n ;
18 : lzw-bit-width ( n -- n' )
20 { [ dup 510 <= ] [ drop 9 ] }
21 { [ dup 1022 <= ] [ drop 10 ] }
22 { [ dup 2046 <= ] [ drop 11 ] }
23 { [ dup 4094 <= ] [ drop 12 ] }
27 : lzw-bit-width-compress ( lzw -- n )
28 count>> lzw-bit-width ;
30 : lzw-bit-width-uncompress ( lzw -- n )
31 table>> length lzw-bit-width ;
33 : initial-compress-table ( -- assoc )
34 258 iota [ [ 1vector ] keep ] H{ } map>assoc ;
36 : initial-uncompress-table ( -- seq )
37 258 iota [ 1vector ] V{ } map-as ;
39 : reset-lzw ( lzw -- lzw )
45 : reset-lzw-compress ( lzw -- lzw )
47 initial-compress-table >>table reset-lzw ;
49 : reset-lzw-uncompress ( lzw -- lzw )
50 initial-uncompress-table >>table reset-lzw ;
52 : <lzw-compress> ( input -- obj )
55 binary <byte-writer> <bitstream-writer> >>output
58 : <lzw-uncompress> ( input -- obj )
62 reset-lzw-uncompress ;
64 : push-k ( lzw -- lzw )
67 [ omega>> clone [ push ] keep ] tri >>omega-k ;
69 : omega-k-in-table? ( lzw -- ? )
70 [ omega-k>> ] [ table>> ] bi key? ;
74 : write-output ( lzw -- )
76 [ omega>> ] [ table>> ] bi at* [ not-in-table ] unless
78 [ lzw-bit-width-compress ]
79 [ output>> write-bits ] bi
82 : omega-k>omega ( lzw -- lzw )
83 dup omega-k>> clone >>omega ;
85 : k>omega ( lzw -- lzw )
86 dup k>> 1vector >>omega ;
88 : add-omega-k ( lzw -- )
89 [ [ 1+ ] change-count count>> ]
91 [ table>> ] tri set-at ;
93 : lzw-compress-char ( lzw k -- )
94 >>k push-k dup omega-k-in-table? [
102 : (lzw-compress-chars) ( lzw -- )
103 dup lzw-bit-width-compress table-full = [
106 dup input>> stream-read1
107 [ [ lzw-compress-char ] [ drop (lzw-compress-chars) ] 2bi ]
108 [ t >>end-of-input? drop ] if*
111 : lzw-compress-chars ( lzw -- )
113 ! [ [ clear-code lzw-compress-char ] [ drop ] bi ] ! reset-lzw-compress drop ] bi ]
116 [ lzw-bit-width-compress ]
117 [ output>> write-bits ] bi
119 [ (lzw-compress-chars) ]
122 [ lzw-bit-width-compress ]
123 [ output>> write-bits ] tri
126 [ end-of-information ] dip
127 [ lzw-bit-width-compress ]
128 [ output>> write-bits ] bi
131 } cleave dup end-of-input?>> [ drop ] [ lzw-compress-chars ] if ;
133 : lzw-compress ( byte-array -- seq )
134 binary <byte-reader> <lzw-compress>
135 [ lzw-compress-chars ] [ output>> stream>> ] bi ;
137 : lookup-old-code ( lzw -- vector )
138 [ old-code>> ] [ table>> ] bi nth ;
140 : lookup-code ( lzw -- vector )
141 [ code>> ] [ table>> ] bi nth ;
143 : code-in-table? ( lzw -- ? )
144 [ code>> ] [ table>> length ] bi < ;
146 : code>old-code ( lzw -- lzw )
147 dup code>> >>old-code ;
149 : write-code ( lzw -- )
150 [ lookup-code ] [ output>> ] bi push-all ;
152 : add-to-table ( seq lzw -- ) table>> push ;
154 : lzw-read ( lzw -- lzw n )
155 [ ] [ lzw-bit-width-uncompress ] [ input>> ] tri read-bits 2drop ;
157 DEFER: lzw-uncompress-char
158 : handle-clear-code ( lzw -- )
160 lzw-read dup end-of-information = [
169 : handle-uncompress-code ( lzw -- lzw )
175 [ lookup-code first ] bi suffix
176 ] [ add-to-table ] bi
177 ] [ code>old-code ] tri
180 [ lookup-old-code dup first suffix ] keep
181 [ output>> push-all ] [ add-to-table ] 2bi
182 ] [ code>old-code ] bi
185 : lzw-uncompress-char ( lzw -- )
188 dup code>> end-of-information = [
191 dup code>> clear-code = [
194 handle-uncompress-code
202 : lzw-uncompress ( seq -- byte-array )
203 binary <byte-reader> <bitstream-reader>
204 <lzw-uncompress> [ lzw-uncompress-char ] [ output>> ] bi ;