! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors assocs bitstreams byte-vectors combinators io
-io.encodings.binary io.streams.byte-array kernel math sequences
-vectors ;
+USING: accessors combinators kernel math math.order
+sequences vectors ;
+QUALIFIED-WITH: bitstreams bs
IN: compression.lzw
-CONSTANT: clear-code 256
-CONSTANT: end-of-information 257
+TUPLE: lzw
+input
+output
+table
+code
+old-code
+initial-code-size
+code-size
+clear-code
+end-of-information-code ;
-TUPLE: lzw input output end-of-input? table count k omega omega-k #bits
-code old-code ;
+TUPLE: tiff-lzw < lzw ;
+TUPLE: gif-lzw < lzw ;
-SYMBOL: table-full
-
-ERROR: index-too-big n ;
-
-: lzw-bit-width ( n -- n' )
- {
- { [ dup 510 <= ] [ drop 9 ] }
- { [ dup 1022 <= ] [ drop 10 ] }
- { [ dup 2046 <= ] [ drop 11 ] }
- { [ dup 4094 <= ] [ drop 12 ] }
- [ drop table-full ]
- } cond ;
-
-: lzw-bit-width-compress ( lzw -- n )
- count>> lzw-bit-width ;
-
-: lzw-bit-width-uncompress ( lzw -- n )
- table>> length lzw-bit-width ;
-
-: initial-compress-table ( -- assoc )
- 258 iota [ [ 1vector ] keep ] H{ } map>assoc ;
-
-: initial-uncompress-table ( -- seq )
- 258 iota [ 1vector ] V{ } map-as ;
-
-: reset-lzw ( lzw -- lzw )
- 257 >>count
- V{ } clone >>omega
- V{ } clone >>omega-k
- 9 >>#bits ;
-
-: reset-lzw-compress ( lzw -- lzw )
- f >>k
- initial-compress-table >>table reset-lzw ;
+: initial-uncompress-table ( size -- seq )
+ <iota> [ 1vector ] V{ } map-as ;
: reset-lzw-uncompress ( lzw -- lzw )
- initial-uncompress-table >>table reset-lzw ;
+ dup end-of-information-code>> 1 + initial-uncompress-table >>table
+ dup initial-code-size>> >>code-size ;
-: <lzw-compress> ( input -- obj )
- lzw new
- swap >>input
- binary <byte-writer> <bitstream-writer> >>output
- reset-lzw-compress ;
+ERROR: code-size-zero ;
-: <lzw-uncompress> ( input -- obj )
- lzw new
+: <lzw-uncompress> ( input code-size class -- obj )
+ new
+ swap [ code-size-zero ] when-zero >>code-size
+ dup code-size>> >>initial-code-size
+ dup code-size>> 1 - 2^ >>clear-code
+ dup clear-code>> 1 + >>end-of-information-code
swap >>input
BV{ } clone >>output
reset-lzw-uncompress ;
-: push-k ( lzw -- lzw )
- [ ]
- [ k>> ]
- [ omega>> clone [ push ] keep ] tri >>omega-k ;
-
-: omega-k-in-table? ( lzw -- ? )
- [ omega-k>> ] [ table>> ] bi key? ;
-
-ERROR: not-in-table ;
-
-: write-output ( lzw -- )
- [
- [ omega>> ] [ table>> ] bi at* [ not-in-table ] unless
- ] [
- [ lzw-bit-width-compress ]
- [ output>> write-bits ] bi
- ] bi ;
-
-: omega-k>omega ( lzw -- lzw )
- dup omega-k>> clone >>omega ;
-
-: k>omega ( lzw -- lzw )
- dup k>> 1vector >>omega ;
-
-: add-omega-k ( lzw -- )
- [ [ 1+ ] change-count count>> ]
- [ omega-k>> clone ]
- [ table>> ] tri set-at ;
-
-: lzw-compress-char ( lzw k -- )
- >>k push-k dup omega-k-in-table? [
- omega-k>omega drop
- ] [
- [ write-output ]
- [ add-omega-k ]
- [ k>omega drop ] tri
- ] if ;
-
-: (lzw-compress-chars) ( lzw -- )
- dup lzw-bit-width-compress table-full = [
- drop
- ] [
- dup input>> stream-read1
- [ [ lzw-compress-char ] [ drop (lzw-compress-chars) ] 2bi ]
- [ t >>end-of-input? drop ] if*
- ] if ;
-
-: lzw-compress-chars ( lzw -- )
- {
- ! [ [ clear-code lzw-compress-char ] [ drop ] bi ] ! reset-lzw-compress drop ] bi ]
- [
- [ clear-code ] dip
- [ lzw-bit-width-compress ]
- [ output>> write-bits ] bi
- ]
- [ (lzw-compress-chars) ]
- [
- [ k>> ]
- [ lzw-bit-width-compress ]
- [ output>> write-bits ] tri
- ]
- [
- [ end-of-information ] dip
- [ lzw-bit-width-compress ]
- [ output>> write-bits ] bi
- ]
- [ ]
- } cleave dup end-of-input?>> [ drop ] [ lzw-compress-chars ] if ;
-
-: lzw-compress ( byte-array -- seq )
- binary <byte-reader> <lzw-compress>
- [ lzw-compress-chars ] [ output>> stream>> ] bi ;
-
: lookup-old-code ( lzw -- vector )
[ old-code>> ] [ table>> ] bi nth ;
: write-code ( lzw -- )
[ lookup-code ] [ output>> ] bi push-all ;
-: add-to-table ( seq lzw -- ) table>> push ;
+GENERIC: code-space-full? ( lzw -- ? )
+
+: size-and-limit ( lzw -- m n ) [ table>> length ] [ code-size>> 2^ ] bi ;
+
+M: tiff-lzw code-space-full? size-and-limit 1 - = ;
+M: gif-lzw code-space-full? size-and-limit = ;
+
+GENERIC: increment-code-size ( lzw -- lzw )
+
+M: lzw increment-code-size [ 1 + ] change-code-size ;
+M: gif-lzw increment-code-size [ 1 + 12 min ] change-code-size ;
+
+: maybe-increment-code-size ( lzw -- lzw )
+ dup code-space-full? [ increment-code-size ] when ;
+
+: add-to-table ( seq lzw -- )
+ [ table>> push ]
+ [ maybe-increment-code-size 2drop ] 2bi ;
: lzw-read ( lzw -- lzw n )
- [ ] [ lzw-bit-width-uncompress ] [ input>> ] tri read-bits 2drop ;
+ [ ] [ code-size>> ] [ input>> ] tri bs:read ;
+
+: end-of-information? ( lzw code -- ? ) swap end-of-information-code>> = ;
+: clear-code? ( lzw code -- ? ) swap clear-code>> = ;
+
+DEFER: handle-clear-code
+: lzw-process-next-code ( lzw quot: ( lzw code -- ) -- )
+ [ lzw-read ] dip {
+ { [ 2over end-of-information? ] [ 3drop ] }
+ { [ 2over clear-code? ] [ 2drop handle-clear-code ] }
+ [ call( lzw code -- ) ]
+ } cond ; inline
DEFER: lzw-uncompress-char
: handle-clear-code ( lzw -- )
reset-lzw-uncompress
- lzw-read dup end-of-information = [
- 2drop
- ] [
+ [
>>code
[ write-code ]
[ code>old-code ] bi
lzw-uncompress-char
- ] if ;
+ ] lzw-process-next-code ;
: handle-uncompress-code ( lzw -- lzw )
dup code-in-table? [
[ output>> push-all ] [ add-to-table ] 2bi
] [ code>old-code ] bi
] if ;
-
+
: lzw-uncompress-char ( lzw -- )
- lzw-read [
- >>code
- dup code>> end-of-information = [
- drop
- ] [
- dup code>> clear-code = [
- handle-clear-code
- ] [
- handle-uncompress-code
- lzw-uncompress-char
- ] if
- ] if
- ] [
- drop
- ] if* ;
+ [ >>code handle-uncompress-code lzw-uncompress-char ]
+ lzw-process-next-code ;
+
+: lzw-uncompress ( bitstream code-size class -- byte-array )
+ <lzw-uncompress>
+ [ lzw-uncompress-char ] [ output>> ] bi ;
+
+: tiff-lzw-uncompress ( seq -- byte-array )
+ bs:<msb0-bit-reader> 9 tiff-lzw lzw-uncompress ;
-: lzw-uncompress ( seq -- byte-array )
- binary <byte-reader> <bitstream-reader>
- <lzw-uncompress> [ lzw-uncompress-char ] [ output>> ] bi ;
+: gif-lzw-uncompress ( seq code-size -- byte-array )
+ [ bs:<lsb0-bit-reader> ] dip gif-lzw lzw-uncompress ;