1 ! Copyright (C) 2009, 2020 Marc Fauconneau, Abtin Molavi, and Jacob Fischer.
2 ! See https://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs bit-arrays bitstreams combinators
4 hashtables heaps kernel math math.bits math.order namespaces
5 sequences sorting vectors ;
6 QUALIFIED-WITH: bitstreams bs
7 IN: compression.huffman
19 : <huffman-code> ( -- huffman-code )
20 0 0 0 huffman-code boa ; inline
22 : next-size ( huffman-code -- )
24 [ 2 * ] change-code drop ; inline
26 : next-code ( huffman-code -- )
27 [ 1 + ] change-code drop ; inline
29 :: all-patterns ( huffman-code n -- seq )
30 n log2 huffman-code size>> - :> free-bits
32 [ free-bits 2^ <iota> [ huffman-code code>> free-bits 2^ * + ] map ]
33 [ huffman-code code>> free-bits neg 2^ /i 1array ] if ;
35 :: huffman-each ( ... tdesc quot: ( ... huffman-code -- ... ) -- ... )
36 <huffman-code> :> code
40 [ code value<< code clone quot call code next-code ] each
43 : update-reverse-table ( huffman-code n table -- )
45 [ nip '[ _ swap _ set-at ] each ] 3bi ;
47 :: reverse-table ( tdesc n -- rtable )
48 n f <array> <enumerated> :> table
49 tdesc [ n table update-reverse-table ] huffman-each
53 { code maybe{ fixnum } }
54 { left maybe{ huffman-tree } }
55 { right maybe{ huffman-tree } } ;
57 : <huffman-tree> ( code left right -- huffman-tree )
60 : <huffman-internal> ( left right -- huffman-tree )
61 huffman-tree new swap >>left swap >>right ;
63 : leaf? ( huff-tree -- ? )
64 [ left>> not ] [ right>> not ] bi and ;
66 : gen-leaves ( lit-seq -- leaves )
67 [ huffman-tree new swap >>code ] map ;
69 : build-leaf-table ( leaves -- )
70 dup empty? [ drop ] [ dup first leaf-table get inc-at rest build-leaf-table ] if ;
72 : insert-leaves ( -- ) leaf-table get unzip swap zip node-heap get heap-push-all ;
75 node-heap get heap-pop node-heap get heap-pop swap [ + ] dip pick <huffman-internal> swap node-heap get heap-push drop ;
77 : build-tree ( lit-seq -- heap )
78 gen-leaves build-leaf-table insert-leaves [ node-heap get heap-size 1 > ] [ combine-two ] while node-heap get ;
80 ! Walks down a huffman tree and outputs a dictionary of codes
81 : (generate-codes) ( huff-tree -- code-dict )
83 { [ dup leaf? ] [ code>> ?{ } swap H{ } clone ?set-at ] }
84 { [ dup left>> not ] [ right>> (generate-codes) [ ?{ t } prepend ] assoc-map ] }
85 { [ dup right>> not ] [ left>> (generate-codes) [ ?{ f } prepend ] assoc-map ] }
87 [ left>> (generate-codes) [ ?{ f } prepend ] assoc-map ]
88 [ right>> (generate-codes) [ ?{ t } prepend ] assoc-map ] bi assoc-union!
92 : generate-codes ( lit-seq -- code-dict )
95 [ H{ } clone leaf-table set
96 <min-heap> node-heap set
97 build-tree heap-pop swap (generate-codes) nip ]
101 ! Ordering of codes that is useful for generating canonical codes.
102 ! Sort by length, then lexicographically.
103 :: <==> ( b1 b2 -- <=> )
105 { [ b1 second length b2 second length < ] [ +lt+ ] }
106 { [ b2 second length b1 second length < ] [ +gt+ ] }
107 { [ b1 first b2 first < ] [ +lt+ ] }
108 { [ b2 first b1 first < ] [ +gt+ ] }
112 : sort-values! ( obj -- sortedseq )
113 >alist [ <==> ] sort-with ;
115 : get-next-code ( code current -- next )
116 [ reverse bit-array>integer 1 + ] [ length ] bi <bits> >bit-array reverse dup length pick length swap - [ f ] replicate append nip ;
118 ! Does most of the work of converting a collection of codes to canonical ones.
119 : (canonize-codes) ( current codes -- codes )
120 dup empty? [ 2drop V{ } clone ] [ dup first pick get-next-code dup pick 1 tail (canonize-codes) ?push 2nip ] if ;
122 ! Basically a wrapper for the above recursive helper
123 : canonize-codes ( codes -- codes )
124 [ V{ } clone ] [ dup first length <bit-array> dup pick 1 tail (canonize-codes) ?push nip reverse ] if-empty ;
126 :: length-limit-codes ( max-len old-codes -- new-codes )
127 old-codes [ length ] assoc-map [ dup length max-len < [ drop max-len ] when ] assoc-map ;
131 TUPLE: huffman-decoder
135 { bits/level fixnum } ;
137 : <huffman-decoder> ( bs tdesc -- huffman-decoder )
142 dup [ tdesc>> ] [ bits/level>> 2^ ] bi reverse-table >>rtable ; inline
144 : read1-huff ( huffman-decoder -- elt )
145 16 over [ bs>> bs:peek ] [ rtable>> nth ] bi
146 [ size>> swap bs>> bs:seek ] [ value>> ] bi ; inline
148 : reverse-bits ( value bits -- value' )
149 [ integer>bit-array ] dip
150 f pad-tail reverse bit-array>integer ; inline
152 : read1-huff2 ( huffman-decoder -- elt )
153 16 over [ bs>> bs:peek 16 reverse-bits ] [ rtable>> nth ] bi
154 [ size>> swap bs>> bs:seek ] [ value>> ] bi ; inline
156 ! Outputs a dictionary of canonical codes
157 : generate-canonical-codes ( lit-seq -- code-dict )
158 generate-codes sort-values! unzip canonize-codes zip ;