]> gitweb.factorcode.org Git - factor.git/blob - extra/http2/hpack/huffman/huffman.factor
3344c923e380730426ce382759946658b34172f8
[factor.git] / extra / http2 / hpack / huffman / huffman.factor
1 USING: accessors arrays assocs bit-arrays http2.hpack
2 io.encodings.string io.encodings.utf8 kernel literals locals
3 make math sequences ;
4
5 IN: http2.hpack.huffman
6
7 <<
8 ! Table contents from RFC 7541 Appendix B
9 CONSTANT: huffman-table {
10             {     0x1ff8  13 }
11             {   0x7fffd8  23 }
12             {  0xfffffe2  28 }
13             {  0xfffffe3  28 }
14             {  0xfffffe4  28 }
15             {  0xfffffe5  28 }
16             {  0xfffffe6  28 }
17             {  0xfffffe7  28 }
18             {  0xfffffe8  28 }
19             {   0xffffea  24 }
20             { 0x3ffffffc  30 }
21             {  0xfffffe9  28 }
22             {  0xfffffea  28 }
23             { 0x3ffffffd  30 }
24             {  0xfffffeb  28 }
25             {  0xfffffec  28 }
26             {  0xfffffed  28 }
27             {  0xfffffee  28 }
28             {  0xfffffef  28 }
29             {  0xffffff0  28 }
30             {  0xffffff1  28 }
31             {  0xffffff2  28 }
32             { 0x3ffffffe  30 }
33             {  0xffffff3  28 }
34             {  0xffffff4  28 }
35             {  0xffffff5  28 }
36             {  0xffffff6  28 }
37             {  0xffffff7  28 }
38             {  0xffffff8  28 }
39             {  0xffffff9  28 }
40             {  0xffffffa  28 }
41             {  0xffffffb  28 }
42             {       0x14   6 }
43             {      0x3f8  10 }
44             {      0x3f9  10 }
45             {      0xffa  12 }
46             {     0x1ff9  13 }
47             {       0x15   6 }
48             {       0xf8   8 }
49             {      0x7fa  11 }
50             {      0x3fa  10 }
51             {      0x3fb  10 }
52             {       0xf9   8 }
53             {      0x7fb  11 }
54             {       0xfa   8 }
55             {       0x16   6 }
56             {       0x17   6 }
57             {       0x18   6 }
58             {        0x0   5 }
59             {        0x1   5 }
60             {        0x2   5 }
61             {       0x19   6 }
62             {       0x1a   6 }
63             {       0x1b   6 }
64             {       0x1c   6 }
65             {       0x1d   6 }
66             {       0x1e   6 }
67             {       0x1f   6 }
68             {       0x5c   7 }
69             {       0xfb   8 }
70             {     0x7ffc  15 }
71             {       0x20   6 }
72             {      0xffb  12 }
73             {      0x3fc  10 }
74             {     0x1ffa  13 }
75             {       0x21   6 }
76             {       0x5d   7 }
77             {       0x5e   7 }
78             {       0x5f   7 }
79             {       0x60   7 }
80             {       0x61   7 }
81             {       0x62   7 }
82             {       0x63   7 }
83             {       0x64   7 }
84             {       0x65   7 }
85             {       0x66   7 }
86             {       0x67   7 }
87             {       0x68   7 }
88             {       0x69   7 }
89             {       0x6a   7 }
90             {       0x6b   7 }
91             {       0x6c   7 }
92             {       0x6d   7 }
93             {       0x6e   7 }
94             {       0x6f   7 }
95             {       0x70   7 }
96             {       0x71   7 }
97             {       0x72   7 }
98             {       0xfc   8 }
99             {       0x73   7 }
100             {       0xfd   8 }
101             {     0x1ffb  13 }
102             {    0x7fff0  19 }
103             {     0x1ffc  13 }
104             {     0x3ffc  14 }
105             {       0x22   6 }
106             {     0x7ffd  15 }
107             {        0x3   5 }
108             {       0x23   6 }
109             {        0x4   5 }
110             {       0x24   6 }
111             {        0x5   5 }
112             {       0x25   6 }
113             {       0x26   6 }
114             {       0x27   6 }
115             {        0x6   5 }
116             {       0x74   7 }
117             {       0x75   7 }
118             {       0x28   6 }
119             {       0x29   6 }
120             {       0x2a   6 }
121             {        0x7   5 }
122             {       0x2b   6 }
123             {       0x76   7 }
124             {       0x2c   6 }
125             {        0x8   5 }
126             {        0x9   5 }
127             {       0x2d   6 }
128             {       0x77   7 }
129             {       0x78   7 }
130             {       0x79   7 }
131             {       0x7a   7 }
132             {       0x7b   7 }
133             {     0x7ffe  15 }
134             {      0x7fc  11 }
135             {     0x3ffd  14 }
136             {     0x1ffd  13 }
137             {  0xffffffc  28 }
138             {    0xfffe6  20 }
139             {   0x3fffd2  22 }
140             {    0xfffe7  20 }
141             {    0xfffe8  20 }
142             {   0x3fffd3  22 }
143             {   0x3fffd4  22 }
144             {   0x3fffd5  22 }
145             {   0x7fffd9  23 }
146             {   0x3fffd6  22 }
147             {   0x7fffda  23 }
148             {   0x7fffdb  23 }
149             {   0x7fffdc  23 }
150             {   0x7fffdd  23 }
151             {   0x7fffde  23 }
152             {   0xffffeb  24 }
153             {   0x7fffdf  23 }
154             {   0xffffec  24 }
155             {   0xffffed  24 }
156             {   0x3fffd7  22 }
157             {   0x7fffe0  23 }
158             {   0xffffee  24 }
159             {   0x7fffe1  23 }
160             {   0x7fffe2  23 }
161             {   0x7fffe3  23 }
162             {   0x7fffe4  23 }
163             {   0x1fffdc  21 }
164             {   0x3fffd8  22 }
165             {   0x7fffe5  23 }
166             {   0x3fffd9  22 }
167             {   0x7fffe6  23 }
168             {   0x7fffe7  23 }
169             {   0xffffef  24 }
170             {   0x3fffda  22 }
171             {   0x1fffdd  21 }
172             {    0xfffe9  20 }
173             {   0x3fffdb  22 }
174             {   0x3fffdc  22 }
175             {   0x7fffe8  23 }
176             {   0x7fffe9  23 }
177             {   0x1fffde  21 }
178             {   0x7fffea  23 }
179             {   0x3fffdd  22 }
180             {   0x3fffde  22 }
181             {   0xfffff0  24 }
182             {   0x1fffdf  21 }
183             {   0x3fffdf  22 }
184             {   0x7fffeb  23 }
185             {   0x7fffec  23 }
186             {   0x1fffe0  21 }
187             {   0x1fffe1  21 }
188             {   0x3fffe0  22 }
189             {   0x1fffe2  21 }
190             {   0x7fffed  23 }
191             {   0x3fffe1  22 }
192             {   0x7fffee  23 }
193             {   0x7fffef  23 }
194             {    0xfffea  20 }
195             {   0x3fffe2  22 }
196             {   0x3fffe3  22 }
197             {   0x3fffe4  22 }
198             {   0x7ffff0  23 }
199             {   0x3fffe5  22 }
200             {   0x3fffe6  22 }
201             {   0x7ffff1  23 }
202             {  0x3ffffe0  26 }
203             {  0x3ffffe1  26 }
204             {    0xfffeb  20 }
205             {    0x7fff1  19 }
206             {   0x3fffe7  22 }
207             {   0x7ffff2  23 }
208             {   0x3fffe8  22 }
209             {  0x1ffffec  25 }
210             {  0x3ffffe2  26 }
211             {  0x3ffffe3  26 }
212             {  0x3ffffe4  26 }
213             {  0x7ffffde  27 }
214             {  0x7ffffdf  27 }
215             {  0x3ffffe5  26 }
216             {   0xfffff1  24 }
217             {  0x1ffffed  25 }
218             {    0x7fff2  19 }
219             {   0x1fffe3  21 }
220             {  0x3ffffe6  26 }
221             {  0x7ffffe0  27 }
222             {  0x7ffffe1  27 }
223             {  0x3ffffe7  26 }
224             {  0x7ffffe2  27 }
225             {   0xfffff2  24 }
226             {   0x1fffe4  21 }
227             {   0x1fffe5  21 }
228             {  0x3ffffe8  26 }
229             {  0x3ffffe9  26 }
230             {  0xffffffd  28 }
231             {  0x7ffffe3  27 }
232             {  0x7ffffe4  27 }
233             {  0x7ffffe5  27 }
234             {    0xfffec  20 }
235             {   0xfffff3  24 }
236             {    0xfffed  20 }
237             {   0x1fffe6  21 }
238             {   0x3fffe9  22 }
239             {   0x1fffe7  21 }
240             {   0x1fffe8  21 }
241             {   0x7ffff3  23 }
242             {   0x3fffea  22 }
243             {   0x3fffeb  22 }
244             {  0x1ffffee  25 }
245             {  0x1ffffef  25 }
246             {   0xfffff4  24 }
247             {   0xfffff5  24 }
248             {  0x3ffffea  26 }
249             {   0x7ffff4  23 }
250             {  0x3ffffeb  26 }
251             {  0x7ffffe6  27 }
252             {  0x3ffffec  26 }
253             {  0x3ffffed  26 }
254             {  0x7ffffe7  27 }
255             {  0x7ffffe8  27 }
256             {  0x7ffffe9  27 }
257             {  0x7ffffea  27 }
258             {  0x7ffffeb  27 }
259             {  0xffffffe  28 }
260             {  0x7ffffec  27 }
261             {  0x7ffffed  27 }
262             {  0x7ffffee  27 }
263             {  0x7ffffef  27 }
264             {  0x7fffff0  27 }
265             {  0x3ffffee  26 }
266             { 0x3fffffff  30 }
267         }
268
269 :: R2, ( n -- ) n ,     n 2 64 * + ,     n 1 64 * + ,     n 3 64 * + , ;
270 :: R4, ( n -- ) n R2,   n 2 16 * + R2,   n 1 16 * + R2,   n 3 16 * + R2, ;
271 :: R6, ( n -- ) n R4,   n 2 4 * + R4,    n 1 4 * + R4,    n 3 4 * + R4, ;
272 >>
273
274 ! The codes for each entry in the huffman table
275 CONSTANT: huffman-encode-table $[
276     huffman-table [
277         [ integer>bit-array ] dip f pad-tail reverse
278     ] { } assoc>map
279 ]
280
281 CONSTANT: EOS 256
282
283 CONSTANT: bit-reverse-table $[
284     [ 0 R6, 2 R6, 1 R6, 3 R6, ] B{ } make
285 ]
286
287 : reverse-bits ( byte-array -- byte-array' )
288     [ bit-reverse-table nth ] B{ } map-as ;
289
290 : byte-array>bit-array ( byte-array -- bit-array )
291     [ length 8 * ] [ bit-array boa ] bi ;
292
293 ! converts a byte array/vector/sequence to a bit array, with
294 ! each byte in descending order, such that the most significant
295 ! bit of the first byte is the first bit in the sequence.
296 : bytes-to-bits ( bytes -- bits )
297     reverse-bits byte-array>bit-array ;
298
299 ! most significant bit first.
300 : bits-to-bytes ( bits -- bytes )
301     underlying>> reverse-bits ;
302
303 ERROR: hpack-huffman-error message ;
304
305 ! probably inefficient, but it works.
306 ! just loops over the bits, adding each bit to the current code and searching for
307 ! the current code, adding the corresponding symbol if the code
308 ! is found in the table.
309 :: huffman-decode ( bytes -- string )
310     BV{ } clone :> byte-vector
311     0 0 ! current code and length
312     bytes bytes-to-bits [
313         [ 2 * ] 2dip 1 0 ? swap 1 [ + ] 2bi@ 
314         2dup 2array huffman-table index
315         [
316             dup EOS = [ "End of Stream in huffman encoded string" hpack-huffman-error ] when
317             byte-vector push 2drop 0 0
318         ] when*
319     ] each
320
321     7 > [ "Padding is too long in huffman encoded string" hpack-huffman-error ] when
322
323     EOS huffman-table nth first integer>bit-array
324     swap integer>bit-array tail?
325     [ "Padding is not the most significant bits of the End of Stream code in huffman encoded string" hpack-huffman-error ] unless
326
327     byte-vector utf8 decode ;
328
329 : huffman-encode ( string -- bytes )
330     [ huffman-encode-table nth ] { } map-as concat
331     EOS huffman-encode-table nth over length neg 8 rem head
332     append bits-to-bytes ;
333