--- /dev/null
+USING: compression.gzip compression.inflate tools.test ;
+
+{ B{
+ 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 122 121 94 119
+ 239 237 227 88 16 16 10 5 16 17 26 172 3 20 19 245 22 54 55
+ 70 245 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 139 138 112 127 12 6 234 132 254 250 9
+ 24 16 19 38 182 25 27 40 154 2 240 239 235 25 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 163 163 154 57 223 218 192 128 6 4 39 87 13 9 22 63 245 239
+ 239 242 240 240 242 243 4 17 17 25 21 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 223 219
+ 197 140 26 21 26 221 108 117 136 170 0 0 0 0 0 0 0 194 148
+ 147 138 6 4 4 5 4 33 176 175 161 5 80 81 95 251 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 122 121 105 33 246 246 234 80 241 240
+ 226 77 28 25 4 58 29 30 68 108 0 0 0 0 0 0 0 0 0 0 0 0 108
+ 109 118 250 2 24 24 39 230 225 221 203 107 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 103 102 80 101 249 245 214 208 13 6 240 142
+ 44 37 29 65 11 13 22 250 11 15 30 180 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 1 200 201 196 1 208 195 176 132 224 223 207 50
+ 253 6 15 181 251 253 0 6 240 241 239 77 14 10 246 64 33 24
+ 13 0 7 252 20 0 247 1 249 0 241 253 1 205 129 132 173 52
+ 124 123 107 32 17 16 6 15 115 117 143 209 0 0 0 0 1 255 255
+ 255 0 0 0 0 0 128 118 95 119 221 222 204 136 1 3 0 0 22 27
+ 35 0 249 239 239 0 30 22 3 0 247 4 18 0 250 248 247 0 29 26
+ 31 222 239 249 6 164 241 241 230 48 19 19 28 209 29 30 35
+ 154 87 88 109 228 1 255 255 255 0 0 0 0 0 0 0 0 0 136 136
+ 116 39 227 224 218 110 245 245 242 61 238 238 237 36 11 1
+ 254 9 32 37 20 213 7 14 40 151 2 0 246 36 6 8 20 210 8 8 5
+ 4 33 32 41 184 10 11 17 232 69 70 80 251 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0 107 104 82 144 88 81 34 255 162 159 134 122 255
+ 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 195 194
+ 184 2 255 255 255 0 255 255 255 0 0 255 255 255 0 255 255
+ 255 0 255 255 255 0 255 255 255 0 255 255 255 0 174 171 167
+ 15 102 99 63 233 132 129 99 133 255 255 255 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0 255 255 255 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 119 119 116 4 240 239 217 143 28 28 30 228 34 36 45 232 0 0
+ 0 0 0 0 0 0 38 38 38 4 28 28 28 2 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 4 0 0 0 0 0 0 0 0 33 33 33 3 38 38 38 9 243 243 243
+ 252 14 12 44 24 233 235 4 89 250 251 216 126 92 91 76 241 8
+ 9 21 235 69 69 70 2 250 250 249 214 0 0 0 223 0 0 0 0 0 0 0
+ 0 0 0 0 0 2 0 0 0 0 0 0 0 0 247 247 247 255 25 25 25 11 45
+ 46 48 26 239 239 251 219 3 4 1 114 233 236 1 254 21 21 20
+ 113 12 11 2 54 1 2 2 215 206 206 206 230 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 46 46
+ 47 8 56 56 49 70 23 21 9 145 237 239 248 180 247 247 2 148
+ 225 225 224 234 241 241 240 248 205 205 205 247 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 255 255 255 0 255 255 255 0 255 255
+ 255 0 255 255 255 0 255 255 255 0 255 255 255 0 107 106 96
+ 75 90 89 73 75 255 255 255 0 255 255 255 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0
+ }
+}
+[ B{
+ 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 122 121 94 119
+ 239 237 227 88 16 16 10 5 16 17 26 172 3 20 19 245 22 54 55
+ 70 245 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 139 138 112 127 12 6 234 132 254 250 9
+ 24 16 19 38 182 25 27 40 154 2 240 239 235 25 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 163 163 154 57 223 218 192 128 6 4 39 87 13 9 22 63 245 239
+ 239 242 240 240 242 243 4 17 17 25 21 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 223 219
+ 197 140 26 21 26 221 108 117 136 170 0 0 0 0 0 0 0 194 148
+ 147 138 6 4 4 5 4 33 176 175 161 5 80 81 95 251 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 122 121 105 33 246 246 234 80 241 240
+ 226 77 28 25 4 58 29 30 68 108 0 0 0 0 0 0 0 0 0 0 0 0 108
+ 109 118 250 2 24 24 39 230 225 221 203 107 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 103 102 80 101 249 245 214 208 13 6 240 142
+ 44 37 29 65 11 13 22 250 11 15 30 180 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 1 200 201 196 1 208 195 176 132 224 223 207 50
+ 253 6 15 181 251 253 0 6 240 241 239 77 14 10 246 64 33 24
+ 13 0 7 252 20 0 247 1 249 0 241 253 1 205 129 132 173 52
+ 124 123 107 32 17 16 6 15 115 117 143 209 0 0 0 0 1 255 255
+ 255 0 0 0 0 0 128 118 95 119 221 222 204 136 1 3 0 0 22 27
+ 35 0 249 239 239 0 30 22 3 0 247 4 18 0 250 248 247 0 29 26
+ 31 222 239 249 6 164 241 241 230 48 19 19 28 209 29 30 35
+ 154 87 88 109 228 1 255 255 255 0 0 0 0 0 0 0 0 0 136 136
+ 116 39 227 224 218 110 245 245 242 61 238 238 237 36 11 1
+ 254 9 32 37 20 213 7 14 40 151 2 0 246 36 6 8 20 210 8 8 5
+ 4 33 32 41 184 10 11 17 232 69 70 80 251 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0 107 104 82 144 88 81 34 255 162 159 134 122 255
+ 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 195 194
+ 184 2 255 255 255 0 255 255 255 0 0 255 255 255 0 255 255
+ 255 0 255 255 255 0 255 255 255 0 255 255 255 0 174 171 167
+ 15 102 99 63 233 132 129 99 133 255 255 255 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0 255 255 255 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 119 119 116 4 240 239 217 143 28 28 30 228 34 36 45 232 0 0
+ 0 0 0 0 0 0 38 38 38 4 28 28 28 2 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 4 0 0 0 0 0 0 0 0 33 33 33 3 38 38 38 9 243 243 243
+ 252 14 12 44 24 233 235 4 89 250 251 216 126 92 91 76 241 8
+ 9 21 235 69 69 70 2 250 250 249 214 0 0 0 223 0 0 0 0 0 0 0
+ 0 0 0 0 0 2 0 0 0 0 0 0 0 0 247 247 247 255 25 25 25 11 45
+ 46 48 26 239 239 251 219 3 4 1 114 233 236 1 254 21 21 20
+ 113 12 11 2 54 1 2 2 215 206 206 206 230 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 46 46
+ 47 8 56 56 49 70 23 21 9 145 237 239 248 180 247 247 2 148
+ 225 225 224 234 241 241 240 248 205 205 205 247 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 255 255 255 0 255 255 255 0 255 255
+ 255 0 255 255 255 0 255 255 255 0 255 255 255 0 107 106 96
+ 75 90 89 73 75 255 255 255 0 255 255 255 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0
+ }
+ compress-dynamic gzip-inflate
+] unit-test
+
+{ B{
+ 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 122 121 94 119
+ 239 237 227 88 16 16 10 5 16 17 26 172 3 20 19 245 22 54 55
+ 70 245 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 139 138 112 127 12 6 234 132 254 250 9
+ 24 16 19 38 182 25 27 40 154 2 240 239 235 25 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 163 163 154 57 223 218 192 128 6 4 39 87 13 9 22 63 245 239
+ 239 242 240 240 242 243 4 17 17 25 21 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 223 219
+ 197 140 26 21 26 221 108 117 136 170 0 0 0 0 0 0 0 194 148
+ 147 138 6 4 4 5 4 33 176 175 161 5 80 81 95 251 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 122 121 105 33 246 246 234 80 241 240
+ 226 77 28 25 4 58 29 30 68 108 0 0 0 0 0 0 0 0 0 0 0 0 108
+ 109 118 250 2 24 24 39 230 225 221 203 107 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 103 102 80 101 249 245 214 208 13 6 240 142
+ 44 37 29 65 11 13 22 250 11 15 30 180 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 1 200 201 196 1 208 195 176 132 224 223 207 50
+ 253 6 15 181 251 253 0 6 240 241 239 77 14 10 246 64 33 24
+ 13 0 7 252 20 0 247 1 249 0 241 253 1 205 129 132 173 52
+ 124 123 107 32 17 16 6 15 115 117 143 209 0 0 0 0 1 255 255
+ 255 0 0 0 0 0 128 118 95 119 221 222 204 136 1 3 0 0 22 27
+ 35 0 249 239 239 0 30 22 3 0 247 4 18 0 250 248 247 0 29 26
+ 31 222 239 249 6 164 241 241 230 48 19 19 28 209 29 30 35
+ 154 87 88 109 228 1 255 255 255 0 0 0 0 0 0 0 0 0 136 136
+ 116 39 227 224 218 110 245 245 242 61 238 238 237 36 11 1
+ 254 9 32 37 20 213 7 14 40 151 2 0 246 36 6 8 20 210 8 8 5
+ 4 33 32 41 184 10 11 17 232 69 70 80 251 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0 107 104 82 144 88 81 34 255 162 159 134 122 255
+ 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 195 194
+ 184 2 255 255 255 0 255 255 255 0 0 255 255 255 0 255 255
+ 255 0 255 255 255 0 255 255 255 0 255 255 255 0 174 171 167
+ 15 102 99 63 233 132 129 99 133 255 255 255 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0 255 255 255 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 119 119 116 4 240 239 217 143 28 28 30 228 34 36 45 232 0 0
+ 0 0 0 0 0 0 38 38 38 4 28 28 28 2 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 4 0 0 0 0 0 0 0 0 33 33 33 3 38 38 38 9 243 243 243
+ 252 14 12 44 24 233 235 4 89 250 251 216 126 92 91 76 241 8
+ 9 21 235 69 69 70 2 250 250 249 214 0 0 0 223 0 0 0 0 0 0 0
+ 0 0 0 0 0 2 0 0 0 0 0 0 0 0 247 247 247 255 25 25 25 11 45
+ 46 48 26 239 239 251 219 3 4 1 114 233 236 1 254 21 21 20
+ 113 12 11 2 54 1 2 2 215 206 206 206 230 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 46 46
+ 47 8 56 56 49 70 23 21 9 145 237 239 248 180 247 247 2 148
+ 225 225 224 234 241 241 240 248 205 205 205 247 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 255 255 255 0 255 255 255 0 255 255
+ 255 0 255 255 255 0 255 255 255 0 255 255 255 0 107 106 96
+ 75 90 89 73 75 255 255 255 0 255 255 255 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0
+ }
+}
+[ B{
+ 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 122 121 94 119
+ 239 237 227 88 16 16 10 5 16 17 26 172 3 20 19 245 22 54 55
+ 70 245 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 139 138 112 127 12 6 234 132 254 250 9
+ 24 16 19 38 182 25 27 40 154 2 240 239 235 25 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 163 163 154 57 223 218 192 128 6 4 39 87 13 9 22 63 245 239
+ 239 242 240 240 242 243 4 17 17 25 21 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 223 219
+ 197 140 26 21 26 221 108 117 136 170 0 0 0 0 0 0 0 194 148
+ 147 138 6 4 4 5 4 33 176 175 161 5 80 81 95 251 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 122 121 105 33 246 246 234 80 241 240
+ 226 77 28 25 4 58 29 30 68 108 0 0 0 0 0 0 0 0 0 0 0 0 108
+ 109 118 250 2 24 24 39 230 225 221 203 107 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 103 102 80 101 249 245 214 208 13 6 240 142
+ 44 37 29 65 11 13 22 250 11 15 30 180 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 1 200 201 196 1 208 195 176 132 224 223 207 50
+ 253 6 15 181 251 253 0 6 240 241 239 77 14 10 246 64 33 24
+ 13 0 7 252 20 0 247 1 249 0 241 253 1 205 129 132 173 52
+ 124 123 107 32 17 16 6 15 115 117 143 209 0 0 0 0 1 255 255
+ 255 0 0 0 0 0 128 118 95 119 221 222 204 136 1 3 0 0 22 27
+ 35 0 249 239 239 0 30 22 3 0 247 4 18 0 250 248 247 0 29 26
+ 31 222 239 249 6 164 241 241 230 48 19 19 28 209 29 30 35
+ 154 87 88 109 228 1 255 255 255 0 0 0 0 0 0 0 0 0 136 136
+ 116 39 227 224 218 110 245 245 242 61 238 238 237 36 11 1
+ 254 9 32 37 20 213 7 14 40 151 2 0 246 36 6 8 20 210 8 8 5
+ 4 33 32 41 184 10 11 17 232 69 70 80 251 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0 107 104 82 144 88 81 34 255 162 159 134 122 255
+ 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 195 194
+ 184 2 255 255 255 0 255 255 255 0 0 255 255 255 0 255 255
+ 255 0 255 255 255 0 255 255 255 0 255 255 255 0 174 171 167
+ 15 102 99 63 233 132 129 99 133 255 255 255 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0 255 255 255 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 119 119 116 4 240 239 217 143 28 28 30 228 34 36 45 232 0 0
+ 0 0 0 0 0 0 38 38 38 4 28 28 28 2 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 4 0 0 0 0 0 0 0 0 33 33 33 3 38 38 38 9 243 243 243
+ 252 14 12 44 24 233 235 4 89 250 251 216 126 92 91 76 241 8
+ 9 21 235 69 69 70 2 250 250 249 214 0 0 0 223 0 0 0 0 0 0 0
+ 0 0 0 0 0 2 0 0 0 0 0 0 0 0 247 247 247 255 25 25 25 11 45
+ 46 48 26 239 239 251 219 3 4 1 114 233 236 1 254 21 21 20
+ 113 12 11 2 54 1 2 2 215 206 206 206 230 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 46 46
+ 47 8 56 56 49 70 23 21 9 145 237 239 248 180 247 247 2 148
+ 225 225 224 234 241 241 240 248 205 205 205 247 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 255 255 255 0 255 255 255 0 255 255
+ 255 0 255 255 255 0 255 255 255 0 255 255 255 0 107 106 96
+ 75 90 89 73 75 255 255 255 0 255 255 255 0 255 255 255 0
+ 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+ 255 255 0
+ }
+ compress-fixed gzip-inflate
+] unit-test
--- /dev/null
+! Copyright (C) 2020 Jacob Fischer, Abtin Molavi.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs bit-arrays byte-arrays
+combinators compression.huffman fry kernel literals locals make
+math math.bits math.order math.ranges namespaces sequences
+sequences.deep splitting vectors ;
+IN: compression.gzip
+
+<PRIVATE
+
+SYMBOL: lit-dict
+SYMBOL: dist-dict
+SYMBOL: lit-vec
+
+! LZ77 compression
+
+:: longest-prefix ( ind seq -- start end )
+ ind dup ind + seq length min [a..b]
+ seq ind head-slice '[
+ ind swap seq <slice> _ subseq-start
+ ] map-find-last ;
+
+:: create-pair ( ind seq -- array )
+ ind seq longest-prefix :> ( start end )
+ end ind - :> n
+ n 3 <
+ [ ind seq nth ]
+ [ n ind start - 2array ]
+ if ;
+
+: sum-vec ( vec -- n )
+ [ dup array? [ first ] [ drop 1 ] if ] map-sum ;
+
+:: compress-lz77 ( seq -- vec )
+ 0 seq create-pair seq length <vector> ?push [ dup sum-vec seq length < ] [ dup sum-vec seq create-pair swap ?push ] while ;
+
+: gzip-header ( -- header )
+ { 31 139 8 0 0 0 255 } >byte-array ;
+
+! Huffman Coding
+
+! Fixed Huffman table encoding specified in section 3.2.5 of RFC 1951
+: length-to-code ( length -- code )
+ {
+ { [ dup 11 < ] [ 254 + ] }
+ { [ dup 19 < ] [ [ 11 - 2 /i 265 + ] [ 11 - 2 mod 1 <bits> >bit-array ] bi 2array ] }
+ { [ dup 35 < ] [ [ 19 - 4 /i 269 + ] [ 19 - 4 mod 2 <bits> >bit-array ] bi 2array ] }
+ { [ dup 67 < ] [ [ 35 - 8 /i 273 + ] [ 35 - 8 mod 3 <bits> >bit-array ] bi 2array ] }
+ { [ dup 131 < ] [ [ 67 - 16 /i 277 + ] [ 67 - 16 mod 4 <bits> >bit-array ] bi 2array ] }
+ { [ dup 258 < ] [ [ 131 - 32 /i 281 + ] [ 131 - 32 mod 5 <bits> >bit-array ] bi 2array ] }
+ [ drop 285 ]
+ }
+ cond ;
+
+: dist-to-code ( dist -- code )
+ {
+ { [ dup 5 < ] [ -1 + ] }
+ { [ dup 9 < ] [ [ 5 - 2 /i 4 + ] [ 5 - 2 mod 1 <bits> >bit-array ] bi 2array ] }
+ { [ dup 17 < ] [ [ 9 - 4 /i 6 + ] [ 9 - 4 mod 2 <bits> >bit-array ] bi 2array ] }
+ { [ dup 33 < ] [ [ 17 - 8 /i 8 + ] [ 17 - 8 mod 3 <bits> >bit-array ] bi 2array ] }
+ { [ dup 65 < ] [ [ 33 - 16 /i 10 + ] [ 33 - 16 mod 4 <bits> >bit-array ] bi 2array ] }
+ { [ dup 129 < ] [ [ 65 - 32 /i 12 + ] [ 65 - 32 mod 5 <bits> >bit-array ] bi 2array ] }
+ { [ dup 257 < ] [ [ 129 - 64 /i 14 + ] [ 129 - 64 mod 6 <bits> >bit-array ] bi 2array ] }
+ { [ dup 513 < ] [ [ 257 - 128 /i 16 + ] [ 257 - 128 mod 7 <bits> >bit-array ] bi 2array ] }
+ { [ dup 1025 < ] [ [ 513 - 256 /i 18 + ] [ 513 - 256 mod 8 <bits> >bit-array ] bi 2array ] }
+ { [ dup 2049 < ] [ [ 1025 - 512 /i 20 + ] [ 1025 - 512 mod 9 <bits> >bit-array ] bi 2array ] }
+ { [ dup 4097 < ] [ [ 2049 - 1024 /i 22 + ] [ 2049 - 1024 mod 10 <bits> >bit-array ] bi 2array ] }
+ { [ dup 8193 < ] [ [ 4097 - 2048 /i 24 + ] [ 4097 - 2048 mod 11 <bits> >bit-array ] bi 2array ] }
+ { [ dup 16385 < ] [ [ 8193 - 4096 /i 26 + ] [ 8193 - 4096 mod 12 <bits> >bit-array ] bi 2array ] }
+ [ [ 8193 - 4096 /i 28 + ] [ 8193 - 4096 mod 13 <bits> >bit-array ] bi 2array ]
+ }
+ cond ;
+
+ ! Words for transforming our vector of (length, distance) pairs and bytes into literals using above table
+: pair-to-code ( pr -- code )
+ [ first length-to-code ] [ second dist-to-code ] bi 2array ;
+
+: vec-to-lits ( vec -- vec )
+ [ dup array? [ pair-to-code ] [ ] if ] map ;
+
+! Words for using the fixed Huffman code to map literals to bit arrays
+! This is the table in section 3.2.6
+: (lit-to-bits) ( lit -- bitarr )
+ {
+ { [ dup 144 < ] [ 48 + 8 <bits> >bit-array reverse ] }
+ { [ dup 256 < ] [ 144 - 400 + 9 <bits> >bit-array reverse ] }
+ { [ dup 280 < ] [ 256 - 7 <bits> >bit-array reverse ] }
+ [ 280 - 192 + 8 <bits> >bit-array reverse ]
+ }
+ cond ;
+
+! Gluing codes with their extra bits
+
+: dist-to-bits ( dist -- bits )
+ dup array? [ [ first 5 <bits> >bit-array reverse ] [ second ] bi 2array ] [ 5 <bits> >bit-array reverse ] if ;
+
+: lit-to-bits ( lit -- bits )
+ dup array? [ [ first (lit-to-bits) ] [ second ] bi 2array ] [ (lit-to-bits) ] if ;
+
+: pair-to-bits ( l,d -- bits )
+ [ first lit-to-bits ] [ second dist-to-bits ] bi 2array ;
+
+: vec-to-bits ( vec -- bitarr )
+ [ dup array? [ pair-to-bits ] [ (lit-to-bits) ] if ] map ;
+
+
+! fixed huffman compression function
+: (compress-fixed) ( bytes -- bits )
+ compress-lz77 vec-to-lits vec-to-bits ;
+
+! Dynamic Huffman
+
+! using distance code 31 to represent no distance code for particular elements because it cannot occur
+: dists ( vec -- seq )
+ [ dup array? [ second dup array? [ first ] when ] [ drop 31 ] if ] map 31 swap remove ;
+
+: len-lits ( vec -- seq )
+ [ dup array? [ first ] when dup array? [ first ] when ] map ;
+
+! Given an lz77 compressed block, constructs the huffman code tables
+: build-dicts ( vec -- lit-dict dist-dict )
+ [ len-lits generate-canonical-codes ]
+ [ dists generate-canonical-codes ] bi ;
+
+
+! Use the given dictionary to replace the element with its code
+:: replace-one ( ele code-dict -- new-ele )
+ ele array? [ ele first code-dict at ele second 2array ] [ ele code-dict at ] if ;
+
+! replace both elements of a length distance pair with their codes
+: replace-pair ( pair -- new-pair )
+ [ first lit-dict get replace-one ] [ second dist-dict get replace-one ] bi 2array ;
+
+! Replace all vector elements with their codes
+: vec-to-codes ( vec -- new-vec )
+ [ dup array? [ replace-pair ] [ lit-dict get replace-one ] if ] map ;
+
+! Dictionary encoding
+: lit-code-lens ( -- len-seq )
+ 285 [0..b] [ lit-dict get at length ] map [ zero? ] trim-tail ;
+
+: dist-code-lens ( -- len-seq )
+ 31 [0..b] [ dist-dict get at length ] map [ zero? ] trim-tail ;
+
+:: replace-0-single ( m len-seq -- new-len-seq )
+ m 11 < [ len-seq m 0 <array> 17 m 3 - 3 <bits> >bit-array 2array 1array replace ]
+ [ len-seq m 0 <array> 18 m 11 - 7 <bits> >bit-array 2array 1array replace ]
+ if ;
+
+:: replace-0-range ( range len-seq -- new-len-seq )
+ range empty? [ len-seq ] [ range first range 1 tail len-seq replace-0-range replace-0-single ] if ;
+
+: replace-0 ( len-seq -- new-len-seq )
+ 2 139 (a..b) swap replace-0-range ;
+
+:: replace-runs ( n len-seq -- new-len-seq )
+ len-seq 7 n <array> { n { 16 ?{ t t } } } replace
+ 6 n <array> { n { 16 ?{ f t } } } replace
+ 5 n <array> { n { 16 ?{ t f } } } replace
+ 4 n <array> { n { 16 ?{ f f } } } replace ;
+
+:: replace-all-runs ( range len-seq -- new-len-seq )
+ range empty? [ len-seq ] [ range first range 1 tail len-seq replace-all-runs replace-runs ] if ;
+
+: run-free-lit ( -- len-seq )
+ 0 285 [a..b] lit-code-lens replace-0 replace-all-runs ;
+
+: run-free-dist ( -- len-seq )
+ 0 31 [a..b] dist-code-lens replace-0 replace-all-runs ;
+
+: run-free-codes ( -- len-seq )
+ run-free-lit run-free-dist append ;
+
+: code-len-dict ( -- code-dict )
+ run-free-codes [ dup array? [ first ] when ] map generate-canonical-codes ;
+
+: compressed-lens ( -- len-seq )
+ run-free-codes [ dup array? [ [ first code-len-dict at ] [ second ] bi 2array ] [ code-len-dict at ] if ] map ;
+
+CONSTANT: clen-shuffle { 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15 }
+
+: clen-seq ( -- len-seq )
+ clen-shuffle [ code-len-dict at length ] map [ zero? ] trim-tail ;
+
+: clen-bits ( -- bit-arr )
+ clen-seq [ 3 <bits> >bit-array ] map ;
+
+: h-lit ( -- bit-arr )
+ lit-code-lens length 257 - 5 <bits> >bit-array ;
+
+: h-dist ( -- bit-arr )
+ dist-code-lens length 1 - 5 <bits> >bit-array ;
+
+: h-clen ( -- bit-arr )
+ clen-seq length 4 - 4 <bits> >bit-array ;
+
+: dynamic-headers ( -- bit-arr-seq )
+ ?{ f t } h-lit h-dist h-clen 4array concat ;
+
+TUPLE: deflate-block
+ { headers bit-array }
+ { clen array }
+ { compressed-lens array }
+ { compressed-data vector } ;
+
+! Compresses a block with dynamic huffman compression, outputting a nested array structure
+: (compress-dynamic) ( lit-seq -- bit-arr-seq )
+ [ dup compress-lz77 vec-to-lits { 256 } append lit-vec set
+ lit-vec get build-dicts
+ dist-dict set
+ lit-dict set
+ lit-code-lens supremum 16 < clen-seq supremum 8 < and
+ [ drop dynamic-headers clen-bits compressed-lens
+ lit-vec get vec-to-codes deflate-block boa ]
+ [ halves [ (compress-dynamic) ] bi@ 2array ] if
+ ] with-scope ;
+
+
+: flatten-single ( ele -- bits )
+ dup array? [ concat ] when ;
+
+: flatten-lens ( compressed-lens -- bits )
+ [ flatten-single ] map concat ;
+
+: flatten-pair ( pair -- bits )
+ dup array? [ [ first flatten-single ] [ second flatten-single ] bi append ] when ;
+
+: flatten-block ( bit-arr-seq -- byte-array )
+ { [ headers>> ] [ clen>> concat ] [ compressed-lens>> flatten-lens ] [ compressed-data>> [ flatten-pair ] map concat ] } cleave 4array concat ;
+
+: flatten-blocks ( blocks -- byte-array )
+ [ flatten-block ] map unclip-last [ [ ?{ f } prepend ] map ] dip ?{ t } prepend suffix concat ;
+
+PRIVATE>
+
+: compress-dynamic ( byte-array -- byte-array )
+ (compress-dynamic) [ deflate-block? ] deep-filter flatten-blocks underlying>> gzip-header prepend B{ 0 0 } append ;
+
+: compress-fixed ( byte-array -- byte-array )
+ (compress-fixed) [ flatten-pair ] map concat ?{ t t f } prepend underlying>> gzip-header prepend B{ 0 0 } append ;
-! Copyright (C) 2009 Marc Fauconneau.
+! Copyright (C) 2009, 2020 Marc Fauconneau, Abtin Molavi, and Jacob Fischer.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs fry
-hashtables io kernel locals math math.order math.parser
-math.ranges multiline sequences bitstreams bit-arrays ;
-IN: compression.huffman
-
+USING: accessors arrays assocs bit-arrays bitstreams combinators
+fry hashtables heaps io kernel locals math math.bits math.order
+math.parser math.ranges multiline namespaces sequences sorting
+vectors ;
QUALIFIED-WITH: bitstreams bs
+IN: compression.huffman
<PRIVATE
+SYMBOL: leaf-table
+SYMBOL: node-heap
+
TUPLE: huffman-code
{ value fixnum }
{ size fixnum }
tdesc [ n table update-reverse-table ] huffman-each
table seq>> ;
+TUPLE: huffman-tree
+ { code maybe{ fixnum } }
+ { left maybe{ huffman-tree } }
+ { right maybe{ huffman-tree } } ;
+
+: <huffman-tree> ( code left right -- huffman-tree )
+ huffman-tree boa ;
+
+: <huffman-internal> ( left right -- huffman-tree )
+ huffman-tree new swap >>left swap >>right ;
+
+: leaf? ( huff-tree -- ? )
+ [ left>> not ] [ right>> not ] bi and ;
+
+: gen-leaves ( lit-seq -- leaves )
+ [ huffman-tree new swap >>code ] map ;
+
+: build-leaf-table ( leaves -- )
+ dup empty? [ drop ] [ dup first leaf-table get inc-at rest build-leaf-table ] if ;
+
+: insert-leaves ( -- ) leaf-table get unzip swap zip node-heap get heap-push-all ;
+
+: combine-two ( -- )
+ node-heap get heap-pop node-heap get heap-pop swap [ + ] dip pick <huffman-internal> swap node-heap get heap-push drop ;
+
+: build-tree ( lit-seq -- heap )
+ gen-leaves build-leaf-table insert-leaves [ node-heap get heap-size 1 > ] [ combine-two ] while node-heap get ;
+
+! Walks down a huffman tree and outputs a dictionary of codes
+: (generate-codes) ( huff-tree -- code-dict )
+ {
+ { [ dup leaf? ] [ code>> ?{ } swap H{ } clone ?set-at ] }
+ { [ dup left>> not ] [ right>> (generate-codes) [ ?{ t } prepend ] assoc-map ] }
+ { [ dup right>> not ] [ left>> (generate-codes) [ ?{ f } prepend ] assoc-map ] }
+ [
+ [ left>> (generate-codes) [ ?{ f } prepend ] assoc-map ]
+ [ right>> (generate-codes) [ ?{ t } prepend ] assoc-map ] bi assoc-union!
+ ]
+ } cond ;
+
+: generate-codes ( lit-seq -- code-dict )
+ [
+ [ H{ } clone ]
+ [ H{ } clone leaf-table set
+ <min-heap> node-heap set
+ build-tree heap-pop swap (generate-codes) nip ]
+ if-empty
+ ] with-scope ;
+
+! Ordering of codes that is useful for generating canonical codes.
+! Sort by length, then lexicographically.
+:: <==> ( b1 b2 -- <=> )
+ {
+ { [ b1 second length b2 second length < ] [ +lt+ ] }
+ { [ b2 second length b1 second length < ] [ +gt+ ] }
+ { [ b1 first b2 first < ] [ +lt+ ] }
+ { [ b2 first b1 first < ] [ +gt+ ] }
+ [ +eq+ ]
+ } cond ;
+
+: sort-values! ( obj -- sortedseq )
+ >alist [ <==> ] sort ;
+
+: get-next-code ( code current -- next )
+ [ reverse bit-array>integer 1 + ] [ length ] bi <bits> >bit-array reverse dup length pick length swap - [ f ] replicate append nip ;
+
+! Does most of the work of converting a collection of codes to canonical ones.
+: (canonize-codes) ( current codes -- codes )
+ dup empty? [ 2drop V{ } clone ] [ dup first pick get-next-code dup pick 1 tail (canonize-codes) ?push 2nip ] if ;
+
+! Basically a wrapper for the above recursive helper
+: canonize-codes ( codes -- codes )
+ [ V{ } clone ] [ dup first length <bit-array> dup pick 1 tail (canonize-codes) ?push nip reverse ] if-empty ;
+
+:: length-limit-codes ( max-len old-codes -- new-codes )
+ old-codes [ length ] assoc-map [ dup length max-len < [ drop max-len ] when ] assoc-map ;
+
PRIVATE>
TUPLE: huffman-decoder
: read1-huff2 ( huffman-decoder -- elt )
16 over [ bs>> bs:peek 16 reverse-bits ] [ rtable>> nth ] bi
[ size>> swap bs>> bs:seek ] [ value>> ] bi ; inline
+
+! Outputs a dictionary of canonical codes
+: generate-canonical-codes ( lit-seq -- code-dict )
+ generate-codes sort-values! unzip canonize-codes zip ;
-! Copyright (C) 2009 Marc Fauconneau.
+! Copyright (C) 2009, 2020 Marc Fauconneau, Abtin Molavi, Jacob Fischer.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs byte-vectors combinators
combinators.smart compression.huffman fry hashtables io.binary
ERROR: bad-zlib-data ;
ERROR: bad-zlib-header ;
+
:: check-zlib-header ( data -- )
16 data bs:peek 2 >le be> 31 mod ! checksum
0 assert=
2 data bs:seek ! compression level; ignore
;
+
+: read-until-terminated ( data -- data )
+ [ dup 8 swap bs:read 0 = ] [ ] until ;
+
+:: interpret-flag ( flg data -- )
+ 27 data bs:seek
+ flg first 1 = [ 8 data bs:read data bs:seek ] when
+ flg second 1 = [ data read-until-terminated drop ] when
+ flg fourth 1 = [ data read-until-terminated drop ] when
+ flg second 1 = [ 1 data bs:read drop ] when ;
+
+:: check-gzip-header ( data -- )
+ 8 data bs:read 31 assert= ! ID 1
+ 8 data bs:read 139 assert= ! ID 2
+ 8 data bs:read 8 assert= ! compression method: deflate
+ 1 data bs:seek ! ignore textbit
+ 1 data bs:read 1 data bs:read 1 data bs:read 1 data bs:read 4array data interpret-flag
+ ;
+
+
CONSTANT: clen-shuffle { 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15 }
: get-table ( values size -- table )
bs:<lsb0-bit-reader>
[ check-zlib-header ] [ inflate-loop ] bi
inflate-lz77 ;
+
+: gzip-inflate ( bytes -- bytes )
+ bs:<lsb0-bit-reader>
+ [ check-gzip-header ] [ inflate-loop ] bi
+ inflate-lz77 ;
\ No newline at end of file