From c8132b43297574549df26ffc590800d270edaf8b Mon Sep 17 00:00:00 2001 From: razetime Date: Sun, 30 Oct 2022 23:10:02 +0530 Subject: [PATCH] add bzip3 compression --- extra/compression/bzip3/bzip3-docs.factor | 39 +++++++ extra/compression/bzip3/bzip3.factor | 47 ++++++++ extra/compression/bzip3/ffi/ffi-docs.factor | 120 ++++++++++++++++++++ extra/compression/bzip3/ffi/ffi.factor | 73 ++++++++++++ 4 files changed, 279 insertions(+) create mode 100644 extra/compression/bzip3/bzip3-docs.factor create mode 100644 extra/compression/bzip3/bzip3.factor create mode 100644 extra/compression/bzip3/ffi/ffi-docs.factor create mode 100644 extra/compression/bzip3/ffi/ffi.factor diff --git a/extra/compression/bzip3/bzip3-docs.factor b/extra/compression/bzip3/bzip3-docs.factor new file mode 100644 index 0000000000..30cec8a7d8 --- /dev/null +++ b/extra/compression/bzip3/bzip3-docs.factor @@ -0,0 +1,39 @@ +! Copyright (C) 2022 Raghu Ranganathan. +! See http://factorcode.org/license.txt for BSD license. +USING: byte-arrays help.markup help.syntax kernel strings ; +IN: compression.bzip3 + +HELP: compress +{ $values byte-array: byte-array block-size/f: POSTPONE: integer/f byte-array: byte-array } +{ $description Takes a { $link "byte-array" } and block size, and pushes a compressed byte array from bzip3. } ; + +HELP: decompress +{ $values byte-array: byte-array byte-array': byte-array } +{ $description Takes a valid bzip3 compressed { $link "byte-array" } , and pushes its decompressed form. } ; + +HELP: internal-error +{ $values msg: object } +{ $description Throws an { $link internal-error } error. } +{ $error-description A bzip3 internal error. Error type is indicated in { $snippet "msg" } . } ; + +HELP: invalid-block-size +{ $values size: object } +{ $description Throws an { $link invalid-block-size } error. } +{ $error-description Occurs if the given block size for compression is not in the range of 65 KiB and 511 MiB. } ; + +HELP: version +{ $values c-string: string } +{ $description Pushes the version info of the bzip3 release installed on your system. } ; + +ARTICLE: "compression.bzip3" "Compressing data with bzip3" +The { $vocab-link "compression.bzip3" } vocabulary can compress and decompress binary data with the help of the +{ $url "https://github.com/kspalaiologos/bzip3" "bzip3" } library. All data is represented in the form of { $link "byte-arrays" } . + +bzip3 is best used with text or code, and hence the { $vocab-link "io.encodings" } vocabularies, specifically the +{ $link "io.encodings.string" } , { $vocab-link "io.encodings.utf8" } and { $vocab-link "io.encodings.ascii" } will be of help. +$nl +If you are an experienced user and would like to use the low level API of bzip3, the { $link "compression.bzip3.ffi" } library +exposes the C bindings that allows for better performance via threading and other customizations. In order to use the functions +imported you will need to use the { $vocab-link "alien" } vocabulary. ; + +ABOUT: "compression.bzip3" diff --git a/extra/compression/bzip3/bzip3.factor b/extra/compression/bzip3/bzip3.factor new file mode 100644 index 0000000000..139239c58e --- /dev/null +++ b/extra/compression/bzip3/bzip3.factor @@ -0,0 +1,47 @@ +USING: alien alien.libraries alien.c-types alien.data alien.syntax + kernel io.encodings.string byte-arrays sequences combinators syntax + compression.bzip3.ffi locals math math.order summary pair-rocket ; +IN: compression.bzip3 + +ERROR: invalid-block-size size ; +M: invalid-block-size summary drop "Block size must be between 65 KiB and 511 MiB" ; +ERROR: internal-error msg ; +M: internal-error summary drop "bzip3: Internal Error" ; + [ "BZ3_ERR_OUT_OF_BOUNDS" ] + -2 => [ "BZ3_ERR_BWT" ] + -3 => [ "BZ3_ERR_CRC" ] + -4 => [ "BZ3_ERR_MALFORMED_HEADER" ] + -5 => [ "BZ3_ERR_TRUNCATED_DATA" ] + -6 => [ "BZ3_ERR_DATA_TOO_BIG" ] + -7 => [ "BZ3_ERR_INIT" ] + [ drop "UNDEFINED_ERR" ] + } case internal-error +; + +: KiB ( b -- kib ) 1024 * ; +: MiB ( b -- mib ) 1024 * 1024 * ; +: validate-block-size ( b -- b ) dup 65 KiB 511 MiB between? + [ invalid-block-size ] unless ; +PRIVATE> + +ALIAS: version bz3_version +:: compress ( byte-array block-size/f -- byte-array' ) + byte-array length :> in-size + in-size bz3_bound :> out-size + out-size :> out + block-size/f [ dsize ] unless* validate-block-size + byte-array out in-size out-size size_t bz3_compress + dup 0 = [ drop out ] [ throw-internal-error ] if +; + +:: decompress ( byte-array -- byte-array' ) + byte-array length :> in-size + in-size bz3_bound :> out-size + out-size :> out + byte-array out in-size out-size size_t bz3_decompress + dup 0 = [ drop out ] [ throw-internal-error ] if +; diff --git a/extra/compression/bzip3/ffi/ffi-docs.factor b/extra/compression/bzip3/ffi/ffi-docs.factor new file mode 100644 index 0000000000..86a1ca728a --- /dev/null +++ b/extra/compression/bzip3/ffi/ffi-docs.factor @@ -0,0 +1,120 @@ +! Copyright (C) 2022 Your name. +! See http://factorcode.org/license.txt for BSD license. +USING: help.markup help.syntax kernel math ; +IN: compression.bzip3.ffi + +HELP: bz3_bound +{ $values + { "input_size" object } + { "size_t" object } +} +{ $description "" } ; + +HELP: bz3_compress +{ $values + { "block_size" object } { "in" object } { "out" object } { "in_size" object } { "out_size" object } + { "int" object } +} +{ $description Available in the high level API. Usage of { $link "compression.bzip3.compress" } is encouraged. } ; + +HELP: bz3_decode_block +{ $values + { "state" object } { "buffer" object } { "size" object } { "orig_size" object } + { "int32_t" object } +} +{ $description "" } ; + +HELP: bz3_decode_blocks +{ $values + { "states[]" object } { "buffers[]" object } { "sizes[]" object } { "orig_sizes[]" object } { "n" integer } +} +{ $description "" } ; + +HELP: bz3_decompress +{ $values + { "in" object } { "out" object } { "in_size" object } { "out_size" object } + { "int" object } +} +{ $description Available in the high level API. Usage of { $link "compression.bzip3.decompress" } is encouraged. } ; + +HELP: bz3_encode_block +{ $values + { "state" struct } { "buffer" object } { "size" object } + { "int32_t" object } +} +{ $description "" } ; + +HELP: bz3_encode_blocks +{ $values + { "states[]" object } { "buffers[]" object } { "sizes[]" object } { "n" integer } +} +{ $description "" } ; + +HELP: bz3_free +{ $values + { "state" object } +} +{ $description "" } ; + +HELP: bz3_last_error +{ $values + { "state" object } + { "int8_t" object } +} +{ $description "" } ; + +HELP: bz3_new +{ $values + { "block_size" object } + { "bz3_state*" object } +} +{ $description "" } ; + +HELP: bz3_state +{ $class-description "" } ; + +HELP: bz3_strerror +{ $values + { "state" object } + { "c-string" object } +} +{ $description "" } ; + +HELP: bz3_version +{ $values + { "c-string" object } +} +{ $description "" } ; + +HELP: s16 +{ $var-description "" } ; + +HELP: s32 +{ $var-description "" } ; + +HELP: s8 +{ $var-description "" } ; + +HELP: state +{ $class-description "" } ; + +HELP: u16 +{ $var-description "" } ; + +HELP: u32 +{ $var-description "" } ; + +HELP: u64 +{ $var-description "" } ; + +HELP: u8 +{ $var-description "" } ; + +ARTICLE: "compression.bzip3.ffi" "compression.bzip3.ffi" +This vocabulary contains mainly high-level documentation. + +Consult your local installation of { $snippet "libbz3.h" } , or read it at +{ $url "https://github.com/kspalaiologos/bzip3/blob/master/include/libbz3.h" } for details that are up-to-date. +; + +ABOUT: "compression.bzip3.ffi" diff --git a/extra/compression/bzip3/ffi/ffi.factor b/extra/compression/bzip3/ffi/ffi.factor new file mode 100644 index 0000000000..215edcedc2 --- /dev/null +++ b/extra/compression/bzip3/ffi/ffi.factor @@ -0,0 +1,73 @@ +! Copyright (C) 2022 Raghu Ranganathan. +! See http://factorcode.org/license.txt for BSD license. + +! Makes use of Kamila Szewczyk's bzip3 library. +! See https://github.com/kspalaiologos/bzip3/blob/master/include/libbz3.h for the API specifics. +USING: alien alien.libraries alien.c-types alien.syntax + classes.struct combinators system words ; +IN: compression.bzip3.ffi +<< "bzip3" { + { [ os windows? ] [ "bzip3.dll" ] } + { [ os macosx? ] [ "libbzip3.dylib" ] } + { [ os unix? ] [ "libbzip3.so" ] } +} cond cdecl add-library >> + +LIBRARY: bzip3 + +TYPEDEF: uint8_t u8 +TYPEDEF: uint16_t u16 +TYPEDEF: uint32_t u32 +TYPEDEF: uint64_t u64 +TYPEDEF: int8_t s8 +TYPEDEF: int16_t s16 +TYPEDEF: int32_t s32 + + +! typedef struct { +! /* Input/output. */ +! u8 *in_queue, *out_queue; +! s32 input_ptr, output_ptr, input_max; + +! /* C0, C1 - used for making the initial prediction, C2 used for an APM with a slightly low +! learning rate (6) and 512 contexts. kanzi merges C0 and C1, uses slightly different +! counter initialisation code and prediction code which from my tests tends to be suboptimal. */ +! u16 C0[256], C1[256][256], C2[512][17]; +! } state; +STRUCT: state + { in_queue u8* } { out_queue u8* } + { input_ptr s32 } { output_ptr s32 } { input_max s32 } + { C0 u16[256] } { C1 u16[256][256] } { C2 u16[512][17] } +; + +! struct bz3_state { +! u8 * swap_buffer; +! s32 block_size; +! s32 *sais_array, *lzp_lut; +! state * cm_state; +! s8 last_error; +! }; +STRUCT: bz3_state + { swap_buffer u8* } + { block_size s32 } + { sais_array s32* } { lzp_lut s32* } + { cm_state state* } + { last_error s8 } +; + +FUNCTION: c-string bz3_version ( ) +FUNCTION: int8_t bz3_last_error ( bz3_state* state ) +FUNCTION: c-string bz3_strerror ( bz3_state* state ) +FUNCTION: bz3_state* bz3_new ( int32_t block_size ) +FUNCTION: void bz3_free ( bz3_state* state ) +FUNCTION: size_t bz3_bound ( size_t input_size ) + +! HIGH LEVEL APIs +FUNCTION: int bz3_compress ( uint32_t block_size, uint8_t* in, uint8_t* out, size_t in_size, size_t* out_size ) +FUNCTION: int bz3_decompress ( uint8_t* in, uint8_t* out, size_t in_size, size_t* out_size ) + +! LOW LEVEL APIs +FUNCTION: int32_t bz3_encode_block ( bz3_state* state, uint8_t* buffer, int32_t size ) +FUNCTION: int32_t bz3_decode_block ( bz3_state* state, uint8_t* buffer, int32_t size, int32_t orig_size ) +FUNCTION: void bz3_encode_blocks ( bz3_state* states[], uint8_t* buffers[], int32_t sizes[], int32_t n ) +FUNCTION: void bz3_decode_blocks ( bz3_state* states[], uint8_t* buffers[], int32_t sizes[], int32_t orig_sizes[], int32_t n ) + -- 2.34.1