Skip to content

Commit

Permalink
Added: BZ3_ERR_DATA_SIZE_TOO_SMALL when bz3_decode_block is called wi…
Browse files Browse the repository at this point in the history
…th insufficient buffer.
  • Loading branch information
Sewer56 committed Dec 14, 2024
1 parent 269133b commit 0deefe0
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 31 deletions.
18 changes: 14 additions & 4 deletions include/libbz3.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ extern "C" {
#define BZ3_ERR_TRUNCATED_DATA -5
#define BZ3_ERR_DATA_TOO_BIG -6
#define BZ3_ERR_INIT -7
#define BZ3_ERR_ORIG_SIZE_TOO_SMALL -8

struct bz3_state;

Expand Down Expand Up @@ -173,12 +174,21 @@ BZIP3_API int32_t bz3_encode_block(struct bz3_state * state, uint8_t * buffer, i

/**
* @brief Decode a single block.
* `buffer' must be able to hold at least `bz3_bound(orig_size)' bytes. The size must not exceed the block size
* associated with the state.
*
* `buffer' must be able to hold at least `bz3_bound(orig_size)' bytes
* in order to ensure decompression will succeed for all possible bzip3 blocks.
*
* In most (but not all) cases, `orig_size` should usually be sufficient.
* If it is not sufficient, you must allocate a buffer of size `bz3_bound(orig_size)` temporarily.
*
* If `buffer_size` is too small, `BZ3_ERR_ORIG_SIZE_TOO_SMALL` will be returned.
* The size must not exceed the block size associated with the state.
*
* @param buffer_size The size of the buffer at `buffer'
* @param size The size of the compressed data in `buffer'
* @param orig_size The original size of the data before compression.
*/
BZIP3_API int32_t bz3_decode_block(struct bz3_state * state, uint8_t * buffer, int32_t size, int32_t orig_size);
BZIP3_API int32_t bz3_decode_block(struct bz3_state * state, uint8_t * buffer, size_t buffer_size, int32_t size, int32_t orig_size);

/**
* @brief Encode `n' blocks, all in parallel.
Expand All @@ -196,7 +206,7 @@ BZIP3_API void bz3_encode_blocks(struct bz3_state * states[], uint8_t * buffers[
* @brief Decode `n' blocks, all in parallel.
* Same specifics as `bz3_encode_blocks', but doesn't overwrite `sizes'.
*/
BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], uint8_t * buffers[], int32_t sizes[],
BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], uint8_t * buffers[], size_t buffer_sizes[], int32_t sizes[],
int32_t orig_sizes[], int32_t n);

#ifdef __cplusplus
Expand Down
64 changes: 45 additions & 19 deletions src/libbz3.c
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,8 @@ BZIP3_API const char * bz3_strerror(struct bz3_state * state) {
return "Truncated data";
case BZ3_ERR_DATA_TOO_BIG:
return "Too much data";
case BZ3_ERR_ORIG_SIZE_TOO_SMALL:
return "Size of buffer `buffer_size` passed to the block decoder (bz3_decode_block) is too small. See function docs for details.";
default:
return "Unknown error";
}
Expand Down Expand Up @@ -615,7 +617,7 @@ BZIP3_API s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 data_s
return data_size + overhead * 4 + 1;
}

BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_size, s32 orig_size) {
BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, size_t buffer_size, s32 data_size, s32 orig_size) {
// Read the header.
u32 crc32 = read_neutral_s32(buffer);
s32 bwt_idx = read_neutral_s32(buffer + 4);
Expand Down Expand Up @@ -662,6 +664,34 @@ BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_s
return -1;
}

// Size that undoing BWT+BCM should decompress into.
s32 size_before_bwt;

if (model & 2)
size_before_bwt = lzp_size;
else if (model & 4)
size_before_bwt = rle_size;
else
size_before_bwt = orig_size;

// Note(sewer): It's technically valid within the spec to create a bzip3 block
// where the size after LZP/RLE is larger than the original input. Some earlier encoders
// even (mistakenly?) were able to do this.
//
// SAFETY: Data passed to the BWT+BCM step can be one of the following:
// - original data
// - original data + LZP
// - original data + RLE
// - original data + RLE + LZP
//
// We must ensure `orig_size` is large enough to store the data at every step of the way
// when we walk backwards from undoing BWT+BCM. The size required may be stored in either `lzp_size`,
// `rle_size` OR `orig_size`. We therefore simply check all possible sizes.
if ((lzp_size > buffer_size) || (rle_size > buffer_size)) {
state->last_error = BZ3_ERR_ORIG_SIZE_TOO_SMALL;
return -1;
}

// Decode the data.
u8 *b1 = buffer, *b2 = state->swap_buffer;

Expand All @@ -670,32 +700,25 @@ BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_s
state->cm_state->input_ptr = 0;
state->cm_state->input_max = data_size;

s32 size_src;

if (model & 2)
size_src = lzp_size;
else if (model & 4)
size_src = rle_size;
else
size_src = orig_size;

decode_bytes(state->cm_state, b2, size_src);
decode_bytes(state->cm_state, b2, size_before_bwt);
swap(b1, b2);

if (bwt_idx > size_src) {
if (bwt_idx > size_before_bwt) {
state->last_error = BZ3_ERR_MALFORMED_HEADER;
return -1;
}

// Undo BWT
memset(state->sais_array, 0, sizeof(s32) * BWT_BOUND(state->block_size));
memset(b2, 0, size_src);
if (libsais_unbwt(b1, b2, state->sais_array, size_src, NULL, bwt_idx) < 0) {
memset(b2, 0, size_before_bwt); // buffer b2, swap b1
if (libsais_unbwt(b1, b2, state->sais_array, size_before_bwt, NULL, bwt_idx) < 0) {
state->last_error = BZ3_ERR_BWT;
return -1;
}
swap(b1, b2);

s32 size_src = size_before_bwt;

// Undo LZP
if (model & 2) {
size_src = lzp_decompress(b1, b2, lzp_size, bz3_bound(state->block_size), state->lzp_lut);
Expand All @@ -706,7 +729,7 @@ BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_s
swap(b1, b2);
}

if (model & 4) {
if (model & 4) {
int err = mrled(b1, b2, orig_size, size_src);
if (err) {
state->last_error = BZ3_ERR_CRC;
Expand Down Expand Up @@ -748,6 +771,7 @@ typedef struct {
typedef struct {
struct bz3_state * state;
u8 * buffer;
size_t buffer_size;
s32 size;
s32 orig_size;
} decode_thread_msg;
Expand All @@ -761,7 +785,7 @@ static void * bz3_init_encode_thread(void * _msg) {

static void * bz3_init_decode_thread(void * _msg) {
decode_thread_msg * msg = _msg;
bz3_decode_block(msg->state, msg->buffer, msg->size, msg->orig_size);
bz3_decode_block(msg->state, msg->buffer, msg->buffer_size, msg->size, msg->orig_size);
pthread_exit(NULL);
return NULL; // unreachable
}
Expand All @@ -779,12 +803,13 @@ BZIP3_API void bz3_encode_blocks(struct bz3_state * states[], u8 * buffers[], s3
for (s32 i = 0; i < n; i++) sizes[i] = messages[i].size;
}

BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], u8 * buffers[], s32 sizes[], s32 orig_sizes[], s32 n) {
BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], u8 * buffers[], size_t buffer_sizes[], s32 sizes[], s32 orig_sizes[], s32 n) {
decode_thread_msg messages[n];
pthread_t threads[n];
for (s32 i = 0; i < n; i++) {
messages[i].state = states[i];
messages[i].buffer = buffers[i];
messages[i].buffer_size = buffer_sizes[i];
messages[i].size = sizes[i];
messages[i].orig_size = orig_sizes[i];
pthread_create(&threads[i], NULL, bz3_init_decode_thread, &messages[i]);
Expand Down Expand Up @@ -868,7 +893,8 @@ BZIP3_API int bz3_decompress(const uint8_t * in, uint8_t * out, size_t in_size,
struct bz3_state * state = bz3_new(block_size);
if (!state) return BZ3_ERR_INIT;

u8 * compression_buf = malloc(bz3_bound(block_size));
size_t compression_buf_size = bz3_bound(block_size);
u8 * compression_buf = malloc(compression_buf_size);
if (!compression_buf) {
bz3_free(state);
return BZ3_ERR_INIT;
Expand Down Expand Up @@ -899,7 +925,7 @@ BZIP3_API int bz3_decompress(const uint8_t * in, uint8_t * out, size_t in_size,
return BZ3_ERR_DATA_TOO_BIG;
}
memcpy(compression_buf, in + 8, size);
bz3_decode_block(state, compression_buf, size, orig_size);
bz3_decode_block(state, compression_buf, compression_buf_size, size, orig_size);
if (bz3_last_error(state) != BZ3_OK) {
s8 last_error = state->last_error;
bz3_free(state);
Expand Down
20 changes: 12 additions & 8 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size
return 1;
}

u8 * buffer = malloc(bz3_bound(block_size));
size_t buffer_size = bz3_bound(block_size);
u8 * buffer = malloc(buffer_size);

if (!buffer) {
fprintf(stderr, "Failed to allocate memory.\n");
Expand Down Expand Up @@ -272,7 +273,7 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size
}
xread_noeof(buffer, 1, new_size, input_des);
bytes_read += 8 + new_size;
if (bz3_decode_block(state, buffer, new_size, old_size) == -1) {
if (bz3_decode_block(state, buffer, buffer_size, new_size, old_size) == -1) {
fprintf(stderr, "Failed to decode a block: %s\n", bz3_strerror(state));
return 1;
}
Expand All @@ -294,7 +295,7 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size
}
xread_noeof(buffer, 1, new_size, input_des);
bytes_read += 8 + new_size;
if (bz3_decode_block(state, buffer, new_size, old_size) == -1) {
if (bz3_decode_block(state, buffer, buffer_size, new_size, old_size) == -1) {
fprintf(stderr, "Writing invalid block: %s\n", bz3_strerror(state));
}
xwrite(buffer, old_size, 1, output_des);
Expand All @@ -315,7 +316,7 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size
xread_noeof(buffer, 1, new_size, input_des);
bytes_read += 8 + new_size;
bytes_written += old_size;
if (bz3_decode_block(state, buffer, new_size, old_size) == -1) {
if (bz3_decode_block(state, buffer, buffer_size, new_size, old_size) == -1) {
fprintf(stderr, "Failed to decode a block: %s\n", bz3_strerror(state));
return 1;
}
Expand All @@ -335,14 +336,17 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size
struct bz3_state * states[workers];
u8 * buffers[workers];
s32 sizes[workers];
size_t buffer_sizes[workers];
s32 old_sizes[workers];
for (s32 i = 0; i < workers; i++) {
states[i] = bz3_new(block_size);
if (states[i] == NULL) {
fprintf(stderr, "Failed to create a block encoder state.\n");
return 1;
}
buffers[i] = malloc(block_size + block_size / 50 + 32);
size_t buffer_size = bz3_bound(block_size);
buffer_sizes[i] = buffer_size;
buffers[i] = malloc(buffer_size);
if (!buffers[i]) {
fprintf(stderr, "Failed to allocate memory.\n");
return 1;
Expand Down Expand Up @@ -393,7 +397,7 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size
xread_noeof(buffers[i], 1, sizes[i], input_des);
bytes_read += 8 + sizes[i];
}
bz3_decode_blocks(states, buffers, sizes, old_sizes, i);
bz3_decode_blocks(states, buffers, buffer_sizes, sizes, old_sizes, i);
for (s32 j = 0; j < i; j++) {
if (bz3_last_error(states[j]) != BZ3_OK) {
fprintf(stderr, "Failed to decode data: %s\n", bz3_strerror(states[j]));
Expand Down Expand Up @@ -421,7 +425,7 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size
xread_noeof(buffers[i], 1, sizes[i], input_des);
bytes_read += 8 + sizes[i];
}
bz3_decode_blocks(states, buffers, sizes, old_sizes, i);
bz3_decode_blocks(states, buffers, buffer_sizes, sizes, old_sizes, i);
for (s32 j = 0; j < i; j++) {
if (bz3_last_error(states[j]) != BZ3_OK) {
fprintf(stderr, "Writing invalid block: %s\n", bz3_strerror(states[j]));
Expand Down Expand Up @@ -449,7 +453,7 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size
bytes_read += 8 + sizes[i];
bytes_written += old_sizes[i];
}
bz3_decode_blocks(states, buffers, sizes, old_sizes, i);
bz3_decode_blocks(states, buffers, buffer_sizes, sizes, old_sizes, i);
for (s32 j = 0; j < i; j++) {
if (bz3_last_error(states[j]) != BZ3_OK) {
fprintf(stderr, "Failed to decode data: %s\n", bz3_strerror(states[j]));
Expand Down

0 comments on commit 0deefe0

Please sign in to comment.