diff --git a/benches/decode_all.rs b/benches/decode_all.rs index e7f89cd2..43998678 100644 --- a/benches/decode_all.rs +++ b/benches/decode_all.rs @@ -3,12 +3,12 @@ use ruzstd::decoding::FrameDecoder; fn criterion_benchmark(c: &mut Criterion) { let mut fr = FrameDecoder::new(); - let mut target_slice = &mut vec![0u8; 1024 * 1024 * 200]; + let target_slice = &mut vec![0u8; 1024 * 1024 * 200]; let src = include_bytes!("../decodecorpus_files/z000033.zst"); c.bench_function("decode_all_slice", |b| { b.iter(|| { - fr.decode_all(src, &mut target_slice).unwrap(); + fr.decode_all(src, target_slice).unwrap(); }) }); } diff --git a/src/bin/zstd.rs b/src/bin/zstd.rs index bdd80bd7..f44d1912 100644 --- a/src/bin/zstd.rs +++ b/src/bin/zstd.rs @@ -68,8 +68,8 @@ fn decompress(flags: &[String], file_paths: &[String]) { })) => { eprintln!("Found a skippable frame with magic number: {magic_num} and size: {skip_size}"); tracker.file_pos = f.stream_position().unwrap(); - tracker.file_pos += skip_size as u64; - f.seek(SeekFrom::Current(skip_size as i64)).unwrap(); + tracker.file_pos += u64::from(skip_size); + f.seek(SeekFrom::Current(i64::from(skip_size))).unwrap(); continue; } other => other.unwrap(), diff --git a/src/blocks/block.rs b/src/blocks/block.rs index 1dbf4e8c..4776f8e3 100644 --- a/src/blocks/block.rs +++ b/src/blocks/block.rs @@ -34,10 +34,10 @@ pub struct BlockHeader { pub last_block: bool, pub block_type: BlockType, /// The size of the decompressed data. If the block type - /// is [BlockType::Reserved] or [BlockType::Compressed], + /// is [`BlockType::Reserved`] or [`BlockType::Compressed`], /// this value is set to zero and should not be referenced. pub decompressed_size: u32, - /// The size of the block. If the block is [BlockType::RLE], + /// The size of the block. If the block is [`BlockType::RLE`], /// this value will be 1. pub content_size: u32, } diff --git a/src/blocks/literals_section.rs b/src/blocks/literals_section.rs index 83917918..e1641f46 100644 --- a/src/blocks/literals_section.rs +++ b/src/blocks/literals_section.rs @@ -7,20 +7,20 @@ use crate::decoding::errors::LiteralsSectionParseError; /// /// This is the first of those two sections. A literal is just any arbitrary data, and it is copied by the sequences section pub struct LiteralsSection { - /// - If this block is of type [LiteralsSectionType::Raw], then the data is `regenerated_bytes` + /// - If this block is of type [`LiteralsSectionType::Raw`], then the data is `regenerated_bytes` /// bytes long, and it contains the raw literals data to be used during the second section, /// the sequences section. - /// - If this block is of type [LiteralsSectionType::RLE], + /// - If this block is of type [`LiteralsSectionType::RLE`], /// then the literal consists of a single byte repeated `regenerated_size` times. - /// - For types [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], + /// - For types [`LiteralsSectionType::Compressed`] or [`LiteralsSectionType::Treeless`], /// then this is the size of the decompressed data. pub regenerated_size: u32, - /// - For types [LiteralsSectionType::Raw] and [LiteralsSectionType::RLE], this value is not present. - /// - For types [LiteralsSectionType::Compressed] and [LiteralsSectionType::Treeless], this value will + /// - For types [`LiteralsSectionType::Raw`] and [`LiteralsSectionType::RLE`], this value is not present. + /// - For types [`LiteralsSectionType::Compressed`] and [`LiteralsSectionType::Treeless`], this value will /// be set to the size of the compressed data. pub compressed_size: Option, /// This value will be either 1 stream or 4 streams if the literal is of type - /// [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], and it + /// [`LiteralsSectionType::Compressed`] or [`LiteralsSectionType::Treeless`], and it /// is not used for RLE or uncompressed literals. pub num_streams: Option, /// The type of the literal section. @@ -31,7 +31,7 @@ pub struct LiteralsSection { pub enum LiteralsSectionType { /// Literals are stored uncompressed. Raw, - /// Literals consist of a single byte value repeated [LiteralsSection::regenerated_size] times. + /// Literals consist of a single byte value repeated [`LiteralsSection::regenerated_size`] times. #[allow(clippy::upper_case_acronyms)] RLE, /// This is a standard Huffman-compressed block, starting with a Huffman tree description. @@ -39,7 +39,7 @@ pub enum LiteralsSectionType { /// description. Compressed, /// This is a Huffman-compressed block, - /// using the Huffman tree from the previous [LiteralsSectionType::Compressed] block + /// using the Huffman tree from the previous [`LiteralsSectionType::Compressed`] block /// in the sequence. If this mode is triggered without any previous Huffman-tables in the /// frame (or dictionary), it should be treated as data corruption. Treeless, @@ -52,7 +52,7 @@ impl Default for LiteralsSection { } impl LiteralsSection { - /// Create a new [LiteralsSection]. + /// Create a new [`LiteralsSection`]. pub fn new() -> LiteralsSection { LiteralsSection { regenerated_size: 0, @@ -63,7 +63,7 @@ impl LiteralsSection { } /// Given the first byte of a header, determine the size of the whole header, from 1 to 5 bytes. - pub fn header_bytes_needed(&self, first_byte: u8) -> Result { + pub fn header_bytes_needed(first_byte: u8) -> Result { let ls_type: LiteralsSectionType = Self::section_type(first_byte)?; let size_format = (first_byte >> 2) & 0x3; match ls_type { @@ -84,7 +84,7 @@ impl LiteralsSection { // regenerated_size uses 20 bits Ok(3) } - _ => panic!( + _ => unreachable!( "This is a bug in the program. There should only be values between 0..3" ), } @@ -105,7 +105,7 @@ impl LiteralsSection { Ok(5) } - _ => panic!( + _ => unreachable!( "This is a bug in the program. There should only be values between 0..3" ), } @@ -120,7 +120,7 @@ impl LiteralsSection { self.ls_type = Self::section_type(block_type)?; let size_format = br.get_bits(2)? as u8; - let byte_needed = self.header_bytes_needed(raw[0])?; + let byte_needed = Self::header_bytes_needed(raw[0])?; if raw.len() < byte_needed as usize { return Err(LiteralsSectionParseError::NotEnoughBytes { have: raw.len(), diff --git a/src/blocks/sequence_section.rs b/src/blocks/sequence_section.rs index 8d871fad..def2890a 100644 --- a/src/blocks/sequence_section.rs +++ b/src/blocks/sequence_section.rs @@ -63,7 +63,7 @@ pub enum ModeType { } impl CompressionModes { - /// Deserialize a two bit mode value into a [ModeType] + /// Deserialize a two bit mode value into a [`ModeType`] pub fn decode_mode(m: u8) -> ModeType { match m { 0 => ModeType::Predefined, @@ -96,7 +96,7 @@ impl Default for SequencesHeader { } impl SequencesHeader { - /// Create a new [SequencesHeader]. + /// Create a new [`SequencesHeader`]. pub fn new() -> SequencesHeader { SequencesHeader { num_sequences: 0, diff --git a/src/decoding/bit_reader.rs b/src/decoding/bit_reader.rs index 2fba6dbe..af4e461f 100644 --- a/src/decoding/bit_reader.rs +++ b/src/decoding/bit_reader.rs @@ -50,9 +50,7 @@ impl<'s> BitReader<'s> { } pub fn return_bits(&mut self, n: usize) { - if n > self.idx { - panic!("Cant return this many bits"); - } + assert!(n <= self.idx, "can't return this many bits"); self.idx -= n; } diff --git a/src/decoding/bit_reader_reverse.rs b/src/decoding/bit_reader_reverse.rs index b6a1de5c..3179f735 100644 --- a/src/decoding/bit_reader_reverse.rs +++ b/src/decoding/bit_reader_reverse.rs @@ -146,7 +146,7 @@ impl<'s> BitReaderReversed<'s> { debug_assert!(self.bits_consumed <= 64); } - /// Same as calling get_bits three times but slightly more performant + /// Same as calling `get_bits` three times but slightly more performant #[inline(always)] pub fn get_bits_triple(&mut self, n1: u8, n2: u8, n3: u8) -> (u64, u64, u64) { let sum = n1 + n2 + n3; diff --git a/src/decoding/block_decoder.rs b/src/decoding/block_decoder.rs index aef15e32..f0158bdd 100644 --- a/src/decoding/block_decoder.rs +++ b/src/decoding/block_decoder.rs @@ -26,7 +26,7 @@ enum DecoderState { Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs } -/// Create a new [BlockDecoder]. +/// Create a new [`BlockDecoder`]. pub fn new() -> BlockDecoder { BlockDecoder { internal_state: DecoderState::ReadyToDecodeNextHeader, @@ -113,7 +113,7 @@ impl BlockDecoder { } BlockType::Compressed => { - self.decompress_block(header, workspace, source)?; + Self::decompress_block(header, workspace, source)?; self.internal_state = DecoderState::ReadyToDecodeNextHeader; Ok(u64::from(header.content_size)) @@ -122,7 +122,6 @@ impl BlockDecoder { } fn decompress_block( - &mut self, header: &BlockHeader, workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees mut source: impl Read, diff --git a/src/decoding/decodebuffer.rs b/src/decoding/decodebuffer.rs index 46c4f333..2e162dfa 100644 --- a/src/decoding/decodebuffer.rs +++ b/src/decoding/decodebuffer.rs @@ -180,8 +180,8 @@ impl DecodeBuffer { self.buffer.len() } - /// Drain as much as possible while retaining enough so that decoding si still possible with the required window_size - /// At best call only if can_drain_to_window_size reports a 'high' number of bytes to reduce allocations + /// Drain as much as possible while retaining enough so that decoding si still possible with the required `window_size` + /// At best call only if `can_drain_to_window_size` reports a 'high' number of bytes to reduce allocations pub fn drain_to_window_size(&mut self) -> Option> { //TODO investigate if it is possible to return the std::vec::Drain iterator directly without collecting here match self.can_drain_to_window_size() { @@ -238,7 +238,7 @@ impl DecodeBuffer { Ok(amount) } - /// Semantics of write_bytes: + /// Semantics of `write_bytes`: /// Should dump as many of the provided bytes as possible to whatever sink until no bytes are left or an error is encountered /// Return how many bytes have actually been dumped to the sink. fn drain_to( @@ -302,7 +302,7 @@ impl DecodeBuffer { } } -/// Like Write::write_all but returns partial write length even on error +/// Like `Write::write_all` but returns partial write length even on error fn write_all_bytes(mut sink: impl Write, buf: &[u8]) -> (usize, Result<(), Error>) { let mut written = 0; while written < buf.len() { diff --git a/src/decoding/dictionary.rs b/src/decoding/dictionary.rs index f0f7b7ad..422dc6a3 100644 --- a/src/decoding/dictionary.rs +++ b/src/decoding/dictionary.rs @@ -23,10 +23,10 @@ pub struct Dictionary { /// to compress or decompress, /// so it can be referenced in sequence commands. /// As long as the amount of data decoded from this frame is less than or - /// equal to Window_Size, sequence commands may specify offsets longer than + /// equal to `Window_Size`, sequence commands may specify offsets longer than /// the total length of decoded output so far to reference back to the - /// dictionary, even parts of the dictionary with offsets larger than Window_Size. - /// After the total output has surpassed Window_Size however, + /// dictionary, even parts of the dictionary with offsets larger than `Window_Size`. + /// After the total output has surpassed `Window_Size` however, /// this is no longer allowed and the dictionary is no longer accessible pub dict_content: Vec, /// The 3 most recent offsets are stored so that they can be used @@ -41,7 +41,7 @@ pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC]; impl Dictionary { /// Parses the dictionary from `raw` and set the tables - /// it returns the dict_id for checking with the frame's `dict_id`` + /// it returns the `dict_id` for checking with the frame's `dict_id` pub fn decode_dict(raw: &[u8]) -> Result { let mut new_dict = Dictionary { id: 0, diff --git a/src/decoding/frame.rs b/src/decoding/frame.rs index 278d814e..5076f7b8 100644 --- a/src/decoding/frame.rs +++ b/src/decoding/frame.rs @@ -96,7 +96,7 @@ impl FrameDescriptor { /// This is a 2 bit flag, specifying if the `Frame_Content_Size` field is present /// within the header. It notates the number of bytes used by `Frame_Content_size` /// - /// When this value is is 0, `FCS_Field_Size` depends on Single_Segment_flag. + /// When this value is is 0, `FCS_Field_Size` depends on `Single_Segment_flag`. /// If the `Single_Segment_flag` field is set in the frame header descriptor, /// the size of the `Frame_Content_Size` field of the header is 1 byte. /// Otherwise, `FCS_Field_Size` is 0, and the `Frame_Content_Size` is not provided. @@ -230,7 +230,7 @@ pub fn read_frame_header(mut r: impl Read) -> Result<(Frame, u8), ReadFrameHeade #[allow(clippy::needless_range_loop)] for i in 0..dict_id_len { - dict_id += (buf[i] as u32) << (8 * i); + dict_id += u32::from(buf[i]) << (8 * i); } if dict_id != 0 { frame_header.dict_id = Some(dict_id); @@ -248,7 +248,7 @@ pub fn read_frame_header(mut r: impl Read) -> Result<(Frame, u8), ReadFrameHeade #[allow(clippy::needless_range_loop)] for i in 0..fcs_len { - fcs += (fcs_buf[i] as u64) << (8 * i); + fcs += u64::from(fcs_buf[i]) << (8 * i); } if fcs_len == 2 { fcs += 256; diff --git a/src/decoding/frame_decoder.rs b/src/decoding/frame_decoder.rs index 6c230e6b..f2bfc4c3 100644 --- a/src/decoding/frame_decoder.rs +++ b/src/decoding/frame_decoder.rs @@ -20,8 +20,8 @@ use core::convert::TryInto; /// over how many bytes/blocks will be decoded at a time (so you don't have to decode a 10GB file into memory all at once). /// It reads bytes as needed from a provided source and can be read from to collect partial results. /// -/// If you want to just read the whole frame with an `io::Read` without having to deal with manually calling [FrameDecoder::decode_blocks] -/// you can use the provided [crate::decoding::StreamingDecoder] wich wraps this FrameDecoder. +/// If you want to just read the whole frame with an `io::Read` without having to deal with manually calling [`FrameDecoder::decode_blocks`] +/// you can use the provided [`crate::decoding::StreamingDecoder`] wich wraps this `FrameDecoder`. /// /// Workflow is as follows: /// ``` @@ -134,7 +134,7 @@ impl Default for FrameDecoder { impl FrameDecoder { /// This will create a new decoder without allocating anything yet. - /// init()/reset() will allocate all needed buffers if it is the first time this decoder is used + /// `init()/reset()` will allocate all needed buffers if it is the first time this decoder is used /// else they just reset these buffers with not further allocations pub fn new() -> FrameDecoder { FrameDecoder { @@ -143,22 +143,22 @@ impl FrameDecoder { } } - /// init() will allocate all needed buffers if it is the first time this decoder is used + /// `init()` will allocate all needed buffers if it is the first time this decoder is used /// else they just reset these buffers with not further allocations /// - /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer() + /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with `collect()/collect_to_writer()` /// - /// equivalent to reset() + /// equivalent to `reset()` pub fn init(&mut self, source: impl Read) -> Result<(), FrameDecoderError> { self.reset(source) } - /// reset() will allocate all needed buffers if it is the first time this decoder is used + /// `reset()` will allocate all needed buffers if it is the first time this decoder is used /// else they just reset these buffers with not further allocations /// - /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer() + /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with `collect()/collect_to_writer()` /// - /// equivalent to init() + /// equivalent to `init()` pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> { use FrameDecoderError as err; let state = match &mut self.state { @@ -166,10 +166,7 @@ impl FrameDecoder { s.reset(source)?; s } - None => { - self.state = Some(FrameDecoderState::new(source)?); - self.state.as_mut().unwrap() - } + None => self.state.insert(FrameDecoderState::new(source)?), }; if let Some(dict_id) = state.frame.header.dictionary_id() { let dict = self @@ -182,7 +179,7 @@ impl FrameDecoder { Ok(()) } - /// Add a dict to the FrameDecoder that can be used when needed. The FrameDecoder uses the appropriate one dynamically + /// Add a dict to the `FrameDecoder` that can be used when needed. The `FrameDecoder` uses the appropriate one dynamically pub fn add_dict(&mut self, dict: Dictionary) -> Result<(), FrameDecoderError> { self.dicts.insert(dict.id, dict); Ok(()) @@ -223,7 +220,7 @@ impl FrameDecoder { } /// Returns the checksum that was calculated while decoding. - /// Only a sensible value after all decoded bytes have been collected/read from the FrameDecoder + /// Only a sensible value after all decoded bytes have been collected/read from the `FrameDecoder` #[cfg(feature = "hash")] pub fn get_calculated_checksum(&self) -> Option { use core::hash::Hasher; @@ -248,7 +245,7 @@ impl FrameDecoder { /// Whether the current frames last block has been decoded yet /// If this returns true you can call the drain* functions to get all content - /// (the read() function will drain automatically if this returns true) + /// (the `read()` function will drain automatically if this returns true) pub fn is_finished(&self) -> bool { let state = match &self.state { None => return true, @@ -344,8 +341,8 @@ impl FrameDecoder { Ok(state.frame_finished) } - /// Collect bytes and retain window_size bytes while decoding is still going on. - /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes + /// Collect bytes and retain `window_size` bytes while decoding is still going on. + /// After decoding of the frame (`is_finished()` == true) has finished it will collect all remaining bytes pub fn collect(&mut self) -> Option> { let finished = self.is_finished(); let state = self.state.as_mut()?; @@ -356,8 +353,8 @@ impl FrameDecoder { } } - /// Collect bytes and retain window_size bytes while decoding is still going on. - /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes + /// Collect bytes and retain `window_size` bytes while decoding is still going on. + /// After decoding of the frame (`is_finished()` == true) has finished it will collect all remaining bytes pub fn collect_to_writer(&mut self, w: impl Write) -> Result { let finished = self.is_finished(); let state = match &mut self.state { @@ -372,8 +369,8 @@ impl FrameDecoder { } /// How many bytes can currently be collected from the decodebuffer, while decoding is going on this will be lower than the actual decodbuffer size - /// because window_size bytes need to be retained for decoding. - /// After decoding of the frame (is_finished() == true) has finished it will report all remaining bytes + /// because `window_size` bytes need to be retained for decoding. + /// After decoding of the frame (`is_finished()` == true) has finished it will report all remaining bytes pub fn can_collect(&self) -> usize { let finished = self.is_finished(); let state = match &self.state { @@ -394,14 +391,14 @@ impl FrameDecoder { /// Decodes as many blocks as possible from the source slice and reads from the decodebuffer into the target slice /// The source slice may contain only parts of a frame but must contain at least one full block to make progress /// - /// By all means use decode_blocks if you have a io.Reader available. This is just for compatibility with other decompressors + /// By all means use `decode_blocks` if you have a io.Reader available. This is just for compatibility with other decompressors /// which try to serve an old-style c api /// /// Returns (read, written), if read == 0 then the source did not contain a full block and further calls with the same /// input will not make any progress! /// /// Note that no kind of block can be bigger than 128kb. - /// So to be safe use at least 128*1024 (max block content size) + 3 (block_header size) + 18 (max frame_header size) bytes as your source buffer + /// So to be safe use at least 128*1024 (max block content size) + 3 (`block_header` size) + 18 (max `frame_header` size) bytes as your source buffer /// /// You may call this function with an empty source after all bytes have been decoded. This is equivalent to just call decoder.read(&mut target) pub fn decode_from_to( @@ -424,10 +421,7 @@ impl FrameDecoder { //pseudo block to scope "state" so we can borrow self again after the block { - let state = match &mut self.state { - Some(s) => s, - None => panic!("Bug in library"), - }; + let state = self.state.as_mut().unwrap(); let mut block_dec = decoding::block_decoder::new(); if state.frame.header.descriptor.content_checksum_flag() @@ -514,7 +508,7 @@ impl FrameDecoder { let mut total_bytes_written = 0; while !input.is_empty() { match self.init(&mut input) { - Ok(_) => {} + Ok(()) => {} Err(FrameDecoderError::ReadFrameHeaderError( crate::decoding::errors::ReadFrameHeaderError::SkipFrame { length, .. }, )) => { @@ -578,8 +572,8 @@ impl FrameDecoder { } } -/// Read bytes from the decode_buffer that are no longer needed. While the frame is not yet finished -/// this will retain window_size bytes, else it will drain it completely +/// Read bytes from the `decode_buffer` that are no longer needed. While the frame is not yet finished +/// this will retain `window_size` bytes, else it will drain it completely impl Read for FrameDecoder { fn read(&mut self, target: &mut [u8]) -> Result { let state = match &mut self.state { diff --git a/src/decoding/literals_section_decoder.rs b/src/decoding/literals_section_decoder.rs index 7ccb21c3..e987cb52 100644 --- a/src/decoding/literals_section_decoder.rs +++ b/src/decoding/literals_section_decoder.rs @@ -1,4 +1,4 @@ -//! This module contains the decompress_literals function, used to take a +//! This module contains the `decompress_literals` function, used to take a //! parsed literals header and a source and decompress it. use super::super::blocks::literals_section::{LiteralsSection, LiteralsSectionType}; diff --git a/src/decoding/ringbuffer.rs b/src/decoding/ringbuffer.rs index 53796e51..fbeb74c4 100644 --- a/src/decoding/ringbuffer.rs +++ b/src/decoding/ringbuffer.rs @@ -264,14 +264,13 @@ impl RingBuffer { /// Copies elements from the provided range to the end of the buffer. #[allow(dead_code)] pub fn extend_from_within(&mut self, start: usize, len: usize) { - if start + len > self.len() { - panic!( - "Calls to this functions must respect start ({}) + len ({}) <= self.len() ({})!", - start, - len, - self.len() - ); - } + assert!( + start + len <= self.len(), + "Calls to this functions must respect start ({}) + len ({}) <= self.len() ({})!", + start, + len, + self.len() + ); self.reserve(len); @@ -286,7 +285,7 @@ impl RingBuffer { /// /// SAFETY: /// For this to be safe two requirements need to hold: - /// 1. start + len <= self.len() so we do not copy uninitialised memory + /// 1. start + len <= `self.len()` so we do not copy uninitialised memory /// 2. More then len reserved space so we do not write out-of-bounds #[warn(unsafe_op_in_unsafe_fn)] pub unsafe fn extend_from_within_unchecked(&mut self, start: usize, len: usize) { @@ -463,11 +462,11 @@ impl RingBuffer { } #[allow(dead_code)] - /// This function is functionally the same as [RingBuffer::extend_from_within_unchecked], + /// This function is functionally the same as [`RingBuffer::extend_from_within_unchecked`], /// but it does not contain any branching operations. /// /// SAFETY: - /// Needs start + len <= self.len() + /// Needs start + len <= `self.len()` /// And more then len reserved space pub unsafe fn extend_from_within_unchecked_branchless(&mut self, start: usize, len: usize) { // data slices in raw parts @@ -539,7 +538,7 @@ impl Drop for RingBuffer { } } -/// Similar to ptr::copy_nonoverlapping +/// Similar to `ptr::copy_nonoverlapping` /// /// But it might overshoot the desired copy length if deemed useful /// @@ -551,7 +550,7 @@ impl Drop for RingBuffer { /// /// The chunk size is not part of the contract and may change depending on the target platform. /// -/// If that isn't possible we just fall back to ptr::copy_nonoverlapping +/// If that isn't possible we just fall back to `ptr::copy_nonoverlapping` #[inline(always)] unsafe fn copy_bytes_overshooting( src: (*const u8, usize), diff --git a/src/decoding/sequence_section_decoder.rs b/src/decoding/sequence_section_decoder.rs index 645341b1..b1bf7d9d 100644 --- a/src/decoding/sequence_section_decoder.rs +++ b/src/decoding/sequence_section_decoder.rs @@ -411,10 +411,10 @@ fn maybe_update_fse_tables( // The default Literal Length decoding table uses an accuracy logarithm of 6 bits. const LL_DEFAULT_ACC_LOG: u8 = 6; -/// If [ModeType::Predefined] is selected for a symbol type, its FSE decoding +/// If [`ModeType::Predefined`] is selected for a symbol type, its FSE decoding /// table is generated using a predefined distribution table. /// -/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals-length +/// const LITERALS_LENGTH_DEFAULT_DISTRIBUTION: [i32; 36] = [ 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1, @@ -422,10 +422,10 @@ const LITERALS_LENGTH_DEFAULT_DISTRIBUTION: [i32; 36] = [ // The default Match Length decoding table uses an accuracy logarithm of 6 bits. const ML_DEFAULT_ACC_LOG: u8 = 6; -/// If [ModeType::Predefined] is selected for a symbol type, its FSE decoding +/// If [`ModeType::Predefined`] is selected for a symbol type, its FSE decoding /// table is generated using a predefined distribution table. /// -/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#match-length +/// const MATCH_LENGTH_DEFAULT_DISTRIBUTION: [i32; 53] = [ 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, @@ -433,10 +433,10 @@ const MATCH_LENGTH_DEFAULT_DISTRIBUTION: [i32; 53] = [ // The default Match Length decoding table uses an accuracy logarithm of 5 bits. const OF_DEFAULT_ACC_LOG: u8 = 5; -/// If [ModeType::Predefined] is selected for a symbol type, its FSE decoding +/// If [`ModeType::Predefined`] is selected for a symbol type, its FSE decoding /// table is generated using a predefined distribution table. /// -/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#match-length +/// const OFFSET_DEFAULT_DISTRIBUTION: [i32; 29] = [ 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, ]; diff --git a/src/decoding/streaming_decoder.rs b/src/decoding/streaming_decoder.rs index cc3fbf26..925d5c43 100644 --- a/src/decoding/streaming_decoder.rs +++ b/src/decoding/streaming_decoder.rs @@ -1,4 +1,4 @@ -//! The [StreamingDecoder] wraps a [FrameDecoder] and provides a Read impl that decodes data when necessary +//! The [`StreamingDecoder`] wraps a [`FrameDecoder`] and provides a Read impl that decodes data when necessary use core::borrow::BorrowMut; @@ -12,17 +12,17 @@ use crate::io::{Error, ErrorKind, Read}; /// `io::Read::read_to_end` / `io::Read::read_exact` or passing this to another library / module as a source for the decoded content /// /// If you need more control over how decompression takes place, you can use -/// the lower level [FrameDecoder], which allows for greater control over how +/// the lower level [`FrameDecoder`], which allows for greater control over how /// decompression takes place but the implementor must call -/// [FrameDecoder::decode_blocks] repeatedly to decode the entire frame. +/// [`FrameDecoder::decode_blocks`] repeatedly to decode the entire frame. /// /// ## Caveat -/// [StreamingDecoder] expects the underlying stream to only contain a single frame, +/// [`StreamingDecoder`] expects the underlying stream to only contain a single frame, /// yet the specification states that a single archive may contain multiple frames. /// /// To decode all the frames in a finite stream, the calling code needs to recreate /// the instance of the decoder and handle -/// [crate::decoding::errors::ReadFrameHeaderError::SkipFrame] +/// [`crate::decoding::errors::ReadFrameHeaderError::SkipFrame`] /// errors by skipping forward the `length` amount of bytes, see /// /// ```no_run @@ -86,7 +86,7 @@ impl> StreamingDecoder { self.source } - /// Destructures this object into both the inner reader and [FrameDecoder]. + /// Destructures this object into both the inner reader and [`FrameDecoder`]. pub fn into_parts(self) -> (READ, DEC) where READ: Sized, @@ -94,7 +94,7 @@ impl> StreamingDecoder { (self.source, self.decoder) } - /// Destructures this object into the inner [FrameDecoder]. + /// Destructures this object into the inner [`FrameDecoder`]. pub fn into_frame_decoder(self) -> DEC { self.decoder } diff --git a/src/encoding/bit_writer.rs b/src/encoding/bit_writer.rs index fb809926..9460b4f1 100644 --- a/src/encoding/bit_writer.rs +++ b/src/encoding/bit_writer.rs @@ -1,4 +1,4 @@ -//! Use [BitWriter] to write an arbitrary amount of bits into a buffer. +//! Use [`BitWriter`] to write an arbitrary amount of bits into a buffer. use alloc::vec::Vec; /// An interface for writing an arbitrary number of bits into a buffer. Write new bits into the buffer with `write_bits`, and @@ -7,7 +7,7 @@ use alloc::vec::Vec; pub(crate) struct BitWriter>> { /// The buffer that's filled with bits output: V, - /// holds a partially filled byte which gets put in outpu when it's fill with a write_bits call + /// holds a partially filled byte which gets put in outpu when it's fill with a `write_bits` call partial: u64, bits_in_partial: usize, /// The index pointing to the next unoccupied bit. Effectively just @@ -52,7 +52,7 @@ impl>> BitWriter { self.output.as_mut().resize(index / 8, 0); } - /// Change the bits at the index. `bits` contains the ǹum_bits` new bits that should be written + /// Change the bits at the index. `bits` contains the `ǹum_bits` new bits that should be written /// Instead of the current content. `bits` *MUST* only contain zeroes in the upper bits outside of the `0..num_bits` range. pub fn change_bits(&mut self, idx: usize, bits: impl Into, num_bits: usize) { self.change_bits_64(idx, bits.into(), num_bits); @@ -103,9 +103,11 @@ impl>> BitWriter { /// Simply append bytes to the buffer. Only works if the buffer was already byte aligned pub fn append_bytes(&mut self, data: &[u8]) { - if self.misaligned() != 0 { - panic!("Don't append bytes when writer is misaligned") - } + assert!( + self.misaligned() == 0, + "Don't append bytes when writer is misaligned" + ); + self.flush(); self.output.as_mut().extend_from_slice(data); self.bit_idx += data.len() * 8; @@ -193,9 +195,7 @@ impl>> BitWriter { /// This function consumes the writer, so it cannot be used after /// dumping pub fn dump(mut self) -> V { - if self.misaligned() != 0 { - panic!("`dump` was called on a bit writer but an even number of bytes weren't written into the buffer. Was: {}", self.index()) - } + assert!(self.misaligned()==0,"`dump` was called on a bit writer but an even number of bytes weren't written into the buffer. Was: {}", self.index()); self.flush(); debug_assert_eq!(self.partial, 0); self.output diff --git a/src/encoding/block_header.rs b/src/encoding/block_header.rs index cdfa7dc4..83ec0ac1 100644 --- a/src/encoding/block_header.rs +++ b/src/encoding/block_header.rs @@ -31,7 +31,7 @@ impl BlockHeader { }; let mut block_header = self.block_size << 3; block_header |= encoded_block_type << 1; - block_header |= self.last_block as u32; + block_header |= u32::from(self.last_block); output.extend_from_slice(&block_header.to_le_bytes()[0..3]); } } diff --git a/src/encoding/blocks/mod.rs b/src/encoding/blocks/mod.rs index 671ff6fb..e895464b 100644 --- a/src/encoding/blocks/mod.rs +++ b/src/encoding/blocks/mod.rs @@ -1,4 +1,4 @@ -//! After Magic_Number and Frame_Header, there are some number of blocks. Each frame must have at least one block, +//! After `Magic_Number` and `Frame_Header`, there are some number of blocks. Each frame must have at least one block, //! but there is no upper limit on the number of blocks per frame. //! //! There are a few different kinds of blocks, and implementations for those kinds are diff --git a/src/encoding/frame_compressor.rs b/src/encoding/frame_compressor.rs index 519bd52a..844e725e 100644 --- a/src/encoding/frame_compressor.rs +++ b/src/encoding/frame_compressor.rs @@ -13,7 +13,7 @@ use crate::io::{Read, Write}; /// Blocks cannot be larger than 128KB in size. const MAX_BLOCK_SIZE: usize = 128 * 1024 - 20; -/// An interface for compressing arbitrary data with the ZStandard compression algorithm. +/// An interface for compressing arbitrary data with the `ZStandard` compression algorithm. /// /// `FrameCompressor` will generally be used by: /// 1. Initializing a compressor by providing a buffer of data using `FrameCompressor::new()` @@ -62,30 +62,29 @@ impl FrameCompressor { } } - /// Before calling [FrameCompressor::compress] you need to set the source + /// Before calling [`FrameCompressor::compress`] you need to set the source pub fn set_source(&mut self, uncompressed_data: R) -> Option { self.uncompressed_data.replace(uncompressed_data) } - /// Before calling [FrameCompressor::compress] you need to set the drain + /// Before calling [`FrameCompressor::compress`] you need to set the drain pub fn set_drain(&mut self, compressed_data: W) -> Option { self.compressed_data.replace(compressed_data) } /// Compress the uncompressed data from the provided source as one Zstd frame and write it to the provided drain /// - /// This will repeatedly call [Read::read] on the source to fill up blocks until the source returns 0 on the read call. - /// Also [Write::write_all] will be called on the drain after each block has been encoded. + /// This will repeatedly call [`Read::read`] on the source to fill up blocks until the source returns 0 on the read call. + /// Also [`Write::write_all`] will be called on the drain after each block has been encoded. /// /// To avoid endlessly encoding from a potentially endless source (like a network socket) you can use the - /// [Read::take] function + /// [`Read::take`] function pub fn compress(&mut self) { self.match_generator.reset(self.compression_level); let source = self.uncompressed_data.as_mut().unwrap(); let drain = self.compressed_data.as_mut().unwrap(); let mut output = Vec::with_capacity(1024 * 130); - let output = &mut output; let header = FrameHeader { frame_content_size: None, single_segment: false, @@ -93,7 +92,7 @@ impl FrameCompressor { dictionary_id: None, window_size: Some(self.match_generator.window_size()), }; - header.serialize(output); + header.serialize(&mut output); loop { let mut uncompressed_data = self.match_generator.get_next_space(); @@ -111,7 +110,7 @@ impl FrameCompressor { break 'read_loop; } } - uncompressed_data.resize(read_bytes, 0); + uncompressed_data.truncate(read_bytes); // Special handling is needed for compression of a totally empty file (why you'd want to do that, I don't know) if uncompressed_data.is_empty() { @@ -121,8 +120,8 @@ impl FrameCompressor { block_size: 0, }; // Write the header, then the block - header.serialize(output); - drain.write_all(output).unwrap(); + header.serialize(&mut output); + drain.write_all(&output).unwrap(); output.clear(); break; } @@ -135,7 +134,7 @@ impl FrameCompressor { block_size: read_bytes.try_into().unwrap(), }; // Write the header, then the block - header.serialize(output); + header.serialize(&mut output); output.extend_from_slice(&uncompressed_data); } CompressionLevel::Fastest => { @@ -149,7 +148,7 @@ impl FrameCompressor { block_size: read_bytes.try_into().unwrap(), }; // Write the header, then the block - header.serialize(output); + header.serialize(&mut output); output.push(rle_byte); } else { let mut compressed = Vec::new(); @@ -162,7 +161,7 @@ impl FrameCompressor { block_size: read_bytes.try_into().unwrap(), }; // Write the header, then the block - header.serialize(output); + header.serialize(&mut output); output.extend_from_slice(self.match_generator.get_last_space()); } else { let header = BlockHeader { @@ -171,7 +170,7 @@ impl FrameCompressor { block_size: (compressed.len()).try_into().unwrap(), }; // Write the header, then the block - header.serialize(output); + header.serialize(&mut output); output.extend(compressed); } } @@ -180,7 +179,7 @@ impl FrameCompressor { unimplemented!(); } } - drain.write_all(output).unwrap(); + drain.write_all(&output).unwrap(); output.clear(); if last_block { break; @@ -218,13 +217,13 @@ impl FrameCompressor { self.compressed_data.take() } - /// Before calling [FrameCompressor::compress] you can replace the matcher + /// Before calling [`FrameCompressor::compress`] you can replace the matcher pub fn replace_matcher(&mut self, mut match_generator: M) -> M { core::mem::swap(&mut match_generator, &mut self.match_generator); match_generator } - /// Before calling [FrameCompressor::compress] you can replace the compression level + /// Before calling [`FrameCompressor::compress`] you can replace the compression level pub fn set_compression_level( &mut self, compression_level: CompressionLevel, diff --git a/src/encoding/frame_header.rs b/src/encoding/frame_header.rs index 72ef4061..b0337100 100644 --- a/src/encoding/frame_header.rs +++ b/src/encoding/frame_header.rs @@ -1,4 +1,6 @@ //! Utilities and representations for a frame header. +use core::array; + use crate::decoding::frame; use crate::encoding::{ bit_writer::BitWriter, @@ -60,7 +62,7 @@ impl FrameHeader { /// Generate a serialized frame header descriptor for the frame header. /// - /// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header_descriptor + /// fn descriptor(&self) -> u8 { let mut bw = BitWriter::new(); // A frame header starts with a frame header descriptor. @@ -149,14 +151,17 @@ impl FrameHeader { /// /// > When FCS_Field_Size is 1, 4 or 8 bytes, the value is read directly. When FCS_Field_Size is 2, the offset of 256 is added. /// -/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size -fn minify_val_fcs(val: u64) -> Vec { +/// +fn minify_val_fcs(val: u64) -> impl Iterator { let new_size = find_min_size(val); let mut val = val; if new_size == 2 { val -= 256; } - val.to_le_bytes()[0..new_size].to_vec() + + // TODO: switch to `.into_iter()` when switching to 2021 edition + #[allow(deprecated)] + array::IntoIter::new(val.to_le_bytes()).take(new_size) } #[cfg(test)] diff --git a/src/encoding/match_generator.rs b/src/encoding/match_generator.rs index 54a59494..ec11f897 100644 --- a/src/encoding/match_generator.rs +++ b/src/encoding/match_generator.rs @@ -23,8 +23,8 @@ pub struct MatchGeneratorDriver { } impl MatchGeneratorDriver { - /// slice_size says how big the slices should be that are allocated to work with - /// max_slices_in_window says how many slices should at most be used while looking for matches + /// `slice_size` says how big the slices should be that are allocated to work with + /// `max_slices_in_window` says how many slices should at most be used while looking for matches pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self { Self { vec_pool: Vec::new(), @@ -128,11 +128,11 @@ impl SuffixStore { #[inline(always)] fn key(&self, suffix: &[u8]) -> usize { - let s0 = suffix[0] as u64; - let s1 = suffix[1] as u64; - let s2 = suffix[2] as u64; - let s3 = suffix[3] as u64; - let s4 = suffix[4] as u64; + let s0 = u64::from(suffix[0]); + let s1 = u64::from(suffix[1]); + let s2 = u64::from(suffix[2]); + let s3 = u64::from(suffix[3]); + let s4 = u64::from(suffix[4]); const POLY: u64 = 0xCF3BCCDCABu64; @@ -173,7 +173,7 @@ pub(crate) struct MatchGenerator { } impl MatchGenerator { - /// max_size defines how many bytes will be used at most in the window used for matching + /// `max_size` defines how many bytes will be used at most in the window used for matching fn new(max_size: usize) -> Self { Self { max_window_size: max_size, @@ -198,8 +198,8 @@ impl MatchGenerator { } /// Processes bytes in the current window until either a match is found or no more matches can be found - /// * If a match is found handle_sequence is called with the Triple variant - /// * If no more matches can be found but there are bytes still left handle_sequence is called with the Literals variant + /// * If a match is found `handle_sequence` is called with the Triple variant + /// * If no more matches can be found but there are bytes still left `handle_sequence` is called with the Literals variant /// * If no more matches can be found and no more bytes are left this returns false fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool { loop { @@ -348,7 +348,7 @@ impl MatchGenerator { self.concat_window.extend_from_slice(&data); if let Some(last_len) = self.window.last().map(|last| last.data.len()) { - for entry in self.window.iter_mut() { + for entry in &mut self.window { entry.base_offset += last_len; } } diff --git a/src/encoding/util.rs b/src/encoding/util.rs index 920b1a4c..e9702042 100644 --- a/src/encoding/util.rs +++ b/src/encoding/util.rs @@ -1,4 +1,4 @@ -use alloc::vec::Vec; +use core::array; /// Returns the minimum number of bytes needed to represent this value, as /// either 1, 2, 4, or 8 bytes. A value of 0 will still return one byte. @@ -24,16 +24,18 @@ pub fn find_min_size(val: u64) -> usize { /// Returned vector will be 1, 2, 4, or 8 bytes in length. Zero is represented as 1 byte. /// /// Operates in **little-endian**. -pub fn minify_val(val: u64) -> Vec { +pub fn minify_val(val: u64) -> impl Iterator { let new_size = find_min_size(val); - val.to_le_bytes()[0..new_size].to_vec() + // TODO: switch to `.into_iter()` when switching to 2021 edition + #[allow(deprecated)] + array::IntoIter::new(val.to_le_bytes()).take(new_size) } #[cfg(test)] mod tests { use super::find_min_size; use super::minify_val; - use alloc::vec; + use alloc::{vec, vec::Vec}; #[test] fn min_size_detection() { @@ -48,12 +50,15 @@ mod tests { #[test] fn bytes_minified() { - assert_eq!(minify_val(0), vec![0]); - assert_eq!(minify_val(0xff), vec![0xff]); - assert_eq!(minify_val(0xff_ff), vec![0xff, 0xff]); - assert_eq!(minify_val(0xff_ff_ff_ff), vec![0xff, 0xff, 0xff, 0xff]); + assert_eq!(minify_val(0).collect::>(), vec![0]); + assert_eq!(minify_val(0xff).collect::>(), vec![0xff]); + assert_eq!(minify_val(0xff_ff).collect::>(), vec![0xff, 0xff]); assert_eq!( - minify_val(0xffff_ffff_ffff_ffff), + minify_val(0xff_ff_ff_ff).collect::>(), + vec![0xff, 0xff, 0xff, 0xff] + ); + assert_eq!( + minify_val(0xffff_ffff_ffff_ffff).collect::>(), vec![0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff] ); } diff --git a/src/fse/fse_decoder.rs b/src/fse/fse_decoder.rs index 083d7756..1a131496 100644 --- a/src/fse/fse_decoder.rs +++ b/src/fse/fse_decoder.rs @@ -9,7 +9,7 @@ use alloc::vec::Vec; /// #[derive(Debug)] pub struct FSETable { - /// The maximum symbol in the table (inclusive). Limits the probabilities length to max_symbol + 1. + /// The maximum symbol in the table (inclusive). Limits the probabilities length to `max_symbol` + 1. max_symbol: u8, /// The actual table containing the decoded symbol and the compression data /// connected to that symbol. diff --git a/src/fse/fse_encoder.rs b/src/fse/fse_encoder.rs index a821b1e5..79cf78cf 100644 --- a/src/fse/fse_encoder.rs +++ b/src/fse/fse_encoder.rs @@ -27,7 +27,7 @@ impl>> FSEEncoder<'_, V> { self.write_table(); let mut state = self.table.start_state(data[data.len() - 1]); - for x in data[0..data.len() - 1].iter().rev().copied() { + for &x in data[0..data.len() - 1].iter().rev() { let next = self.table.next_state(x, state.index); let diff = state.index - next.baseline; self.writer.write_bits(diff as u64, next.num_bits as usize); @@ -165,7 +165,7 @@ impl>> FSEEncoder<'_, V> { pub struct FSETable { /// Indexed by symbol pub(super) states: [SymbolStates; 256], - /// Sum of all states.states.len() + /// Sum of all `states.states.len()` pub(crate) table_size: usize, } @@ -237,7 +237,7 @@ fn build_table_from_counts(counts: &[usize], max_log: u8, avoid_0_numbit: bool) // shift all probabilities down so that the lowest are 1 min_count -= 1; - for prob in probs.iter_mut() { + for prob in &mut probs { if *prob > 0 { *prob -= min_count as i32; } diff --git a/src/huff0/huff0_encoder.rs b/src/huff0/huff0_encoder.rs index dfb3c65d..6ecc0d10 100644 --- a/src/huff0/huff0_encoder.rs +++ b/src/huff0/huff0_encoder.rs @@ -1,5 +1,5 @@ use alloc::vec::Vec; -use core::cmp::Ordering; +use core::{cmp::Ordering, convert::TryFrom}; use crate::{ encoding::bit_writer::BitWriter, @@ -66,14 +66,14 @@ impl>> HuffmanEncoder<'_, V> { Self::encode_stream(&self.table, self.writer, src4); // Sanity check, if this doesn't hold we produce a broken stream - assert!(size1 <= u16::MAX as usize); - assert!(size2 <= u16::MAX as usize); - assert!(size3 <= u16::MAX as usize); + let size1 = u16::try_from(size1).unwrap(); + let size2 = u16::try_from(size2).unwrap(); + let size3 = u16::try_from(size3).unwrap(); // Update the jumptable with the real sizes - self.writer.change_bits(size_idx, size1 as u16, 16); - self.writer.change_bits(size_idx + 16, size2 as u16, 16); - self.writer.change_bits(size_idx + 32, size3 as u16, 16); + self.writer.change_bits(size_idx, size1, 16); + self.writer.change_bits(size_idx + 16, size2, 16); + self.writer.change_bits(size_idx + 32, size3, 16); } /// Encode one stream and pad it to fill the last byte @@ -128,7 +128,7 @@ impl>> HuffmanEncoder<'_, V> { self.writer.write_bits(weights.len() as u8 + 127, 8); let pairs = weights.chunks_exact(2); let remainder = pairs.remainder(); - for pair in pairs.into_iter() { + for pair in pairs { let weight1 = pair[0]; let weight2 = pair[1]; assert!(weight1 < 16); @@ -218,16 +218,14 @@ impl HuffmanTable { // Determine the number of bits needed for codes with the lowest weight let weight_sum = sorted.iter().map(|e| 1 << (e.weight - 1)).sum::(); - if !weight_sum.is_power_of_two() { - panic!("This is an internal error"); - } + assert!(weight_sum.is_power_of_two(), "This is an internal error"); let max_num_bits = highest_bit_set(weight_sum) - 1; // this is a log_2 of a clean power of two // Starting at the symbols with the lowest weight we update the placeholders in the table let mut current_code = 0; let mut current_weight = 0; let mut current_num_bits = 0; - for entry in sorted.iter() { + for entry in &sorted { // If the entry isn't the same weight as the last one we need to change a few things if current_weight != entry.weight { // The code shifts by the difference of the weights to allow for enough unique values @@ -317,7 +315,7 @@ fn distribute_weights(amount: usize) -> Vec { weights } -/// Sometimes distribute_weights generates weights that require too many bits to encode +/// Sometimes `distribute_weights` generates weights that require too many bits to encode /// This redistributes the weights to have less variance by raising the lower weights while still maintaining the /// required attributes of the weight distribution fn redistribute_weights(weights: &mut [usize], max_num_bits: usize) {