-
Notifications
You must be signed in to change notification settings - Fork 17
Drop some unsafes - the compiler now optimizes equivalent safe code
#43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
1a810db
488db57
a5e29c8
bc3916d
3715c4c
11c73f2
8f22601
4e5cc94
dd20d3e
d56abee
61c2c1c
d476136
fdee4de
9c8f582
a2955c8
f4418ea
baf9c7e
a36a517
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,8 +65,6 @@ | |
| //! } | ||
| //! ``` | ||
|
|
||
| #![cfg_attr(feature = "unstable", feature(core))] | ||
|
|
||
| extern crate adler32; | ||
|
|
||
| use std::cmp; | ||
|
|
@@ -182,16 +180,6 @@ struct BitStream<'a> { | |
| state: BitState, | ||
| } | ||
|
|
||
| // Use this instead of triggering a panic (that will unwind). | ||
| #[cfg(feature = "unstable")] | ||
| fn abort() -> ! { | ||
| unsafe { ::std::intrinsics::abort() } | ||
| } | ||
| #[cfg(not(feature = "unstable"))] | ||
| fn abort() -> ! { | ||
| panic!() | ||
| } | ||
|
|
||
| #[cfg(debug)] | ||
| macro_rules! debug { ($($x:tt)*) => (println!($($x)*)) } | ||
| #[cfg(not(debug))] | ||
|
|
@@ -224,10 +212,7 @@ impl<'a> BitStream<'a> { | |
| return false; | ||
| } | ||
| if n > 8 && self.state.n < n { | ||
| if n > 16 { | ||
| // HACK(eddyb) in place of a static assert. | ||
| abort(); | ||
| } | ||
| assert!(n <= 16); | ||
| if !self.use_byte() { | ||
| return false; | ||
| } | ||
|
|
@@ -248,10 +233,7 @@ impl<'a> BitStream<'a> { | |
| } | ||
|
|
||
| fn take(&mut self, n: u8) -> Option<u8> { | ||
| if n > 8 { | ||
| // HACK(eddyb) in place of a static assert. | ||
| abort(); | ||
| } | ||
| assert!(n <= 8); | ||
| self.take16(n).map(|v: u16| v as u8) | ||
| } | ||
|
|
||
|
|
@@ -404,7 +386,7 @@ impl CodeLengthReader { | |
| self.result.push(0); | ||
| } | ||
| } | ||
| _ => abort(), | ||
| _ => panic!(), | ||
|
||
| } | ||
| } | ||
| Ok(true) | ||
|
|
@@ -625,6 +607,9 @@ impl InflateStream { | |
| fn run_len_dist(&mut self, len: u16, dist: u16) -> Result<Option<u16>, String> { | ||
| debug!("RLE -{}; {} (cap={} len={})", dist, len, | ||
| self.buffer.capacity(), self.buffer.len()); | ||
| if dist < 1 { | ||
| return Err("invalid run length in stream".to_owned()); | ||
| } | ||
| let buffer_size = self.buffer.capacity() as u16; | ||
| let len = if self.pos < dist { | ||
| // Handle copying from ahead, until we hit the end reading. | ||
|
|
@@ -638,26 +623,13 @@ impl InflateStream { | |
| return Err("run length distance is bigger than the window size".to_owned()); | ||
| } | ||
| let forward = buffer_size - dist; | ||
| // assert for unsafe code: | ||
| if pos_end + forward > self.buffer.len() as u16 { | ||
| return Err("invalid run length in stream".to_owned()); | ||
| } | ||
| unsafe { | ||
| // HACK(eddyb) avoid bound checks, LLVM can't optimize these. | ||
| let buffer = self.buffer.as_mut_ptr(); | ||
| let dst_end = buffer.offset(pos_end as isize); | ||
| let mut dst = buffer.offset(self.pos as isize); | ||
| let mut src = dst.offset(forward as isize); | ||
| while dst < dst_end { | ||
| *dst = *src; | ||
| dst = dst.offset(1); | ||
| src = src.offset(1); | ||
| } | ||
|
|
||
| for i in self.pos as usize..pos_end as usize { | ||
| self.buffer[i] = self.buffer[i + forward as usize]; | ||
| } | ||
| // for i in self.pos as usize..pos_end as usize { | ||
| // self.buffer[i] = self.buffer[i + forward as usize] | ||
| // } | ||
| // | ||
| self.pos = pos_end; | ||
| left | ||
| } else { | ||
|
|
@@ -671,32 +643,18 @@ impl InflateStream { | |
| (buffer_size, Some(pos_end - buffer_size)) | ||
| }; | ||
|
|
||
| if self.pos < dist && pos_end > self.pos { | ||
| return Err("invalid run length in stream".to_owned()); | ||
| } | ||
|
|
||
| if self.buffer.len() < pos_end as usize { | ||
| unsafe { | ||
| self.buffer.set_len(pos_end as usize); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is, replacing for i in self.pos as usize..self.buffer.len().min(pos_end as usize) {
self.buffer[i] = self.buffer[i - dist as usize];
}
assert!(pos_end as usize <= self.buffer.capacity());
while self.buffer.len() < pos_end as usize {
let x = self.buffer[self.buffer.len() - dist as usize];
self.buffer.push(x);
}One interesting question would be whether we are even validating that
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am still iterating on this part, so I didn't include it in this PR. I will try this and report the performance impact. Thanks! I'm also going to try extend_from_slice just to see how that works. Probably poorly because the slice belongs to the same buffer, and since the vector might be reallocated the slice could be invalidated, so I'd have to clone it. Now if I had a fixed-size vector that would be guaranteed not to reallocate that might have been faster than
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You'd need a different loop structure with something like
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sadly, the code you've suggested incurs a 9-11% performance penalty on decompressing entire files, depending on the file. I have tweaked it a bit and got it to incur 8-10% penalty, here's the code: let upper_bound = self.buffer.len().min(pos_end as usize);
for i in self.pos as usize..upper_bound {
self.buffer[i] = self.buffer[i - dist as usize];
}
assert!(pos_end as usize <= self.buffer.capacity());
let initial_buffer_len = self.buffer.len();
for i in initial_buffer_len..pos_end as usize {
let x = self.buffer[i - dist as usize];
self.buffer.push(x);
}Presence or absence of assert() has no effect (in this code, I haven't tested the variant with I also got 10% performance overhead simply by replacing
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for investigating! This is probably the single best response to me filing a security issue I've ever seen, and I didn't even have a proof of concept this time. As for the commit message - function Also, here's my memcpy-based prototype. if self.buffer.len() < pos_end as usize {
self.buffer.resize(pos_end as usize, 0u8);
}
fill_slice_with_subslice(&mut self.buffer, (self.pos as usize - dist as usize, self.pos as usize), (self.pos as usize, pos_end as usize));
fn fill_slice_with_subslice(slice: &mut[u8], (source_from, source_to): (usize, usize), (dest_from, dest_to): (usize, usize)) {
let (source, destination) = if dest_from >= source_from {slice.split_at_mut(dest_from)} else {slice.split_at_mut(source_from)};
let source = &mut source[source_from..source_to];
let destination = &mut destination[..dest_to-dest_from];
for i in (0..( (destination.len()) / source.len() )).map(|x| x * source.len()) {
destination[i..source.len()+i].copy_from_slice(&source);
}
}It fails some tests and I have been trying to understand why to no avail, so I'm afraid I won't be able to complete it. I'm afraid I'm not familiar with assembler or LLVM IR, so I will not be able to inspect it in any meaningful way. Sorry. I will benchmark your suggested changes with
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that I don't mean I wouldn't have done the But anyway,
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've conjured up a function based on if self.buffer.len() < pos_end as usize {
self.buffer.resize(pos_end as usize, 0u8);
}
fill_slice_with_subslice(&mut self.buffer, (self.pos as usize - dist as usize, self.pos as usize), (self.pos as usize, pos_end as usize));
fn fill_slice_with_subslice(slice: &mut[u8], (source_from, source_to): (usize, usize), (dest_from, dest_to): (usize, usize)) {
let (source, destination) = slice.split_at_mut(dest_from); //TODO: allow destination to be lower than source
let source = &source[source_from..source_to];
let destination = &mut destination[..(dest_to - dest_from)];
let mut offset = 0;
while offset + source.len() < destination.len() {
destination[offset..source.len()+offset].copy_from_slice(&source);
offset += source.len();
}
let remaining_chunk = destination.len()-offset;
&mut destination[offset..].copy_from_slice(&source[..remaining_chunk]);
}It offsets some of the costs of safe memory initialization so that switching to safe initialization would only create 5% overhead instead of 10%. If we switch the loop above to something like this too, then we'd have an entirely safe crate with the same performance as before. I have a branch with all changes from this PR plus the optimized loop: https://github.com/Shnatsel/inflate/tree/safe-with-optimized-loop Sadly, this function still fails one test - the line with its invocation causes an interger overflow on test "issue_30_realworld", i.e.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think this code could be much simpler if it was duplicated between forward and backward, just like the old code - in fact, it would be very similar to the old code, just doing more than one element at a time. Also, is there a benefit to always using the same source subslice, or is it just as efficient / more efficient to always copy the last
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Always copying the last The code I ended up with looks like this: let (source, destination) = (&mut self.buffer).split_at_mut(self.pos as usize);
let source = &source[source.len() - dist as usize..];
let mut offset = 0;
while offset + source.len() < destination.len() {
destination[offset..source.len()+offset].copy_from_slice(&source);
offset += source.len();
}
let remaining_chunk = destination.len()-offset;
&mut destination[offset..].copy_from_slice(&source[..remaining_chunk]);Which is a bit more readable. This nets 3% to 7% performance improvement. Surprisingly, putting the same code in the other copying loop in this function actually hurts performance by 1% on my samples. I've also tried an iterator-based version, which is concise but as slow as copying byte-by-byte: let (source, destination) = (&mut self.buffer).split_at_mut(self.pos as usize);
let source = &source[source.len() - dist as usize..];
for (d,s) in destination.chunks_mut(dist as usize).zip(source.chunks(dist as usize).cycle()) {
let d_len = d.len(); // last chunk has a size lower than we've specified
d.copy_from_slice(&s[..d_len]);
}However, I've realized that this function can return pretty much any garbage and tests won't fail. Since this creates regression potential, optimizing copying falls out of scope of this PR. |
||
| } | ||
| } | ||
|
|
||
| // assert for unsafe code: | ||
| if self.pos < dist && pos_end > self.pos { | ||
| return Err("invalid run length in stream".to_owned()); | ||
| } | ||
| unsafe { | ||
| // HACK(eddyb) avoid bound checks, LLVM can't optimize these. | ||
| let buffer = self.buffer.as_mut_ptr(); | ||
| let dst_end = buffer.offset(pos_end as isize); | ||
| let mut dst = buffer.offset(self.pos as isize); | ||
| let mut src = dst.offset(-(dist as isize)); | ||
| while dst < dst_end { | ||
| *dst = *src; | ||
| dst = dst.offset(1); | ||
| src = src.offset(1); | ||
| } | ||
| for i in self.pos as usize..pos_end as usize { | ||
| self.buffer[i] = self.buffer[i - dist as usize]; | ||
| } | ||
| // for i in self.pos as usize..pos_end as usize { | ||
| // self.buffer[i] = self.buffer[i - dist as usize] | ||
| // } | ||
| // | ||
| self.pos = pos_end; | ||
| Ok(left) | ||
| } | ||
|
|
@@ -713,9 +671,7 @@ impl InflateStream { | |
| if (self.pos as usize) < self.buffer.len() { | ||
| self.buffer[self.pos as usize] = b; | ||
| } else { | ||
| if (self.pos as usize) != self.buffer.len() { | ||
| abort(); | ||
| } | ||
| assert_eq!(self.pos as usize, self.buffer.len()); | ||
| self.buffer.push(b); | ||
| } | ||
| self.pos += 1; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this needed at all? I think all it did was disable
unstableby default.