From 81e2d7c83c059be8db92111326103b5adac87e6c Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 2 May 2020 12:09:05 -0400 Subject: [PATCH 1/9] Add comment for some use of unsafe in HeaderValue Document the sound use of unsafe in HeaderValue::to_str(). --- src/header/value.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/header/value.rs b/src/header/value.rs index 4a765aff..6269d819 100644 --- a/src/header/value.rs +++ b/src/header/value.rs @@ -232,6 +232,9 @@ impl HeaderValue { } } + // Safety: the loop above checks that each byte is_valid_ascii() and + // the postcondtion on is_valid_ascii() implies that each byte is + // valid single-byte UTF-8. unsafe { Ok(str::from_utf8_unchecked(bytes)) } } @@ -550,6 +553,8 @@ mod try_from_header_name_tests { } } +// Postcondition: if the return is true then b is a visible ascii byte +// which implies that it is a valid single-byte UTF-8 codepoint. fn is_visible_ascii(b: u8) -> bool { b >= 32 && b < 127 || b == b'\t' } From 5f11ac4c0a8dcf47aade65a1f7e83e92b81f10b0 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 2 May 2020 16:29:43 -0400 Subject: [PATCH 2/9] Add addtional tests for HeaderValue Debug impl The tests inlcude variation on the placement of unicode codepoints that encode to multi-byte UTF-8 and an instance of a [u8] with invalid UTF-8 that is still a valid HeaderValue. --- src/header/value.rs | 49 ++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/src/header/value.rs b/src/header/value.rs index 6269d819..ef38d5d6 100644 --- a/src/header/value.rs +++ b/src/header/value.rs @@ -745,26 +745,39 @@ impl<'a> PartialOrd for &'a str { } } -#[test] -fn test_try_from() { - HeaderValue::try_from(vec![127]).unwrap_err(); -} +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_try_from() { + HeaderValue::try_from(vec![127]).unwrap_err(); + } -#[test] -fn test_debug() { - let cases = &[ - ("hello", "\"hello\""), - ("hello \"world\"", "\"hello \\\"world\\\"\""), - ("\u{7FFF}hello", "\"\\xe7\\xbf\\xbfhello\""), - ]; + #[test] + fn test_debug() { + let cases = &[ + // Note: Unicode codepoint U+7FFF is encoded as e7, bf, bf in UTF-8 + ("hello", "\"hello\""), + ("hello \"world\"", "\"hello \\\"world\\\"\""), + ("\u{7FFF}hello", "\"\\xe7\\xbf\\xbfhello\""), + ("hell\u{7FFF}o", "\"hell\\xe7\\xbf\\xbfo\""), + ("hello\u{7FFF}", "\"hello\\xe7\\xbf\\xbf\""), + ]; + + for &(value, expected) in cases { + let val = HeaderValue::from_bytes(value.as_bytes()).unwrap(); + let actual = format!("{:?}", val); + assert_eq!(expected, actual); + } - for &(value, expected) in cases { - let val = HeaderValue::from_bytes(value.as_bytes()).unwrap(); + // test invalid UTF-8 + let val = HeaderValue::from_bytes(b"\xC0").unwrap(); let actual = format!("{:?}", val); - assert_eq!(expected, actual); - } + assert_eq!("\"\\xc0\"", actual); - let mut sensitive = HeaderValue::from_static("password"); - sensitive.set_sensitive(true); - assert_eq!("Sensitive", format!("{:?}", sensitive)); + let mut sensitive = HeaderValue::from_static("password"); + sensitive.set_sensitive(true); + assert_eq!("Sensitive", format!("{:?}", sensitive)); + } } From 992fdccedbf50b33968cf563de175331d5382856 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 2 May 2020 17:18:33 -0400 Subject: [PATCH 3/9] Restructure Debug impl for HeaderValue The new code uses Iterator::try_fold() instead of external iteration to review the bytes in the HeaderValue and separate the printing of visible ascii bytes printing other bytes (and special casing b'"'). This new implementation may behave slightly different for a HeaderValue of length usize::MAX, but such a HeaderValue is likely to exhaust memory before the Debug impl comes into play (which also makes it hard to test this hypothetical edge case). --- src/header/value.rs | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/header/value.rs b/src/header/value.rs index ef38d5d6..16611e66 100644 --- a/src/header/value.rs +++ b/src/header/value.rs @@ -344,23 +344,27 @@ impl fmt::Debug for HeaderValue { f.write_str("Sensitive") } else { f.write_str("\"")?; - let mut from = 0; let bytes = self.as_bytes(); - for (i, &b) in bytes.iter().enumerate() { - if !is_visible_ascii(b) || b == b'"' { - if from != i { - f.write_str(unsafe { str::from_utf8_unchecked(&bytes[from..i]) })?; - } + + let range = bytes.iter().try_fold(0..0, |range, &b| { + if is_visible_ascii(b) && b != b'"' { + Ok(range.start..range.end+1) + } else { + let text_run = unsafe { str::from_utf8_unchecked(&bytes[range.clone()]) }; + f.write_str(text_run)?; + if b == b'"' { f.write_str("\\\"")?; } else { write!(f, "\\x{:x}", b)?; } - from = i + 1; + + Ok(range.end+1..range.end+1) } - } + })?; - f.write_str(unsafe { str::from_utf8_unchecked(&bytes[from..]) })?; + let text_run = unsafe { str::from_utf8_unchecked(&bytes[range]) }; + f.write_str(text_run)?; f.write_str("\"") } } @@ -767,17 +771,12 @@ mod tests { for &(value, expected) in cases { let val = HeaderValue::from_bytes(value.as_bytes()).unwrap(); - let actual = format!("{:?}", val); - assert_eq!(expected, actual); - } + let actual = format!("{:?}", val); assert_eq!(expected, actual); } // test invalid UTF-8 - let val = HeaderValue::from_bytes(b"\xC0").unwrap(); - let actual = format!("{:?}", val); + let val = HeaderValue::from_bytes(b"\xC0").unwrap(); let actual = format!("{:?}", val); assert_eq!("\"\\xc0\"", actual); - let mut sensitive = HeaderValue::from_static("password"); - sensitive.set_sensitive(true); - assert_eq!("Sensitive", format!("{:?}", sensitive)); - } + let mut sensitive = HeaderValue::from_static("password"); sensitive.set_sensitive(true); + assert_eq!("Sensitive", format!("{:?}", sensitive)); } } From af5b44d211e7790d2ab29bf55280cd3f39267033 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 2 May 2020 21:52:02 -0400 Subject: [PATCH 4/9] Add comments about the use of unsafe in HeaderValue The comments describe the invariants that make the use of unsafe sound. --- src/header/value.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/header/value.rs b/src/header/value.rs index 16611e66..d1e76059 100644 --- a/src/header/value.rs +++ b/src/header/value.rs @@ -346,10 +346,27 @@ impl fmt::Debug for HeaderValue { f.write_str("\"")?; let bytes = self.as_bytes(); + // Invariant1: bytes[range] is valid UTF-8 (for both instanses of range). + // Invariant2: In the try_fold() closure bytes[range.end] == b. + // + // bytes[0..0] is empty so the starting value of range trivally satisfies + // the invariant 1. The first invocaton of the closure has range.end == 0 + // and b == bytes[0], satisfying the second invariant. + // + // On the nth invocation of the closure b = bytes[range.end]. The end of + // range returned from the closure is range.end+1. On the n+1th invocation + // of the closure b will be the next byte of bytes which is bytes[range.end+1]. + // The second invariant follows by induction. let range = bytes.iter().try_fold(0..0, |range, &b| { if is_visible_ascii(b) && b != b'"' { + // Invariant: By the invariant bytes[range] is valid UTF-8. + // bytes[range.end] == b and the postcondition on + // is_visible_ascii() means that b (and hence bytes[range.end]) + // is valid, single-byte UTF-8 so bytes[range.start..range.end+1] + // is valid UTF-8 and invariant 1 is satified. Ok(range.start..range.end+1) } else { + // Safety: By invariant 1 bytes[range] is valid UTF-8 let text_run = unsafe { str::from_utf8_unchecked(&bytes[range.clone()]) }; f.write_str(text_run)?; @@ -359,10 +376,13 @@ impl fmt::Debug for HeaderValue { write!(f, "\\x{:x}", b)?; } + // Invariant: The range is empty so invariant 1 is trivally + // satified. Ok(range.end+1..range.end+1) } })?; + // Safety: By invariant 1 bytes[range] is valid UTF-8. let text_run = unsafe { str::from_utf8_unchecked(&bytes[range]) }; f.write_str(text_run)?; f.write_str("\"") From 6b4c85a17b915394bfae6e1e0661cc60cf64ff9f Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 2 May 2020 22:02:58 -0400 Subject: [PATCH 5/9] Simpify the Debug impl for HeaderValue The simplification removes the need for one of the invariants that described how the use of unsafe is sound. --- src/header/value.rs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/header/value.rs b/src/header/value.rs index d1e76059..c79cd04b 100644 --- a/src/header/value.rs +++ b/src/header/value.rs @@ -346,24 +346,18 @@ impl fmt::Debug for HeaderValue { f.write_str("\"")?; let bytes = self.as_bytes(); - // Invariant1: bytes[range] is valid UTF-8 (for both instanses of range). - // Invariant2: In the try_fold() closure bytes[range.end] == b. + // Invariant: bytes[range] is valid UTF-8 (for both instanses of range). // // bytes[0..0] is empty so the starting value of range trivally satisfies - // the invariant 1. The first invocaton of the closure has range.end == 0 - // and b == bytes[0], satisfying the second invariant. - // - // On the nth invocation of the closure b = bytes[range.end]. The end of - // range returned from the closure is range.end+1. On the n+1th invocation - // of the closure b will be the next byte of bytes which is bytes[range.end+1]. - // The second invariant follows by induction. - let range = bytes.iter().try_fold(0..0, |range, &b| { + // the invariant. + let range = bytes.iter().try_fold(0..0, |range, _| { + let b = bytes[range.end]; if is_visible_ascii(b) && b != b'"' { // Invariant: By the invariant bytes[range] is valid UTF-8. - // bytes[range.end] == b and the postcondition on - // is_visible_ascii() means that b (and hence bytes[range.end]) - // is valid, single-byte UTF-8 so bytes[range.start..range.end+1] - // is valid UTF-8 and invariant 1 is satified. + // The postcondition on is_visible_ascii() means that b (and + // hence bytes[range.end]) is valid, single-byte UTF-8 so + // bytes[range.start..range.end+1] is valid UTF-8 and the + // invariant is satified. Ok(range.start..range.end+1) } else { // Safety: By invariant 1 bytes[range] is valid UTF-8 From 0a0024c5188f7eb62e8fe5ee916dc176844c805b Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 2 May 2020 22:09:12 -0400 Subject: [PATCH 6/9] Add TODO marker for a HeaderValue function HeaderValue::from_maybe_shared_unchecked() is currently an unsafe function but it could be made a safe function without intoducing a soundness hole because it does not do anything directly that depends on `unsafe` and it isn't protecting any invariants of HeaderValue that are relied upon later when using `unsafe`. from_maybe_shared_unchecked() can accept bytes as a HeaderValue that are invalid bytes for what is described in RFC 7230 (section 3.2) but all such bytes would still be valid to convert directly to a str since they are all valid single-byte UTF-8. --- src/header/value.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/header/value.rs b/src/header/value.rs index c79cd04b..11afe7d9 100644 --- a/src/header/value.rs +++ b/src/header/value.rs @@ -166,6 +166,7 @@ impl HeaderValue { /// /// This function does NOT validate that illegal bytes are not contained /// within the buffer. + // TODO: Consider removing the 'unsafe' from this function. pub unsafe fn from_maybe_shared_unchecked(src: T) -> HeaderValue where T: AsRef<[u8]> + 'static, From 6364f795fb860e959d7133688f2e797be148ae57 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 24 May 2020 09:58:36 -0400 Subject: [PATCH 7/9] Remove unsafe from one HeaderValue function. HeaderValue::from_maybe_shared_unchecked had been marked unsafe but this was to indicate caller responsibility for api constraints and not caller responsibility for avoiding undefined behaviour. This change removes the use of unsafe for this purpose and instead uses a more explict doc comment to draw the caller's attention to the restrictions (as well as the continued use of the "_unchecked" suffix. This is not a breaking change (despite affecting the public API) because removing "unsafe" from a function still allows existing uses to compile. This use of "unsafe" was not protecting an invariant on the internal structre of HeaderValue that was relined on in unsafe blocks elsewhere in the module to ensure the absence of undefined behaviour. All (other) unsafe blocks in this module rely on local checking to ensure they avoid undefined behaviour. --- src/header/value.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/header/value.rs b/src/header/value.rs index 11afe7d9..cdda7375 100644 --- a/src/header/value.rs +++ b/src/header/value.rs @@ -165,9 +165,10 @@ impl HeaderValue { /// Convert a `Bytes` directly into a `HeaderValue` without validating. /// /// This function does NOT validate that illegal bytes are not contained - /// within the buffer. - // TODO: Consider removing the 'unsafe' from this function. - pub unsafe fn from_maybe_shared_unchecked(src: T) -> HeaderValue + /// within the buffer. It is the caller's responsibility to ensure that + /// the bytes in `src` are all between 32 and 255 (inclusive), excluding + /// byte 127 (DEL). + pub fn from_maybe_shared_unchecked(src: T) -> HeaderValue where T: AsRef<[u8]> + 'static, { From 2328ec5662ad7f4d14084e8b3caa76387884665d Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 24 May 2020 10:12:36 -0400 Subject: [PATCH 8/9] Correct comments in mod header::value Some of the comments describing the invariants necessary to ensure the sound use of "unsafe" refered to an older version of the reasoning that had used 2 invariants. This change makes them refer to "the invariant" instead of "invariant 1". --- src/header/value.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/header/value.rs b/src/header/value.rs index cdda7375..7c4673c8 100644 --- a/src/header/value.rs +++ b/src/header/value.rs @@ -362,7 +362,7 @@ impl fmt::Debug for HeaderValue { // invariant is satified. Ok(range.start..range.end+1) } else { - // Safety: By invariant 1 bytes[range] is valid UTF-8 + // Safety: By the invariant bytes[range] is valid UTF-8 let text_run = unsafe { str::from_utf8_unchecked(&bytes[range.clone()]) }; f.write_str(text_run)?; @@ -372,13 +372,13 @@ impl fmt::Debug for HeaderValue { write!(f, "\\x{:x}", b)?; } - // Invariant: The range is empty so invariant 1 is trivally + // Invariant: The range is empty so the invariant is trivally // satified. Ok(range.end+1..range.end+1) } })?; - // Safety: By invariant 1 bytes[range] is valid UTF-8. + // Safety: By the invariant bytes[range] is valid UTF-8. let text_run = unsafe { str::from_utf8_unchecked(&bytes[range]) }; f.write_str(text_run)?; f.write_str("\"") From f3c9e80de0b2ddf5692d25ef95fcbe8356f4a527 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 24 May 2020 10:36:22 -0400 Subject: [PATCH 9/9] Fix the "Build Status" indicator in README.md The "Build Status" markup badge had not been updated to refect the change of CI systems from TravisCI to GitHub Actions. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index efaa1000..a54b9303 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ A general purpose library of common HTTP types -[![Build Status](https://travis-ci.org/hyperium/http.svg?branch=master)](https://travis-ci.org/hyperium/http) +![Build Status](https://github.com/hyperium/http/workflows/CI/badge.svg) [![Crates.io](https://img.shields.io/crates/v/http.svg)](https://crates.io/crates/http) [![Documentation](https://docs.rs/http/badge.svg)][dox]