From 86e5c15e8a6fefe77e74e06438c39302a3b6c172 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Thu, 16 Apr 2020 22:14:50 -0400 Subject: [PATCH 1/7] Add tests for Uri::try_from() for &[u8] The tests include an example of invalid UTF-8 which is an error. --- src/uri/tests.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/uri/tests.rs b/src/uri/tests.rs index 719cb94e..4739a917 100644 --- a/src/uri/tests.rs +++ b/src/uri/tests.rs @@ -1,4 +1,5 @@ use std::str::FromStr; +use std::convert::TryFrom; use super::{ErrorKind, InvalidUri, Port, Uri, URI_CHARS}; @@ -517,3 +518,40 @@ fn test_partial_eq_path_with_terminating_questionmark() { assert_eq!(uri, a); } + +#[test] +fn test_uri_from_u8_slice() { + let uri = Uri::try_from(b"http://example.com".as_ref()).expect("conversion"); + + assert_eq!(uri, "http://example.com"); +} + +#[test] +fn test_uri_from_u8_slice_error() { + fn err(s: &[u8]) { + Uri::try_from(s).unwrap_err(); + } + + err(b"http://"); + err(b"htt:p//host"); + err(b"hyper.rs/"); + err(b"hyper.rs?key=val"); + err(b"?key=val"); + err(b"localhost/"); + err(b"localhost?key=val"); + err(b"\0"); + err(b"http://[::1"); + err(b"http://::1]"); + err(b"localhost:8080:3030"); + err(b"@"); + err(b"http://username:password@/wut"); + + // illegal queries + err(b"/?foo\rbar"); + err(b"/?foo\nbar"); + err(b"/?<"); + err(b"/?>"); + + // invalid UTF-8 + err(&[0xc0]) +} From ce01f23823c825b84a4d773757f5fa5b6e74d0ad Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Thu, 16 Apr 2020 23:00:01 -0400 Subject: [PATCH 2/7] Add another test for an invalid Uri from &[u8] This test targets and invalid UTF-8 byte after valid bytes for a uri scheme. --- src/uri/tests.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/uri/tests.rs b/src/uri/tests.rs index 4739a917..74b59d13 100644 --- a/src/uri/tests.rs +++ b/src/uri/tests.rs @@ -553,5 +553,6 @@ fn test_uri_from_u8_slice_error() { err(b"/?>"); // invalid UTF-8 - err(&[0xc0]) + err(&[0xc0]); + err(&[b'h', b't', b't', b'p', b':', b'/', b'/', 0xc0]); } From 012680139221249ef71bdf0a37f8f2069afabd5d Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Fri, 24 Apr 2020 09:07:27 -0400 Subject: [PATCH 3/7] Add more test of invalid UTF-8 --- src/uri/tests.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/uri/tests.rs b/src/uri/tests.rs index 74b59d13..35dd1dc7 100644 --- a/src/uri/tests.rs +++ b/src/uri/tests.rs @@ -555,4 +555,14 @@ fn test_uri_from_u8_slice_error() { // invalid UTF-8 err(&[0xc0]); err(&[b'h', b't', b't', b'p', b':', b'/', b'/', 0xc0]); + err(b"http://example.com/\0xc0"); + err(b"http://example.com/path?\0xc0"); +} + +#[test] +fn test_uri_with_invalid_fragment_is_valid() { + let uri_bytes = b"http://example.com/path?query#\0xc0"; + let uri = Uri::try_from(uri_bytes.as_ref()).expect("conversion error"); + + assert_eq!(uri, "http://example.com/path?query"); } From b5fcf4a7470edc4b9d1a20f3ad88e3ee8f5752e1 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Fri, 24 Apr 2020 21:22:27 -0400 Subject: [PATCH 4/7] Refactor parse_full() in uri/mod The refactoring simplifies the handling of the authority component of a Uri by eliminating code duplication. --- src/uri/mod.rs | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/uri/mod.rs b/src/uri/mod.rs index 716ea1ad..ea0a7bd6 100644 --- a/src/uri/mod.rs +++ b/src/uri/mod.rs @@ -785,12 +785,14 @@ impl From for Parts { } } +// parse_full() parses a Uri that includes more than just a path. It +// expects that at least one of the scheme or authority will be present +// as well. fn parse_full(mut s: Bytes) -> Result { // Parse the scheme let scheme = match Scheme2::parse(&s[..])? { Scheme2::None => Scheme2::None, Scheme2::Standard(p) => { - // TODO: use truncate let _ = s.split_to(p.len() + 3); Scheme2::Standard(p) } @@ -798,8 +800,8 @@ fn parse_full(mut s: Bytes) -> Result { // Grab the protocol let mut scheme = s.split_to(n + 3); - // Strip ://, TODO: truncate - let _ = scheme.split_off(n); + // Strip :// + scheme.truncate(n); // Allocate the ByteStr let val = unsafe { ByteStr::from_utf8_unchecked(scheme) }; @@ -813,24 +815,15 @@ fn parse_full(mut s: Bytes) -> Result { let authority_end = Authority::parse(&s[..])?; if scheme.is_none() { + // Path is not allowed if there is no scheme. if authority_end != s.len() { return Err(ErrorKind::InvalidFormat.into()); } - - let authority = Authority { - data: unsafe { ByteStr::from_utf8_unchecked(s) }, - }; - - return Ok(Uri { - scheme: scheme.into(), - authority: authority, - path_and_query: PathAndQuery::empty(), - }); - } - - // Authority is required when absolute - if authority_end == 0 { - return Err(ErrorKind::InvalidFormat.into()); + } else { + // Authority is required when absolute + if authority_end == 0 { + return Err(ErrorKind::InvalidFormat.into()); + } } let authority = s.split_to(authority_end); From 2850d83855e141d166cd6e6b3b982e271eef61a6 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 25 Apr 2020 11:36:40 -0400 Subject: [PATCH 5/7] Add comments to uses of unsafe in uri/mod.rs The two uses of unsafe rely on postconditions in other functions which are also documented to show this reliance. --- src/uri/mod.rs | 6 ++++++ src/uri/scheme.rs | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/src/uri/mod.rs b/src/uri/mod.rs index ea0a7bd6..38df0287 100644 --- a/src/uri/mod.rs +++ b/src/uri/mod.rs @@ -804,6 +804,8 @@ fn parse_full(mut s: Bytes) -> Result { scheme.truncate(n); // Allocate the ByteStr + // Safety: the postcondition on Scheme2::parse() means that + // s[0..n+3] is valid UTF-8. scheme is a subslice of s[0..n+3]. let val = unsafe { ByteStr::from_utf8_unchecked(scheme) }; Scheme2::Other(Box::new(val)) @@ -828,6 +830,10 @@ fn parse_full(mut s: Bytes) -> Result { let authority = s.split_to(authority_end); let authority = Authority { + // Safety: The postcondition on Authority::parse() means that + // s[0..authority_end] is valid UTF-8 after that call. The call + // to s.split_to() means that authority here is what s[0..authority_end] + // was after the call to Authority::parse(). data: unsafe { ByteStr::from_utf8_unchecked(authority) }, }; diff --git a/src/uri/scheme.rs b/src/uri/scheme.rs index 5bbab11e..be881c33 100644 --- a/src/uri/scheme.rs +++ b/src/uri/scheme.rs @@ -253,6 +253,7 @@ impl Scheme2 { } } + // Postcondition: On Ok(Scheme2::Other(n)) return, s[0..n+3] is valid UTF-8 pub(super) fn parse(s: &[u8]) -> Result, InvalidUri> { if s.len() >= 7 { // Check for HTTP @@ -270,6 +271,9 @@ impl Scheme2 { } if s.len() > 3 { + // The only Ok(Scheme2::Option(n)) return from this function is an + // early exit from this loop. This loop checks each byte in s against + // SCHEME_CHARS until until one of the early exit conditions. for i in 0..s.len() { let b = s[i]; @@ -290,10 +294,15 @@ impl Scheme2 { } // Return scheme + // Postcondition: Every byte in s[0..i] has matched the + // _ arm so is valid UTF-8. s[i..i+3] matches "://" which + // is also valid UTF-8. Thus s[0..i+3] is valid UTF-8. return Ok(Scheme2::Other(i)); } // Invald scheme character, abort 0 => break, + // Valid scheme character: imples that b is a valid, single + // byte UTF-8 codepoint. _ => {} } } From c7bc09ba09dfac5603fd6984ab823284ca63369d Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 25 Apr 2020 15:01:41 -0400 Subject: [PATCH 6/7] Fix build error on nightly The nightly build was failing because of a know regression concerning the use of the new "unused_braces" lint in doc tests. --- src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 1c967231..58a076d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -160,6 +160,9 @@ #![deny(warnings, missing_docs, missing_debug_implementations)] +// Nightly Regression https://github.com/rust-lang/rust/issues/70814 +#![allow(unknown_lints, unused_braces)] + #[cfg(test)] #[macro_use] extern crate doc_comment; From 8e6758e9529dd7b6a542f7ebf4d12e70fc18e492 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 3 May 2020 10:28:39 -0400 Subject: [PATCH 7/7] Revert "Fix build error on nightly" This reverts commit c7bc09ba09dfac5603fd6984ab823284ca63369d. --- src/lib.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 58a076d0..1c967231 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -160,9 +160,6 @@ #![deny(warnings, missing_docs, missing_debug_implementations)] -// Nightly Regression https://github.com/rust-lang/rust/issues/70814 -#![allow(unknown_lints, unused_braces)] - #[cfg(test)] #[macro_use] extern crate doc_comment;