@@ -77,10 +77,13 @@ impl<'a> TryFrom<&'a [u8]> for Scheme {
7777 None => Err ( ErrorKind :: InvalidScheme . into ( ) ) ,
7878 Standard ( p) => Ok ( Standard ( p) . into ( ) ) ,
7979 Other ( _) => {
80- // Unsafe: parse_exact already checks for a strict subset of UTF-8
81- Ok ( Other ( Box :: new ( unsafe {
82- ByteStr :: from_utf8_unchecked ( Bytes :: copy_from_slice ( s) )
83- } ) ) . into ( ) )
80+ let bytes = Bytes :: copy_from_slice ( s) ;
81+
82+ // Safety: postcondition on parse_exact() means that s and
83+ // hence bytes are valid UTF-8.
84+ let string = unsafe { ByteStr :: from_utf8_unchecked ( bytes) } ;
85+
86+ Ok ( Other ( Box :: new ( string) ) . into ( ) )
8487 }
8588 }
8689 }
@@ -195,6 +198,12 @@ const MAX_SCHEME_LEN: usize = 64;
195198
196199// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
197200//
201+ // SCHEME_CHARS is a table of valid characters in the scheme part of a URI. An
202+ // entry in the table is 0 for invalid characters. For valid characters the
203+ // entry is itself (i.e. the entry for 43 is b'+' because b'+' == 43u8). An
204+ // important characteristic of this table is that all entries above 127 are
205+ // invalid. This makes all of the valid entries a valid single-byte UTF-8 code
206+ // point. This means that a slice of such valid entries is valid UTF-8.
198207const SCHEME_CHARS : [ u8 ; 256 ] = [
199208 // 0 1 2 3 4 5 6 7 8 9
200209 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // x
@@ -226,6 +235,7 @@ const SCHEME_CHARS: [u8; 256] = [
226235] ;
227236
228237impl Scheme2 < usize > {
238+ // Postcondition: On all Ok() returns, s is valid UTF-8
229239 fn parse_exact ( s : & [ u8 ] ) -> Result < Scheme2 < ( ) > , InvalidUri > {
230240 match s {
231241 b"http" => Ok ( Protocol :: Http . into ( ) ) ,
@@ -235,6 +245,8 @@ impl Scheme2<usize> {
235245 return Err ( ErrorKind :: SchemeTooLong . into ( ) ) ;
236246 }
237247
248+ // check that each byte in s is a SCHEME_CHARS which implies
249+ // that it is a valid single byte UTF-8 code point.
238250 for & b in s {
239251 match SCHEME_CHARS [ b as usize ] {
240252 b':' => {
0 commit comments