Skip to content

Commit 311ceb8

Browse files
committed
Host parsing rules.
1 parent 15418b4 commit 311ceb8

File tree

4 files changed

+131
-33
lines changed

4 files changed

+131
-33
lines changed

src/host.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,10 @@ impl<'de> ::serde::Deserialize<'de> for HostInternal {
6363
}
6464
}
6565

66-
impl<S> From<Host<S>> for HostInternal
67-
where
68-
S: ToString,
69-
{
70-
fn from(host: Host<S>) -> HostInternal {
66+
impl From<Host<String>> for HostInternal {
67+
fn from(host: Host<String>) -> HostInternal {
7168
match host {
72-
Host::Domain(ref s) if s.to_string().is_empty() => HostInternal::None,
69+
Host::Domain(ref s) if s.is_empty() => HostInternal::None,
7370
Host::Domain(_) => HostInternal::Domain,
7471
Host::Ipv4(address) => HostInternal::Ipv4(address),
7572
Host::Ipv6(address) => HostInternal::Ipv6(address),

src/lib.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ impl Url {
692692
/// ```
693693
#[inline]
694694
pub fn cannot_be_a_base(&self) -> bool {
695-
!self.slice(self.path_start..).starts_with('/')
695+
!self.slice(self.scheme_end + 1..).starts_with('/')
696696
}
697697

698698
/// Return the username for this URL (typically the empty string)
@@ -1643,10 +1643,25 @@ impl Url {
16431643
if host == "" && SchemeType::from(self.scheme()).is_special() {
16441644
return Err(ParseError::EmptyHost);
16451645
}
1646+
let mut host_substr = host;
1647+
// Otherwise, if c is U+003A (:) and the [] flag is unset, then
1648+
if !host.starts_with('[') || !host.ends_with(']') {
1649+
match host.find(':') {
1650+
Some(0) => {
1651+
// If buffer is the empty string, validation error, return failure.
1652+
return Err(ParseError::InvalidDomainCharacter);
1653+
}
1654+
// Let host be the result of host parsing buffer
1655+
Some(colon_index) => {
1656+
host_substr = &host[..colon_index];
1657+
}
1658+
None => {}
1659+
}
1660+
}
16461661
if SchemeType::from(self.scheme()).is_special() {
1647-
self.set_host_internal(Host::parse(host)?, None)
1662+
self.set_host_internal(Host::parse(host_substr)?, None);
16481663
} else {
1649-
self.set_host_internal(Host::parse_opaque(host)?, None)
1664+
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
16501665
}
16511666
} else if self.has_host() {
16521667
if SchemeType::from(self.scheme()).is_special() {

src/parser.rs

Lines changed: 68 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ impl fmt::Display for SyntaxViolation {
156156
}
157157
}
158158

159-
#[derive(Copy, Clone)]
159+
#[derive(Copy, Clone, PartialEq)]
160160
pub enum SchemeType {
161161
File,
162162
SpecialNotFile,
@@ -217,7 +217,7 @@ impl<'i> Input<'i> {
217217
pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self {
218218
let input = original_input.trim_matches(c0_control_or_space);
219219
if let Some(vfn) = vfn {
220-
if input.len() < original_input.len() {
220+
if input.len() != original_input.len() {
221221
vfn(SyntaxViolation::C0SpaceIgnored)
222222
}
223223
if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
@@ -858,11 +858,13 @@ impl<'a> Parser<'a> {
858858
self.serialization.push('/');
859859
self.serialization.push('/');
860860
// authority state
861+
let before_authority = self.serialization.len();
861862
let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
863+
let has_authority = before_authority != self.serialization.len();
862864
// host state
863865
let host_start = to_u32(self.serialization.len())?;
864866
let (host_end, host, port, remaining) =
865-
self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
867+
self.parse_host_and_port(remaining, scheme_end, scheme_type, has_authority)?;
866868
// path state
867869
let path_start = to_u32(self.serialization.len())?;
868870
let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
@@ -906,7 +908,18 @@ impl<'a> Parser<'a> {
906908
}
907909
let (mut userinfo_char_count, remaining) = match last_at {
908910
None => return Ok((to_u32(self.serialization.len())?, input)),
909-
Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)),
911+
Some((0, remaining)) => {
912+
// Otherwise, if one of the following is true
913+
// c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
914+
// url is special and c is U+005C (\)
915+
// If @ flag is set and buffer is the empty string, validation error, return failure.
916+
if let (Some(c), _) = remaining.split_first() {
917+
if c == '/' || c == '?' || c == '#' || scheme_type.is_special() && c == '\\' {
918+
return Err(ParseError::EmptyHost);
919+
}
920+
}
921+
return Ok((to_u32(self.serialization.len())?, remaining));
922+
}
910923
Some(x) => x,
911924
};
912925

@@ -948,10 +961,26 @@ impl<'a> Parser<'a> {
948961
input: Input<'i>,
949962
scheme_end: u32,
950963
scheme_type: SchemeType,
964+
has_authority: bool,
951965
) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
952966
let (host, remaining) = Parser::parse_host(input, scheme_type)?;
953967
write!(&mut self.serialization, "{}", host).unwrap();
954968
let host_end = to_u32(self.serialization.len())?;
969+
if let Host::Domain(h) = &host {
970+
if h.is_empty() {
971+
// Port with an empty host
972+
if remaining.starts_with(":") {
973+
return Err(ParseError::EmptyHost);
974+
}
975+
if scheme_type.is_special() {
976+
return Err(ParseError::EmptyHost);
977+
}
978+
if !scheme_type.is_special() && has_authority {
979+
return Err(ParseError::EmptyHost);
980+
}
981+
}
982+
};
983+
955984
let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
956985
let scheme = || default_port(&self.serialization[..scheme_end as usize]);
957986
Parser::parse_port(remaining, scheme, self.context)?
@@ -1018,10 +1047,41 @@ impl<'a> Parser<'a> {
10181047
Ok((host, input))
10191048
}
10201049

1021-
pub(crate) fn parse_file_host<'i>(
1050+
pub fn get_file_host<'i>(input: Input<'i>) -> ParseResult<(Host<String>, Input)> {
1051+
let (_, host_str, remaining) = Parser::file_host(input)?;
1052+
let host = match Host::parse(&host_str)? {
1053+
Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
1054+
host => host,
1055+
};
1056+
Ok((host, remaining))
1057+
}
1058+
1059+
fn parse_file_host<'i>(
10221060
&mut self,
10231061
input: Input<'i>,
10241062
) -> ParseResult<(bool, HostInternal, Input<'i>)> {
1063+
let has_host;
1064+
let (_, host_str, remaining) = Parser::file_host(input)?;
1065+
let host = if host_str.is_empty() {
1066+
has_host = false;
1067+
HostInternal::None
1068+
} else {
1069+
match Host::parse(&host_str)? {
1070+
Host::Domain(ref d) if d == "localhost" => {
1071+
has_host = false;
1072+
HostInternal::None
1073+
}
1074+
host => {
1075+
write!(&mut self.serialization, "{}", host).unwrap();
1076+
has_host = true;
1077+
host.into()
1078+
}
1079+
}
1080+
};
1081+
Ok((has_host, host, remaining))
1082+
}
1083+
1084+
pub fn file_host<'i>(input: Input<'i>) -> ParseResult<(bool, String, Input<'i>)> {
10251085
// Undo the Input abstraction here to avoid allocating in the common case
10261086
// where the host part of the input does not contain any tab or newline
10271087
let input_str = input.chars.as_str();
@@ -1050,20 +1110,9 @@ impl<'a> Parser<'a> {
10501110
}
10511111
}
10521112
if is_windows_drive_letter(host_str) {
1053-
return Ok((false, HostInternal::None, input));
1113+
return Ok((false, "".to_string(), input));
10541114
}
1055-
let host = if host_str.is_empty() {
1056-
HostInternal::None
1057-
} else {
1058-
match Host::parse(host_str)? {
1059-
Host::Domain(ref d) if d == "localhost" => HostInternal::None,
1060-
host => {
1061-
write!(&mut self.serialization, "{}", host).unwrap();
1062-
host.into()
1063-
}
1064-
}
1065-
};
1066-
Ok((true, host, remaining))
1115+
Ok((true, host_str.to_string(), remaining))
10671116
}
10681117

10691118
pub fn parse_port<P>(
@@ -1503,7 +1552,7 @@ fn c0_control_or_space(ch: char) -> bool {
15031552

15041553
/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
15051554
#[inline]
1506-
pub fn ascii_tab_or_new_line(ch: char) -> bool {
1555+
fn ascii_tab_or_new_line(ch: char) -> bool {
15071556
matches!(ch, '\t' | '\r' | '\n')
15081557
}
15091558

src/quirks.rs

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
//! you probably want to use `Url` method instead.
1313
1414
use parser::{default_port, Context, Input, Parser, SchemeType};
15+
use std::cell::RefCell;
16+
use SyntaxViolation;
1517
use {idna, Host, ParseError, Position, Url};
1618

1719
/// https://url.spec.whatwg.org/#dom-url-domaintoascii
@@ -110,14 +112,23 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
110112
let opt_port;
111113
{
112114
let scheme = url.scheme();
113-
let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme));
115+
let scheme_type = SchemeType::from(scheme);
116+
let result = if scheme_type == SchemeType::File {
117+
Parser::get_file_host(input)
118+
} else {
119+
Parser::parse_host(input, scheme_type)
120+
};
114121
match result {
115122
Ok((h, remaining)) => {
116123
host = h;
117124
opt_port = if let Some(remaining) = remaining.split_prefix(':') {
118-
Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
119-
.ok()
120-
.map(|(port, _remaining)| port)
125+
if remaining.is_empty() {
126+
None
127+
} else {
128+
Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
129+
.ok()
130+
.map(|(port, _remaining)| port)
131+
}
121132
} else {
122133
None
123134
};
@@ -154,8 +165,30 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
154165
if url.cannot_be_a_base() {
155166
return Err(());
156167
}
157-
let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme()));
168+
// Host parsing rules are strict,
169+
// We don't want to trim the input
170+
let input = Input::no_trim(new_hostname);
171+
let scheme_type = SchemeType::from(url.scheme());
172+
let result = if scheme_type == SchemeType::File {
173+
Parser::get_file_host(input)
174+
} else {
175+
Parser::parse_host(input, scheme_type)
176+
};
158177
if let Ok((host, _remaining)) = result {
178+
if let Host::Domain(h) = &host {
179+
if h.is_empty() {
180+
// Empty host on special not file url
181+
if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile
182+
// Port with an empty host
183+
||!port(&url).is_empty()
184+
// Empty host with includes credentials
185+
|| !url.username().is_empty()
186+
|| !url.password().unwrap_or(&"").is_empty()
187+
{
188+
return Err(());
189+
}
190+
}
191+
}
159192
url.set_host_internal(host, None);
160193
Ok(())
161194
} else {
@@ -209,6 +242,10 @@ pub fn set_pathname(url: &mut Url, new_pathname: &str) {
209242
&& Some('\\') == new_pathname.chars().nth(0)
210243
{
211244
url.set_path(new_pathname)
245+
} else {
246+
let mut path_to_set = String::from("/");
247+
path_to_set.push_str(new_pathname);
248+
url.set_path(&path_to_set)
212249
}
213250
}
214251

0 commit comments

Comments
 (0)