Skip to content

Commit 27b2d75

Browse files
authored
feat: host parsing (#9)
* merge: resolve merge issues * add: Host/Hostname structs in python * add: python code documentation * add: python tests for new APIs * add: python typing information
1 parent 2845cb8 commit 27b2d75

File tree

6 files changed

+500
-55
lines changed

6 files changed

+500
-55
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

faup/src/grammar.pest

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@ userinfo = ${ username ~ (":" ~ password)? ~ "@" }
77

88
// this rule is used to check if the hostname is actually
99
// a valid ipv4 address as hostname rule matches any ipv4
10-
ipv4 = ${ SOI ~ (ASCII_DIGIT{1, 3} ~ "."){3} ~ ASCII_DIGIT{1, 3} ~ EOI }
11-
host = ${ ipv6 | hostname }
12-
domain_part = ${ (!(":" | "?" | "/" | "#" | "." | WHITE_SPACE) ~ ANY)+ }
13-
tld = ${ (!(":" | "?" | "/" | "#" | WHITE_SPACE) ~ ANY)+ }
14-
hostname = ${ (((domain_part ~ ".")+ ~ tld) | domain_part) }
15-
16-
ipv6 = ${ "[" ~ (ASCII_HEX_DIGIT{,4} ~ ":"){2, 7} ~ ASCII_HEX_DIGIT{,4} ~ "]" }
10+
ipv4 = ${ SOI ~ (ASCII_DIGIT{1, 3} ~ "."){3} ~ ASCII_DIGIT{1, 3} ~ EOI }
11+
host = ${ "[" ~ ipv6 ~ "]" | hostname }
12+
checked_host = _{ SOI ~ (ipv6 | hostname) ~ EOI }
13+
domain_part = ${ (!(":" | "?" | "/" | "#" | "." | WHITE_SPACE) ~ ANY)+ }
14+
tld = ${ (!(":" | "?" | "/" | "#" | "." | WHITE_SPACE) ~ ANY)+ }
15+
hostname = ${ (((domain_part ~ ".")+ ~ tld) | domain_part) }
16+
17+
ipv6 = ${ (ASCII_HEX_DIGIT{,4} ~ ":"){2, 7} ~ ASCII_HEX_DIGIT{,4} }
1718

1819
encoded_char = ${ "%" ~ ASCII_DIGIT{2} }
1920

faup/src/lib.rs

Lines changed: 128 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,20 @@ pub enum Error {
133133
InvalidIPv4,
134134
#[error("invalid ipv6 address")]
135135
InvalidIPv6,
136+
#[error("invalid host")]
137+
InvalidHost,
138+
#[error("{0}")]
139+
Other(String),
136140
#[error("parser error: {0}")]
137141
Parse(#[from] Box<pest::error::Error<Rule>>),
138142
}
139143

144+
impl Error {
145+
fn other<S: AsRef<str>>(s: S) -> Self {
146+
Error::Other(s.as_ref().to_string())
147+
}
148+
}
149+
140150
#[derive(Parser)]
141151
#[grammar = "grammar.pest"]
142152
pub(crate) struct UrlParser;
@@ -395,14 +405,96 @@ impl fmt::Display for Host<'_> {
395405
}
396406
}
397407

398-
impl<'url> Host<'url> {
408+
impl<'host> Host<'host> {
399409
fn into_owned<'owned>(self) -> Host<'owned> {
400410
match self {
401411
Host::Hostname(h) => Host::Hostname(h.into_owned()),
402412
Host::Ip(ip) => Host::Ip(ip),
403413
}
404414
}
405415

416+
#[inline(always)]
417+
fn from_pair(host_pair: Pair<'host, Rule>) -> Result<Self, Error> {
418+
match host_pair.as_rule() {
419+
Rule::hostname => {
420+
if let Ok(ipv4) =
421+
UrlParser::parse(Rule::ipv4, host_pair.as_str()).map(|p| p.as_str())
422+
{
423+
Ok(Ipv4Addr::from_str(ipv4)
424+
.map(IpAddr::from)
425+
.map(Host::Ip)
426+
.map_err(|_| Error::InvalidIPv4)?)
427+
} else {
428+
Ok(Host::Hostname(Hostname::from_str(host_pair.as_str())))
429+
}
430+
}
431+
432+
Rule::ipv6 => Ok(Ipv6Addr::from_str(
433+
host_pair.as_str().trim_matches(|c| c == '[' || c == ']'),
434+
)
435+
.map(IpAddr::from)
436+
.map(Host::Ip)
437+
.map_err(|_| Error::InvalidIPv6)?),
438+
_ => Err(Error::other(format!(
439+
"unexpected parsing rule: {:?}",
440+
host_pair.as_rule()
441+
))),
442+
}
443+
}
444+
445+
/// Parses a string into a `Host` enum.
446+
///
447+
/// This function expects the input string to be a URL host, which can be either
448+
/// an IPv4 address, an IPv6 address, or a hostname.
449+
///
450+
/// # Arguments
451+
///
452+
/// * `host` - A string slice that holds the host to parse (e.g., `"example.com"`, `"127.0.0.1"`, `"::1"`).
453+
///
454+
/// # Returns
455+
///
456+
/// * `Result<Host, Error>` - A [`Host`] enum if parsing is successful, or an [`Error`] if parsing fails.
457+
///
458+
/// # Examples
459+
///
460+
/// ```
461+
/// use faup_rs::Host;
462+
///
463+
/// // Parse an IPv4 address
464+
/// let host = Host::parse("127.0.0.1").unwrap();
465+
/// assert!(matches!(host, Host::Ip(std::net::IpAddr::V4(_))));
466+
///
467+
/// // Parse an IPv6 address
468+
/// let host = Host::parse("::1").unwrap();
469+
/// assert!(matches!(host, Host::Ip(std::net::IpAddr::V6(_))));
470+
///
471+
/// // Parse a hostname
472+
/// let host = Host::parse("example.com").unwrap();
473+
/// assert!(matches!(host, Host::Hostname(_)));
474+
///
475+
/// // Parse a hostname with a subdomain
476+
/// let host = Host::parse("sub.example.com").unwrap();
477+
/// assert!(matches!(host, Host::Hostname(_)));
478+
///
479+
/// // Parse a hostname with a custom TLD
480+
/// let host = Host::parse("example.b32.i2p").unwrap();
481+
/// assert!(matches!(host, Host::Hostname(_)));
482+
///
483+
/// // Attempt to parse an invalid host
484+
/// let result = Host::parse("invalid..host");
485+
/// assert!(matches!(result, Err(faup_rs::Error::InvalidHost)));
486+
/// ```
487+
#[inline]
488+
pub fn parse(host: &'host str) -> Result<Self, Error> {
489+
Self::from_pair(
490+
UrlParser::parse(Rule::checked_host, host)
491+
.map_err(|_| Error::InvalidHost)?
492+
.next()
493+
// this should not panic as parser guarantee some pair exist
494+
.expect("expecting host pair"),
495+
)
496+
}
497+
406498
/// Returns the hostname component if this is a `Host::Hostname` variant.
407499
///
408500
/// # Returns
@@ -595,34 +687,7 @@ impl<'url> Url<'url> {
595687
Rule::host => {
596688
// cannot panic guarantee by parser
597689
let host_pair = p.into_inner().next().unwrap();
598-
match host_pair.as_rule() {
599-
Rule::hostname => {
600-
if let Ok(ipv4) =
601-
UrlParser::parse(Rule::ipv4, host_pair.as_str()).map(|p| p.as_str())
602-
{
603-
host = Some(
604-
Ipv4Addr::from_str(ipv4)
605-
.map(IpAddr::from)
606-
.map(Host::Ip)
607-
.map_err(|_| Error::InvalidIPv4)?,
608-
);
609-
} else {
610-
host = Some(Host::Hostname(Hostname::from_str(host_pair.as_str())))
611-
}
612-
}
613-
614-
Rule::ipv6 => {
615-
host = Some(
616-
Ipv6Addr::from_str(
617-
host_pair.as_str().trim_matches(|c| c == '[' || c == ']'),
618-
)
619-
.map(IpAddr::from)
620-
.map(Host::Ip)
621-
.map_err(|_| Error::InvalidIPv6)?,
622-
);
623-
}
624-
_ => {}
625-
}
690+
host = Some(Host::from_pair(host_pair)?)
626691
}
627692
Rule::port => {
628693
port = Some(u16::from_str(p.as_str()).map_err(|_| Error::InvalidPort)?)
@@ -1255,4 +1320,38 @@ mod tests {
12551320
let url = Url::parse("https://example.com/../../..some/directory/traversal/../").unwrap();
12561321
assert_eq!(url.path(), Some("/../../..some/directory/traversal/../"));
12571322
}
1323+
1324+
#[test]
1325+
fn test_host_from_str() {
1326+
// Valid IPv4
1327+
let host = Host::parse("127.0.0.1").unwrap();
1328+
assert!(matches!(host, Host::Ip(std::net::IpAddr::V4(_))));
1329+
1330+
// Valid IPv6
1331+
let host = Host::parse("::1").unwrap();
1332+
assert!(matches!(host, Host::Ip(std::net::IpAddr::V6(_))));
1333+
1334+
let host = Host::parse("[::1]");
1335+
assert!(matches!(host, Err(Error::InvalidHost)));
1336+
1337+
// Invalid IPv6
1338+
let result = Host::parse("::::");
1339+
assert!(matches!(result, Err(Error::InvalidIPv6)));
1340+
1341+
// Valid hostname
1342+
let host = Host::parse("example.com").unwrap();
1343+
assert!(matches!(host, Host::Hostname(_)));
1344+
1345+
// Hostname with subdomain
1346+
let host = Host::parse("sub.example.com").unwrap();
1347+
assert!(matches!(host, Host::Hostname(_)));
1348+
1349+
// Hostname with custom TLD
1350+
let host = Host::parse("example.b32.i2p").unwrap();
1351+
assert!(matches!(host, Host::Hostname(_)));
1352+
1353+
// Invalid hostname (placeholder logic)
1354+
let result = Host::parse("example..com");
1355+
assert!(matches!(result, Err(Error::InvalidHost)));
1356+
}
12581357
}

python/pyfaup.pyi

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,42 @@
1+
from typing import Optional
2+
3+
class Hostname:
4+
hostname: str
5+
subdomain: Optional[str]
6+
domain: Optional[str]
7+
suffix: Optional[str]
8+
9+
def __init__(self, hn: str) -> None:
10+
...
11+
12+
def __str__(self) -> str:
13+
...
14+
15+
class Host:
16+
def __init__(self, s: str) -> None:
17+
...
18+
19+
def try_into_hostname(self) -> Hostname:
20+
...
21+
22+
def try_into_ip(self) -> str:
23+
...
24+
25+
def is_hostname(self) -> bool:
26+
...
27+
28+
def is_ipv4(self) -> bool:
29+
...
30+
31+
def is_ipv6(self) -> bool:
32+
...
33+
34+
def is_ip_addr(self) -> bool:
35+
...
36+
37+
def __str__(self) -> str:
38+
...
39+
140
class FaupCompat:
241

342
url: bytes
@@ -41,21 +80,22 @@ class FaupCompat:
4180
def get_port(self) -> int | None:
4281
...
4382

44-
4583
class Url:
46-
4784
orig: str
4885
scheme: str
49-
username: str | None
50-
password: str | None
86+
username: Optional[str]
87+
password: Optional[str]
5188
host: str
52-
subdomain: str | None
53-
domain: str | None
54-
suffix: str | None
55-
port: int | None
56-
path: str | None
57-
query: str | None
58-
fragment: str | None
59-
60-
def __init__(self, url: str | None = None) -> None:
89+
subdomain: Optional[str]
90+
domain: Optional[str]
91+
suffix: Optional[str]
92+
port: Optional[int]
93+
path: Optional[str]
94+
query: Optional[str]
95+
fragment: Optional[str]
96+
97+
def __init__(self, url: str) -> None:
6198
...
99+
100+
def __str__(self) -> str:
101+
...

0 commit comments

Comments
 (0)