Skip to content

Commit 8c8949d

Browse files
committed
Documentation improvements
1 parent ff722f9 commit 8c8949d

File tree

1 file changed

+54
-29
lines changed

1 file changed

+54
-29
lines changed

src/lib.rs

Lines changed: 54 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,6 @@ use std::str::Chars;
44
use regex::Regex;
55
use url::Url;
66

7-
/// Defines how URL normalization will work. This struct offers reasonable defaults, as well as a fluent interface for building normalization.
8-
///
9-
/// Construct an empty [`Options`] object and provide a query parameter:
10-
///
11-
/// ```
12-
/// # use urlnorm::*;
13-
/// let options = Options::new().with_ignored_query_params(["fbclid"]);
14-
/// ```
15-
///
16-
/// Construct a default [`Options`] object and modify the query parameters:
17-
///
18-
/// ```
19-
/// # use urlnorm::*;
20-
/// let options = Options::default().with_ignored_query_params(["fbclid"]);
21-
/// ```
22-
pub struct Options {
23-
/// Query parameters to ignore. These are wrapped in the regular expression beginning and end-of-string markers (ie: `^...$`).
24-
pub ignored_query_params: Vec<String>,
25-
/// Host prefixes to trim. These match only at the start of the URL's host, and repeated matches will be removed.
26-
pub trimmed_host_prefixes: Vec<String>,
27-
/// Path extensions to trim. These match only at the end of the path, and an end-of-string marker (`$`) is added to the patterns
28-
/// automatically.
29-
pub trimmed_path_extension_suffixes: Vec<String>,
30-
/// Specifies the maximum length of a path extension to remove. Some paths may contain periods that signify identify or have some
31-
/// other meaning than marking a file extension.
32-
pub path_extension_length: usize,
33-
}
34-
357
/// Default query parameters that are ignored.
368
const DEFAULT_IGNORED_QUERY_PARAMS: [&str; 15] = [
379
"utm_source",
@@ -64,6 +36,53 @@ const DEFAULT_WWW_PREFIX: &str = r#"(?x)
6436
/// By default, trim extensions that look like .html, .html5, etc.
6537
const DEFAULT_EXTENSION_SUFFIX: &str = "[a-zA-Z]+[0-9]?$";
6638

39+
/// Defines how URL normalization will work. This struct offers reasonable defaults, as well as a fluent interface for building normalization.
40+
///
41+
/// Construct an empty [`Options`] object and provide a query parameter:
42+
///
43+
/// ```
44+
/// # use urlnorm::*;
45+
/// let options = Options::new().with_ignored_query_params(["fbclid"]);
46+
/// ```
47+
///
48+
/// Construct a default [`Options`] object and modify the query parameters:
49+
///
50+
/// ```
51+
/// # use urlnorm::*;
52+
/// let options = Options::default().with_ignored_query_params(["fbclid"]);
53+
/// ```
54+
///
55+
/// And once you've constructed the [`Options`] object, you can [`Options::compile`] it
56+
/// to a [`UrlNormalizer`]. This may fail if the regular expressions fail to compile.
57+
///
58+
/// ```
59+
/// # use urlnorm::*;
60+
/// let options: Options = Options::default().with_ignored_query_params(["fbclid"]);
61+
/// let normalizer: UrlNormalizer = options.compile().expect("Failed to compile");
62+
/// ```
63+
///
64+
/// In most cases, however, you'll want to just use [`UrlNormalizer::default()`] and can skip [`Options`] entirely. The
65+
/// default [`UrlNormalizer`] is also infallible:
66+
///
67+
/// ```
68+
/// # use url::Url;
69+
/// # use urlnorm::*;
70+
/// let normalizer = UrlNormalizer::default();
71+
/// let s = normalizer.compute_normalization_string(&Url::parse("http://google.com").unwrap());
72+
/// ```
73+
pub struct Options {
74+
/// Query parameters to ignore. These are wrapped in the regular expression beginning and end-of-string markers (ie: `^...$`).
75+
pub ignored_query_params: Vec<String>,
76+
/// Host prefixes to trim. These match only at the start of the URL's host, and repeated matches will be removed.
77+
pub trimmed_host_prefixes: Vec<String>,
78+
/// Path extensions to trim. These match only at the end of the path, and an end-of-string marker (`$`) is added to the patterns
79+
/// automatically.
80+
pub trimmed_path_extension_suffixes: Vec<String>,
81+
/// Specifies the maximum length of a path extension to remove. Some paths may contain periods that signify identify or have some
82+
/// other meaning than marking a file extension.
83+
pub path_extension_length: usize,
84+
}
85+
6786
impl Default for Options {
6887
fn default() -> Self {
6988
let new = Self::new();
@@ -125,6 +144,7 @@ impl Options {
125144
})
126145
}
127146

147+
/// Replaces the ignored query parameters.
128148
pub fn with_ignored_query_params<S: AsRef<str>, I: IntoIterator<Item = S>>(
129149
mut self,
130150
iter: I,
@@ -133,6 +153,7 @@ impl Options {
133153
self
134154
}
135155

156+
/// Replaces the trimmed host prefixes.
136157
pub fn with_trimmed_host_prefixes<S: AsRef<str>, I: IntoIterator<Item = S>>(
137158
mut self,
138159
iter: I,
@@ -141,6 +162,7 @@ impl Options {
141162
self
142163
}
143164

165+
/// Replaces the trimmed path extensions.
144166
pub fn with_trimmed_path_extension_suffixes<S: AsRef<str>, I: IntoIterator<Item = S>>(
145167
mut self,
146168
iter: I,
@@ -150,6 +172,7 @@ impl Options {
150172
self
151173
}
152174

175+
/// Replaces the path extension length.
153176
pub fn with_path_extension_length(mut self, path_extension_length: usize) -> Self {
154177
self.path_extension_length = path_extension_length;
155178
self
@@ -466,8 +489,10 @@ mod test {
466489
#[case("https://google.com/?page=1", "https://google.com/?page=2")]
467490
#[case("https://google.com/?page=%31", "https://google.com/?page=%32")]
468491
#[case("https://amazon.com/product/ref=a", "https://amazon.com/product/ref=b")]
469-
// Slightly modified query string param
492+
// Negative case: slightly modified query string param
470493
#[case("http://x.com?xfbclid=foo", "http://x.com?xfbclid=basdf")]
494+
// Negative case: long extension
495+
#[case("http://x.com/file.html12345", "http://x.com/file.html12346")]
471496
// Examples of real URLs that should not be normalized together
472497
#[case("http://arxiv.org/abs/1405.0126", "http://arxiv.org/abs/1405.0351")]
473498
#[case(

0 commit comments

Comments
 (0)