Skip to content

Commit 4d39298

Browse files
authored
Merge pull request #428 from brave/code_structure_2
Code structure follow up
2 parents c9a5b2f + 22dcb7e commit 4d39298

File tree

5 files changed

+427
-363
lines changed

5 files changed

+427
-363
lines changed

src/filters/abstract_network.rs

+256
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
use memchr::memrchr as find_char_reverse;
2+
3+
use super::network::NetworkFilterError;
4+
5+
use once_cell::sync::Lazy;
6+
use regex::Regex;
7+
8+
/// For now, only support `$removeparam` with simple alphanumeric/dash/underscore patterns.
9+
static VALID_PARAM: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_\-]+$").unwrap());
10+
11+
#[derive(Clone, Copy)]
12+
pub(crate) enum NetworkFilterLeftAnchor {
13+
/// A `||` token, which represents a match to the start of a domain or subdomain segment.
14+
DoublePipe,
15+
/// A `|` token, which represents a match to the exact start of the URL.
16+
SinglePipe,
17+
}
18+
19+
#[derive(Clone, Copy)]
20+
pub(crate) enum NetworkFilterRightAnchor {
21+
/// A `|` token, which represents a match to the exact end of the URL.
22+
SinglePipe,
23+
}
24+
25+
/// Pattern for a network filter, describing what URLs to match against.
26+
#[derive(Clone)]
27+
pub(crate) struct NetworkFilterPattern {
28+
pub(crate) left_anchor: Option<NetworkFilterLeftAnchor>,
29+
pub(crate) pattern: String,
30+
pub(crate) right_anchor: Option<NetworkFilterRightAnchor>,
31+
}
32+
33+
/// Any option that appears on the right side of a network filter as initiated by a `$` character.
34+
/// All `bool` arguments below are `true` if the option stands alone, or `false` if the option is
35+
/// negated using a prepended `~`.
36+
#[derive(Clone)]
37+
pub(crate) enum NetworkFilterOption {
38+
Domain(Vec<(bool, String)>),
39+
Badfilter,
40+
Important,
41+
MatchCase,
42+
ThirdParty(bool),
43+
FirstParty(bool),
44+
Tag(String),
45+
Redirect(String),
46+
RedirectRule(String),
47+
Csp(Option<String>),
48+
Removeparam(String),
49+
Generichide,
50+
Document,
51+
Image(bool),
52+
Media(bool),
53+
Object(bool),
54+
Other(bool),
55+
Ping(bool),
56+
Script(bool),
57+
Stylesheet(bool),
58+
Subdocument(bool),
59+
XmlHttpRequest(bool),
60+
Websocket(bool),
61+
Font(bool),
62+
}
63+
64+
impl NetworkFilterOption {
65+
pub fn is_content_type(&self) -> bool {
66+
matches!(
67+
self,
68+
Self::Document
69+
| Self::Image(..)
70+
| Self::Media(..)
71+
| Self::Object(..)
72+
| Self::Other(..)
73+
| Self::Ping(..)
74+
| Self::Script(..)
75+
| Self::Stylesheet(..)
76+
| Self::Subdocument(..)
77+
| Self::XmlHttpRequest(..)
78+
| Self::Websocket(..)
79+
| Self::Font(..)
80+
)
81+
}
82+
83+
pub fn is_redirection(&self) -> bool {
84+
matches!(self, Self::Redirect(..) | Self::RedirectRule(..))
85+
}
86+
}
87+
88+
/// Abstract syntax representation of a network filter. This representation can fully specify the
89+
/// string representation of a filter as written, with the exception of aliased options like `1p`
90+
/// or `ghide`. This allows separation of concerns between parsing and interpretation.
91+
pub(crate) struct AbstractNetworkFilter {
92+
pub(crate) exception: bool,
93+
pub(crate) pattern: NetworkFilterPattern,
94+
pub(crate) options: Option<Vec<NetworkFilterOption>>,
95+
}
96+
97+
impl AbstractNetworkFilter {
98+
pub(crate) fn parse(line: &str) -> Result<Self, NetworkFilterError> {
99+
let mut filter_index_start: usize = 0;
100+
let mut filter_index_end: usize = line.len();
101+
102+
let mut exception = false;
103+
if line.starts_with("@@") {
104+
filter_index_start += 2;
105+
exception = true;
106+
}
107+
108+
let maybe_options_index: Option<usize> = find_char_reverse(b'$', line.as_bytes());
109+
110+
let mut options = None;
111+
if let Some(options_index) = maybe_options_index {
112+
filter_index_end = options_index;
113+
114+
// slicing here is safe; the first byte after '$' will be a character boundary
115+
let raw_options = &line[filter_index_end + 1..];
116+
117+
options = Some(parse_filter_options(raw_options)?);
118+
}
119+
120+
let left_anchor = if line[filter_index_start..].starts_with("||") {
121+
filter_index_start += 2;
122+
Some(NetworkFilterLeftAnchor::DoublePipe)
123+
} else if line[filter_index_start..].starts_with('|') {
124+
filter_index_start += 1;
125+
Some(NetworkFilterLeftAnchor::SinglePipe)
126+
} else {
127+
None
128+
};
129+
130+
let right_anchor = if filter_index_end > 0
131+
&& filter_index_end > filter_index_start
132+
&& line[..filter_index_end].ends_with('|')
133+
{
134+
filter_index_end -= 1;
135+
Some(NetworkFilterRightAnchor::SinglePipe)
136+
} else {
137+
None
138+
};
139+
140+
let pattern = &line[filter_index_start..filter_index_end];
141+
142+
Ok(AbstractNetworkFilter {
143+
exception,
144+
pattern: NetworkFilterPattern {
145+
left_anchor,
146+
pattern: pattern.to_string(),
147+
right_anchor,
148+
},
149+
options,
150+
})
151+
}
152+
}
153+
154+
fn parse_filter_options(raw_options: &str) -> Result<Vec<NetworkFilterOption>, NetworkFilterError> {
155+
let mut result = vec![];
156+
157+
for raw_option in raw_options.split(',') {
158+
// Check for negation: ~option
159+
let negation = raw_option.starts_with('~');
160+
let maybe_negated_option = raw_option.trim_start_matches('~');
161+
162+
// Check for options: option=value1|value2
163+
let mut option_and_values = maybe_negated_option.splitn(2, '=');
164+
let (option, value) = (
165+
option_and_values.next().unwrap(),
166+
option_and_values.next().unwrap_or_default(),
167+
);
168+
169+
result.push(match (option, negation) {
170+
("domain", _) | ("from", _) => {
171+
let domains: Vec<(bool, String)> = value
172+
.split('|')
173+
.map(|domain| {
174+
if let Some(negated_domain) = domain.strip_prefix('~') {
175+
(false, negated_domain.to_string())
176+
} else {
177+
(true, domain.to_string())
178+
}
179+
})
180+
.filter(|(_, d)| !(d.starts_with('/') && d.ends_with('/')))
181+
.collect();
182+
if domains.is_empty() {
183+
return Err(NetworkFilterError::NoSupportedDomains);
184+
}
185+
NetworkFilterOption::Domain(domains)
186+
}
187+
("badfilter", true) => return Err(NetworkFilterError::NegatedBadFilter),
188+
("badfilter", false) => NetworkFilterOption::Badfilter,
189+
("important", true) => return Err(NetworkFilterError::NegatedImportant),
190+
("important", false) => NetworkFilterOption::Important,
191+
("match-case", true) => return Err(NetworkFilterError::NegatedOptionMatchCase),
192+
("match-case", false) => NetworkFilterOption::MatchCase,
193+
("third-party", negated) | ("3p", negated) => NetworkFilterOption::ThirdParty(!negated),
194+
("first-party", negated) | ("1p", negated) => NetworkFilterOption::FirstParty(!negated),
195+
("tag", true) => return Err(NetworkFilterError::NegatedTag),
196+
("tag", false) => NetworkFilterOption::Tag(String::from(value)),
197+
("redirect", true) => return Err(NetworkFilterError::NegatedRedirection),
198+
("redirect", false) => {
199+
// Ignore this filter if no redirection resource is specified
200+
if value.is_empty() {
201+
return Err(NetworkFilterError::EmptyRedirection);
202+
}
203+
204+
NetworkFilterOption::Redirect(String::from(value))
205+
}
206+
("redirect-rule", true) => return Err(NetworkFilterError::NegatedRedirection),
207+
("redirect-rule", false) => {
208+
if value.is_empty() {
209+
return Err(NetworkFilterError::EmptyRedirection);
210+
}
211+
212+
NetworkFilterOption::RedirectRule(String::from(value))
213+
}
214+
("csp", _) => NetworkFilterOption::Csp(if !value.is_empty() {
215+
Some(String::from(value))
216+
} else {
217+
None
218+
}),
219+
("removeparam", true) => return Err(NetworkFilterError::NegatedRemoveparam),
220+
("removeparam", false) => {
221+
if value.is_empty() {
222+
return Err(NetworkFilterError::EmptyRemoveparam);
223+
}
224+
if !VALID_PARAM.is_match(value) {
225+
return Err(NetworkFilterError::RemoveparamRegexUnsupported);
226+
}
227+
NetworkFilterOption::Removeparam(String::from(value))
228+
}
229+
("generichide", true) | ("ghide", true) => {
230+
return Err(NetworkFilterError::NegatedGenericHide)
231+
}
232+
("generichide", false) | ("ghide", false) => NetworkFilterOption::Generichide,
233+
("document", true) | ("doc", true) => return Err(NetworkFilterError::NegatedDocument),
234+
("document", false) | ("doc", false) => NetworkFilterOption::Document,
235+
("image", negated) => NetworkFilterOption::Image(!negated),
236+
("media", negated) => NetworkFilterOption::Media(!negated),
237+
("object", negated) | ("object-subrequest", negated) => {
238+
NetworkFilterOption::Object(!negated)
239+
}
240+
("other", negated) => NetworkFilterOption::Other(!negated),
241+
("ping", negated) | ("beacon", negated) => NetworkFilterOption::Ping(!negated),
242+
("script", negated) => NetworkFilterOption::Script(!negated),
243+
("stylesheet", negated) | ("css", negated) => NetworkFilterOption::Stylesheet(!negated),
244+
("subdocument", negated) | ("frame", negated) => {
245+
NetworkFilterOption::Subdocument(!negated)
246+
}
247+
("xmlhttprequest", negated) | ("xhr", negated) => {
248+
NetworkFilterOption::XmlHttpRequest(!negated)
249+
}
250+
("websocket", negated) => NetworkFilterOption::Websocket(!negated),
251+
("font", negated) => NetworkFilterOption::Font(!negated),
252+
(_, _) => return Err(NetworkFilterError::UnrecognisedOption),
253+
});
254+
}
255+
Ok(result)
256+
}

src/filters/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//! Contains representations and standalone behaviors of individual filter rules.
22
3+
mod abstract_network;
34
pub mod cosmetic;
45
pub mod network;

0 commit comments

Comments
 (0)