|
| 1 | +use memchr::memrchr as find_char_reverse; |
| 2 | + |
| 3 | +use super::network::NetworkFilterError; |
| 4 | + |
| 5 | +use once_cell::sync::Lazy; |
| 6 | +use regex::Regex; |
| 7 | + |
| 8 | +/// For now, only support `$removeparam` with simple alphanumeric/dash/underscore patterns. |
| 9 | +static VALID_PARAM: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_\-]+$").unwrap()); |
| 10 | + |
| 11 | +#[derive(Clone, Copy)] |
| 12 | +pub(crate) enum NetworkFilterLeftAnchor { |
| 13 | + /// A `||` token, which represents a match to the start of a domain or subdomain segment. |
| 14 | + DoublePipe, |
| 15 | + /// A `|` token, which represents a match to the exact start of the URL. |
| 16 | + SinglePipe, |
| 17 | +} |
| 18 | + |
| 19 | +#[derive(Clone, Copy)] |
| 20 | +pub(crate) enum NetworkFilterRightAnchor { |
| 21 | + /// A `|` token, which represents a match to the exact end of the URL. |
| 22 | + SinglePipe, |
| 23 | +} |
| 24 | + |
| 25 | +/// Pattern for a network filter, describing what URLs to match against. |
| 26 | +#[derive(Clone)] |
| 27 | +pub(crate) struct NetworkFilterPattern { |
| 28 | + pub(crate) left_anchor: Option<NetworkFilterLeftAnchor>, |
| 29 | + pub(crate) pattern: String, |
| 30 | + pub(crate) right_anchor: Option<NetworkFilterRightAnchor>, |
| 31 | +} |
| 32 | + |
| 33 | +/// Any option that appears on the right side of a network filter as initiated by a `$` character. |
| 34 | +/// All `bool` arguments below are `true` if the option stands alone, or `false` if the option is |
| 35 | +/// negated using a prepended `~`. |
| 36 | +#[derive(Clone)] |
| 37 | +pub(crate) enum NetworkFilterOption { |
| 38 | + Domain(Vec<(bool, String)>), |
| 39 | + Badfilter, |
| 40 | + Important, |
| 41 | + MatchCase, |
| 42 | + ThirdParty(bool), |
| 43 | + FirstParty(bool), |
| 44 | + Tag(String), |
| 45 | + Redirect(String), |
| 46 | + RedirectRule(String), |
| 47 | + Csp(Option<String>), |
| 48 | + Removeparam(String), |
| 49 | + Generichide, |
| 50 | + Document, |
| 51 | + Image(bool), |
| 52 | + Media(bool), |
| 53 | + Object(bool), |
| 54 | + Other(bool), |
| 55 | + Ping(bool), |
| 56 | + Script(bool), |
| 57 | + Stylesheet(bool), |
| 58 | + Subdocument(bool), |
| 59 | + XmlHttpRequest(bool), |
| 60 | + Websocket(bool), |
| 61 | + Font(bool), |
| 62 | +} |
| 63 | + |
| 64 | +impl NetworkFilterOption { |
| 65 | + pub fn is_content_type(&self) -> bool { |
| 66 | + matches!( |
| 67 | + self, |
| 68 | + Self::Document |
| 69 | + | Self::Image(..) |
| 70 | + | Self::Media(..) |
| 71 | + | Self::Object(..) |
| 72 | + | Self::Other(..) |
| 73 | + | Self::Ping(..) |
| 74 | + | Self::Script(..) |
| 75 | + | Self::Stylesheet(..) |
| 76 | + | Self::Subdocument(..) |
| 77 | + | Self::XmlHttpRequest(..) |
| 78 | + | Self::Websocket(..) |
| 79 | + | Self::Font(..) |
| 80 | + ) |
| 81 | + } |
| 82 | + |
| 83 | + pub fn is_redirection(&self) -> bool { |
| 84 | + matches!(self, Self::Redirect(..) | Self::RedirectRule(..)) |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +/// Abstract syntax representation of a network filter. This representation can fully specify the |
| 89 | +/// string representation of a filter as written, with the exception of aliased options like `1p` |
| 90 | +/// or `ghide`. This allows separation of concerns between parsing and interpretation. |
| 91 | +pub(crate) struct AbstractNetworkFilter { |
| 92 | + pub(crate) exception: bool, |
| 93 | + pub(crate) pattern: NetworkFilterPattern, |
| 94 | + pub(crate) options: Option<Vec<NetworkFilterOption>>, |
| 95 | +} |
| 96 | + |
| 97 | +impl AbstractNetworkFilter { |
| 98 | + pub(crate) fn parse(line: &str) -> Result<Self, NetworkFilterError> { |
| 99 | + let mut filter_index_start: usize = 0; |
| 100 | + let mut filter_index_end: usize = line.len(); |
| 101 | + |
| 102 | + let mut exception = false; |
| 103 | + if line.starts_with("@@") { |
| 104 | + filter_index_start += 2; |
| 105 | + exception = true; |
| 106 | + } |
| 107 | + |
| 108 | + let maybe_options_index: Option<usize> = find_char_reverse(b'$', line.as_bytes()); |
| 109 | + |
| 110 | + let mut options = None; |
| 111 | + if let Some(options_index) = maybe_options_index { |
| 112 | + filter_index_end = options_index; |
| 113 | + |
| 114 | + // slicing here is safe; the first byte after '$' will be a character boundary |
| 115 | + let raw_options = &line[filter_index_end + 1..]; |
| 116 | + |
| 117 | + options = Some(parse_filter_options(raw_options)?); |
| 118 | + } |
| 119 | + |
| 120 | + let left_anchor = if line[filter_index_start..].starts_with("||") { |
| 121 | + filter_index_start += 2; |
| 122 | + Some(NetworkFilterLeftAnchor::DoublePipe) |
| 123 | + } else if line[filter_index_start..].starts_with('|') { |
| 124 | + filter_index_start += 1; |
| 125 | + Some(NetworkFilterLeftAnchor::SinglePipe) |
| 126 | + } else { |
| 127 | + None |
| 128 | + }; |
| 129 | + |
| 130 | + let right_anchor = if filter_index_end > 0 |
| 131 | + && filter_index_end > filter_index_start |
| 132 | + && line[..filter_index_end].ends_with('|') |
| 133 | + { |
| 134 | + filter_index_end -= 1; |
| 135 | + Some(NetworkFilterRightAnchor::SinglePipe) |
| 136 | + } else { |
| 137 | + None |
| 138 | + }; |
| 139 | + |
| 140 | + let pattern = &line[filter_index_start..filter_index_end]; |
| 141 | + |
| 142 | + Ok(AbstractNetworkFilter { |
| 143 | + exception, |
| 144 | + pattern: NetworkFilterPattern { |
| 145 | + left_anchor, |
| 146 | + pattern: pattern.to_string(), |
| 147 | + right_anchor, |
| 148 | + }, |
| 149 | + options, |
| 150 | + }) |
| 151 | + } |
| 152 | +} |
| 153 | + |
| 154 | +fn parse_filter_options(raw_options: &str) -> Result<Vec<NetworkFilterOption>, NetworkFilterError> { |
| 155 | + let mut result = vec![]; |
| 156 | + |
| 157 | + for raw_option in raw_options.split(',') { |
| 158 | + // Check for negation: ~option |
| 159 | + let negation = raw_option.starts_with('~'); |
| 160 | + let maybe_negated_option = raw_option.trim_start_matches('~'); |
| 161 | + |
| 162 | + // Check for options: option=value1|value2 |
| 163 | + let mut option_and_values = maybe_negated_option.splitn(2, '='); |
| 164 | + let (option, value) = ( |
| 165 | + option_and_values.next().unwrap(), |
| 166 | + option_and_values.next().unwrap_or_default(), |
| 167 | + ); |
| 168 | + |
| 169 | + result.push(match (option, negation) { |
| 170 | + ("domain", _) | ("from", _) => { |
| 171 | + let domains: Vec<(bool, String)> = value |
| 172 | + .split('|') |
| 173 | + .map(|domain| { |
| 174 | + if let Some(negated_domain) = domain.strip_prefix('~') { |
| 175 | + (false, negated_domain.to_string()) |
| 176 | + } else { |
| 177 | + (true, domain.to_string()) |
| 178 | + } |
| 179 | + }) |
| 180 | + .filter(|(_, d)| !(d.starts_with('/') && d.ends_with('/'))) |
| 181 | + .collect(); |
| 182 | + if domains.is_empty() { |
| 183 | + return Err(NetworkFilterError::NoSupportedDomains); |
| 184 | + } |
| 185 | + NetworkFilterOption::Domain(domains) |
| 186 | + } |
| 187 | + ("badfilter", true) => return Err(NetworkFilterError::NegatedBadFilter), |
| 188 | + ("badfilter", false) => NetworkFilterOption::Badfilter, |
| 189 | + ("important", true) => return Err(NetworkFilterError::NegatedImportant), |
| 190 | + ("important", false) => NetworkFilterOption::Important, |
| 191 | + ("match-case", true) => return Err(NetworkFilterError::NegatedOptionMatchCase), |
| 192 | + ("match-case", false) => NetworkFilterOption::MatchCase, |
| 193 | + ("third-party", negated) | ("3p", negated) => NetworkFilterOption::ThirdParty(!negated), |
| 194 | + ("first-party", negated) | ("1p", negated) => NetworkFilterOption::FirstParty(!negated), |
| 195 | + ("tag", true) => return Err(NetworkFilterError::NegatedTag), |
| 196 | + ("tag", false) => NetworkFilterOption::Tag(String::from(value)), |
| 197 | + ("redirect", true) => return Err(NetworkFilterError::NegatedRedirection), |
| 198 | + ("redirect", false) => { |
| 199 | + // Ignore this filter if no redirection resource is specified |
| 200 | + if value.is_empty() { |
| 201 | + return Err(NetworkFilterError::EmptyRedirection); |
| 202 | + } |
| 203 | + |
| 204 | + NetworkFilterOption::Redirect(String::from(value)) |
| 205 | + } |
| 206 | + ("redirect-rule", true) => return Err(NetworkFilterError::NegatedRedirection), |
| 207 | + ("redirect-rule", false) => { |
| 208 | + if value.is_empty() { |
| 209 | + return Err(NetworkFilterError::EmptyRedirection); |
| 210 | + } |
| 211 | + |
| 212 | + NetworkFilterOption::RedirectRule(String::from(value)) |
| 213 | + } |
| 214 | + ("csp", _) => NetworkFilterOption::Csp(if !value.is_empty() { |
| 215 | + Some(String::from(value)) |
| 216 | + } else { |
| 217 | + None |
| 218 | + }), |
| 219 | + ("removeparam", true) => return Err(NetworkFilterError::NegatedRemoveparam), |
| 220 | + ("removeparam", false) => { |
| 221 | + if value.is_empty() { |
| 222 | + return Err(NetworkFilterError::EmptyRemoveparam); |
| 223 | + } |
| 224 | + if !VALID_PARAM.is_match(value) { |
| 225 | + return Err(NetworkFilterError::RemoveparamRegexUnsupported); |
| 226 | + } |
| 227 | + NetworkFilterOption::Removeparam(String::from(value)) |
| 228 | + } |
| 229 | + ("generichide", true) | ("ghide", true) => { |
| 230 | + return Err(NetworkFilterError::NegatedGenericHide) |
| 231 | + } |
| 232 | + ("generichide", false) | ("ghide", false) => NetworkFilterOption::Generichide, |
| 233 | + ("document", true) | ("doc", true) => return Err(NetworkFilterError::NegatedDocument), |
| 234 | + ("document", false) | ("doc", false) => NetworkFilterOption::Document, |
| 235 | + ("image", negated) => NetworkFilterOption::Image(!negated), |
| 236 | + ("media", negated) => NetworkFilterOption::Media(!negated), |
| 237 | + ("object", negated) | ("object-subrequest", negated) => { |
| 238 | + NetworkFilterOption::Object(!negated) |
| 239 | + } |
| 240 | + ("other", negated) => NetworkFilterOption::Other(!negated), |
| 241 | + ("ping", negated) | ("beacon", negated) => NetworkFilterOption::Ping(!negated), |
| 242 | + ("script", negated) => NetworkFilterOption::Script(!negated), |
| 243 | + ("stylesheet", negated) | ("css", negated) => NetworkFilterOption::Stylesheet(!negated), |
| 244 | + ("subdocument", negated) | ("frame", negated) => { |
| 245 | + NetworkFilterOption::Subdocument(!negated) |
| 246 | + } |
| 247 | + ("xmlhttprequest", negated) | ("xhr", negated) => { |
| 248 | + NetworkFilterOption::XmlHttpRequest(!negated) |
| 249 | + } |
| 250 | + ("websocket", negated) => NetworkFilterOption::Websocket(!negated), |
| 251 | + ("font", negated) => NetworkFilterOption::Font(!negated), |
| 252 | + (_, _) => return Err(NetworkFilterError::UnrecognisedOption), |
| 253 | + }); |
| 254 | + } |
| 255 | + Ok(result) |
| 256 | +} |
0 commit comments