diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c4f6a800..9ee9d7b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,9 @@ jobs: - name: Cargo build 'adblock' package run: cargo build --all-features --all-targets + - name: Cargo build 'adblock' package (default features) + run: cargo build --all-targets + - name: Cargo build 'adblock' package (no default features) run: cargo build --no-default-features --all-targets @@ -61,6 +64,9 @@ jobs: - name: Cargo test 'adblock' package run: cargo test --all-features --tests --no-fail-fast + - name: Cargo test 'adblock' package (default features) + run: cargo test --tests --no-fail-fast + - name: Cargo test 'adblock' package (no default features) run: cargo test --no-default-features --features embedded-domain-resolver,full-regex-handling --tests --no-fail-fast @@ -79,7 +85,7 @@ jobs: # This hackily checks that the filter is working. # If this check fails, something might have been renamed inadvertantly. echo "Ensure that '$TEST_NAME_FILTER' still matches exactly 2 tests." - cargo test --all-features --test live --no-fail-fast -- --ignored "$TEST_NAME_FILTER" --list | grep "2 tests, 0 benchmarks" + cargo test --test live --no-fail-fast -- --ignored "$TEST_NAME_FILTER" --list | grep "2 tests, 0 benchmarks" # Now run the tests - cargo test --all-features --test live --no-fail-fast -- --ignored "$TEST_NAME_FILTER" + cargo test --test live --no-fail-fast -- --ignored "$TEST_NAME_FILTER" diff --git a/.github/workflows/perf-ci.yml b/.github/workflows/perf-ci.yml index f9f9d096..97419dfa 100644 --- a/.github/workflows/perf-ci.yml +++ b/.github/workflows/perf-ci.yml @@ -26,16 +26,16 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Bench network filter matching - run: cargo bench --bench bench_matching rule-match-browserlike/brave-list -- --output-format bencher | tee -a output.txt + run: cargo bench --bench bench_matching --features flatbuffers rule-match-browserlike/brave-list -- --output-format bencher | tee -a output.txt - name: Bench first request matching delay - run: cargo bench --bench bench_matching rule-match-first-request -- --output-format bencher | tee -a output.txt + run: cargo bench --bench bench_matching --features flatbuffers rule-match-first-request -- --output-format bencher | tee -a output.txt - name: Bench startup speed - run: cargo bench --bench bench_rules blocker_new/brave-list -- --output-format bencher | tee -a output.txt + run: cargo bench --bench bench_rules --features flatbuffers blocker_new/brave-list -- --output-format bencher | tee -a output.txt - name: Bench memory usage - run: cargo bench --bench bench_memory -- --output-format bencher | tee -a output.txt + run: cargo bench --bench bench_memory --features flatbuffers -- --output-format bencher | tee -a output.txt - name: Store benchmark result uses: benchmark-action/github-action-benchmark@d48d326b4ca9ba73ca0cd0d59f108f9e02a381c7 # v1.20.4 diff --git a/Cargo.lock b/Cargo.lock index ca6c7e06..09324318 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12,6 +12,7 @@ dependencies = [ "criterion", "cssparser", "csv", + "flatbuffers", "futures", "idna", "itertools 0.13.0", @@ -417,6 +418,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "flatbuffers" +version = "24.12.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +dependencies = [ + "bitflags", + "rustc_version", +] + [[package]] name = "fnv" version = "1.0.7" diff --git a/Cargo.toml b/Cargo.toml index 3f3f7adf..77e932fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ cssparser = { version = "0.28", optional = true } selectors = { version = "0.23", optional = true } serde_json = "1.0" thiserror = "1.0" +flatbuffers = "24.12.23" [dev-dependencies] criterion = "0.5" @@ -55,6 +56,9 @@ sha2 = "0.9" [lib] bench = false +[profile.bench] +debug = true + [[bench]] name = "bench_regex" harness = false @@ -98,3 +102,4 @@ css-validation = ["cssparser", "selectors"] content-blocking = [] embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled. resource-assembler = [] +flatbuffers = [] diff --git a/benches/bench_cosmetic_matching.rs b/benches/bench_cosmetic_matching.rs index f3a295d7..b4c64d65 100644 --- a/benches/bench_cosmetic_matching.rs +++ b/benches/bench_cosmetic_matching.rs @@ -57,13 +57,7 @@ fn by_classes_ids(c: &mut Criterion) { let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard); let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters); let exceptions = Default::default(); - b.iter(|| { - cfcache.hidden_class_id_selectors( - &["ad"], - &["ad"], - &exceptions, - ) - }) + b.iter(|| cfcache.hidden_class_id_selectors(&["ad"], &["ad"], &exceptions)) }); group.bench_function("many lists", move |b| { let rules = rules_from_lists(&[ @@ -75,13 +69,7 @@ fn by_classes_ids(c: &mut Criterion) { let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard); let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters); let exceptions = Default::default(); - b.iter(|| { - cfcache.hidden_class_id_selectors( - &["ad"], - &["ad"], - &exceptions, - ) - }) + b.iter(|| cfcache.hidden_class_id_selectors(&["ad"], &["ad"], &exceptions)) }); group.bench_function("many matching classes and ids", move |b| { let rules = rules_from_lists(&[ diff --git a/benches/bench_matching.rs b/benches/bench_matching.rs index 29ba112e..5efb08a9 100644 --- a/benches/bench_matching.rs +++ b/benches/bench_matching.rs @@ -2,11 +2,11 @@ use criterion::*; use serde::{Deserialize, Serialize}; -use adblock::Engine; use adblock::blocker::{Blocker, BlockerOptions}; use adblock::request::Request; use adblock::resources::ResourceStorage; use adblock::url_parser::parse_url; +use adblock::{Engine, Serialize as _}; #[path = "../tests/test_utils.rs"] mod test_utils; @@ -36,7 +36,7 @@ fn load_requests() -> Vec { reqs } -fn get_blocker(rules: impl IntoIterator>) -> Blocker { +fn get_blocker(rules: impl IntoIterator>) -> Blocker { let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default()); let blocker_options = BlockerOptions { @@ -61,7 +61,11 @@ fn bench_rule_matching(engine: &Engine, requests: &Vec) -> (u32, u3 (matches, passes) } -fn bench_matching_only(blocker: &Blocker, resources: &ResourceStorage, requests: &Vec) -> (u32, u32) { +fn bench_matching_only( + blocker: &Blocker, + resources: &ResourceStorage, + requests: &Vec, +) -> (u32, u32) { let mut matches = 0; let mut passes = 0; requests.iter().for_each(|parsed| { @@ -78,10 +82,7 @@ fn bench_matching_only(blocker: &Blocker, resources: &ResourceStorage, requests: type ParsedRequest = (String, String, String, String, bool); -fn bench_rule_matching_browserlike( - blocker: &Engine, - requests: &Vec, -) -> (u32, u32) { +fn bench_rule_matching_browserlike(blocker: &Engine, requests: &Vec) -> (u32, u32) { let mut matches = 0; let mut passes = 0; requests.iter().for_each( @@ -141,9 +142,7 @@ fn rule_match(c: &mut Criterion) { fn rule_match_parsed_el(c: &mut Criterion) { let mut group = c.benchmark_group("rule-match-parsed"); - let rules = rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - ]); + let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let requests = load_requests(); let requests_parsed: Vec<_> = requests .into_iter() @@ -221,9 +220,7 @@ fn serialization(c: &mut Criterion) { b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) }); group.bench_function("el", move |b| { - let full_rules = rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - ]); + let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) @@ -258,9 +255,7 @@ fn deserialization(c: &mut Criterion) { }) }); group.bench_function("el", move |b| { - let full_rules = rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - ]); + let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); let serialized = engine.serialize_raw().unwrap(); @@ -294,9 +289,7 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) { group.throughput(Throughput::Elements(requests_len)); group.sample_size(20); - fn requests_parsed( - requests: &[TestRequest], - ) -> Vec<(String, String, String, String, bool)> { + fn requests_parsed(requests: &[TestRequest]) -> Vec<(String, String, String, String, bool)> { requests .iter() .map(|r| { @@ -354,10 +347,10 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) { b.iter(|| bench_rule_matching_browserlike(&engine, &requests)) }); group.bench_function("brave-list", |b| { - let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); - let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); - b.iter(|| bench_rule_matching_browserlike(&engine, &requests)) - }); + let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); + let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); + b.iter(|| bench_rule_matching_browserlike(&engine, &requests)) + }); group.finish(); } @@ -376,21 +369,20 @@ fn rule_match_first_request(c: &mut Criterion) { )]; group.bench_function("brave-list", |b| { - b.iter_custom( - |iters| { - let mut total_time = std::time::Duration::ZERO; - for _ in 0..iters { - let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); - let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); - - // Measure only the matching time, skip setup and destruction - let start_time = std::time::Instant::now(); - bench_rule_matching_browserlike(&engine, &requests); - total_time += start_time.elapsed(); - } - total_time + b.iter_custom(|iters| { + let mut total_time = std::time::Duration::ZERO; + for _ in 0..iters { + let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); + let engine = + Engine::from_rules_parametrised(rules, Default::default(), false, true); + + // Measure only the matching time, skip setup and destruction + let start_time = std::time::Instant::now(); + bench_rule_matching_browserlike(&engine, &requests); + total_time += start_time.elapsed(); } - ) + total_time + }) }); group.finish(); diff --git a/benches/bench_memory.rs b/benches/bench_memory.rs index 83ddc825..7efd034a 100644 --- a/benches/bench_memory.rs +++ b/benches/bench_memory.rs @@ -4,12 +4,12 @@ * You can obtain one at https://mozilla.org/MPL/2.0/. */ use criterion::*; +use serde::{Deserialize, Serialize}; use std::alloc::{GlobalAlloc, Layout, System}; use std::sync::atomic::{AtomicUsize, Ordering}; -use serde::{Deserialize, Serialize}; -use adblock::Engine; use adblock::request::Request; +use adblock::Engine; #[path = "../tests/test_utils.rs"] mod test_utils; @@ -110,15 +110,15 @@ fn bench_memory_usage(c: &mut Criterion) { let mut result = 0; b.iter_custom(|iters| { for _ in 0..iters { - ALLOCATOR.reset(); - let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); - let engine = Engine::from_rules(rules, Default::default()); + ALLOCATOR.reset(); + let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); + let engine = Engine::from_rules(rules, Default::default()); - noise += 1; // add some noise to make criterion happy - result += ALLOCATOR.current_usage() + noise; + noise += 1; // add some noise to make criterion happy + result += ALLOCATOR.current_usage() + noise; - // Prevent engine from being optimized - criterion::black_box(&engine); + // Prevent engine from being optimized + criterion::black_box(&engine); } // Return the memory usage as a Duration @@ -134,15 +134,15 @@ fn bench_memory_usage(c: &mut Criterion) { let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); let engine = Engine::from_rules(rules, Default::default()); - for request in first_1000_requests.clone() { - criterion::black_box(engine.check_network_request(&request.into())); - } + for request in first_1000_requests.clone() { + criterion::black_box(engine.check_network_request(&request.into())); + } - noise += 1; // add some noise to make criterion happy - result += ALLOCATOR.current_usage() + noise; + noise += 1; // add some noise to make criterion happy + result += ALLOCATOR.current_usage() + noise; - // Prevent engine from being optimized - criterion::black_box(&engine); + // Prevent engine from being optimized + criterion::black_box(&engine); } // Return the memory usage as a Duration diff --git a/benches/bench_redirect_performance.rs b/benches/bench_redirect_performance.rs index fa562c5a..c9088dc3 100644 --- a/benches/bench_redirect_performance.rs +++ b/benches/bench_redirect_performance.rs @@ -2,6 +2,7 @@ use criterion::*; use tokio::runtime::Runtime; use adblock::blocker::{Blocker, BlockerOptions}; +use adblock::filters::network::NetworkFilterMaskHelper; use adblock::filters::network::{NetworkFilter, NetworkFilterMask}; use adblock::request::Request; use adblock::resources::ResourceStorage; @@ -66,9 +67,7 @@ fn get_redirect_rules() -> Vec { .into_iter() .filter(NetworkFilter::is_redirect) .filter(NetworkFilter::also_block_redirect) - .filter(|rule| { - rule.modifier_option.as_ref().unwrap() != "none" - }) + .filter(|rule| rule.modifier_option.as_ref().unwrap() != "none") .enumerate() .map(|(index, mut rule)| { rule.mask.insert(NetworkFilterMask::IS_LEFT_ANCHOR); @@ -102,8 +101,8 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re #[cfg(feature = "resource-assembler")] { - use std::path::Path; use adblock::resources::resource_assembler::assemble_web_accessible_resources; + use std::path::Path; let mut resource_data = assemble_web_accessible_resources( Path::new("data/test/fake-uBO-files/web_accessible_resources"), @@ -116,16 +115,14 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re )), ); - resource_data - .into_iter() - .for_each(|resource| { - let _res = resources.add_resource(resource); - }); + resource_data.into_iter().for_each(|resource| { + let _res = resources.add_resource(resource); + }); } #[cfg(not(feature = "resource-assembler"))] { - use adblock::resources::{Resource, ResourceType, MimeType}; + use adblock::resources::{MimeType, Resource, ResourceType}; filters .iter() @@ -213,11 +210,7 @@ pub fn build_custom_requests(rules: Vec) -> Vec { let source_url = format!("https://{}", source_hostname); - Request::new( - &url, - &source_url, - raw_type, - ).unwrap() + Request::new(&url, &source_url, raw_type).unwrap() }) .collect::>() } @@ -225,7 +218,12 @@ pub fn build_custom_requests(rules: Vec) -> Vec { fn bench_fn(blocker: &Blocker, resources: &ResourceStorage, requests: &[Request]) { requests.iter().for_each(|request| { let block_result = blocker.check(&request, &resources); - assert!(block_result.redirect.is_some(), "{:?}, {:?}", request, block_result); + assert!( + block_result.redirect.is_some(), + "{:?}, {:?}", + request, + block_result + ); }); } diff --git a/benches/bench_rules.rs b/benches/bench_rules.rs index 0bc106f8..603350f7 100644 --- a/benches/bench_rules.rs +++ b/benches/bench_rules.rs @@ -7,11 +7,8 @@ use adblock::blocker::{Blocker, BlockerOptions}; mod test_utils; use test_utils::rules_from_lists; -static DEFAULT_LISTS: Lazy> = Lazy::new(|| { - rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - ]).collect() -}); +static DEFAULT_LISTS: Lazy> = + Lazy::new(|| rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]).collect()); fn bench_string_hashing(filters: &Vec) -> adblock::utils::Hash { let mut dummy: adblock::utils::Hash = 0; @@ -81,7 +78,7 @@ fn list_parse(c: &mut Criterion) { group.finish(); } -fn get_blocker(rules: impl IntoIterator>) -> Blocker { +fn get_blocker(rules: impl IntoIterator>) -> Blocker { let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default()); let blocker_options = BlockerOptions { @@ -100,13 +97,14 @@ fn blocker_new(c: &mut Criterion) { let easylist_rules: Vec<_> = rules_from_lists(&[ "data/easylist.to/easylist/easylist.txt", "data/easylist.to/easylist/easyprivacy.txt", - ]).collect(); - let brave_list_rules: Vec<_> = rules_from_lists(&[ - "data/brave/brave-main-list.txt", - ]).collect(); + ]) + .collect(); + let brave_list_rules: Vec<_> = rules_from_lists(&["data/brave/brave-main-list.txt"]).collect(); group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&easylist_rules))); - group.bench_function("brave-list", move |b| b.iter(|| get_blocker(&brave_list_rules))); + group.bench_function("brave-list", move |b| { + b.iter(|| get_blocker(&brave_list_rules)) + }); group.finish(); } diff --git a/examples/deserialization.rs b/examples/deserialization.rs index 4d780405..6d7e3739 100644 --- a/examples/deserialization.rs +++ b/examples/deserialization.rs @@ -1,7 +1,4 @@ -use adblock::{ - Engine, - request::Request, -}; +use adblock::{request::Request, Engine, Serialize}; use serde::Deserialize; diff --git a/examples/example.rs b/examples/example.rs index fa6d94d9..20d07a91 100644 --- a/examples/example.rs +++ b/examples/example.rs @@ -1,16 +1,11 @@ use adblock::{ - Engine, lists::{FilterSet, ParseOptions}, request::Request, + Engine, }; fn main() { - let rules = vec![ - String::from("-advertisement-icon."), - String::from("-advertisement-management/"), - String::from("-advertisement."), - String::from("-advertisement/script."), - ]; + let rules = vec![String::from("||yandex.*/clck/$~ping")]; let debug_info = true; let mut filter_set = FilterSet::new(debug_info); @@ -19,10 +14,11 @@ fn main() { let engine = Engine::from_filter_set(filter_set, true); let request = Request::new( - "http://example.com/-advertisement-icon.", - "http://example.com/helloworld", - "image", - ).unwrap(); + "https://yandex.ru/clck/counter", + "https://www.yandex.ru/", + "other", + ) + .unwrap(); let blocker_result = engine.check_network_request(&request); println!("Blocker result: {:?}", blocker_result); diff --git a/examples/generate-dat.rs b/examples/generate-dat.rs index f28bf4b8..324edf15 100644 --- a/examples/generate-dat.rs +++ b/examples/generate-dat.rs @@ -1,7 +1,4 @@ -use adblock::{ - Engine, - request::Request, -}; +use adblock::{request::Request, Engine, Serialize}; use std::fs::File; use std::io::prelude::*; @@ -20,12 +17,10 @@ fn main() { let request = Request::new( "https://platform.twitter.com/widgets.js", "https://fmarier.github.io/brave-testing/social-widgets.html", - "script" - ).unwrap(); - assert!(engine - .check_network_request(&request) - .exception - .is_some()); + "script", + ) + .unwrap(); + assert!(engine.check_network_request(&request).exception.is_some()); let serialized = engine.serialize_raw().expect("Could not serialize!"); // Write to file diff --git a/examples/use-dat.rs b/examples/use-dat.rs index 33f0a92b..8240b864 100644 --- a/examples/use-dat.rs +++ b/examples/use-dat.rs @@ -1,7 +1,4 @@ -use adblock::{ - Engine, - request::Request, -}; +use adblock::{request::Request, Engine, Serialize}; use std::fs::File; use std::io::prelude::*; @@ -23,7 +20,8 @@ fn main() { "https://platform.twitter.com/widgets.js", "https://fmarier.github.io/brave-testing/social-widgets.html", "script", - ).unwrap(); + ) + .unwrap(); let checked = engine.check_network_request(&request); assert!(checked.filter.is_some()); assert!(checked.exception.is_some()); diff --git a/js/Cargo.lock b/js/Cargo.lock index 226e3bc2..efeb2304 100644 --- a/js/Cargo.lock +++ b/js/Cargo.lock @@ -10,6 +10,7 @@ dependencies = [ "base64", "bitflags", "cssparser", + "flatbuffers", "idna", "itertools", "lifeguard", @@ -163,6 +164,16 @@ version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b" +[[package]] +name = "flatbuffers" +version = "24.12.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +dependencies = [ + "bitflags", + "rustc_version", +] + [[package]] name = "form_urlencoded" version = "1.2.1" diff --git a/js/src/lib.rs b/js/src/lib.rs index 77639e4c..9d194d41 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -5,6 +5,7 @@ use std::cell::RefCell; use std::sync::Mutex; use std::path::Path; use adblock::Engine as EngineInternal; +use adblock::Serialize as SerializeInternal; use adblock::lists::{RuleTypes, FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions}; use adblock::resources::Resource; use adblock::resources::resource_assembler::assemble_web_accessible_resources; diff --git a/src/blocker.rs b/src/blocker.rs index 888c97fc..0ab700e7 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -2,22 +2,21 @@ use memchr::{memchr as find_char, memrchr as find_char_reverse}; use once_cell::sync::Lazy; +use serde::Serialize; +use std::collections::HashSet; use std::ops::DerefMut; -use serde::{Deserialize, Serialize}; -use std::sync::Arc; -use std::collections::{HashMap, HashSet}; use thiserror::Error; -#[cfg(feature = "object-pooling")] -use lifeguard::Pool; +use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; +pub(crate) use crate::network_filter_list::NetworkFilterListTrait; + +#[allow(unused_imports)] +pub(crate) use crate::network_filter_list::NetworkFilterList; -use crate::filters::network::{NetworkFilter, NetworkMatchable}; use crate::regex_manager::{RegexManager, RegexManagerDiscardPolicy}; use crate::request::Request; -use crate::utils::{fast_hash, Hash}; -use crate::optimizer; use crate::resources::ResourceStorage; -use crate::utils; +use crate::utils::Hash; /// Options used when constructing a [`Blocker`]. pub struct BlockerOptions { @@ -88,37 +87,29 @@ pub enum BlockerError { FilterExists, } -#[cfg(feature = "object-pooling")] -pub(crate) struct TokenPool { - pub pool: Pool> -} - -#[cfg(feature = "object-pooling")] -impl Default for TokenPool { - fn default() -> TokenPool { - TokenPool { - pool: lifeguard::pool() - .with(lifeguard::StartingSize(1)) - .with(lifeguard::Supplier(|| Vec::with_capacity(utils::TOKENS_BUFFER_SIZE))) - .build() - } - } -} - // only check for tags in tagged and exception rule buckets, // pass empty set for the rest static NO_TAGS: Lazy> = Lazy::new(HashSet::new); +#[cfg(feature = "flatbuffers")] +pub type Blocker = GenericBlocker; + +#[cfg(not(feature = "flatbuffers"))] +pub type Blocker = GenericBlocker; + /// Stores network filters for efficient querying. -pub struct Blocker { - pub(crate) csp: NetworkFilterList, - pub(crate) exceptions: NetworkFilterList, - pub(crate) importants: NetworkFilterList, - pub(crate) redirects: NetworkFilterList, - pub(crate) removeparam: NetworkFilterList, - pub(crate) filters_tagged: NetworkFilterList, - pub(crate) filters: NetworkFilterList, - pub(crate) generic_hide: NetworkFilterList, +pub struct GenericBlocker +where + NetworkFilterListType: NetworkFilterListTrait, +{ + pub(crate) csp: NetworkFilterListType, + pub(crate) exceptions: NetworkFilterListType, + pub(crate) importants: NetworkFilterListType, + pub(crate) redirects: NetworkFilterListType, + pub(crate) removeparam: NetworkFilterListType, + pub(crate) filters_tagged: NetworkFilterListType, + pub(crate) filters: NetworkFilterListType, + pub(crate) generic_hide: NetworkFilterListType, // Enabled tags are not serialized - when deserializing, tags of the existing // instance (the one we are recreating lists into) are maintained @@ -127,10 +118,6 @@ pub struct Blocker { pub(crate) enable_optimizations: bool, - // Not serialized - #[cfg(feature = "object-pooling")] - pub(crate) pool: TokenPool, - // Not serialized #[cfg(feature = "unsync-regex-caching")] pub(crate) regex_manager: std::cell::RefCell, @@ -138,7 +125,10 @@ pub struct Blocker { pub(crate) regex_manager: std::sync::Mutex, } -impl Blocker { +impl GenericBlocker +where + NetworkFilterListType: NetworkFilterListTrait, +{ /// Decide if a network request (usually from WebRequest API) should be /// blocked, redirected or allowed. pub fn check(&self, request: &Request, resources: &ResourceStorage) -> BlockerResult { @@ -165,24 +155,9 @@ impl Blocker { pub fn check_generic_hide(&self, hostname_request: &Request) -> bool { let mut regex_manager = self.borrow_regex_manager(); - let mut request_tokens; - #[cfg(feature = "object-pooling")] - { - request_tokens = self.pool.pool.new(); - } - #[cfg(not(feature = "object-pooling"))] - { - request_tokens = Vec::with_capacity(utils::TOKENS_BUFFER_SIZE); - } - hostname_request.get_tokens(&mut request_tokens); self.generic_hide - .check( - hostname_request, - &request_tokens, - &HashSet::new(), - &mut regex_manager, - ) + .check(hostname_request, &HashSet::new(), &mut regex_manager) .is_some() } @@ -198,17 +173,6 @@ impl Blocker { return BlockerResult::default(); } - let mut request_tokens; - #[cfg(feature = "object-pooling")] - { - request_tokens = self.pool.pool.new(); - } - #[cfg(not(feature = "object-pooling"))] - { - request_tokens = Vec::with_capacity(utils::TOKENS_BUFFER_SIZE); - } - request.get_tokens(&mut request_tokens); - // Check the filters in the following order: // 1. $important (not subject to exceptions) // 2. redirection ($redirect=resource) @@ -216,30 +180,13 @@ impl Blocker { // 4. exceptions - if any non-important match of forced // Always check important filters - let important_filter = self.importants.check( - request, - &request_tokens, - &NO_TAGS, - &mut regex_manager, - ); + let important_filter = self.importants.check(request, &NO_TAGS, &mut regex_manager); // only check the rest of the rules if not previously matched let filter = if important_filter.is_none() && !matched_rule { self.filters_tagged - .check( - request, - &request_tokens, - &self.tags_enabled, - &mut regex_manager, - ) - .or_else(|| { - self.filters.check( - request, - &request_tokens, - &NO_TAGS, - &mut regex_manager, - ) - }) + .check(request, &self.tags_enabled, &mut regex_manager) + .or_else(|| self.filters.check(request, &NO_TAGS, &mut regex_manager)) } else { important_filter }; @@ -247,32 +194,20 @@ impl Blocker { let exception = match filter.as_ref() { // if no other rule matches, only check exceptions if forced to None if matched_rule || force_check_exceptions => { - self.exceptions.check( - request, - &request_tokens, - &self.tags_enabled, - &mut regex_manager, - ) + self.exceptions + .check(request, &self.tags_enabled, &mut regex_manager) } None => None, // If matched an important filter, exceptions don't atter Some(f) if f.is_important() => None, - Some(_) => { - self.exceptions.check( - request, - &request_tokens, - &self.tags_enabled, - &mut regex_manager, - ) - } + Some(_) => self + .exceptions + .check(request, &self.tags_enabled, &mut regex_manager), }; - let redirect_filters = self.redirects.check_all( - request, - &request_tokens, - &NO_TAGS, - regex_manager.deref_mut(), - ); + let redirect_filters = + self.redirects + .check_all(request, &NO_TAGS, regex_manager.deref_mut()); // Extract the highest priority redirect directive. // 1. Exceptions - can bail immediately if found @@ -292,17 +227,18 @@ impl Blocker { if let Some(redirect) = redirect_filter.modifier_option.as_ref() { if !exceptions.contains(&redirect) { // parse redirect + priority - let (resource, priority) = if let Some(idx) = find_char_reverse(b':', redirect.as_bytes()) { - let priority_str = &redirect[idx + 1..]; - let resource = &redirect[..idx]; - if let Ok(priority) = priority_str.parse::() { - (resource, priority) + let (resource, priority) = + if let Some(idx) = find_char_reverse(b':', redirect.as_bytes()) { + let priority_str = &redirect[idx + 1..]; + let resource = &redirect[..idx]; + if let Ok(priority) = priority_str.parse::() { + (resource, priority) + } else { + (&redirect[..], 0) + } } else { (&redirect[..], 0) - } - } else { - (&redirect[..], 0) - }; + }; if let Some((_, p1)) = resource_and_priority { if priority > p1 { resource_and_priority = Some((resource, priority)); @@ -327,17 +263,16 @@ impl Blocker { }) }); - let important = filter.is_some() && filter.as_ref().map(|f| f.is_important()).unwrap_or_else(|| false); + let important = filter.is_some() + && filter + .as_ref() + .map(|f| f.is_important()) + .unwrap_or_else(|| false); let rewritten_url = if important { None } else { - Self::apply_removeparam( - &self.removeparam, - request, - &request_tokens, - regex_manager.deref_mut(), - ) + Self::apply_removeparam(&self.removeparam, request, regex_manager.deref_mut()) }; // If something has already matched before but we don't know what, still return a match @@ -353,9 +288,8 @@ impl Blocker { } fn apply_removeparam( - removeparam_filters: &NetworkFilterList, + removeparam_filters: &NetworkFilterListType, request: &Request, - request_tokens: &[Hash], regex_manager: &mut RegexManager, ) -> Option { /// Represents an `&`-separated argument from a URL query parameter string @@ -399,7 +333,7 @@ impl Blocker { .map(|param| (param, true)) .collect(); - let filters = removeparam_filters.check_all(request, request_tokens, &NO_TAGS, regex_manager); + let filters = removeparam_filters.check_all(request, &NO_TAGS, regex_manager); let mut rewrite = false; for removeparam_filter in filters { if let Some(removeparam) = &removeparam_filter.modifier_option { @@ -414,13 +348,24 @@ impl Blocker { } } if rewrite { - let p = itertools::join(params.into_iter().filter(|(_, include)| *include).map(|(param, _)| param.to_string()), "&"); + let p = itertools::join( + params + .into_iter() + .filter(|(_, include)| *include) + .map(|(param, _)| param.to_string()), + "&", + ); let new_param_str = if p.is_empty() { String::from("") } else { format!("?{}", p) }; - Some(format!("{}{}{}", &url[0..i], new_param_str, &url[hash_index..])) + Some(format!( + "{}{}{}", + &url[0..i], + new_param_str, + &url[hash_index..] + )) } else { None } @@ -434,50 +379,38 @@ impl Blocker { pub fn get_csp_directives(&self, request: &Request) -> Option { use crate::request::RequestType; - if request.request_type != RequestType::Document && request.request_type != RequestType::Subdocument { + if request.request_type != RequestType::Document + && request.request_type != RequestType::Subdocument + { return None; } - let mut request_tokens; let mut regex_manager = self.borrow_regex_manager(); - #[cfg(feature = "object-pooling")] - { - request_tokens = self.pool.pool.new(); - } - #[cfg(not(feature = "object-pooling"))] - { - request_tokens = Vec::with_capacity(utils::TOKENS_BUFFER_SIZE); - } - request.get_tokens(&mut request_tokens); - - let filters = self.csp.check_all( - request, - &request_tokens, - &self.tags_enabled, - &mut regex_manager, - ); + let filters = self + .csp + .check_all(request, &self.tags_enabled, &mut regex_manager); if filters.is_empty() { return None; } - let mut disabled_directives: HashSet<&str> = HashSet::new(); - let mut enabled_directives: HashSet<&str> = HashSet::new(); + let mut disabled_directives: HashSet = HashSet::new(); + let mut enabled_directives: HashSet = HashSet::new(); for filter in filters { if filter.is_exception() { if filter.is_csp() { - if let Some(csp_directive) = &filter.modifier_option { + if let Some(csp_directive) = filter.modifier_option { disabled_directives.insert(csp_directive); } else { // Exception filters with empty `csp` options will disable all CSP // injections for matching pages. - return None + return None; } } } else if filter.is_csp() { - if let Some(csp_directive) = &filter.modifier_option { + if let Some(csp_directive) = filter.modifier_option { enabled_directives.insert(csp_directive); } } @@ -486,7 +419,7 @@ impl Blocker { let mut remaining_directives = enabled_directives.difference(&disabled_directives); let mut merged = if let Some(directive) = remaining_directives.next() { - String::from(*directive) + directive.to_string() } else { return None; }; @@ -499,7 +432,7 @@ impl Blocker { Some(merged) } - pub fn new(network_filters: Vec, options: &BlockerOptions) -> Blocker { + pub fn new(network_filters: Vec, options: &BlockerOptions) -> Self { // Capacity of filter subsets estimated based on counts in EasyList and EasyPrivacy - if necessary // the Vectors will grow beyond the pre-set capacity, but it is more efficient to allocate all at once // $csp= @@ -530,7 +463,10 @@ impl Blocker { badfilters.push(filter); } } - let badfilter_ids: HashSet = badfilters.iter().map(|f| f.get_id_without_badfilter()).collect(); + let badfilter_ids: HashSet = badfilters + .iter() + .map(|f| f.get_id_without_badfilter()) + .collect(); for filter in network_filters { // skip any bad filters let filter_id = filter.get_id(); @@ -557,7 +493,9 @@ impl Blocker { // `tag` + `redirect` is unsupported for now. tagged_filters_all.push(filter); } else { - if (filter.is_redirect() && filter.also_block_redirect()) || !filter.is_redirect() { + if (filter.is_redirect() && filter.also_block_redirect()) + || !filter.is_redirect() + { filters.push(filter); } } @@ -566,25 +504,23 @@ impl Blocker { tagged_filters_all.shrink_to_fit(); - Blocker { - csp: NetworkFilterList::new(csp, options.enable_optimizations), - exceptions: NetworkFilterList::new(exceptions, options.enable_optimizations), - importants: NetworkFilterList::new(importants, options.enable_optimizations), - redirects: NetworkFilterList::new(redirects, options.enable_optimizations), + Self { + csp: NetworkFilterListType::new(csp, options.enable_optimizations), + exceptions: NetworkFilterListType::new(exceptions, options.enable_optimizations), + importants: NetworkFilterListType::new(importants, options.enable_optimizations), + redirects: NetworkFilterListType::new(redirects, options.enable_optimizations), // Don't optimize removeparam, since it can fuse filters without respecting distinct // queryparam values - removeparam: NetworkFilterList::new(removeparam, false), - filters_tagged: NetworkFilterList::new(Vec::new(), options.enable_optimizations), - filters: NetworkFilterList::new(filters, options.enable_optimizations), - generic_hide: NetworkFilterList::new(generic_hide, options.enable_optimizations), + removeparam: NetworkFilterListType::new(removeparam, false), + filters_tagged: NetworkFilterListType::new(Vec::new(), options.enable_optimizations), + filters: NetworkFilterListType::new(filters, options.enable_optimizations), + generic_hide: NetworkFilterListType::new(generic_hide, options.enable_optimizations), // Tags special case for enabling/disabling them dynamically tags_enabled: HashSet::new(), tagged_filters_all, // Options enable_optimizations: options.enable_optimizations, - #[cfg(feature = "object-pooling")] - pool: TokenPool::default(), regex_manager: Default::default(), } } @@ -674,7 +610,10 @@ impl Blocker { } pub fn enable_tags(&mut self, tags: &[&str]) { - let tag_set: HashSet = tags.iter().map(|&t| String::from(t)).collect::>() + let tag_set: HashSet = tags + .iter() + .map(|&t| String::from(t)) + .collect::>() .union(&self.tags_enabled) .cloned() .collect(); @@ -682,7 +621,8 @@ impl Blocker { } pub fn disable_tags(&mut self, tags: &[&str]) { - let tag_set: HashSet = self.tags_enabled + let tag_set: HashSet = self + .tags_enabled .difference(&tags.iter().map(|&t| String::from(t)).collect()) .cloned() .collect(); @@ -691,21 +631,20 @@ impl Blocker { fn tags_with_set(&mut self, tags_enabled: HashSet) { self.tags_enabled = tags_enabled; - let filters: Vec = self.tagged_filters_all.iter() + let filters: Vec = self + .tagged_filters_all + .iter() .filter(|n| n.tag.is_some() && self.tags_enabled.contains(n.tag.as_ref().unwrap())) .cloned() .collect(); - self.filters_tagged = NetworkFilterList::new(filters, self.enable_optimizations); + self.filters_tagged = NetworkFilterListType::new(filters, self.enable_optimizations); } pub fn tags_enabled(&self) -> Vec { self.tags_enabled.iter().cloned().collect() } - pub fn set_regex_discard_policy( - &self, - new_discard_policy: RegexManagerDiscardPolicy - ) { + pub fn set_regex_discard_policy(&self, new_discard_policy: RegexManagerDiscardPolicy) { let mut regex_manager = self.borrow_regex_manager(); regex_manager.set_discard_policy(new_discard_policy); } @@ -723,1613 +662,6 @@ impl Blocker { } } -#[derive(Serialize, Deserialize, Default)] -pub(crate) struct NetworkFilterList { - #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] - pub(crate) filter_map: HashMap>>, -} - -impl NetworkFilterList { - pub fn new(filters: Vec, optimize: bool) -> NetworkFilterList { - // Compute tokens for all filters - let filter_tokens: Vec<_> = filters - .into_iter() - .map(|filter| { - let tokens = filter.get_tokens(); - (Arc::new(filter), tokens) - }) - .collect(); - // compute the tokens' frequency histogram - let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); - - // Build a HashMap of tokens to Network Filters (held through Arc, Atomic Reference Counter) - let mut filter_map = HashMap::with_capacity(filter_tokens.len()); - { - for (filter_pointer, multi_tokens) in filter_tokens { - for tokens in multi_tokens { - let mut best_token: Hash = 0; - let mut min_count = total_number_of_tokens + 1; - for token in tokens { - match tokens_histogram.get(&token) { - None => { - min_count = 0; - best_token = token - } - Some(&count) if count < min_count => { - min_count = count; - best_token = token - } - _ => {} - } - } - insert_dup(&mut filter_map, best_token, Arc::clone(&filter_pointer)); - } - } - } - - let mut self_ = NetworkFilterList { - filter_map, - }; - - if optimize { - self_.optimize(); - } else { - self_.filter_map.shrink_to_fit(); - } - - self_ - } - - pub fn optimize(&mut self) { - let mut optimized_map = HashMap::with_capacity(self.filter_map.len()); - for (key, filters) in self.filter_map.drain() { - let mut unoptimized: Vec = Vec::with_capacity(filters.len()); - let mut unoptimizable: Vec> = Vec::with_capacity(filters.len()); - for f in filters { - match Arc::try_unwrap(f) { - Ok(f) => unoptimized.push(f), - Err(af) => unoptimizable.push(af) - } - } - - let mut optimized: Vec<_> = if unoptimized.len() > 1 { - optimizer::optimize(unoptimized).into_iter().map(Arc::new).collect() - } else { - // nothing to optimize - unoptimized.into_iter().map(Arc::new).collect() - }; - - optimized.append(&mut unoptimizable); - optimized.shrink_to_fit(); - optimized_map.insert(key, optimized); - } - - // won't mutate anymore, shrink to fit items - optimized_map.shrink_to_fit(); - - self.filter_map = optimized_map; - } - - pub fn add_filter(&mut self, filter: NetworkFilter) { - let filter_tokens = filter.get_tokens(); - let total_rules = vec_hashmap_len(&self.filter_map); - let filter_pointer = Arc::new(filter); - - for tokens in filter_tokens { - let mut best_token: Hash = 0; - let mut min_count = total_rules + 1; - for token in tokens { - match self.filter_map.get(&token) { - None => { - min_count = 0; - best_token = token - } - Some(filters) if filters.len() < min_count => { - min_count = filters.len(); - best_token = token - } - _ => {} - } - } - - insert_dup(&mut self.filter_map, best_token, Arc::clone(&filter_pointer)); - } - } - - /// This may not work if the list has been optimized. - pub fn filter_exists(&self, filter: &NetworkFilter) -> bool { - let mut tokens: Vec<_> = filter.get_tokens().into_iter().flatten().collect(); - - if tokens.is_empty() { - tokens.push(0) - } - - for token in tokens { - if let Some(filters) = self.filter_map.get(&token) { - for saved_filter in filters { - if saved_filter.id == filter.id { - return true; - } - } - } - } - - false - } - - /// Returns the first found filter, if any, that matches the given request. The backing storage - /// has a non-deterministic order, so this should be used for any category of filters where a - /// match from each would be functionally equivalent. For example, if two different exception - /// filters match a certain request, it doesn't matter _which_ one is matched - the request - /// will be excepted either way. - pub fn check( - &self, - request: &Request, - request_tokens: &[Hash], - active_tags: &HashSet, - regex_manager: &mut RegexManager, - ) -> Option<&NetworkFilter> { - if self.filter_map.is_empty() { - return None; - } - - if let Some(source_hostname_hashes) = request.source_hostname_hashes.as_ref() { - for token in source_hostname_hashes { - if let Some(filter_bucket) = self.filter_map.get(token) { - for filter in filter_bucket { - // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) - && filter - .tag - .as_ref() - .map(|t| active_tags.contains(t)) - .unwrap_or(true) - { - return Some(filter); - } - } - } - } - } - - for token in request_tokens { - if let Some(filter_bucket) = self.filter_map.get(token) { - for filter in filter_bucket { - // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) && filter.tag.as_ref().map(|t| active_tags.contains(t)).unwrap_or(true) { - return Some(filter); - } - } - } - } - - None - } - - /// Returns _all_ filters that match the given request. This should be used for any category of - /// filters where a match from each may carry unique information. For example, if two different - /// `$csp` filters match a certain request, they may each carry a distinct CSP directive, and - /// each directive should be combined for the final result. - pub fn check_all( - &self, - request: &Request, - request_tokens: &[Hash], - active_tags: &HashSet, - regex_manager: &mut RegexManager, - ) -> Vec<&NetworkFilter> { - let mut filters: Vec<&NetworkFilter> = vec![]; - - if self.filter_map.is_empty() { - return filters; - } - - if let Some(source_hostname_hashes) = request.source_hostname_hashes.as_ref() { - for token in source_hostname_hashes { - if let Some(filter_bucket) = self.filter_map.get(token) { - for filter in filter_bucket { - // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) && filter.tag.as_ref().map(|t| active_tags.contains(t)).unwrap_or(true) { - filters.push(filter); - } - } - } - } - } - - for token in request_tokens { - if let Some(filter_bucket) = self.filter_map.get(token) { - for filter in filter_bucket { - // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) && filter.tag.as_ref().map(|t| active_tags.contains(t)).unwrap_or(true) { - filters.push(filter); - } - } - } - } - - filters - } -} - -/// Inserts a value into the `Vec` under the specified key in the `HashMap`. The entry will be -/// created if it does not exist. If it already exists, it will be inserted in the `Vec` in a -/// sorted order. -fn insert_dup(map: &mut HashMap, H>, k: K, v: V) -where - K: std::cmp::Ord + std::hash::Hash, - V: PartialOrd, -{ - let entry = map.entry(k).or_insert_with(Vec::new); - - match entry.binary_search_by(|f| f.partial_cmp(&v).unwrap_or(std::cmp::Ordering::Equal)) { - Ok(_pos) => (), // Can occur if the exact same rule is inserted twice. No reason to add anything. - Err(slot) => entry.insert(slot, v), - } -} - -fn vec_hashmap_len(map: &HashMap, H>) -> usize { - let mut size = 0usize; - for (_, val) in map.iter() { - size += val.len(); - } - size -} - -fn token_histogram(filter_tokens: &[(T, Vec>)]) -> (u32, HashMap) { - let mut tokens_histogram: HashMap = HashMap::new(); - let mut number_of_tokens = 0; - for (_, tokens) in filter_tokens.iter() { - for tg in tokens { - for t in tg { - *tokens_histogram.entry(*t).or_insert(0) += 1; - number_of_tokens += 1; - } - } - } - - for bad_token in ["http", "https", "www", "com"].iter() { - tokens_histogram.insert(fast_hash(bad_token), number_of_tokens); - } - - (number_of_tokens, tokens_histogram) -} - #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn insert_dup_works() { - let mut dup_map: HashMap> = HashMap::new(); - - // inserts into empty - insert_dup(&mut dup_map, 1, String::from("foo")); - assert_eq!(dup_map.get(&1), Some(&vec![String::from("foo")])); - - // adds item - insert_dup(&mut dup_map, 1, String::from("bar")); - assert_eq!( - dup_map.get(&1), - Some(&vec![String::from("bar"), String::from("foo")]) - ); - - // inserts into another key item - insert_dup(&mut dup_map, 123, String::from("baz")); - assert_eq!(dup_map.get(&123), Some(&vec![String::from("baz")])); - assert_eq!( - dup_map.get(&1), - Some(&vec![String::from("bar"), String::from("foo")]) - ); - } - - #[test] - fn token_histogram_works() { - // handle the case of just 1 token - { - let tokens = vec![(0, vec![vec![111]])]; - let (total_tokens, histogram) = token_histogram(&tokens); - assert_eq!(total_tokens, 1); - assert_eq!(histogram.get(&111), Some(&1)); - // include bad tokens - assert_eq!(histogram.get(&fast_hash("http")), Some(&1)); - assert_eq!(histogram.get(&fast_hash("www")), Some(&1)); - } - - // handle the case of repeating tokens - { - let tokens = vec![(0, vec![vec![111]]), (1, vec![vec![111]])]; - let (total_tokens, histogram) = token_histogram(&tokens); - assert_eq!(total_tokens, 2); - assert_eq!(histogram.get(&111), Some(&2)); - // include bad tokens - assert_eq!(histogram.get(&fast_hash("http")), Some(&2)); - assert_eq!(histogram.get(&fast_hash("www")), Some(&2)); - } - - // handle the different token set sizes - { - let tokens = vec![ - (0, vec![vec![111, 123, 132]]), - (1, vec![vec![111], vec![123], vec![132]]), - (2, vec![vec![111, 123], vec![132]]), - (3, vec![vec![111, 111], vec![111]]), - ]; - let (total_tokens, histogram) = token_histogram(&tokens); - assert_eq!(total_tokens, 12); - assert_eq!(histogram.get(&111), Some(&6)); - assert_eq!(histogram.get(&123), Some(&3)); - assert_eq!(histogram.get(&132), Some(&3)); - // include bad tokens - assert_eq!(histogram.get(&fast_hash("http")), Some(&12)); - assert_eq!(histogram.get(&fast_hash("www")), Some(&12)); - } - } - - #[test] - fn network_filter_list_new_works() { - { - let filters = ["||foo.com"]; - let network_filters: Vec<_> = filters - .into_iter() - .map(|f| NetworkFilter::parse(&f, true, Default::default())) - .filter_map(Result::ok) - .collect(); - let filter_list = NetworkFilterList::new(network_filters, false); - let maybe_matching_filter = filter_list.filter_map.get(&fast_hash("foo")); - assert!(maybe_matching_filter.is_some(), "Expected filter not found"); - } - // choses least frequent token - { - let filters = ["||foo.com", "||bar.com/foo"]; - let network_filters: Vec<_> = filters - .into_iter() - .map(|f| NetworkFilter::parse(&f, true, Default::default())) - .filter_map(Result::ok) - .collect(); - let filter_list = NetworkFilterList::new(network_filters, false); - assert_eq!( - filter_list.filter_map.get(&fast_hash("bar")).unwrap().len(), - 1 - ); - assert_eq!( - filter_list.filter_map.get(&fast_hash("foo")).unwrap().len(), - 1 - ); - } - // choses blacklisted token when no other choice - { - let filters = ["||foo.com", "||foo.com/bar", "||www"]; - let network_filters: Vec<_> = filters - .into_iter() - .map(|f| NetworkFilter::parse(&f, true, Default::default())) - .filter_map(Result::ok) - .collect(); - let filter_list = NetworkFilterList::new(network_filters, false); - assert!( - filter_list.filter_map.get(&fast_hash("www")).is_some(), - "Filter matching {} not found", - "www" - ); - assert_eq!( - filter_list.filter_map.get(&fast_hash("www")).unwrap().len(), - 1 - ); - } - // uses domain as token when only one domain - { - let filters = ["||foo.com", "||foo.com$domain=bar.com"]; - let network_filters: Vec<_> = filters - .into_iter() - .map(|f| NetworkFilter::parse(&f, true, Default::default())) - .filter_map(Result::ok) - .collect(); - let filter_list = NetworkFilterList::new(network_filters, false); - assert!( - filter_list.filter_map.get(&fast_hash("bar.com")).is_some(), - "Filter matching {} not found", - "bar.com" - ); - assert_eq!( - filter_list - .filter_map - .get(&fast_hash("bar.com")) - .unwrap() - .len(), - 1 - ); - } - // dispatches filter to multiple buckets per domain options if no token in main part - { - let filters = ["foo*$domain=bar.com|baz.com"]; - let network_filters: Vec<_> = filters - .into_iter() - .map(|f| NetworkFilter::parse(&f, true, Default::default())) - .filter_map(Result::ok) - .collect(); - let filter_list = NetworkFilterList::new(network_filters, false); - assert_eq!(filter_list.filter_map.len(), 2); - assert!( - filter_list.filter_map.get(&fast_hash("bar.com")).is_some(), - "Filter matching {} not found", - "bar.com" - ); - assert_eq!( - filter_list - .filter_map - .get(&fast_hash("bar.com")) - .unwrap() - .len(), - 1 - ); - assert!( - filter_list.filter_map.get(&fast_hash("baz.com")).is_some(), - "Filter matching {} not found", - "baz.com" - ); - assert_eq!( - filter_list - .filter_map - .get(&fast_hash("baz.com")) - .unwrap() - .len(), - 1 - ); - } - } - - fn test_requests_filters(filters: impl IntoIterator>, requests: &[(Request, bool)]) { - let network_filters: Vec<_> = filters - .into_iter() - .map(|f| NetworkFilter::parse(&f.as_ref(), true, Default::default())) - .filter_map(Result::ok) - .collect(); - let filter_list = NetworkFilterList::new(network_filters, false); - let mut regex_manager = RegexManager::default(); - - requests.into_iter().for_each(|(req, expected_result)| { - let mut tokens = Vec::new(); - req.get_tokens(&mut tokens); - let matched_rule = - filter_list.check(&req, &tokens, &HashSet::new(), &mut regex_manager); - if *expected_result { - assert!(matched_rule.is_some(), "Expected match for {}", req.url); - } else { - assert!(matched_rule.is_none(), "Expected no match for {}, matched with {}", req.url, matched_rule.unwrap().to_string()); - } - }); - } - - #[test] - fn network_filter_list_check_works_plain_filter() { - // includes cases with fall back to 0 bucket (no tokens from a rule) - let filters = [ - "foo", - "-foo-", - "&fo.o=+_-", - "foo/bar/baz", - "com/bar/baz", - "https://bar.com/bar/baz", - ]; - - let url_results = [ - ("https://bar.com/foo", true), - ("https://bar.com/baz/foo", true), - ("https://bar.com/q=foo/baz", true), - ("https://foo.com", true), - ("https://bar.com/baz/42-foo-q", true), - ("https://bar.com?baz=42&fo.o=+_-", true), - ("https://bar.com/foo/bar/baz", true), - ("https://bar.com/bar/baz", true), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(url, expected_result)| { - let request = Request::new(url, "https://example.com", "other").unwrap(); - (request, expected_result) - }) - .collect(); - - test_requests_filters(&filters, &request_expectations); - } - - #[test] - fn network_filter_list_check_works_hostname_anchor() { - let filters = [ - "||foo.com", - "||bar.com/bar", - "||coo.baz.", - "||foo.bar.com^", - "||foo.baz^", - ]; - - let url_results = [ - ("https://foo.com/bar", true), - ("https://bar.com/bar", true), - ("https://baz.com/bar", false), - ("https://baz.foo.com/bar", true), - ("https://coo.baz.com/bar", true), - ("https://foo.bar.com/bar", true), - ("https://foo.baz.com/bar", false), - ("https://baz.com", false), - ("https://foo-bar.baz.com/bar", false), - ("https://foo.de", false), - ("https://bar.foo.de", false), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(url, expected_result)| { - let request = Request::new(url, "https://example.com", "other").unwrap(); - (request, expected_result) - }) - .collect(); - - test_requests_filters(&filters, &request_expectations); - } - - #[test] - fn network_filter_list_check_works_unicode() { - let filters = [ - "||firstrowsports.li/frame/", - "||fırstrowsports.eu/pu/", - "||atđhe.net/pu/", - ]; - - let url_results = [ - ("https://firstrowsports.li/frame/bar", true), - ("https://secondrowsports.li/frame/bar", false), - ("https://fırstrowsports.eu/pu/foo", true), - ("https://xn--frstrowsports-39b.eu/pu/foo", true), - ("https://atđhe.net/pu/foo", true), - ("https://xn--athe-1ua.net/pu/foo", true), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(url, expected_result)| { - let request = Request::new(url, "https://example.com", "other").unwrap(); - (request, expected_result) - }).collect(); - - test_requests_filters(&filters, &request_expectations); - } - - #[test] - fn network_filter_list_check_works_regex_escaping() { - let filters = [ - r#"/^https?:\/\/.*(bitly|bit)\.(com|ly)\/.*/$domain=123movies.com|1337x.to"#, - r#"/\:\/\/data.*\.com\/[a-zA-Z0-9]{30,}/$third-party,xmlhttprequest"# - ]; - - let url_results = [ - ( - Request::new("https://bit.ly/bar/", "http://123movies.com", "").unwrap(), - true, - ), - ( - Request::new( - "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer", - "http://123movies.com", - "xmlhttprequest", - ) - .unwrap(), - true, - ), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(request, expected_result)| (request, expected_result)) - .collect(); - - test_requests_filters(&filters, &request_expectations); - } -} - -#[cfg(test)] -mod blocker_tests { - - use super::*; - use crate::lists::parse_filters; - use crate::resources::Resource; - use crate::request::Request; - use std::collections::HashSet; - use std::iter::FromIterator; - - #[test] - fn single_slash() { - let filters = [ - "/|", - ]; - - let (network_filters, _) = parse_filters(filters, true, Default::default()); - - let blocker_options = BlockerOptions { - enable_optimizations: true, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - - let request = Request::new("https://example.com/test/", "https://example.com", "xmlhttprequest").unwrap(); - assert!(blocker.check(&request, &Default::default()).matched); - - let request = Request::new("https://example.com/test", "https://example.com", "xmlhttprequest").unwrap(); - assert!(!blocker.check(&request, &Default::default()).matched); - } - - fn test_requests_filters(filters: impl IntoIterator>, requests: &[(Request, bool)]) { - let (network_filters, _) = parse_filters(filters, true, Default::default()); - - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, // optimizations will reduce number of rules - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - - requests.iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req, &Default::default()); - if *expected_result { - assert!(matched_rule.matched, "Expected match for {}", req.url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", req.url, matched_rule.filter); - } - }); - } - - #[test] - fn redirect_blocking_exception() { - let filters = [ - "||imdb-video.media-imdb.com$media,redirect=noop-0.1s.mp3", - "@@||imdb-video.media-imdb.com^$domain=imdb.com", - ]; - - let request = Request::new("https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", "https://www.imdb.com/video/13", "media").unwrap(); - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources.add_resource( - Resource::simple("noop-0.1s.mp3", crate::resources::MimeType::AudioMp3, "mp3"), - ).unwrap(); - - let matched_rule = blocker.check(&request, &resources); - assert_eq!(matched_rule.matched, false); - assert_eq!(matched_rule.important, false); - assert_eq!(matched_rule.redirect, Some("data:audio/mp3;base64,bXAz".to_string())); - assert_eq!(matched_rule.exception, Some("@@||imdb-video.media-imdb.com^$domain=imdb.com".to_string())); - } - - #[test] - fn redirect_exception() { - let filters = [ - "||imdb-video.media-imdb.com$media,redirect=noop-0.1s.mp3", - "@@||imdb-video.media-imdb.com^$domain=imdb.com,redirect=noop-0.1s.mp3", - ]; - - let request = Request::new("https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", "https://www.imdb.com/video/13", "media").unwrap(); - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources.add_resource( - Resource::simple("noop-0.1s.mp3", crate::resources::MimeType::AudioMp3, "mp3"), - ).unwrap(); - - let matched_rule = blocker.check(&request, &resources); - assert_eq!(matched_rule.matched, false); - assert_eq!(matched_rule.important, false); - assert_eq!(matched_rule.redirect, None); - assert_eq!(matched_rule.exception, Some("@@||imdb-video.media-imdb.com^$domain=imdb.com,redirect=noop-0.1s.mp3".to_string())); - } - - #[test] - fn redirect_rule_redirection() { - let filters = [ - "||doubleclick.net^", - "||www3.doubleclick.net^$xmlhttprequest,redirect-rule=noop.txt,domain=lineups.fun", - ]; - - let request = Request::new("https://www3.doubleclick.net", "https://lineups.fun", "xhr").unwrap(); - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources.add_resource(Resource::simple("noop.txt", crate::resources::MimeType::TextPlain, "noop")).unwrap(); - - let matched_rule = blocker.check(&request, &resources); - assert_eq!(matched_rule.matched, true); - assert_eq!(matched_rule.important, false); - assert_eq!(matched_rule.redirect, Some("data:text/plain;base64,bm9vcA==".to_string())); - assert_eq!(matched_rule.exception, None); - } - - #[test] - fn badfilter_does_not_match() { - let filters = ["||foo.com$badfilter"]; - let url_results = [ - ( - Request::new("https://foo.com", "https://bar.com", "image").unwrap(), - false, - ), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(request, expected_result)| (request, expected_result)) - .collect(); - - test_requests_filters(&filters, &request_expectations); - } - - #[test] - fn badfilter_cancels_with_same_id() { - let filters = [ - "||foo.com$domain=bar.com|foo.com,badfilter", - "||foo.com$domain=foo.com|bar.com", - ]; - let url_results = [ - ( - Request::new("https://foo.com", "https://bar.com", "image").unwrap(), - false, - ), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(request, expected_result)| (request, expected_result)) - .collect(); - - test_requests_filters(&filters, &request_expectations); - } - - #[test] - fn badfilter_does_not_cancel_similar_filter() { - let filters = [ - "||foo.com$domain=bar.com|foo.com,badfilter", - "||foo.com$domain=foo.com|bar.com,image", - ]; - let url_results = [ - ( - Request::new("https://foo.com", "https://bar.com", "image").unwrap(), - true, - ), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(request, expected_result)| (request, expected_result)) - .collect(); - - test_requests_filters(&filters, &request_expectations); - } - - #[test] - fn hostname_regex_filter_works() { - let filters = [ - "||alimc*.top^$domain=letv.com", - "||aa*.top^$domain=letv.com", - ]; - let url_results = [ - (Request::new("https://r.alimc1.top/test.js", "https://minisite.letv.com/", "script").unwrap(), true), - (Request::new("https://www.baidu.com/test.js", "https://minisite.letv.com/", "script").unwrap(), false), - (Request::new("https://r.aabb.top/test.js", "https://example.com/", "script").unwrap(), false), - (Request::new("https://r.aabb.top/test.js", "https://minisite.letv.com/", "script").unwrap(), true), - ]; - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options = BlockerOptions { - enable_optimizations: false, // optimizations will reduce number of rules - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - let resources = ResourceStorage::default(); - - url_results.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req, &resources); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", req.url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", req.url, matched_rule.filter); - } - }); - } - - #[test] - fn get_csp_directives() { - let filters = [ - "$csp=script-src 'self' * 'unsafe-inline',domain=thepiratebay.vip|pirateproxy.live|thehiddenbay.com|downloadpirate.com|thepiratebay10.org|kickass.vip|pirateproxy.app|ukpass.co|prox.icu|pirateproxy.life", - "$csp=worker-src 'none',domain=pirateproxy.live|thehiddenbay.com|tpb.party|thepiratebay.org|thepiratebay.vip|thepiratebay10.org|flashx.cc|vidoza.co|vidoza.net", - "||1337x.to^$csp=script-src 'self' 'unsafe-inline'", - "@@^no-csp^$csp=script-src 'self' 'unsafe-inline'", - "^duplicated-directive^$csp=worker-src 'none'", - "@@^disable-all^$csp", - "^first-party-only^$csp=script-src 'none',1p", - ]; - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options = BlockerOptions { - enable_optimizations: false, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - - { // No directives should be returned for requests that are not `document` or `subdocument` content types. - assert_eq!(blocker.get_csp_directives(&Request::new("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "script").unwrap()), None); - assert_eq!(blocker.get_csp_directives(&Request::new("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "image").unwrap()), None); - assert_eq!(blocker.get_csp_directives(&Request::new("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "object").unwrap()), None); - } - { // A single directive should be returned if only one match is present in the engine, for both document and subdocument types - assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com", "https://vidoza.co", "document").unwrap()), Some(String::from("worker-src 'none'"))); - assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com", "https://vidoza.net", "subdocument").unwrap()), Some(String::from("worker-src 'none'"))); - } - { // Multiple merged directives should be returned if more than one match is present in the engine - let possible_results = [ - Some(String::from("script-src 'self' * 'unsafe-inline',worker-src 'none'")), - Some(String::from("worker-src 'none',script-src 'self' * 'unsafe-inline'")), - ]; - assert!(possible_results.contains(&blocker.get_csp_directives(&Request::new("https://example.com", "https://pirateproxy.live", "document").unwrap()))); - assert!(possible_results.contains(&blocker.get_csp_directives(&Request::new("https://example.com", "https://pirateproxy.live", "subdocument").unwrap()))); - } - { // A directive with an exception should not be returned - assert_eq!(blocker.get_csp_directives(&Request::new("https://1337x.to", "https://1337x.to", "document").unwrap()), Some(String::from("script-src 'self' 'unsafe-inline'"))); - assert_eq!(blocker.get_csp_directives(&Request::new("https://1337x.to/no-csp", "https://1337x.to", "subdocument").unwrap()), None); - } - { // Multiple identical directives should only appear in the output once - assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com/duplicated-directive", "https://flashx.cc", "document").unwrap()), Some(String::from("worker-src 'none'"))); - assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com/duplicated-directive", "https://flashx.cc", "subdocument").unwrap()), Some(String::from("worker-src 'none'"))); - } - { // A CSP exception with no corresponding directive should disable all CSP injections for the page - assert_eq!(blocker.get_csp_directives(&Request::new("https://1337x.to/duplicated-directive/disable-all", "https://thepiratebay10.org", "document").unwrap()), None); - assert_eq!(blocker.get_csp_directives(&Request::new("https://1337x.to/duplicated-directive/disable-all", "https://thepiratebay10.org", "document").unwrap()), None); - } - { // A CSP exception with a partyness modifier should only match where the modifier applies - assert_eq!(blocker.get_csp_directives(&Request::new("htps://github.com/first-party-only", "https://example.com", "subdocument").unwrap()), None); - assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com/first-party-only", "https://example.com", "document").unwrap()), Some(String::from("script-src 'none'"))); - } - } - - #[test] - fn test_removeparam() { - let filters = [ - "||example.com^$removeparam=test", - "*$removeparam=fbclid", - "/script.js$redirect-rule=noopjs", - "^block^$important", - "$removeparam=testCase,~xhr", - ]; - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options = BlockerOptions { - enable_optimizations: true, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources.add_resource(Resource::simple("noopjs", crate::resources::MimeType::ApplicationJavascript, "(() => {})()")).unwrap(); - - let result = blocker.check(&Request::new("https://example.com?q=1&test=2#blue", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?q=1#blue".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?test=2&q=1#blue", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?q=1#blue".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?test=2#blue", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com#blue".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?q=1#blue", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, None); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?q=1&test=2", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?test=2&q=1", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?test=2", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?test=2", "https://antonok.com", "image").unwrap(), &resources); - assert_eq!(result.rewritten_url, None); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?q=1", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, None); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?q=fbclid", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, None); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?fbclid=10938&q=1&test=2", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://test.com?fbclid=10938&q=1&test=2", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://test.com?q=1&test=2".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?q1=1&q2=2&q3=3&test=2&q4=4&q5=5&fbclid=39", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?q1=1&q2=2&q3=3&q4=4&q5=5".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?q1=1&q1=2&test=2&test=3", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?q1=1&q1=2".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/script.js?test=2#blue", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com/script.js#blue".into())); - assert_eq!(result.redirect, Some("data:application/javascript;base64,KCgpID0+IHt9KSgp".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/block/script.js?test=2", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, None); - assert_eq!(result.redirect, Some("data:application/javascript;base64,KCgpID0+IHt9KSgp".into())); - assert!(result.matched); - - let result = blocker.check(&Request::new("https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", "https://antonok.com", "xhr").unwrap(), &resources); - assert_eq!(result.rewritten_url, None); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", "https://antonok.com", "image").unwrap(), &resources); - assert_eq!(result.rewritten_url, None); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", "https://antonok.com", "subdocument").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com/Path/?Test=ABC&testcase=AbC".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", "https://antonok.com", "document").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com/Path/?Test=ABC&testcase=AbC".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?Test=ABC?123&test=3#&test=4#b", "https://antonok.com", "document").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?Test=ABC?123#&test=4#b".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?Test=ABC&testCase=5", "https://antonok.com", "document").unwrap(), &resources); - assert_eq!(result.rewritten_url, Some("https://example.com?Test=ABC".into())); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com?Test=ABC&testCase=5", "https://antonok.com", "image").unwrap(), &resources); - assert_eq!(result.rewritten_url, None); - assert!(!result.matched); - } - - /// Tests ported from the previous query parameter stripping logic in brave-core - #[test] - fn removeparam_brave_core_tests() { - let testcases = [ - // (original url, expected url after filtering) - ("https://example.com/?fbclid=1234", "https://example.com/"), - ("https://example.com/?fbclid=1234&", "https://example.com/"), - ("https://example.com/?&fbclid=1234", "https://example.com/"), - ("https://example.com/?gclid=1234", "https://example.com/"), - ("https://example.com/?fbclid=0&gclid=1&msclkid=a&mc_eid=a1", - "https://example.com/"), - ("https://example.com/?fbclid=&foo=1&bar=2&gclid=abc", - "https://example.com/?fbclid=&foo=1&bar=2"), - ("https://example.com/?fbclid=&foo=1&gclid=1234&bar=2", - "https://example.com/?fbclid=&foo=1&bar=2"), - ("http://u:p@example.com/path/file.html?foo=1&fbclid=abcd#fragment", - "http://u:p@example.com/path/file.html?foo=1#fragment"), - ("https://example.com/?__s=1234-abcd", "https://example.com/"), - // Obscure edge cases that break most parsers: - ("https://example.com/?fbclid&foo&&gclid=2&bar=&%20", - "https://example.com/?fbclid&foo&&bar=&%20"), - ("https://example.com/?fbclid=1&1==2&=msclkid&foo=bar&&a=b=c&", - "https://example.com/?1==2&=msclkid&foo=bar&&a=b=c&"), - ("https://example.com/?fbclid=1&=2&?foo=yes&bar=2+", - "https://example.com/?=2&?foo=yes&bar=2+"), - ("https://example.com/?fbclid=1&a+b+c=some%20thing&1%202=3+4", - "https://example.com/?a+b+c=some%20thing&1%202=3+4"), - // Conditional query parameter stripping - /*("https://example.com/?mkt_tok=123&foo=bar", - "https://example.com/?foo=bar"),*/ - ]; - - let filters = [ - "fbclid", "gclid", "msclkid", "mc_eid", - "dclid", - "oly_anon_id", "oly_enc_id", - "_openstat", - "vero_conv", "vero_id", - "wickedid", - "yclid", - "__s", - "rb_clickid", - "s_cid", - "ml_subscriber", "ml_subscriber_hash", - "twclid", - "gbraid", "wbraid", - "_hsenc", "__hssc", "__hstc", "__hsfp", "hsCtaTracking", - "oft_id", "oft_k", "oft_lk", "oft_d", "oft_c", "oft_ck", "oft_ids", - "oft_sk", - "ss_email_id", - "bsft_uid", "bsft_clkid", - "vgo_ee", - "igshid", - ].iter().map(|s| format!("*$removeparam={}", s)).collect::>(); - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options = BlockerOptions { - enable_optimizations: true, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - let resources = ResourceStorage::default(); - - for (original, expected) in testcases.into_iter() { - let result = blocker.check(&Request::new(original, "https://example.net", "xhr").unwrap(), &resources); - let expected = if original == expected { - None - } else { - Some(expected.to_string()) - }; - assert_eq!(expected, result.rewritten_url, "Filtering parameters on {} failed", original); - } - } - -#[test] -fn test_removeparam_same_tokens() { - let filters = [ - "$removeparam=example1_", - "$removeparam=example1-", - ]; - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options = BlockerOptions { - enable_optimizations: true, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - - let result = blocker.check(&Request::new("https://example.com?example1_=1&example1-=2", "https://example.com", "xhr").unwrap(), &Default::default()); - assert_eq!(result.rewritten_url, Some("https://example.com".into())); - assert!(!result.matched); -} - - #[test] - fn test_redirect_priority() { - let filters = [ - ".txt^$redirect-rule=a", - "||example.com^$redirect-rule=b:10", - "/text$redirect-rule=c:20", - "@@^excepta^$redirect-rule=a", - "@@^exceptb10^$redirect-rule=b:10", - "@@^exceptc20^$redirect-rule=c:20", - ]; - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options = BlockerOptions { - enable_optimizations: true, - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - fn add_simple_resource(resources: &mut ResourceStorage, identifier: &str) -> Option { - resources.add_resource(Resource::simple(identifier, crate::resources::MimeType::TextPlain, identifier)).unwrap(); - Some(format!("data:text/plain;base64,{}", base64::encode(identifier))) - } - let a_redirect = add_simple_resource(&mut resources, "a"); - let b_redirect = add_simple_resource(&mut resources, "b"); - let c_redirect = add_simple_resource(&mut resources, "c"); - - let result = blocker.check(&Request::new("https://example.net/test", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, None); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.net/test.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, a_redirect); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/test.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, b_redirect); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, c_redirect); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/exceptc20/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, b_redirect); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/exceptb10/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, c_redirect); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/exceptc20/exceptb10/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, a_redirect); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/exceptc20/exceptb10/excepta/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, None); - assert!(!result.matched); - - let result = blocker.check(&Request::new("https://example.com/exceptc20/exceptb10/text", "https://example.com", "xmlhttprequest").unwrap(), &resources); - assert_eq!(result.redirect, None); - assert!(!result.matched); - } - - #[test] - fn tags_enable_works() { - let filters = [ - "adv$tag=stuff", - "somelongpath/test$tag=stuff", - "||brianbondy.com/$tag=brian", - "||brave.com$tag=brian", - ]; - let url_results = [ - ("http://example.com/advert.html", true), - ("http://example.com/somelongpath/test/2.html", true), - ("https://brianbondy.com/about", false), - ("https://brave.com/about", false), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(url, expected_result)| { - let request = Request::new(url, "https://example.com", "other").unwrap(); - (request, expected_result) - }).collect(); - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, // optimizations will reduce number of rules - }; - - let mut blocker = Blocker::new(network_filters, &blocker_options); - let resources = Default::default(); - blocker.enable_tags(&["stuff"]); - assert_eq!(blocker.tags_enabled, HashSet::from_iter([String::from("stuff")].into_iter())); - assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 2); - - request_expectations.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req, &resources); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", req.url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", req.url, matched_rule.filter); - } - }); - } - - #[test] - fn tags_enable_adds_tags() { - let filters = [ - "adv$tag=stuff", - "somelongpath/test$tag=stuff", - "||brianbondy.com/$tag=brian", - "||brave.com$tag=brian", - ]; - let url_results = [ - ("http://example.com/advert.html", true), - ("http://example.com/somelongpath/test/2.html", true), - ("https://brianbondy.com/about", true), - ("https://brave.com/about", true), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(url, expected_result)| { - let request = Request::new(url, "https://example.com", "other").unwrap(); - (request, expected_result) - }).collect(); - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, // optimizations will reduce number of rules - }; - - let mut blocker = Blocker::new(network_filters, &blocker_options); - let resources = Default::default(); - blocker.enable_tags(&["stuff"]); - blocker.enable_tags(&["brian"]); - assert_eq!(blocker.tags_enabled, HashSet::from_iter([String::from("brian"), String::from("stuff")].into_iter())); - assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 4); - - request_expectations.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req, &resources); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", req.url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", req.url, matched_rule.filter); - } - }); - } - - #[test] - fn tags_disable_works() { - let filters = [ - "adv$tag=stuff", - "somelongpath/test$tag=stuff", - "||brianbondy.com/$tag=brian", - "||brave.com$tag=brian", - ]; - let url_results = [ - ("http://example.com/advert.html", false), - ("http://example.com/somelongpath/test/2.html", false), - ("https://brianbondy.com/about", true), - ("https://brave.com/about", true), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(url, expected_result)| { - let request = Request::new(url, "https://example.com", "other").unwrap(); - (request, expected_result) - }).collect(); - - let (network_filters, _) = parse_filters(&filters, true, Default::default()); - - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, // optimizations will reduce number of rules - }; - - let mut blocker = Blocker::new(network_filters, &blocker_options); - let resources = Default::default(); - blocker.enable_tags(&["brian", "stuff"]); - assert_eq!(blocker.tags_enabled, HashSet::from_iter([String::from("brian"), String::from("stuff")].into_iter())); - assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 4); - blocker.disable_tags(&["stuff"]); - assert_eq!(blocker.tags_enabled, HashSet::from_iter([String::from("brian")].into_iter())); - assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 2); - - request_expectations.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req, &resources); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", req.url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", req.url, matched_rule.filter); - } - }); - } - - #[test] - fn filter_add_badfilter_error() { - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - - let filter = NetworkFilter::parse("adv$badfilter", true, Default::default()).unwrap(); - let added = blocker.add_filter(filter); - assert!(added.is_err()); - assert_eq!(added.err().unwrap(), BlockerError::BadFilterAddUnsupported); - } - - #[test] - #[ignore] - fn filter_add_twice_handling_error() { - { - // Not allow filter to be added twice hwn the engine is not optimised - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - - let filter = NetworkFilter::parse("adv", true, Default::default()).unwrap(); - blocker.add_filter(filter.clone()).unwrap(); - assert!(blocker.filter_exists(&filter), "Expected filter to be inserted"); - let added = blocker.add_filter(filter); - assert!(added.is_err(), "Expected repeated insertion to fail"); - assert_eq!(added.err().unwrap(), BlockerError::FilterExists, "Expected specific error on repeated insertion fail"); - } - { - // Allow filter to be added twice when the engine is optimised - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: true, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - - let filter = NetworkFilter::parse("adv", true, Default::default()).unwrap(); - blocker.add_filter(filter.clone()).unwrap(); - let added = blocker.add_filter(filter); - assert!(added.is_ok()); - } - } - - #[test] - fn filter_add_tagged() { - // Allow filter to be added twice when the engine is optimised - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: true, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - let resources = Default::default(); - blocker.enable_tags(&["brian"]); - - blocker.add_filter(NetworkFilter::parse("adv$tag=stuff", true, Default::default()).unwrap()).unwrap(); - blocker.add_filter(NetworkFilter::parse("somelongpath/test$tag=stuff", true, Default::default()).unwrap()).unwrap(); - blocker.add_filter(NetworkFilter::parse("||brianbondy.com/$tag=brian", true, Default::default()).unwrap()).unwrap(); - blocker.add_filter(NetworkFilter::parse("||brave.com$tag=brian", true, Default::default()).unwrap()).unwrap(); - - let url_results = [ - ("http://example.com/advert.html", false), - ("http://example.com/somelongpath/test/2.html", false), - ("https://brianbondy.com/about", true), - ("https://brave.com/about", true), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(url, expected_result)| { - let request = Request::new(url, "https://example.com", "other").unwrap(); - (request, expected_result) - }).collect(); - - request_expectations.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req, &resources); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", req.url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", req.url, matched_rule.filter); - } - }); - } - - #[test] - fn exception_force_check() { - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: true, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - let resources = Default::default(); - - blocker.add_filter(NetworkFilter::parse("@@*ad_banner.png", true, Default::default()).unwrap()).unwrap(); - - let request = Request::new("http://example.com/ad_banner.png", "https://example.com", "other").unwrap(); - - let matched_rule = blocker.check_parameterised(&request, &resources, false, true); - assert!(!matched_rule.matched); - assert!(matched_rule.exception.is_some()); - } - - #[test] - fn generichide() { - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: true, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - - blocker.add_filter(NetworkFilter::parse("@@||example.com$generichide", true, Default::default()).unwrap()).unwrap(); - - assert!(blocker.check_generic_hide(&Request::new("https://example.com", "https://example.com", "other").unwrap())); - } -} - -#[cfg(test)] -mod placeholder_string_tests { - /// If this changes, be sure to update the documentation for [`BlockerResult`] as well. - #[test] - fn test_constant_placeholder_string() { - let mut filter_set = crate::lists::FilterSet::new(false); - filter_set.add_filter("||example.com^", Default::default()).unwrap(); - let engine = crate::Engine::from_filter_set(filter_set, true); - let block = engine.check_network_request(&crate::request::Request::new("https://example.com", "https://example.com", "document").unwrap()); - assert_eq!(block.filter, Some("NetworkFilter".to_string())); - } -} - -#[cfg(test)] -mod legacy_rule_parsing_tests { - use crate::test_utils::rules_from_lists; - use crate::lists::{parse_filters, FilterFormat, ParseOptions}; - use crate::blocker::{Blocker, BlockerOptions}; - use crate::blocker::vec_hashmap_len; - - struct ListCounts { - pub filters: usize, - pub cosmetic_filters: usize, - pub exceptions: usize, - pub duplicates: usize, - } - - impl std::ops::Add for ListCounts { - type Output = ListCounts; - - fn add(self, other: ListCounts) -> Self::Output { - ListCounts { - filters: self.filters + other.filters, - cosmetic_filters: self.cosmetic_filters + other.cosmetic_filters, - exceptions: self.exceptions + other.exceptions, - duplicates: 0, // Don't bother trying to calculate - lists could have cross-duplicated entries - } - } - } - - // number of expected EasyList cosmetic rules from old engine is 31144, but is incorrect as it skips a few particularly long rules that are nevertheless valid - // easyList = { 24478, 31144, 0, 5589 }; - // not handling (and not including) filters with the following options: - // - $popup - // - $elemhide - // difference from original counts caused by not handling document/subdocument options and possibly miscounting on the blocker side. - // Printing all non-cosmetic, non-html, non-comment/-empty rules and ones with no unsupported options yields 29142 items - // This engine also handles 3 rules that old one does not - const EASY_LIST: ListCounts = ListCounts { - filters: 35597, // 36259 - 662 exceptions - cosmetic_filters: if cfg!(feature = "css-validation") { 23072 } else { 23080 }, - exceptions: 662, - duplicates: 0 - }; - // easyPrivacy = { 11817, 0, 0, 1020 }; - // differences in counts explained by hashset size underreporting as detailed in the next two cases - const EASY_PRIVACY: ListCounts = ListCounts { - filters: 52278, // 52998 - 720 exceptions - cosmetic_filters: 21, - exceptions: 720, - duplicates: 2 - }; - // ublockUnbreak = { 4, 8, 0, 94 }; - // differences in counts explained by client.hostAnchoredExceptionHashSet->GetSize() underreporting when compared to client.numHostAnchoredExceptionFilters - const UBLOCK_UNBREAK: ListCounts = ListCounts { filters: 4, cosmetic_filters: 8, exceptions: 98, duplicates: 0 }; - // braveUnbreak = { 31, 0, 0, 4 }; - // differences in counts explained by client.hostAnchoredHashSet->GetSize() underreporting when compared to client.numHostAnchoredFilters - const BRAVE_UNBREAK: ListCounts = ListCounts { filters: 32, cosmetic_filters: 0, exceptions: 4, duplicates: 0 }; - // disconnectSimpleMalware = { 2450, 0, 0, 0 }; - const DISCONNECT_SIMPLE_MALWARE: ListCounts = ListCounts { filters: 2450, cosmetic_filters: 0, exceptions: 0, duplicates: 0 }; - // spam404MainBlacklist = { 5629, 166, 0, 0 }; - const SPAM_404_MAIN_BLACKLIST: ListCounts = ListCounts { filters: 5629, cosmetic_filters: 166, exceptions: 0, duplicates: 0 }; - const MALWARE_DOMAIN_LIST: ListCounts = ListCounts { filters: 1104, cosmetic_filters: 0, exceptions: 0, duplicates: 3 }; - const MALWARE_DOMAINS: ListCounts = ListCounts { filters: 26853, cosmetic_filters: 0, exceptions: 0, duplicates: 48 }; - - fn check_list_counts(rule_lists: impl IntoIterator>, format: FilterFormat, expectation: ListCounts) { - let rules = rules_from_lists(rule_lists); - - let (network_filters, cosmetic_filters) = parse_filters(rules, true, ParseOptions { format, ..Default::default() }); - - assert_eq!( - (network_filters.len(), - network_filters.iter().filter(|f| f.is_exception()).count(), - cosmetic_filters.len()), - (expectation.filters + expectation.exceptions, - expectation.exceptions, - expectation.cosmetic_filters), - "Number of collected filters does not match expectation"); - - let blocker_options = BlockerOptions { - enable_optimizations: false, // optimizations will reduce number of rules - }; - - let blocker = Blocker::new(network_filters, &blocker_options); - - // Some filters in the filter_map are pointed at by multiple tokens, increasing the total number of items - assert!(vec_hashmap_len(&blocker.exceptions.filter_map) + vec_hashmap_len(&blocker.generic_hide.filter_map) - >= expectation.exceptions, "Number of collected exceptions does not match expectation"); - - assert!(vec_hashmap_len(&blocker.filters.filter_map) + - vec_hashmap_len(&blocker.importants.filter_map) + - vec_hashmap_len(&blocker.redirects.filter_map) + - vec_hashmap_len(&blocker.redirects.filter_map) + - vec_hashmap_len(&blocker.csp.filter_map) >= - expectation.filters - expectation.duplicates, "Number of collected network filters does not match expectation"); - } - - #[test] - fn parse_easylist() { - check_list_counts(["./data/easylist.to/easylist/easylist.txt"], FilterFormat::Standard, EASY_LIST); - } - - #[test] - fn parse_easyprivacy() { - check_list_counts(["./data/easylist.to/easylist/easyprivacy.txt"], FilterFormat::Standard, EASY_PRIVACY); - } - - #[test] - fn parse_ublock_unbreak() { - check_list_counts(["./data/test/ublock-unbreak.txt"], FilterFormat::Standard, UBLOCK_UNBREAK); - } - - #[test] - fn parse_brave_unbreak() { - check_list_counts(["./data/test/brave-unbreak.txt"], FilterFormat::Standard, BRAVE_UNBREAK); - } - - #[test] - fn parse_brave_disconnect_simple_malware() { - check_list_counts(["./data/test/disconnect-simple-malware.txt"], FilterFormat::Standard, DISCONNECT_SIMPLE_MALWARE); - } - - #[test] - fn parse_spam404_main_blacklist() { - check_list_counts(["./data/test/spam404-main-blacklist.txt"], FilterFormat::Standard, SPAM_404_MAIN_BLACKLIST); - } - - #[test] - fn parse_malware_domain_list() { - check_list_counts(["./data/test/malwaredomainlist.txt"], FilterFormat::Hosts, MALWARE_DOMAIN_LIST); - } - - #[test] - fn parse_malware_domain_list_just_hosts() { - check_list_counts(["./data/test/malwaredomainlist_justhosts.txt"], FilterFormat::Hosts, MALWARE_DOMAIN_LIST); - } - - #[test] - fn parse_malware_domains() { - check_list_counts(["./data/test/malwaredomains.txt"], FilterFormat::Hosts, MALWARE_DOMAINS); - } - - #[test] - fn parse_multilist() { - let expectation = EASY_LIST + EASY_PRIVACY + UBLOCK_UNBREAK + BRAVE_UNBREAK; - check_list_counts( - [ - "./data/easylist.to/easylist/easylist.txt", - "./data/easylist.to/easylist/easyprivacy.txt", - "./data/test/ublock-unbreak.txt", - "./data/test/brave-unbreak.txt", - ], - FilterFormat::Standard, - expectation, - ) - } - - #[test] - fn parse_malware_multilist() { - let expectation = SPAM_404_MAIN_BLACKLIST + DISCONNECT_SIMPLE_MALWARE; - check_list_counts( - [ - "./data/test/spam404-main-blacklist.txt", - "./data/test/disconnect-simple-malware.txt", - ], - FilterFormat::Standard, - expectation, - ) - } - - #[test] - fn parse_hosts_formats() { - let mut expectation = MALWARE_DOMAIN_LIST + MALWARE_DOMAINS; - expectation.duplicates = 69; - check_list_counts( - [ - "./data/test/malwaredomainlist.txt", - "./data/test/malwaredomains.txt", - ], - FilterFormat::Hosts, - expectation, - ) - } -} +#[path = "../tests/unit/blocker.rs"] +mod unit_tests; diff --git a/src/content_blocking.rs b/src/content_blocking.rs index ecbf5dd9..f661aa88 100644 --- a/src/content_blocking.rs +++ b/src/content_blocking.rs @@ -1,7 +1,7 @@ //! Transforms filter rules into content blocking syntax used on iOS and MacOS. use crate::filters::cosmetic::CosmeticFilter; -use crate::filters::network::{NetworkFilter, NetworkFilterMask}; +use crate::filters::network::{NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper}; use crate::lists::ParsedFilter; use memchr::{memchr as find_char, memmem}; @@ -46,12 +46,32 @@ pub struct CbRule { impl CbRule { /// If this returns false, the rule will not compile and should not be used. fn is_ascii(&self) -> bool { - self.action.selector.iter().all(|s| s.is_ascii()) && - self.trigger.url_filter.is_ascii() && - self.trigger.if_domain.iter().flatten().all(|d| d.is_ascii()) && - self.trigger.unless_domain.iter().flatten().all(|d| d.is_ascii()) && - self.trigger.if_top_url.iter().flatten().all(|d| d.is_ascii()) && - self.trigger.unless_top_url.iter().flatten().all(|d| d.is_ascii()) + self.action.selector.iter().all(|s| s.is_ascii()) + && self.trigger.url_filter.is_ascii() + && self + .trigger + .if_domain + .iter() + .flatten() + .all(|d| d.is_ascii()) + && self + .trigger + .unless_domain + .iter() + .flatten() + .all(|d| d.is_ascii()) + && self + .trigger + .if_top_url + .iter() + .flatten() + .all(|d| d.is_ascii()) + && self + .trigger + .unless_top_url + .iter() + .flatten() + .all(|d| d.is_ascii()) } } @@ -385,47 +405,53 @@ impl TryFrom for CbRuleEquivalent { let escaped_special_chars = SPECIAL_CHARS.replace_all(&hostname, r##"\$1"##); format!("^[^:]+:(//)?([^/]+\\.)?{}", escaped_special_chars) } - (crate::filters::network::FilterPart::Empty, None) => { - if v.mask.contains(NetworkFilterMask::FROM_HTTP | NetworkFilterMask::FROM_HTTPS) { - "^https?://" - } else if v.mask.contains(NetworkFilterMask::FROM_HTTP) { - "^http://" - } else if v.mask.contains(NetworkFilterMask::FROM_HTTPS) { - "^https://" - } else if v.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) { - "^wss?://" - } else { - unreachable!("Invalid scheme information"); - }.to_string() + (crate::filters::network::FilterPart::Empty, None) => if v + .mask + .contains(NetworkFilterMask::FROM_HTTP | NetworkFilterMask::FROM_HTTPS) + { + "^https?://" + } else if v.mask.contains(NetworkFilterMask::FROM_HTTP) { + "^http://" + } else if v.mask.contains(NetworkFilterMask::FROM_HTTPS) { + "^https://" + } else if v.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) { + "^wss?://" + } else { + unreachable!("Invalid scheme information"); } + .to_string(), }; - let (if_domain, unless_domain) = if v.opt_domains.is_some() || v.opt_not_domains.is_some() { + let (if_domain, unless_domain) = if v.opt_domains.is_some() + || v.opt_not_domains.is_some() + { let mut if_domain = vec![]; let mut unless_domain = vec![]; // Unwraps are okay here - any rules with opt_domains or opt_not_domains must have // an options section delimited by a '$' character, followed by a `domain=` option. let opts = &raw_line[find_char(b'$', raw_line.as_bytes()).unwrap() + "$".len()..]; - let domain_start_index = if let Some(index) = memmem::find(opts.as_bytes(), b"domain=") { - index - } else { - return Err(CbRuleCreationFailure::FromNotSupported); - }; - let domains_start = - &opts[domain_start_index + "domain=".len()..]; + let domain_start_index = + if let Some(index) = memmem::find(opts.as_bytes(), b"domain=") { + index + } else { + return Err(CbRuleCreationFailure::FromNotSupported); + }; + let domains_start = &opts[domain_start_index + "domain=".len()..]; let domains = if let Some(comma) = find_char(b',', domains_start.as_bytes()) { &domains_start[..comma] } else { domains_start - }.split('|'); + } + .split('|'); domains.for_each(|domain| { - let (collection, domain) = if let Some(domain_stripped) = domain.strip_prefix('~') { - (&mut unless_domain, domain_stripped) - } else { - (&mut if_domain, domain) - }; + let (collection, domain) = + if let Some(domain_stripped) = domain.strip_prefix('~') { + (&mut unless_domain, domain_stripped) + } else { + (&mut if_domain, domain) + }; let lowercase = domain.to_lowercase(); let normalized_domain = if lowercase.is_ascii() { @@ -645,786 +671,5 @@ impl TryFrom for CbRule { } #[cfg(test)] -mod ab2cb_tests { - use super::*; - - fn test_from_abp(abp_rule: &str, cb: &str) { - let filter = crate::lists::parse_filter(abp_rule, true, Default::default()) - .expect("Rule under test could not be parsed"); - assert_eq!( - CbRuleEquivalent::try_from(filter) - .unwrap() - .into_iter() - .collect::>(), - serde_json::from_str::>(cb) - .expect("content blocking rule under test could not be deserialized") - ); - } - - #[test] - fn ad_tests() { - test_from_abp( - "&ad_box_", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "&ad_box_" - } - }]"####, - ); - test_from_abp( - "&ad_channel=", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "&ad_channel=" - } - }]"####, - ); - test_from_abp( - "+advertorial.", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "\\+advertorial\\." - } - }]"####, - ); - test_from_abp( - "&prvtof=*&poru=", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "&prvtof=.*&poru=" - } - }]"####, - ); - test_from_abp( - "-ad-180x150px.", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "-ad-180x150px\\." - } - }]"####, - ); - test_from_abp( - "://findnsave.*.*/api/groupon.json?", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "://findnsave\\..*\\..*/api/groupon\\.json\\?" - } - }]"####, - ); - test_from_abp( - "|https://$script,third-party,domain=tamilrockers.ws", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "if-domain": ["*tamilrockers.ws"], - "load-type": ["third-party"], - "resource-type": ["script"], - "url-filter": "^https://" - } - }]"####, - ); - test_from_abp("||com/banners/$image,object,subdocument,domain=~pingdom.com|~thetvdb.com|~tooltrucks.com", r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?com/banners/", - "unless-domain": [ - "*pingdom.com", - "*thetvdb.com", - "*tooltrucks.com" - ], - "resource-type": [ - "image" - ] - } - }, { - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?com/banners/", - "unless-domain": [ - "*pingdom.com", - "*thetvdb.com", - "*tooltrucks.com" - ], - "resource-type": [ - "document" - ], - "load-type": [ - "third-party" - ] - }, - "action": { - "type": "block" - } - }]"####); - test_from_abp( - "$image,third-party,xmlhttprequest,domain=rd.com", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^https?://", - "if-domain": [ - "*rd.com" - ], - "resource-type": [ - "image", - "raw" - ], - "load-type": [ - "third-party" - ] - } - }]"####, - ); - test_from_abp( - "|https://r.i.ua^", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^https://r\\.i\\.ua" - } - }]"####, - ); - test_from_abp( - "|ws://$domain=4shared.com", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^wss?://", - "if-domain": [ - "*4shared.com" - ] - } - }]"####, - ); - } - - #[test] - fn element_hiding_tests() { - test_from_abp( - "###A9AdsMiddleBoxTop", - r####"[{ - "action": { - "type": "css-display-none", - "selector": "#A9AdsMiddleBoxTop" - }, - "trigger": { - "url-filter": ".*" - } - }]"####, - ); - test_from_abp( - "thedailygreen.com#@##AD_banner", - r####"[{ - "action": { - "type": "css-display-none", - "selector": "#AD_banner" - }, - "trigger": { - "url-filter": ".*", - "unless-domain": [ - "thedailygreen.com" - ] - } - }]"####, - ); - test_from_abp( - "sprouts.com,tbns.com.au#@##AdImage", - r####"[{ - "action": { - "type": "css-display-none", - "selector": "#AdImage" - }, - "trigger": { - "url-filter": ".*", - "unless-domain": [ - "sprouts.com", - "tbns.com.au" - ] - } - }]"####, - ); - test_from_abp( - r#"santander.co.uk#@#a[href^="http://ad-emea.doubleclick.net/"]"#, - r####"[{ - "action": { - "type": "css-display-none", - "selector": "a[href^=\"http://ad-emea.doubleclick.net/\"]" - }, - "trigger": { - "url-filter": ".*", - "unless-domain": [ - "santander.co.uk" - ] - } - }]"####, - ); - test_from_abp( - "search.safefinder.com,search.snapdo.com###ABottomD", - r####"[{ - "action": { - "type": "css-display-none", - "selector": "#ABottomD" - }, - "trigger": { - "url-filter": ".*", - "if-domain": [ - "search.safefinder.com", - "search.snapdo.com" - ] - } - }]"####, - ); - test_from_abp( - r#"tweakguides.com###adbar > br + p[style="text-align: center"] + p[style="text-align: center"]"#, - r####"[{ - "action": { - "type": "css-display-none", - "selector": "#adbar > br + p[style=\"text-align: center\"] + p[style=\"text-align: center\"]" - }, - "trigger": { - "url-filter": ".*", - "if-domain": [ - "tweakguides.com" - ] - } - }]"####, - ); - } - - /* TODO - `$popup` is currently unsupported by NetworkFilter - #[test] - fn popup_tests() { - test_from_abp("||admngronline.com^$popup,third-party", r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^https?://admngronline\\.com(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)", - "load-type": [ - "third-party" - ], - "resource-type": [ - "popup" - ] - } - }]"####); - test_from_abp("||bet365.com^*affiliate=$popup", r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^https?://bet365\\.com(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$).*affiliate=", - "resource-type": [ - "popup" - ] - } - }]"####); - } - */ - - #[test] - fn third_party() { - test_from_abp( - "||007-gateway.com^$third-party", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?007-gateway\\.com", - "load-type": [ - "third-party" - ] - } - }]"####, - ); - test_from_abp( - "||allestörungen.at^$third-party", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?xn--allestrungen-9ib\\.at", - "load-type": [ - "third-party" - ] - } - }]"####, - ); - test_from_abp( - "||anet*.tradedoubler.com^$third-party", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?anet.*\\.tradedoubler\\.com", - "load-type": [ - "third-party" - ] - } - }]"####, - ); - test_from_abp("||doubleclick.net^$third-party,domain=3news.co.nz|92q.com|abc-7.com|addictinggames.com|allbusiness.com|allthingsd.com|bizjournals.com|bloomberg.com|bnn.ca|boom92houston.com|boom945.com|boomphilly.com|break.com|cbc.ca|cbs19.tv|cbs3springfield.com|cbsatlanta.com|cbslocal.com|complex.com|dailymail.co.uk|darkhorizons.com|doubleviking.com|euronews.com|extratv.com|fandango.com|fox19.com|fox5vegas.com|gorillanation.com|hawaiinewsnow.com|hellobeautiful.com|hiphopnc.com|hot1041stl.com|hothiphopdetroit.com|hotspotatl.com|hulu.com|imdb.com|indiatimes.com|indyhiphop.com|ipowerrichmond.com|joblo.com|kcra.com|kctv5.com|ketv.com|koat.com|koco.com|kolotv.com|kpho.com|kptv.com|ksat.com|ksbw.com|ksfy.com|ksl.com|kypost.com|kysdc.com|live5news.com|livestation.com|livestream.com|metro.us|metronews.ca|miamiherald.com|my9nj.com|myboom1029.com|mycolumbusmagic.com|mycolumbuspower.com|myfoxdetroit.com|myfoxorlando.com|myfoxphilly.com|myfoxphoenix.com|myfoxtampabay.com|nbcrightnow.com|neatorama.com|necn.com|neopets.com|news.com.au|news4jax.com|newsone.com|nintendoeverything.com|oldschoolcincy.com|own3d.tv|pagesuite-professional.co.uk|pandora.com|player.theplatform.com|ps3news.com|radio.com|radionowindy.com|rottentomatoes.com|sbsun.com|shacknews.com|sk-gaming.com|ted.com|thebeatdfw.com|theboxhouston.com|theglobeandmail.com|timesnow.tv|tv2.no|twitch.tv|universalsports.com|ustream.tv|wapt.com|washingtonpost.com|wate.com|wbaltv.com|wcvb.com|wdrb.com|wdsu.com|wflx.com|wfmz.com|wfsb.com|wgal.com|whdh.com|wired.com|wisn.com|wiznation.com|wlky.com|wlns.com|wlwt.com|wmur.com|wnem.com|wowt.com|wral.com|wsj.com|wsmv.com|wsvn.com|wtae.com|wthr.com|wxii12.com|wyff4.com|yahoo.com|youtube.com|zhiphopcleveland.com", r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?doubleclick\\.net", - "load-type": [ - "third-party" - ], - "if-domain": [ - "*3news.co.nz", - "*92q.com", - "*abc-7.com", - "*addictinggames.com", - "*allbusiness.com", - "*allthingsd.com", - "*bizjournals.com", - "*bloomberg.com", - "*bnn.ca", - "*boom92houston.com", - "*boom945.com", - "*boomphilly.com", - "*break.com", - "*cbc.ca", - "*cbs19.tv", - "*cbs3springfield.com", - "*cbsatlanta.com", - "*cbslocal.com", - "*complex.com", - "*dailymail.co.uk", - "*darkhorizons.com", - "*doubleviking.com", - "*euronews.com", - "*extratv.com", - "*fandango.com", - "*fox19.com", - "*fox5vegas.com", - "*gorillanation.com", - "*hawaiinewsnow.com", - "*hellobeautiful.com", - "*hiphopnc.com", - "*hot1041stl.com", - "*hothiphopdetroit.com", - "*hotspotatl.com", - "*hulu.com", - "*imdb.com", - "*indiatimes.com", - "*indyhiphop.com", - "*ipowerrichmond.com", - "*joblo.com", - "*kcra.com", - "*kctv5.com", - "*ketv.com", - "*koat.com", - "*koco.com", - "*kolotv.com", - "*kpho.com", - "*kptv.com", - "*ksat.com", - "*ksbw.com", - "*ksfy.com", - "*ksl.com", - "*kypost.com", - "*kysdc.com", - "*live5news.com", - "*livestation.com", - "*livestream.com", - "*metro.us", - "*metronews.ca", - "*miamiherald.com", - "*my9nj.com", - "*myboom1029.com", - "*mycolumbusmagic.com", - "*mycolumbuspower.com", - "*myfoxdetroit.com", - "*myfoxorlando.com", - "*myfoxphilly.com", - "*myfoxphoenix.com", - "*myfoxtampabay.com", - "*nbcrightnow.com", - "*neatorama.com", - "*necn.com", - "*neopets.com", - "*news.com.au", - "*news4jax.com", - "*newsone.com", - "*nintendoeverything.com", - "*oldschoolcincy.com", - "*own3d.tv", - "*pagesuite-professional.co.uk", - "*pandora.com", - "*player.theplatform.com", - "*ps3news.com", - "*radio.com", - "*radionowindy.com", - "*rottentomatoes.com", - "*sbsun.com", - "*shacknews.com", - "*sk-gaming.com", - "*ted.com", - "*thebeatdfw.com", - "*theboxhouston.com", - "*theglobeandmail.com", - "*timesnow.tv", - "*tv2.no", - "*twitch.tv", - "*universalsports.com", - "*ustream.tv", - "*wapt.com", - "*washingtonpost.com", - "*wate.com", - "*wbaltv.com", - "*wcvb.com", - "*wdrb.com", - "*wdsu.com", - "*wflx.com", - "*wfmz.com", - "*wfsb.com", - "*wgal.com", - "*whdh.com", - "*wired.com", - "*wisn.com", - "*wiznation.com", - "*wlky.com", - "*wlns.com", - "*wlwt.com", - "*wmur.com", - "*wnem.com", - "*wowt.com", - "*wral.com", - "*wsj.com", - "*wsmv.com", - "*wsvn.com", - "*wtae.com", - "*wthr.com", - "*wxii12.com", - "*wyff4.com", - "*yahoo.com", - "*youtube.com", - "*zhiphopcleveland.com" - ] - } - }]"####); - test_from_abp("||dt00.net^$third-party,domain=~marketgid.com|~marketgid.ru|~marketgid.ua|~mgid.com|~thechive.com", r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?dt00\\.net", - "load-type": [ - "third-party" - ], - "unless-domain": [ - "*marketgid.com", - "*marketgid.ru", - "*marketgid.ua", - "*mgid.com", - "*thechive.com" - ] - } - }]"####); - test_from_abp("||amazonaws.com/newscloud-production/*/backgrounds/$domain=crescent-news.com|daily-jeff.com|recordpub.com|state-journal.com|the-daily-record.com|the-review.com|times-gazette.com", r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?amazonaws\\.com/newscloud-production/.*/backgrounds/", - "if-domain": [ - "*crescent-news.com", - "*daily-jeff.com", - "*recordpub.com", - "*state-journal.com", - "*the-daily-record.com", - "*the-review.com", - "*times-gazette.com" - ] - } - }]"####); - test_from_abp( - "||d1noellhv8fksc.cloudfront.net^", - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?d1noellhv8fksc\\.cloudfront\\.net" - } - }]"####, - ); - } - - #[test] - fn whitelist() { - test_from_abp( - "@@||google.com/recaptcha/$domain=mediafire.com", - r####"[{ - "action": { - "type": "ignore-previous-rules" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?google\\.com/recaptcha/", - "if-domain": [ - "*mediafire.com" - ] - } - }]"####, - ); - test_from_abp( - "@@||ad4.liverail.com/?compressed|$domain=majorleaguegaming.com|pbs.org|wikihow.com", - r####"[{ - "action": { - "type": "ignore-previous-rules" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?ad4\\.liverail\\.com/\\?compressed$", - "if-domain": [ - "*majorleaguegaming.com", - "*pbs.org", - "*wikihow.com" - ] - } - }]"####, - ); - test_from_abp( - "@@||googletagservices.com/tag/js/gpt.js$domain=allestoringen.nl|allestörungen.at", - r####"[{ - "action": { - "type": "ignore-previous-rules" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?googletagservices\\.com/tag/js/gpt\\.js", - "if-domain": [ - "*allestoringen.nl", - "*xn--allestrungen-9ib.at" - ] - } - }]"####, - ); - test_from_abp( - "@@||advertising.autotrader.co.uk^$~third-party", - r####"[{ - "action": { - "type": "ignore-previous-rules" - }, - "trigger": { - "load-type": [ - "first-party" - ], - "url-filter": "^[^:]+:(//)?([^/]+\\.)?advertising\\.autotrader\\.co\\.uk" - } - }]"####, - ); - test_from_abp( - "@@||advertising.racingpost.com^$image,script,stylesheet,~third-party,xmlhttprequest", - r####"[{ - "action": { - "type": "ignore-previous-rules" - }, - "trigger": { - "load-type": [ - "first-party" - ], - "url-filter": "^[^:]+:(//)?([^/]+\\.)?advertising\\.racingpost\\.com", - "resource-type": [ - "image", - "style-sheet", - "script", - "raw" - ] - } - }]"####, - ); - } - - #[test] - fn test_ignore_previous_fp_documents() { - assert_eq!( - vec![ignore_previous_fp_documents()], - serde_json::from_str::>( - r####"[{ - "trigger":{ - "url-filter":".*", - "resource-type":["document"], - "load-type":["first-party"] - }, - "action":{"type":"ignore-previous-rules"} - }]"#### - ) - .expect("content blocking rule under test could not be deserialized") - ); - } - - #[test] - fn escape_literal_backslashes() { - test_from_abp( - r#"||gamer.no/?module=Tumedia\DFProxy\Modules^"#, - r####"[{ - "action": { - "type": "block" - }, - "trigger": { - "url-filter": "^[^:]+:(//)?([^/]+\\.)?gamer\\.no/\\?module=tumedia\\\\dfproxy\\\\modules" - } - }]"####, - ); - } -} - -#[cfg(test)] -mod filterset_tests { - use crate::lists::{FilterSet, ParseOptions, RuleTypes}; - - const FILTER_LIST: &[&str] = &[ - "||example.com^$script", - "||test.net^$image,third-party", - "/trackme.js^$script", - "example.com##.ad-banner", - "##.ad-640x480", - "##p.sponsored", - ]; - - #[test] - fn convert_all_rules() -> Result<(), ()> { - let mut set = FilterSet::new(true); - set.add_filters(FILTER_LIST, Default::default()); - - let (cb_rules, used_rules) = set.into_content_blocking()?; - assert_eq!(used_rules, FILTER_LIST); - - // All 6 rules plus `ignore_previous_fp_documents()` - assert_eq!(cb_rules.len(), 7); - - Ok(()) - } - - #[test] - fn convert_network_only() -> Result<(), ()> { - let parse_opts = ParseOptions { - rule_types: RuleTypes::NetworkOnly, - ..Default::default() - }; - - let mut set = FilterSet::new(true); - set.add_filters(FILTER_LIST, parse_opts); - - let (cb_rules, used_rules) = set.into_content_blocking()?; - assert_eq!(used_rules, &FILTER_LIST[0..3]); - - // 3 network rules plus `ignore_previous_fp_documents()` - assert_eq!(cb_rules.len(), 4); - - Ok(()) - } - - #[test] - fn convert_cosmetic_only() -> Result<(), ()> { - let parse_opts = ParseOptions { - rule_types: RuleTypes::CosmeticOnly, - ..Default::default() - }; - - let mut set = FilterSet::new(true); - set.add_filters(FILTER_LIST, parse_opts); - - let (cb_rules, used_rules) = set.into_content_blocking()?; - assert_eq!(used_rules, &FILTER_LIST[3..6]); - - // 3 cosmetic rules only - assert_eq!(cb_rules.len(), 3); - - Ok(()) - } - - #[test] - fn ignore_unsupported_rules() -> Result<(), ()> { - let mut set = FilterSet::new(true); - set.add_filters(FILTER_LIST, Default::default()); - set.add_filters([ - // unicode characters - "||rgmechanics.info/uploads/660х90_", - "||insaattrendy.com/Upload/bükerbanner*.jpg", - // from domain - "/siropu/am/core.min.js$script,important,from=~audi-sport.net|~hifiwigwam.com", - // leading zero-width space - r#"​##a[href^="https://www.g2fame.com/"] > img"#, - ], Default::default()); - - let (cb_rules, used_rules) = set.into_content_blocking()?; - assert_eq!(used_rules, FILTER_LIST); - - // All 6 rules plus `ignore_previous_fp_documents()` - assert_eq!(cb_rules.len(), 7); - - Ok(()) - } - - #[test] - fn punycode_if_domains() -> Result<(), ()> { - let list = [ - "smskaraborg.se,örnsköldsviksgymnasium.se,mojligheternashusab.se##.env-modal-dialog__backdrop", - ]; - let mut set = FilterSet::new(true); - set.add_filters(&list, Default::default()); - - let (cb_rules, used_rules) = set.into_content_blocking()?; - assert_eq!(used_rules, list); - - assert_eq!(cb_rules.len(), 1); - assert!(cb_rules[0].trigger.if_domain.is_some()); - assert_eq!(cb_rules[0].trigger.if_domain.as_ref().unwrap(), &["smskaraborg.se", "xn--rnskldsviksgymnasium-29be.se", "mojligheternashusab.se"]); - - Ok(()) - } - - #[test] - fn convert_cosmetic_filter_locations() -> Result<(), ()> { - let list = [ - r"/^dizipal\d+\.com$/##.web", - r"/^example\d+\.com$/,test.net,b.*##.ad", - ]; - let mut set = FilterSet::new(true); - set.add_filters(&list, Default::default()); - - let (cb_rules, used_rules) = set.into_content_blocking()?; - assert_eq!(used_rules.len(), 1); - assert_eq!(cb_rules.len(), 1); - assert!(cb_rules[0].trigger.if_domain.is_some()); - assert_eq!( - cb_rules[0].trigger.if_domain.as_ref().unwrap(), - &["test.net"] - ); - - Ok(()) - } -} +#[path = "../tests/unit/content_blocking.rs"] +mod unit_tests; diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index 489b1bd9..a64d9583 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -8,11 +8,10 @@ //! cosmetic filters and allows them to be queried efficiently at runtime for any which may be //! relevant to a particular page. +#![allow(dead_code)] + use crate::filters::cosmetic::{ - CosmeticFilter, - CosmeticFilterAction, - CosmeticFilterMask, - CosmeticFilterOperator, + CosmeticFilter, CosmeticFilterAction, CosmeticFilterMask, CosmeticFilterOperator, }; use crate::resources::{PermissionMask, ResourceStorage}; use crate::utils::Hash; @@ -130,7 +129,10 @@ impl CosmeticFilterCache { /// Add a filter, assuming it has already been determined to be a generic rule fn add_generic_filter(&mut self, rule: CosmeticFilter) { - let selector = rule.plain_css_selector().expect("Procedural cosmetic filters cannot be generic").to_string(); + let selector = rule + .plain_css_selector() + .expect("Procedural cosmetic filters cannot be generic") + .to_string(); if selector.starts_with('.') { if let Some(key) = key_from_selector(&selector) { assert!(key.starts_with('.')); @@ -184,8 +186,8 @@ impl CosmeticFilterCache { /// stateless with regard to active page sessions. pub fn hidden_class_id_selectors( &self, - classes: impl IntoIterator>, - ids: impl IntoIterator>, + classes: impl IntoIterator>, + ids: impl IntoIterator>, exceptions: &HashSet, ) -> Vec { let mut selectors = vec![]; @@ -198,7 +200,12 @@ impl CosmeticFilterCache { selectors.push(format!(".{}", class)); } if let Some(bucket) = self.complex_class_rules.get(class) { - selectors.extend(bucket.iter().filter(|sel| !exceptions.contains(*sel)).map(|s| s.to_owned())); + selectors.extend( + bucket + .iter() + .filter(|sel| !exceptions.contains(*sel)) + .map(|s| s.to_owned()), + ); } }); ids.into_iter().for_each(|id| { @@ -207,7 +214,12 @@ impl CosmeticFilterCache { selectors.push(format!("#{}", id)); } if let Some(bucket) = self.complex_id_rules.get(id) { - selectors.extend(bucket.iter().filter(|sel| !exceptions.contains(*sel)).map(|s| s.to_owned())); + selectors.extend( + bucket + .iter() + .filter(|sel| !exceptions.contains(*sel)) + .map(|s| s.to_owned()), + ); } }); @@ -242,26 +254,50 @@ impl CosmeticFilterCache { let mut except_all_scripts = false; - let hashes: Vec<&Hash> = request_entities.iter().chain(request_hostnames.iter()).collect(); - - fn populate_set(hash: &Hash, source_bin: &HostnameFilterBin, dest_set: &mut HashSet) { + let hashes: Vec<&Hash> = request_entities + .iter() + .chain(request_hostnames.iter()) + .collect(); + + fn populate_set( + hash: &Hash, + source_bin: &HostnameFilterBin, + dest_set: &mut HashSet, + ) { if let Some(s) = source_bin.get(hash) { - s.iter().for_each(|s| { dest_set.insert(s.to_owned()); }); + s.iter().for_each(|s| { + dest_set.insert(s.to_owned()); + }); } } for hash in hashes.iter() { - populate_set(hash, &self.specific_rules.hide, &mut specific_hide_selectors); - populate_set(hash, &self.specific_rules.procedural_action, &mut procedural_actions); + populate_set( + hash, + &self.specific_rules.hide, + &mut specific_hide_selectors, + ); + populate_set( + hash, + &self.specific_rules.procedural_action, + &mut procedural_actions, + ); // special behavior: `script_injections` doesn't have to own the strings yet, since the // scripts need to be fetched and templated later if let Some(s) = self.specific_rules.inject_script.get(hash) { s.iter().for_each(|(s, mask)| { - script_injections.entry(s).and_modify(|entry| *entry |= *mask).or_insert(*mask); + script_injections + .entry(s) + .and_modify(|entry| *entry |= *mask) + .or_insert(*mask); }); } } - fn prune_set(hash: &Hash, source_bin: &HostnameFilterBin, dest_set: &mut HashSet) { + fn prune_set( + hash: &Hash, + source_bin: &HostnameFilterBin, + dest_set: &mut HashSet, + ) { if let Some(s) = source_bin.get(hash) { s.iter().for_each(|s| { dest_set.remove(s); @@ -276,7 +312,11 @@ impl CosmeticFilterCache { exceptions.insert(s.to_owned()); }); } - prune_set(hash, &self.specific_rules.procedural_action_exception, &mut procedural_actions); + prune_set( + hash, + &self.specific_rules.procedural_action_exception, + &mut procedural_actions, + ); // same logic but not using prune_set since strings are unowned, (see above) if let Some(s) = self.specific_rules.uninject_script.get(hash) { for s in s { @@ -402,8 +442,13 @@ impl ProceduralOrActionFilter { /// Returns `(selector, style)` if the filter can be expressed in pure CSS. pub fn as_css(&self) -> Option<(String, String)> { match (&self.selector[..], &self.action) { - ([CosmeticFilterOperator::CssSelector(selector)], None) => Some((selector.to_string(), "display: none !important".to_string())), - ([CosmeticFilterOperator::CssSelector(selector)], Some(CosmeticFilterAction::Style(style))) => Some((selector.to_string(), style.to_string())), + ([CosmeticFilterOperator::CssSelector(selector)], None) => { + Some((selector.to_string(), "display: none !important".to_string())) + } + ( + [CosmeticFilterOperator::CssSelector(selector)], + Some(CosmeticFilterAction::Style(style)), + ) => Some((selector.to_string(), style.to_string())), _ => None, } } @@ -424,22 +469,27 @@ impl HostnameRuleDb { let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); let script_inject = rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT); - let kind = match (script_inject, rule.plain_css_selector().map(|s| s.to_string()), rule.action) { + let kind = match ( + script_inject, + rule.plain_css_selector().map(|s| s.to_string()), + rule.action, + ) { (false, Some(selector), None) => Hide(selector), (true, Some(selector), None) => InjectScript((selector, rule.permission)), - (false, selector, action) => ProceduralOrAction(serde_json::to_string(&ProceduralOrActionFilter { - selector: selector.map(|selector| vec![CosmeticFilterOperator::CssSelector(selector)]).unwrap_or(rule.selector), - action - }).unwrap()), + (false, selector, action) => ProceduralOrAction( + serde_json::to_string(&ProceduralOrActionFilter { + selector: selector + .map(|selector| vec![CosmeticFilterOperator::CssSelector(selector)]) + .unwrap_or(rule.selector), + action, + }) + .unwrap(), + ), (true, _, Some(_)) => return, // script injection with action - shouldn't be possible (true, None, _) => return, // script injection without plain CSS selector - shouldn't be possible }; - let kind = if unhide { - kind.negated() - } else { - kind - }; + let kind = if unhide { kind.negated() } else { kind }; let tokens_to_insert = std::iter::empty() .chain(rule.hostnames.unwrap_or(Vec::new())) @@ -514,7 +564,8 @@ fn key_from_selector(selector: &str) -> Option { static RE_PLAIN_SELECTOR: Lazy = Lazy::new(|| Regex::new(r"^[#.][\w\\-]+").unwrap()); static RE_PLAIN_SELECTOR_ESCAPED: Lazy = Lazy::new(|| Regex::new(r"^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+").unwrap()); - static RE_ESCAPE_SEQUENCE: Lazy = Lazy::new(|| Regex::new(r"\\([0-9A-Fa-f]+ |.)").unwrap()); + static RE_ESCAPE_SEQUENCE: Lazy = + Lazy::new(|| Regex::new(r"\\([0-9A-Fa-f]+ |.)").unwrap()); // If there are no escape characters in the selector, just take the first class or id token. let mat = RE_PLAIN_SELECTOR.find(selector); @@ -541,15 +592,15 @@ fn key_from_selector(selector: &str) -> Option { beginning = location.end(); // Unwrap is safe because there is a capture group specified in the regex let capture = capture.get(1).unwrap().as_str(); - if capture.chars().count() == 1 { // Check number of unicode characters rather than byte length + if capture.chars().count() == 1 { + // Check number of unicode characters rather than byte length key += capture; } else { // This u32 conversion can overflow let codepoint = u32::from_str_radix(&capture[..capture.len() - 1], 16).ok()?; // Not all u32s are valid Unicode codepoints - key += &core::char::from_u32(codepoint)? - .to_string(); + key += &core::char::from_u32(codepoint)?.to_string(); } } Some(key + &escaped[beginning..]) @@ -559,617 +610,5 @@ fn key_from_selector(selector: &str) -> Option { } #[cfg(test)] -mod key_from_selector_tests { - use super::key_from_selector; - - #[test] - fn no_escapes() { - assert_eq!(key_from_selector(r#"#selector"#).unwrap(), "#selector"); - assert_eq!(key_from_selector(r#"#ad-box[href="https://popads.net"]"#).unwrap(), "#ad-box"); - assert_eq!(key_from_selector(r#".p"#).unwrap(), ".p"); - assert_eq!(key_from_selector(r#".ad #ad.adblockblock"#).unwrap(), ".ad"); - assert_eq!(key_from_selector(r#"#container.contained"#).unwrap(), "#container"); - } - - #[test] - fn escaped_characters() { - assert_eq!(key_from_selector(r"#Meebo\:AdElement\.Root").unwrap(), "#Meebo:AdElement.Root"); - assert_eq!(key_from_selector(r"#\ Banner\ Ad\ -\ 590\ x\ 90").unwrap(), "# Banner Ad - 590 x 90"); - assert_eq!(key_from_selector(r"#\ rek").unwrap(), "# rek"); - assert_eq!(key_from_selector(r#"#\:rr .nH[role="main"] .mq:first-child"#).unwrap(), "#:rr"); - assert_eq!(key_from_selector(r#"#adspot-300x600\,300x250-pos-1"#).unwrap(), "#adspot-300x600,300x250-pos-1"); - assert_eq!(key_from_selector(r#"#adv_\'146\'"#).unwrap(), "#adv_\'146\'"); - assert_eq!(key_from_selector(r#"#oas-mpu-left\<\/div\>"#).unwrap(), "#oas-mpu-left"); - assert_eq!(key_from_selector(r#".Trsp\(op\).Trsdu\(3s\)"#).unwrap(), ".Trsp(op)"); - } - - #[test] - fn escape_codes() { - assert_eq!(key_from_selector(r#"#\5f _mom_ad_12"#).unwrap(), "#__mom_ad_12"); - assert_eq!(key_from_selector(r#"#\5f _nq__hh[style="display:block!important"]"#).unwrap(), "#__nq__hh"); - assert_eq!(key_from_selector(r#"#\31 000-014-ros"#).unwrap(), "#1000-014-ros"); - assert_eq!(key_from_selector(r#"#\33 00X250ad"#).unwrap(), "#300X250ad"); - assert_eq!(key_from_selector(r#"#\5f _fixme"#).unwrap(), "#__fixme"); - assert_eq!(key_from_selector(r#"#\37 28ad"#).unwrap(), "#728ad"); - } - - #[test] - fn bad_escapes() { - assert!(key_from_selector(r#"#\5ffffffffff overflows"#).is_none()); - assert!(key_from_selector(r#"#\5fffffff is_too_large"#).is_none()); - } -} - -#[cfg(test)] -mod cosmetic_cache_tests { - use super::*; - use crate::resources::Resource; - - fn cache_from_rules(rules: Vec<&str>) -> CosmeticFilterCache { - let parsed_rules = rules - .iter() - .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) - .collect::>(); - - CosmeticFilterCache::from_rules(parsed_rules) - } - - #[test] - fn exceptions() { - let cfcache = cache_from_rules(vec!["~example.com##.item", "sub.example.com#@#.item2"]); - let resources = ResourceStorage::default(); - - let out = cfcache.hostname_cosmetic_resources(&resources, "test.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "example.com", false); - expected.exceptions.insert(".item".into()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); - expected.exceptions.insert(".item2".into()); - assert_eq!(out, expected); - } - - #[test] - fn exceptions2() { - let cfcache = cache_from_rules(vec!["example.com,~sub.example.com##.item"]); - let resources = ResourceStorage::default(); - - let out = cfcache.hostname_cosmetic_resources(&resources, "test.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "example.com", false); - expected.hide_selectors.insert(".item".to_owned()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - expected.exceptions.insert(".item".into()); - assert_eq!(out, expected); - } - - #[test] - fn style_exceptions() { - let cfcache = cache_from_rules(vec![ - "example.com,~sub.example.com##.element:style(background: #fff)", - "sub.test.example.com#@#.element:style(background: #fff)", - "a1.sub.example.com##.element", - "a2.sub.example.com##.element:style(background: #000)", - "a3.example.com##.element:style(background: #000)", - ]); - let resources = ResourceStorage::default(); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); - expected.hide_selectors.insert(".element".to_owned()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); - expected.hide_selectors.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter::from_css(".element".to_string(), "background: #fff".to_string())).unwrap()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); - expected.procedural_actions.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter::from_css(".element".to_string(), "background: #000".to_string())).unwrap()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); - expected.procedural_actions.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter::from_css(".element".to_string(), "background: #000".to_string())).unwrap()); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter::from_css(".element".to_string(), "background: #fff".to_string())).unwrap()); - assert_eq!(out, expected); - } - - #[test] - fn script_exceptions() { - use crate::resources::{MimeType, ResourceType}; - - let cfcache = cache_from_rules(vec![ - "example.com,~sub.example.com##+js(set-constant.js, atob, trueFunc)", - "sub.test.example.com#@#+js(set-constant.js, atob, trueFunc)", - "cosmetic.net##+js(nowebrtc.js)", - "g.cosmetic.net##+js(window.open-defuser.js)", - "c.g.cosmetic.net#@#+js(nowebrtc.js)", - "d.g.cosmetic.net#@#+js()", - ]); - let resources = ResourceStorage::from_resources([ - Resource { - name: "set-constant.js".into(), - aliases: vec![], - kind: ResourceType::Template, - content: base64::encode("set-constant.js, {{1}}, {{2}}"), - dependencies: vec![], - permission: Default::default(), - }, - Resource::simple("nowebrtc.js", MimeType::ApplicationJavascript, "nowebrtc.js"), - Resource::simple("window.open-defuser.js", MimeType::ApplicationJavascript, "window.open-defuser.js"), - ]); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); - expected.injected_script = - "try {\nset-constant.js, atob, trueFunc\n} catch ( e ) { }\n".to_owned(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "cosmetic.net", false); - expected.injected_script = "try {\nnowebrtc.js\n} catch ( e ) { }\n".to_owned(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "g.cosmetic.net", false); - expected.injected_script = "try {\nnowebrtc.js\n} catch ( e ) { }\ntry {\nwindow.open-defuser.js\n} catch ( e ) { }\n".to_owned(); - // order is non-deterministic - if out != expected { - expected.injected_script = "try {\nwindow.open-defuser.js\n} catch ( e ) { }\ntry {\nnowebrtc.js\n} catch ( e ) { }\n".to_owned(); - assert_eq!(out, expected); - } - - let out = cfcache.hostname_cosmetic_resources(&resources, "c.g.cosmetic.net", false); - expected.injected_script = "try {\nwindow.open-defuser.js\n} catch ( e ) { }\n".to_owned(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "d.g.cosmetic.net", false); - expected.injected_script = "".to_owned(); - assert_eq!(out, expected); - } - - #[test] - fn remove_exceptions() { - let cfcache = cache_from_rules(vec![ - "example.com,~sub.example.com##.element:remove()", - "sub.test.example.com#@#.element:remove()", - "a1.sub.example.com##.element", - "a2.sub.example.com##.element:remove()", - "a3.example.com##.element:remove()", - ]); - let resources = ResourceStorage::default(); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); - expected.hide_selectors.insert(".element".to_owned()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); - expected.hide_selectors.clear(); - expected.procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::Remove), - }).unwrap()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); - expected.procedural_actions.clear(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); - expected.procedural_actions.clear(); - expected.procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::Remove), - }).unwrap()); - assert_eq!(out, expected); - } - - #[test] - fn remove_attr_exceptions() { - let cfcache = cache_from_rules(vec![ - "example.com,~sub.example.com##.element:remove-attr(style)", - "sub.test.example.com#@#.element:remove-attr(style)", - "a1.sub.example.com##.element", - "a2.sub.example.com##.element:remove-attr(src)", - "a3.example.com##.element:remove-attr(src)", - ]); - let resources = ResourceStorage::default(); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); - expected.hide_selectors.insert(".element".to_owned()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); - expected.hide_selectors.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::RemoveAttr("style".to_string())), - }).unwrap()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); - expected.procedural_actions.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::RemoveAttr("src".to_string())), - }).unwrap()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); - expected.procedural_actions.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::RemoveAttr("src".to_string())), - }).unwrap()); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::RemoveAttr("style".to_string())), - }).unwrap()); - assert_eq!(out, expected); - } - - #[test] - fn remove_class_exceptions() { - let cfcache = cache_from_rules(vec![ - "example.com,~sub.example.com##.element:remove-class(overlay)", - "sub.test.example.com#@#.element:remove-class(overlay)", - "a1.sub.example.com##.element", - "a2.sub.example.com##.element:remove-class(banner)", - "a3.example.com##.element:remove-class(banner)", - ]); - let resources = ResourceStorage::default(); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); - expected.hide_selectors.insert(".element".to_owned()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); - expected.hide_selectors.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::RemoveClass("overlay".to_string())), - }).unwrap()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); - expected.procedural_actions.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::RemoveClass("banner".to_string())), - }).unwrap()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); - expected.procedural_actions.clear(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::RemoveClass("banner".to_string())), - }).unwrap()); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], - action: Some(CosmeticFilterAction::RemoveClass("overlay".to_string())), - }).unwrap()); - assert_eq!(out, expected); - } - - #[test] - #[cfg(feature = "css-validation")] - fn procedural_actions() { - let cfcache = cache_from_rules(vec![ - "example.com##div:has(video):remove()", - "example.com##div:has-text(Ad):remove()", - "example.com##div:has-text(Sponsored) > p", - "example.com##div:has-text(Cookie) > p:remove-class(overlay)", - ]); - let resources = ResourceStorage::default(); - - let out = cfcache.hostname_cosmetic_resources(&resources, "example.com", false); - let mut expected = UrlSpecificResources::empty(); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector("div:has(video)".to_string())], - action: Some(CosmeticFilterAction::Remove), - }).unwrap()); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector("div".to_string()), CosmeticFilterOperator::HasText("Ad".to_string())], - action: Some(CosmeticFilterAction::Remove), - }).unwrap()); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector("div".to_string()), CosmeticFilterOperator::HasText("Cookie".to_string()), CosmeticFilterOperator::CssSelector(" > p".to_string())], - action: Some(CosmeticFilterAction::RemoveClass("overlay".to_string())), - }).unwrap()); - expected - .procedural_actions - .insert(serde_json::to_string(&ProceduralOrActionFilter { - selector: vec![CosmeticFilterOperator::CssSelector("div".to_string()), CosmeticFilterOperator::HasText("Sponsored".to_string()), CosmeticFilterOperator::CssSelector(" > p".to_string())], - action: None, - }).unwrap()); - assert_eq!(out, expected); - } - - /// Avoid impossible type inference for type parameter `impl AsRef` - const EMPTY: &[&str] = &[]; - - #[test] - fn matching_hidden_class_id_selectors() { - let rules = [ - "##.a-class", - "###simple-id", - "##.a-class .with .children", - "##.children .including #simple-id", - "##a.a-class", - ]; - let cfcache = CosmeticFilterCache::from_rules( - rules - .iter() - .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) - .collect::>(), - ); - - let out = cfcache.hidden_class_id_selectors(["with"], EMPTY, &HashSet::default()); - assert_eq!(out, Vec::::new()); - - let out = cfcache.hidden_class_id_selectors(EMPTY, ["with"], &HashSet::default()); - assert_eq!(out, Vec::::new()); - - let out = cfcache.hidden_class_id_selectors(EMPTY, ["a-class"], &HashSet::default()); - assert_eq!(out, Vec::::new()); - - let out = - cfcache.hidden_class_id_selectors(["simple-id"], EMPTY, &HashSet::default()); - assert_eq!(out, Vec::::new()); - - let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &HashSet::default()); - assert_eq!(out, [".a-class", ".a-class .with .children"]); - - let out = cfcache.hidden_class_id_selectors( - ["children", "a-class"], - EMPTY, - &HashSet::default(), - ); - assert_eq!( - out, - [ - ".children .including #simple-id", - ".a-class", - ".a-class .with .children", - ] - ); - - let out = - cfcache.hidden_class_id_selectors(EMPTY, ["simple-id"], &HashSet::default()); - assert_eq!(out, ["#simple-id"]); - - let out = cfcache.hidden_class_id_selectors( - ["children", "a-class"], - ["simple-id"], - &HashSet::default(), - ); - assert_eq!( - out, - [ - ".children .including #simple-id", - ".a-class", - ".a-class .with .children", - "#simple-id", - ] - ); - } - - #[test] - fn class_id_exceptions() { - let rules = vec![ - "##.a-class", - "###simple-id", - "##.a-class .with .children", - "##.children .including #simple-id", - "##a.a-class", - "example.*#@#.a-class", - "~test.com###test-element", - ]; - let cfcache = CosmeticFilterCache::from_rules( - rules - .iter() - .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) - .collect::>(), - ); - let resources = ResourceStorage::default(); - let exceptions = cfcache - .hostname_cosmetic_resources(&resources, "example.co.uk", false) - .exceptions; - - let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &exceptions); - assert_eq!(out, [".a-class .with .children"]); - - let out = cfcache.hidden_class_id_selectors( - ["children", "a-class"], - ["simple-id"], - &exceptions, - ); - assert_eq!( - out, - [ - ".children .including #simple-id", - ".a-class .with .children", - "#simple-id", - ] - ); - - let out = cfcache.hidden_class_id_selectors(EMPTY, ["test-element"], &exceptions); - assert_eq!(out, ["#test-element"]); - - let exceptions = cfcache - .hostname_cosmetic_resources(&resources, "a1.test.com", false) - .exceptions; - - let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &exceptions); - assert_eq!(out, [".a-class", ".a-class .with .children"]); - - let out = cfcache.hidden_class_id_selectors( - ["children", "a-class"], - ["simple-id"], - &exceptions, - ); - assert_eq!( - out, - [ - ".children .including #simple-id", - ".a-class", - ".a-class .with .children", - "#simple-id", - ] - ); - - let out = cfcache.hidden_class_id_selectors(EMPTY, ["test-element"], &exceptions); - assert_eq!(out, Vec::::new()); - } - - #[test] - fn misc_generic_exceptions() { - let rules = vec![ - "##a[href=\"bad.com\"]", - "##div > p", - "##a[href=\"notbad.com\"]", - "example.com#@#div > p", - "~example.com##a[href=\"notbad.com\"]", - ]; - let cfcache = CosmeticFilterCache::from_rules( - rules - .iter() - .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) - .collect::>(), - ); - let resources = ResourceStorage::default(); - - let hide_selectors = cfcache - .hostname_cosmetic_resources(&resources, "test.com", false) - .hide_selectors; - let mut expected_hides = HashSet::new(); - expected_hides.insert("a[href=\"bad.com\"]".to_owned()); - expected_hides.insert("div > p".to_owned()); - expected_hides.insert("a[href=\"notbad.com\"]".to_owned()); - assert_eq!(hide_selectors, expected_hides); - - let hide_selectors = cfcache - .hostname_cosmetic_resources(&resources, "example.com", false) - .hide_selectors; - let mut expected_hides = HashSet::new(); - expected_hides.insert("a[href=\"bad.com\"]".to_owned()); - assert_eq!(hide_selectors, expected_hides); - } - - #[test] - fn apply_to_tld() { - use crate::resources::ResourceType; - - // toolforge.org and github.io are examples of TLDs with multiple segments. These rules - // should still be parsed correctly and applied on corresponding subdomains. - let rules = vec![ - "toolforge.org##+js(abort-on-property-read, noAdBlockers)", - "github.io##div.adToBlock", - ]; - let cfcache = CosmeticFilterCache::from_rules( - rules - .iter() - .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) - .collect::>(), - ); - let resources = ResourceStorage::from_resources([ - Resource { - name: "abort-on-property-read.js".into(), - aliases: vec!["aopr".to_string()], - kind: ResourceType::Template, - content: base64::encode("abort-on-property-read.js, {{1}}"), - dependencies: vec![], - permission: Default::default(), - } - ]); - - let injected_script = cfcache - .hostname_cosmetic_resources(&resources, "antonok.toolforge.org", false) - .injected_script; - assert_eq!( - injected_script, - "try {\nabort-on-property-read.js, noAdBlockers\n} catch ( e ) { }\n" - ); - - let hide_selectors = cfcache - .hostname_cosmetic_resources(&resources, "antonok.github.io", false) - .hide_selectors; - let mut expected_hides = HashSet::new(); - expected_hides.insert("div.adToBlock".to_owned()); - assert_eq!(hide_selectors, expected_hides); - } -} +#[path = "../tests/unit/cosmetic_filter_cache.rs"] +mod unit_tests; diff --git a/src/data_format/mod.rs b/src/data_format/mod.rs index e9f26ace..e2a8d028 100644 --- a/src/data_format/mod.rs +++ b/src/data_format/mod.rs @@ -5,13 +5,17 @@ //! serialization/deserialization implementations and can automatically dispatch to the appropriate //! one. +#![allow(dead_code)] + mod v0; pub(crate) mod utils; -use crate::blocker::Blocker; +use crate::blocker::GenericBlocker; use crate::cosmetic_filter_cache::CosmeticFilterCache; +type Blocker = GenericBlocker; + /// Newer formats start with this magic byte sequence. /// Calculated as the leading 4 bytes of `echo -n 'brave/adblock-rust' | sha512sum`. const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf]; diff --git a/src/data_format/v0.rs b/src/data_format/v0.rs index 9ac099c4..8ac593c7 100644 --- a/src/data_format/v0.rs +++ b/src/data_format/v0.rs @@ -9,14 +9,16 @@ use std::collections::{HashMap, HashSet}; use rmp_serde as rmps; use serde::{Deserialize, Serialize}; -use crate::blocker::{Blocker, NetworkFilterList}; +use crate::blocker::{GenericBlocker, NetworkFilterList}; use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb, ProceduralOrActionFilter}; -use crate::filters::network::NetworkFilter; +use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; use crate::utils::Hash; use super::utils::{stabilize_hashmap_serialization, stabilize_hashset_serialization}; use super::{DeserializationError, SerializationError}; +type Blocker = GenericBlocker; + /// Each variant describes a single rule that is specific to a particular hostname. #[derive(Clone, Debug, Deserialize, Serialize)] enum LegacySpecificFilterType { @@ -61,8 +63,12 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { for (hash, bin) in v.uninject_script.0.iter() { for f in bin { db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())]); + .and_modify(|v| { + v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned())) + }) + .or_insert_with(|| { + vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())] + }); } } for (hash, bin) in v.procedural_action.0.iter() { @@ -71,8 +77,15 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { Ok(f) => { if let Some((selector, style)) = f.as_css() { db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::Style(selector.clone(), style.clone()))) - .or_insert_with(|| vec![LegacySpecificFilterType::Style(selector, style)]); + .and_modify(|v| { + v.push(LegacySpecificFilterType::Style( + selector.clone(), + style.clone(), + )) + }) + .or_insert_with(|| { + vec![LegacySpecificFilterType::Style(selector, style)] + }); } } _ => (), @@ -85,17 +98,25 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { Ok(f) => { if let Some((selector, style)) = f.as_css() { db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::UnhideStyle(selector.to_owned(), style.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::UnhideStyle(selector.to_owned(), style.to_owned())]); + .and_modify(|v| { + v.push(LegacySpecificFilterType::UnhideStyle( + selector.to_owned(), + style.to_owned(), + )) + }) + .or_insert_with(|| { + vec![LegacySpecificFilterType::UnhideStyle( + selector.to_owned(), + style.to_owned(), + )] + }); } } _ => (), } } } - LegacyHostnameRuleDb { - db, - } + LegacyHostnameRuleDb { db } } } @@ -115,10 +136,22 @@ impl Into for LegacyHostnameRuleDb { match rule { LegacySpecificFilterType::Hide(s) => hide.insert(&hash, s), LegacySpecificFilterType::Unhide(s) => unhide.insert(&hash, s), - LegacySpecificFilterType::Style(s, st) => procedural_action.insert_procedural_action_filter(&hash, &ProceduralOrActionFilter::from_css(s, st)), - LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception.insert_procedural_action_filter(&hash, &ProceduralOrActionFilter::from_css(s, st)), - LegacySpecificFilterType::ScriptInject(s) => inject_script.insert(&hash, (s, Default::default())), - LegacySpecificFilterType::UnhideScriptInject(s) => uninject_script.insert(&hash, s), + LegacySpecificFilterType::Style(s, st) => procedural_action + .insert_procedural_action_filter( + &hash, + &ProceduralOrActionFilter::from_css(s, st), + ), + LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception + .insert_procedural_action_filter( + &hash, + &ProceduralOrActionFilter::from_css(s, st), + ), + LegacySpecificFilterType::ScriptInject(s) => { + inject_script.insert(&hash, (s, Default::default())) + } + LegacySpecificFilterType::UnhideScriptInject(s) => { + uninject_script.insert(&hash, s) + } } } } @@ -445,7 +478,8 @@ impl From for (Blocker, CosmeticFilterCache) { let mut specific_rules: HostnameRuleDb = v.specific_rules.into(); specific_rules.procedural_action = HostnameFilterBin(v.procedural_action); - specific_rules.procedural_action_exception = HostnameFilterBin(v.procedural_action_exception); + specific_rules.procedural_action_exception = + HostnameFilterBin(v.procedural_action_exception); ( Blocker { @@ -463,8 +497,6 @@ impl From for (Blocker, CosmeticFilterCache) { enable_optimizations: v.enable_optimizations, - #[cfg(feature = "object-pooling")] - pool: Default::default(), regex_manager: Default::default(), }, CosmeticFilterCache { diff --git a/src/engine.rs b/src/engine.rs index d17cad99..fef0cbf8 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -7,6 +7,9 @@ use crate::regex_manager::RegexManagerDiscardPolicy; use crate::request::Request; use crate::resources::{Resource, ResourceStorage}; +#[allow(unused_imports)] +pub use crate::engine_serializer::Serialize; + use std::collections::HashSet; /// Drives high-level blocking logic and is responsible for loading filter lists into an optimized @@ -43,8 +46,8 @@ use std::collections::HashSet; /// [`Engine::hidden_class_id_selectors`] on an ongoing basis to determine additional elements that /// should be hidden dynamically. pub struct Engine { - blocker: Blocker, - cosmetic_cache: CosmeticFilterCache, + pub(crate) blocker: Blocker, + pub(crate) cosmetic_cache: CosmeticFilterCache, resources: ResourceStorage, } @@ -73,18 +76,29 @@ impl Engine { } /// Loads rules in a single format, enabling optimizations and discarding debug information. - pub fn from_rules(rules: impl IntoIterator>, opts: ParseOptions) -> Self { + pub fn from_rules( + rules: impl IntoIterator>, + opts: ParseOptions, + ) -> Self { let mut filter_set = FilterSet::new(false); filter_set.add_filters(rules, opts); Self::from_filter_set(filter_set, true) } /// Loads rules, enabling optimizations and including debug information. - pub fn from_rules_debug(rules: impl IntoIterator>, opts: ParseOptions) -> Self { + pub fn from_rules_debug( + rules: impl IntoIterator>, + opts: ParseOptions, + ) -> Self { Self::from_rules_parametrised(rules, opts, true, true) } - pub fn from_rules_parametrised(filter_rules: impl IntoIterator>, opts: ParseOptions, debug: bool, optimize: bool) -> Self { + pub fn from_rules_parametrised( + filter_rules: impl IntoIterator>, + opts: ParseOptions, + debug: bool, + optimize: bool, + ) -> Self { let mut filter_set = FilterSet::new(debug); filter_set.add_filters(filter_rules, opts); Self::from_filter_set(filter_set, optimize) @@ -93,7 +107,11 @@ impl Engine { /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding /// rules from multiple sources. pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self { - let FilterSet { network_filters, cosmetic_filters, .. } = set; + let FilterSet { + network_filters, + cosmetic_filters, + .. + } = set; let blocker_options = BlockerOptions { enable_optimizations: optimize, @@ -106,28 +124,6 @@ impl Engine { } } - /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. - pub fn serialize_raw(&self) -> Result, crate::data_format::SerializationError> { - use crate::data_format::SerializeFormat; - - let serialize_format = SerializeFormat::build(&self.blocker, &self.cosmetic_cache); - - serialize_format.serialize() - } - - /// Deserialize the `Engine` from the binary format generated by `Engine::serialize_raw`. The - /// method will automatically select the correct deserialization implementation. - pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), crate::data_format::DeserializationError> { - use crate::data_format::DeserializeFormat; - let current_tags = self.blocker.tags_enabled(); - let deserialize_format = DeserializeFormat::deserialize(serialized)?; - let (blocker, cosmetic_cache) = deserialize_format.build(); - self.blocker = blocker; - self.blocker.use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); - self.cosmetic_cache = cosmetic_cache; - Ok(()) - } - /// Check if a request for a network resource from `url`, of type `request_type`, initiated by /// `source_url`, should be blocked. pub fn check_network_request(&self, request: &Request) -> BlockerResult { @@ -140,17 +136,19 @@ impl Engine { previously_matched_rule: bool, force_check_exceptions: bool, ) -> BlockerResult { - self.blocker.check_parameterised(request, &self.resources, previously_matched_rule, force_check_exceptions) + self.blocker.check_parameterised( + request, + &self.resources, + previously_matched_rule, + force_check_exceptions, + ) } /// Returns a string containing any additional CSP directives that should be added to this /// request's response. Only applies to document and subdocument requests. /// /// If multiple policies are present from different rules, they will be joined by commas. - pub fn get_csp_directives( - &self, - request: &Request, - ) -> Option { + pub fn get_csp_directives(&self, request: &Request) -> Option { self.blocker.get_csp_directives(request) } @@ -187,12 +185,15 @@ impl Engine { } /// Sets this engine's resources to be _only_ the ones provided in `resources`. - pub fn use_resources(&mut self, resources: impl IntoIterator) { + pub fn use_resources(&mut self, resources: impl IntoIterator) { self.resources = ResourceStorage::from_resources(resources); } /// Sets this engine's resources to additionally include `resource`. - pub fn add_resource(&mut self, resource: Resource) -> Result<(), crate::resources::AddResourceError> { + pub fn add_resource( + &mut self, + resource: Resource, + ) -> Result<(), crate::resources::AddResourceError> { self.resources.add_resource(resource) } @@ -204,8 +205,14 @@ impl Engine { /// corresponding rules are not excepted. /// /// `exceptions` should be passed directly from `UrlSpecificResources`. - pub fn hidden_class_id_selectors(&self, classes: impl IntoIterator>, ids: impl IntoIterator>, exceptions: &HashSet) -> Vec { - self.cosmetic_cache.hidden_class_id_selectors(classes, ids, exceptions) + pub fn hidden_class_id_selectors( + &self, + classes: impl IntoIterator>, + ids: impl IntoIterator>, + exceptions: &HashSet, + ) -> Vec { + self.cosmetic_cache + .hidden_class_id_selectors(classes, ids, exceptions) } /// Returns a set of cosmetic filter resources required for a particular url. Once this has @@ -220,13 +227,14 @@ impl Engine { }; let generichide = self.blocker.check_generic_hide(&request); - self.cosmetic_cache.hostname_cosmetic_resources(&self.resources, &request.hostname, generichide) + self.cosmetic_cache.hostname_cosmetic_resources( + &self.resources, + &request.hostname, + generichide, + ) } - pub fn set_regex_discard_policy( - &mut self, - new_discard_policy: RegexManagerDiscardPolicy - ) { + pub fn set_regex_discard_policy(&mut self, new_discard_policy: RegexManagerDiscardPolicy) { self.blocker.set_regex_discard_policy(new_discard_policy); } @@ -252,642 +260,5 @@ fn _assertions() { } #[cfg(test)] -mod tests { - use super::*; - use crate::resources::MimeType; - use crate::lists::FilterFormat; - - #[test] - fn tags_enable_adds_tags() { - let filters = [ - "adv$tag=stuff", - "somelongpath/test$tag=stuff", - "||brianbondy.com/$tag=brian", - "||brave.com$tag=brian", - ]; - let url_results = [ - ("http://example.com/advert.html", true), - ("http://example.com/somelongpath/test/2.html", true), - ("https://brianbondy.com/about", true), - ("https://brave.com/about", true), - ]; - - let mut engine = Engine::from_rules(&filters, Default::default()); - engine.enable_tags(&["stuff"]); - engine.enable_tags(&["brian"]); - - url_results.into_iter().for_each(|(url, expected_result)| { - let request = Request::new(&url, "", "").unwrap(); - let matched_rule = engine.check_network_request(&request); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn tags_disable_works() { - let filters = [ - "adv$tag=stuff", - "somelongpath/test$tag=stuff", - "||brianbondy.com/$tag=brian", - "||brave.com$tag=brian", - ]; - let url_results = [ - ("http://example.com/advert.html", false), - ("http://example.com/somelongpath/test/2.html", false), - ("https://brianbondy.com/about", true), - ("https://brave.com/about", true), - ]; - - let mut engine = Engine::from_rules(&filters, Default::default()); - engine.enable_tags(&["brian", "stuff"]); - engine.disable_tags(&["stuff"]); - - url_results.into_iter().for_each(|(url, expected_result)| { - let request = Request::new(&url, "", "").unwrap(); - let matched_rule = engine.check_network_request(&request); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn exception_tags_inactive_by_default() { - let filters = [ - "adv", - "||brianbondy.com/$tag=brian", - "@@||brianbondy.com/$tag=brian", - ]; - let url_results = [ - ("http://example.com/advert.html", true), - ("https://brianbondy.com/about", false), - ("https://brianbondy.com/advert", true), - ]; - - let engine = Engine::from_rules(&filters, Default::default()); - - url_results.into_iter().for_each(|(url, expected_result)| { - let request = Request::new(&url, "", "").unwrap(); - let matched_rule = engine.check_network_request(&request); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn exception_tags_works() { - let filters = [ - "adv", - "||brianbondy.com/$tag=brian", - "@@||brianbondy.com/$tag=brian", - ]; - let url_results = [ - ("http://example.com/advert.html", true), - ("https://brianbondy.com/about", false), - ("https://brianbondy.com/advert", false), - ]; - - let mut engine = Engine::from_rules(&filters, Default::default()); - engine.enable_tags(&["brian", "stuff"]); - - url_results.into_iter().for_each(|(url, expected_result)| { - let request = Request::new(&url, "", "").unwrap(); - let matched_rule = engine.check_network_request(&request); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn serialization_retains_tags() { - let filters = [ - "adv$tag=stuff", - "somelongpath/test$tag=stuff", - "||brianbondy.com/$tag=brian", - "||brave.com$tag=brian", - ]; - let url_results = [ - ("http://example.com/advert.html", true), - ("http://example.com/somelongpath/test/2.html", true), - ("https://brianbondy.com/about", false), - ("https://brave.com/about", false), - ]; - - let mut engine = Engine::from_rules(&filters, Default::default()); - engine.enable_tags(&["stuff"]); - engine.enable_tags(&["brian"]); - let serialized = engine.serialize_raw().unwrap(); - let mut deserialized_engine = Engine::default(); - deserialized_engine.enable_tags(&["stuff"]); - deserialized_engine.deserialize(&serialized).unwrap(); - - url_results.into_iter().for_each(|(url, expected_result)| { - let request = Request::new(&url, "", "").unwrap(); - let matched_rule = deserialized_engine.check_network_request(&request); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn deserialization_backwards_compatible_plain() { - // deserialization_generate_simple(); - // assert!(false); - // converted from the legacy compressed format - let serialized = [209, 217, 58, 175, 0, 220, 0, 17, 145, 128, 145, 128, 145, 128, 145, 128, - 145, 128, 145, 129, 207, 202, 167, 36, 217, 43, 56, 97, 176, 145, 157, 145, 206, 0, 3, - 31, 255, 129, 1, 169, 97, 100, 45, 98, 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, - 192, 192, 192, 207, 186, 136, 69, 13, 115, 187, 170, 226, 192, 192, 145, 128, 144, 195, - 145, 128, 144, 144, 128, 128, 145, 128, 144, 145, 128]; - let mut deserialized_engine = Engine::default(); - deserialized_engine.deserialize(&serialized).unwrap(); - - let url = "http://example.com/ad-banner.gif"; - let request = Request::new(&url, "", "").unwrap(); - let matched_rule = deserialized_engine.check_network_request(&request); - assert!(matched_rule.matched, "Expected match for {}", url); - } - - #[test] - fn deserialization_backwards_compatible_tags() { - // deserialization_generate_tags(); - // assert!(false); - // converted from the legacy compressed format - let serialized = [209, 217, 58, 175, 0, 220, 0, 17, 145, 128, 145, 128, 145, 128, 145, 128, - 145, 128, 145, 128, 145, 128, 145, 157, 145, 206, 0, 3, 31, 255, 129, 1, 169, 97, 100, - 45, 98, 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, 192, 163, 97, 98, 99, 192, - 207, 126, 212, 53, 83, 113, 159, 143, 134, 192, 192, 195, 145, 128, 144, 144, 128, 128, - 145, 128, 144, 145, 128]; - let mut deserialized_engine = Engine::default(); - - deserialized_engine.enable_tags(&[]); - deserialized_engine.deserialize(&serialized).unwrap(); - let url = "http://example.com/ad-banner.gif"; - let request = Request::new(&url, "", "").unwrap(); - let matched_rule = deserialized_engine.check_network_request(&request); - assert!(!matched_rule.matched, "Expected NO match for {}", url); - - deserialized_engine.enable_tags(&["abc"]); - deserialized_engine.deserialize(&serialized).unwrap(); - - let url = "http://example.com/ad-banner.gif"; - let request = Request::new(&url, "", "").unwrap(); - let matched_rule = deserialized_engine.check_network_request(&request); - assert!(matched_rule.matched, "Expected match for {}", url); - } - - #[test] - fn deserialization_generate_simple() { - let mut engine = Engine::from_rules(&[ - "ad-banner", - ], Default::default()); - let serialized = engine.serialize_raw().unwrap(); - println!("Engine serialized: {:?}", serialized); - engine.deserialize(&serialized).unwrap(); - } - - #[test] - fn deserialization_generate_tags() { - let mut engine = Engine::from_rules(&[ - "ad-banner$tag=abc", - ], Default::default()); - engine.use_tags(&["abc"]); - let serialized = engine.serialize_raw().unwrap(); - println!("Engine serialized: {:?}", serialized); - engine.deserialize(&serialized).unwrap(); - } - - #[test] - fn deserialization_generate_resources() { - let mut engine = Engine::from_rules(&[ - "ad-banner$redirect=nooptext", - ], Default::default()); - - engine.use_resources([ - Resource::simple("nooptext", MimeType::TextPlain, ""), - Resource::simple("noopcss", MimeType::TextCss, ""), - ]); - - let serialized = engine.serialize_raw().unwrap(); - println!("Engine serialized: {:?}", serialized); - engine.deserialize(&serialized).unwrap(); - } - - #[test] - fn redirect_resource_insertion_works() { - let mut engine = Engine::from_rules(&[ - "ad-banner$redirect=nooptext", - "script.js$redirect=noop.js", - ], Default::default()); - - let script = r#" -(function() { - ; -})(); - - "#; - let mut resources = [ - Resource::simple("nooptext", MimeType::TextPlain, ""), - Resource::simple("noopjs", MimeType::ApplicationJavascript, script), - ]; - resources[1].aliases.push("noop.js".to_string()); - engine.use_resources(resources); - - let url = "http://example.com/ad-banner.gif"; - let request = Request::new(url, "", "").unwrap(); - let matched_rule = engine.check_network_request(&request); - assert!(matched_rule.matched, "Expected match for {}", url); - assert_eq!(matched_rule.redirect, Some("data:text/plain;base64,".to_owned()), "Expected redirect to contain resource"); - - let url = "http://example.com/script.js"; - let request = Request::new(url, "", "").unwrap(); - let matched_rule = engine.check_network_request(&request); - assert!(matched_rule.matched, "Expected match for {}", url); - assert_eq!(matched_rule.redirect, Some(format!("data:application/javascript;base64,{}", base64::encode(format!("{}", script)))), "Expected redirect to contain resource"); - } - - #[test] - fn document() { - let filters = [ - "||example.com$document", - "@@||sub.example.com$document", - ]; - - let engine = Engine::from_rules_debug(&filters, Default::default()); - - assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); - assert!(!engine.check_network_request(&Request::new("https://example.com", "https://example.com", "script").unwrap()).matched); - assert!(engine.check_network_request(&Request::new("https://sub.example.com", "https://sub.example.com", "document").unwrap()).exception.is_some()); - } - - #[test] - fn implicit_all() { - { - let engine = Engine::from_rules_debug(["||example.com^"], Default::default()); - assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); - } - { - let engine = Engine::from_rules_debug(["||example.com^$first-party"], Default::default()); - assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); - } - { - let engine = Engine::from_rules_debug(["||example.com^$script"], Default::default()); - assert!(!engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); - } - { - let engine = Engine::from_rules_debug(["||example.com^$~script"], Default::default()); - assert!(!engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); - } - { - let engine = Engine::from_rules_debug(["||example.com^$document", "@@||example.com^$generichide"], Default::default()); - assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); - } - { - let engine = Engine::from_rules_debug(["example.com"], ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); - } - { - let engine = Engine::from_rules_debug(["||example.com/path"], Default::default()); - assert!(!engine.check_network_request(&Request::new("https://example.com/path", "https://example.com/path", "document").unwrap()).matched); - } - { - let engine = Engine::from_rules_debug(["||example.com/path^"], Default::default()); - assert!(!engine.check_network_request(&Request::new("https://example.com/path", "https://example.com/path", "document").unwrap()).matched); - } - } - - #[test] - fn generichide() { - let filters = [ - "##.donotblock", - "##a[href=\"generic.com\"]", - - "@@||example.com$generichide", - "example.com##.block", - - "@@||example2.com/test.html$generichide", - "example2.com##.block", - ]; - let url_results = [ - ("https://example.com", vec![".block"], true), - ("https://example.com/test.html", vec![".block"], true), - ("https://example2.com", vec![".block", "a[href=\"generic.com\"]"], false), - ("https://example2.com/test.html", vec![".block"], true), - ]; - - let engine = Engine::from_rules(&filters, Default::default()); - - url_results.into_iter().for_each(|(url, expected_result, expected_generichide)| { - let result = engine.url_cosmetic_resources(url); - assert_eq!(result.hide_selectors, expected_result.iter().map(|s| s.to_string()).collect::>()); - assert_eq!(result.generichide, expected_generichide); - }); - } - - #[test] - fn important_redirect() { - let mut filter_set = FilterSet::new(true); - filter_set.add_filters([ - "||addthis.com^$important,3p,domain=~missingkids.com|~missingkids.org|~sainsburys.jobs|~sitecore.com|~amd.com", - "||addthis.com/*/addthis_widget.js$script,redirect=addthis.com/addthis_widget.js", - ], Default::default()); - let mut engine = Engine::from_filter_set(filter_set, false); - - engine.add_resource( - Resource::simple("addthis.com/addthis_widget.js", MimeType::ApplicationJavascript, "window.addthis = undefined"), - ).unwrap(); - - let request = Request::new("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script").unwrap(); - let result = engine.check_network_request(&request); - - assert!(result.redirect.is_some()); - } - - #[test] - fn check_match_case_regex_filtering() { - { - // match case without regex is discarded - let engine = Engine::from_rules_debug(["ad.png$match-case"], Default::default()); - let request = Request::new("https://example.com/ad.png", "https://example.com", "image").unwrap(); - assert!(!engine.check_network_request(&request).matched); - } - { - // /^https:\/\/[0-9a-z]{3,}\.[-a-z]{10,}\.(?:li[fv]e|top|xyz)\/[a-z]{8}\/\?utm_campaign=\w{40,}/$doc,match-case,domain=life|live|top|xyz - let engine = Engine::from_rules_debug([r#"/^https:\/\/[0-9a-z]{3,}\.[-a-z]{10,}\.(?:li[fv]e|top|xyz)\/[a-z]{8}\/\?utm_campaign=\w{40,}/$doc,match-case,domain=life|live|top|xyz"#], Default::default()); - let request = Request::new("https://www.exampleaaa.xyz/testtest/?utm_campaign=aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd", "https://www.exampleaaa.xyz/testtest/?utm_campaign=aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd", "document").unwrap(); - assert!(engine.check_network_request(&request).matched); - } - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/((?!www)[a-z]{3,}|\d{2})?\.?[-0-9a-z]{6,}\.[a-z]{2,6}\/(?:[a-z]{6,8}\/)?\/?\?u=[0-9a-z]{7}&o=[0-9a-z]{7}/$doc,frame,match-case,domain=buzz|com|de|fun|guru|info|life|live|mobi|online|pw|site|space|top|us|xyz - let engine = Engine::from_rules_debug([r#"/^https?:\/\/((?!www)[a-z]{3,}|\d{2})?\.?[-0-9a-z]{6,}\.[a-z]{2,6}\/(?:[a-z]{6,8}\/)?\/?\?u=[0-9a-z]{7}&o=[0-9a-z]{7}/$doc,frame,match-case,domain=buzz|com|de|fun|guru|info|life|live|mobi|online|pw|site|space|top|us|xyz"#], Default::default()); - let request = Request::new("https://example.com/aaaaaa/?u=aaaaaaa&o=bbbbbbb", - "https://example.com/aaaaaa/?u=aaaaaaa&o=bbbbbbb", - "document").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org - let engine = Engine::from_rules_debug([r#"/^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org"#], Default::default()); - let request = Request::new("https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?cid=aaaaaaaaaabbbbbb&qs5=\n&sid=a", - "https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?cid=aaaaaaaaaabbbbbb&qs5=\n&sid=a", - "document").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?sid=[_0-9a-f]{1,32}(?:&qs\d=\S+)?&cid=[-_0-9a-zA-Z]{16,36}$/$doc,match-case,domain=com|info|net|org - let engine = Engine::from_rules_debug([r#"/^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org"#], Default::default()); - let request = Request::new("https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?sid=1&qs1=\n&cid=aaaaaaaaaabbbbbb", - "https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?sid=1&qs1=\n&cid=aaaaaaaaaabbbbbb", - "document").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - { - // /^http:\/\/[a-z]{5}\.[a-z]{5}\.com\/[a-z]{10}\.apk$/$doc,match-case,domain=com - let engine = Engine::from_rules_debug([r#"/^http:\/\/[a-z]{5}\.[a-z]{5}\.com\/[a-z]{10}\.apk$/$doc,match-case,domain=com"#], Default::default()); - let request = Request::new("http://abcde.abcde.com/aaaaabbbbb.apk", "http://abcde.abcde.com/aaaaabbbbb.apk", "document").unwrap(); - assert!(engine.check_network_request(&request).matched); - } - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /\/[A-Z]\/[-0-9a-z]{5,}\.com\/(?:[0-9a-f]{2}\/){3}[0-9a-f]{32}\.js$/$script,1p,match-case - let engine = Engine::from_rules_debug([r#"/\/[A-Z]\/[-0-9a-z]{5,}\.com\/(?:[0-9a-f]{2}\/){3}[0-9a-f]{32}\.js$/$script,1p,match-case"#], Default::default()); - let request = Request::new("/A/aaaaa.com/aa/bb/cc/aaaaaaaabbbbbbbbccccccccdddddddd.js", - "/A/aaaaa.com/aa/bb/cc/aaaaaaaabbbbbbbbccccccccdddddddd.js", - "script").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.com\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.com\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case"#], Default::default()); - let request = Request::new("https://aa.example.com/aAaaa/12222", - "https://aa.example.net/aAaaa/12222", - "frame").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.website\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.website\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case"#], Default::default()); - let request = Request::new("https://aa.example.website/aAaaa/12222", - "https://aa.example.website/aAaaa/12222", - "frame").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/[a-z]{8,15}\.top(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case"#], Default::default()); - let request = Request::new("https://examples.top/articles.html", - "https://examples.top/articles.html", - "frame").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - { - // /^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.json$/$xhr,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.json$/$xhr,3p,match-case"#], Default::default()); - let request = Request::new("https://examples.top/abcd.json", "https://examples.com/abcd.json", "xhr").unwrap(); - assert!(engine.check_network_request(&request).matched); - } - // fails - inferring unescaped `$` inside regex pattern - /*{ - // /^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$css,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$css,3p,match-case"#], Default::default()); - let request = Request::new("https://examples.top/abcd.css?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", - "https://examples.com/abcd.css?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", - "stylesheet").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - inferring unescaped `$` inside regex pattern - /*{ - // /^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.png\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$image,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.png\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$image,3p,match-case"#], Default::default()); - let request = Request::new("https://examples.top/abcd.png?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", - "https://examples.com/abcd.png?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", - "image").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/[a-z]{8,15}\.xyz(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.xyz(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case"#], Default::default()); - let request = Request::new("https://examples.xyz/articles.html", - "https://examples.xyz/articles.html", - "frame").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - { - // /^https?:\/\/cdn\.[a-z]{4,6}\.xyz\/app\.js$/$script,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/cdn\.[a-z]{4,6}\.xyz\/app\.js$/$script,3p,match-case"#], Default::default()); - let request = Request::new("https://cdn.abcde.xyz/app.js", - "https://cdn.abcde.com/app.js", - "script").unwrap(); - assert!(engine.check_network_request(&request).matched); - } - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https:\/\/a\.[-0-9a-z]{4,16}\.(?:club|com?|cyou|info|net|ru|site|top?|xxx|xyz)\/(?=[a-z]{0,6}[0-9A-Z])[0-9a-zA-Z]{7}\.js$/$script,match-case - let engine = Engine::from_rules_debug([r#"/^https:\/\/a\.[-0-9a-z]{4,16}\.(?:club|com?|cyou|info|net|ru|site|top?|xxx|xyz)\/(?=[a-z]{0,6}[0-9A-Z])[0-9a-zA-Z]{7}\.js$/$script,match-case"#], Default::default()); - let request = Request::new("https://a.abcd.club/aaaaaaA.js", - "https://a.abcd.club/aaaaaaA.js", - "script").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - { - // /^https:\/\/cdn\.jsdelivr\.net\/npm\/[-a-z_]{4,22}@latest\/dist\/script\.min\.js$/$script,3p,match-case - let engine = Engine::from_rules_debug([r#"/^https:\/\/cdn\.jsdelivr\.net\/npm\/[-a-z_]{4,22}@latest\/dist\/script\.min\.js$/$script,3p,match-case"#], Default::default()); - let request = Request::new("https://cdn.jsdelivr.net/npm/abcd@latest/dist/script.min.js", - "https://cdn.jsdelivr.com/npm/abcd@latest/dist/script.min.js", - "script").unwrap(); - assert!(engine.check_network_request(&request).matched); - } - // fails - inferring unescaped `$` inside regex pattern - /*{ - // /^https?:\/\/[-.0-9a-z]+\/script\.js$/$script,1p,strict3p,match-case - let engine = Engine::from_rules_debug([r#"/^https?:\/\/[-.0-9a-z]+\/script\.js$/$script,1p,strict3p,match-case"#], Default::default()); - let request = Request::new("https://www.example.com/script.js", - "https://www.abc.com/script.js", - "script").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - unicode not supported in network filter - /*{ - let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default()); - let request = Request::new("https://example.com/tesT߶", - "https://example.com", - "script").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - // fails - unicode not supported in network filter - /*{ - let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default()); - let request = Request::new("https://example-tesT߶.com/tesT", - "https://example.com", - "script").unwrap(); - assert!(engine.check_network_request(&request).matched); - }*/ - } - - #[test] - fn scriptlet_permissions() { - use crate::resources::{PermissionMask, ResourceType}; - const UBO_PERM: PermissionMask = PermissionMask::from_bits(0b00000001); - const BRAVE_PERM: PermissionMask = PermissionMask::from_bits(0b00000011); - - let resources = [ - Resource::simple("refresh-defuser.js", MimeType::ApplicationJavascript, "refresh-defuser"), - Resource { - name: "trusted-set-cookie.js".to_string(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("trusted-set-cookie"), - dependencies: vec![], - permission: UBO_PERM, - }, - Resource { - name: "brave-fix.js".to_string(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("brave-fix"), - dependencies: vec![], - permission: BRAVE_PERM, - }, - ]; - - let mut filter_set = FilterSet::new(false); - filter_set.add_filters([ - "sub1.example.com##+js(refresh-defuser)", - "sub2.example.com##+js(trusted-set-cookie)", - "sub3.example.com##+js(brave-fix)" - ], Default::default()); - filter_set.add_filters([ - "sub4.example.com##+js(refresh-defuser)", - "sub5.example.com##+js(trusted-set-cookie)", - "sub6.example.com##+js(brave-fix)" - ], ParseOptions { - permissions: UBO_PERM, - ..Default::default() - }); - filter_set.add_filters([ - "sub7.example.com##+js(refresh-defuser)", - "sub8.example.com##+js(trusted-set-cookie)", - "sub9.example.com##+js(brave-fix)" - ], ParseOptions { - permissions: BRAVE_PERM, - ..Default::default() - }); - - let mut engine = Engine::from_filter_set(filter_set, true); - engine.use_resources(resources); - - fn wrap_try(scriptlet_content: &str) -> String { - format!("try {{\n{}\n}} catch ( e ) {{ }}\n", scriptlet_content) - } - - assert_eq!(engine.url_cosmetic_resources("https://sub1.example.com").injected_script, wrap_try("refresh-defuser")); - assert_eq!(engine.url_cosmetic_resources("https://sub2.example.com").injected_script, ""); - assert_eq!(engine.url_cosmetic_resources("https://sub3.example.com").injected_script, ""); - - assert_eq!(engine.url_cosmetic_resources("https://sub4.example.com").injected_script, wrap_try("refresh-defuser")); - assert_eq!(engine.url_cosmetic_resources("https://sub5.example.com").injected_script, wrap_try("trusted-set-cookie")); - assert_eq!(engine.url_cosmetic_resources("https://sub6.example.com").injected_script, ""); - - assert_eq!(engine.url_cosmetic_resources("https://sub7.example.com").injected_script, wrap_try("refresh-defuser")); - assert_eq!(engine.url_cosmetic_resources("https://sub8.example.com").injected_script, wrap_try("trusted-set-cookie")); - assert_eq!(engine.url_cosmetic_resources("https://sub9.example.com").injected_script, wrap_try("brave-fix")); - } - - #[test] - fn quoted_scriptlet_args() { - use crate::resources::{MimeType, ResourceType}; - - let resources = [ - Resource { - name: "trusted-set-local-storage-item.js".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("function trustedSetLocalStorageItem(key = '', value = '') { setLocalStorageItemFn('local', true, key, value); }"), - dependencies: vec![], - permission: Default::default(), - }, - ]; - - let mut filter_set = FilterSet::new(false); - filter_set.add_filters([ - r#"dailymail.co.uk##+js(trusted-set-local-storage-item, mol.ads.cmp.tcf.cache, '{"getTCData":{"cmpId":27,"cmpVersion":3,"gdprApplies":true,"tcfPolicyVersion":2,"tcString":"CPyz5QAPyz5QAAbADCENC6CgAAAAAAAAAAwIAAASjAJINW4gCLMscGaQEIoEAIgjCQggUAAFAILRAQAODgp2VgE6MIkAAAUARABAhwAQAQCAAASABCAAJAAwQAAAiAQAAAAQCAAAMCAILACgAAAABANAhRCgAECQAyIAIpTAgKgSCAFsKAAADJCQCAKgMAKARGgEACIIARGAAACwMAgBICFggABMQbBAAMACAESoBoCTEwBACDQFgBkADLAGzAPsA_ACAAEFAIwASYAp8BaAFpAOqAfIBDoCJgEiAKRAXIAyMBk4DlAI_gSKEQEwBkADLAGzAPsA_ACAAEYAJMAU8A6oB8gEOgJEAUiAuQBkYDJwHKAR_AkU.f_gAAagAAAAA","eventStatus":"useractioncomplete","cmpStatus":"loaded","isServiceSpecific":true,"useNonStandardStacks":false,"publisherCC":"GB","purposeOneTreatment":false,"addtlConsent":"1~","acmVersion":2,"molGvlVersion":"186.gb.web","nrvString":"1~","nrvVersion":1,"repromptVersion":5},"getStoredRepromptVersion":5,"hasUserConsentedToAll":false,"hasUserDissentedToAll":true,"getConsentDegree":"no","getValidTCData":{"cmpId":27,"cmpVersion":3,"gdprApplies":true,"tcfPolicyVersion":2,"tcString":"CPyz5QAPyz5QAAbADCENC6CgAAAAAAAAAAwIAAASjAJINW4gCLMscGaQEIoEAIgjCQggUAAFAILRAQAODgp2VgE6MIkAAAUARABAhwAQAQCAAASABCAAJAAwQAAAiAQAAAAQCAAAMCAILACgAAAABANAhRCgAECQAyIAIpTAgKgSCAFsKAAADJCQCAKgMAKARGgEACIIARGAAACwMAgBICFggABMQbBAAMACAESoBoCTEwBACDQFgBkADLAGzAPsA_ACAAEFAIwASYAp8BaAFpAOqAfIBDoCJgEiAKRAXIAyMBk4DlAI_gSKEQEwBkADLAGzAPsA_ACAAEYAJMAU8A6oB8gEOgJEAUiAuQBkYDJwHKAR_AkU.f_gAAagAAAAA","listenerId":1,"eventStatus":"useractioncomplete","cmpStatus":"loaded","isServiceSpecific":true,"useNonStandardStacks":false,"publisherCC":"GB","purposeOneTreatment":false,"addtlConsent":"1~","acmVersion":2,"molGvlVersion":"186.gb.web","nrvString":"1~","nrvVersion":1,"repromptVersion":5}}')"#, - // invalid - unclosed quoted arg - r#"example.com##+js(trusted-set-local-storage-item, "test)"#, - // invalid - closing quote does not surround the argument - r#"example.com##+js(trusted-set-local-storage-item, "test"test, 3)"#, - ], Default::default()); - - let mut engine = Engine::from_filter_set(filter_set, true); - engine.use_resources(resources); - - assert_eq!(engine.url_cosmetic_resources("https://dailymail.co.uk").injected_script, r#"function trustedSetLocalStorageItem(key = '', value = '') { setLocalStorageItemFn('local', true, key, value); } -try { -trustedSetLocalStorageItem("mol.ads.cmp.tcf.cache", "{\"getTCData\":{\"cmpId\":27,\"cmpVersion\":3,\"gdprApplies\":true,\"tcfPolicyVersion\":2,\"tcString\":\"CPyz5QAPyz5QAAbADCENC6CgAAAAAAAAAAwIAAASjAJINW4gCLMscGaQEIoEAIgjCQggUAAFAILRAQAODgp2VgE6MIkAAAUARABAhwAQAQCAAASABCAAJAAwQAAAiAQAAAAQCAAAMCAILACgAAAABANAhRCgAECQAyIAIpTAgKgSCAFsKAAADJCQCAKgMAKARGgEACIIARGAAACwMAgBICFggABMQbBAAMACAESoBoCTEwBACDQFgBkADLAGzAPsA_ACAAEFAIwASYAp8BaAFpAOqAfIBDoCJgEiAKRAXIAyMBk4DlAI_gSKEQEwBkADLAGzAPsA_ACAAEYAJMAU8A6oB8gEOgJEAUiAuQBkYDJwHKAR_AkU.f_gAAagAAAAA\",\"eventStatus\":\"useractioncomplete\",\"cmpStatus\":\"loaded\",\"isServiceSpecific\":true,\"useNonStandardStacks\":false,\"publisherCC\":\"GB\",\"purposeOneTreatment\":false,\"addtlConsent\":\"1~\",\"acmVersion\":2,\"molGvlVersion\":\"186.gb.web\",\"nrvString\":\"1~\",\"nrvVersion\":1,\"repromptVersion\":5},\"getStoredRepromptVersion\":5,\"hasUserConsentedToAll\":false,\"hasUserDissentedToAll\":true,\"getConsentDegree\":\"no\",\"getValidTCData\":{\"cmpId\":27,\"cmpVersion\":3,\"gdprApplies\":true,\"tcfPolicyVersion\":2,\"tcString\":\"CPyz5QAPyz5QAAbADCENC6CgAAAAAAAAAAwIAAASjAJINW4gCLMscGaQEIoEAIgjCQggUAAFAILRAQAODgp2VgE6MIkAAAUARABAhwAQAQCAAASABCAAJAAwQAAAiAQAAAAQCAAAMCAILACgAAAABANAhRCgAECQAyIAIpTAgKgSCAFsKAAADJCQCAKgMAKARGgEACIIARGAAACwMAgBICFggABMQbBAAMACAESoBoCTEwBACDQFgBkADLAGzAPsA_ACAAEFAIwASYAp8BaAFpAOqAfIBDoCJgEiAKRAXIAyMBk4DlAI_gSKEQEwBkADLAGzAPsA_ACAAEYAJMAU8A6oB8gEOgJEAUiAuQBkYDJwHKAR_AkU.f_gAAagAAAAA\",\"listenerId\":1,\"eventStatus\":\"useractioncomplete\",\"cmpStatus\":\"loaded\",\"isServiceSpecific\":true,\"useNonStandardStacks\":false,\"publisherCC\":\"GB\",\"purposeOneTreatment\":false,\"addtlConsent\":\"1~\",\"acmVersion\":2,\"molGvlVersion\":\"186.gb.web\",\"nrvString\":\"1~\",\"nrvVersion\":1,\"repromptVersion\":5}}") -} catch ( e ) { } -"#.to_owned()); - - assert_eq!(engine.url_cosmetic_resources("https://example.com").injected_script, ""); - } -} +#[path = "../tests/unit/engine.rs"] +mod unit_tests; diff --git a/src/engine_serializer.rs b/src/engine_serializer.rs new file mode 100644 index 00000000..94f8ab30 --- /dev/null +++ b/src/engine_serializer.rs @@ -0,0 +1,76 @@ +use crate::engine::Engine; + +#[derive(Debug)] +pub enum SerializeError { + #[cfg(not(feature = "flatbuffers"))] + DataFormatError(crate::data_format::SerializationError), + + #[cfg(feature = "flatbuffers")] + FlatbuffersError(), +} + +#[derive(Debug)] +pub enum DeserializeError { + #[cfg(not(feature = "flatbuffers"))] + DataFormatError(crate::data_format::DeserializationError), + + #[cfg(feature = "flatbuffers")] + FlatbuffersError(), +} + +#[cfg(not(feature = "flatbuffers"))] +impl From for SerializeError { + fn from(value: crate::data_format::SerializationError) -> Self { + SerializeError::DataFormatError(value) + } +} + +#[cfg(not(feature = "flatbuffers"))] +impl From for DeserializeError { + fn from(value: crate::data_format::DeserializationError) -> Self { + DeserializeError::DataFormatError(value) + } +} + +pub trait Serialize { + fn serialize_raw(&self) -> Result, SerializeError>; + fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializeError>; +} + +#[cfg(feature = "flatbuffers")] +impl Serialize for Engine { + fn serialize_raw(&self) -> Result, SerializeError> { + Err(SerializeError::FlatbuffersError()) + } + + fn deserialize(&mut self, _serialized: &[u8]) -> Result<(), DeserializeError> { + Err(DeserializeError::FlatbuffersError()) + } +} + +#[cfg(not(feature = "flatbuffers"))] +impl Serialize for Engine { + /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. + fn serialize_raw(&self) -> Result, SerializeError> { + use crate::data_format::SerializeFormat; + + let serialize_format = SerializeFormat::build(&self.blocker, &self.cosmetic_cache); + + let result = serialize_format.serialize()?; + Ok(result) + } + + /// Deserialize the `Engine` from the binary format generated by `Engine::serialize_raw`. The + /// method will automatically select the correct deserialization implementation. + fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializeError> { + use crate::data_format::DeserializeFormat; + let current_tags = self.blocker.tags_enabled(); + let deserialize_format = DeserializeFormat::deserialize(serialized)?; + let (blocker, cosmetic_cache) = deserialize_format.build(); + self.blocker = blocker; + self.blocker + .use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); + self.cosmetic_cache = cosmetic_cache; + Ok(()) + } +} diff --git a/src/filters/abstract_network.rs b/src/filters/abstract_network.rs new file mode 100644 index 00000000..6a1d55ef --- /dev/null +++ b/src/filters/abstract_network.rs @@ -0,0 +1,256 @@ +use memchr::memrchr as find_char_reverse; + +use super::network::NetworkFilterError; + +use once_cell::sync::Lazy; +use regex::Regex; + +/// For now, only support `$removeparam` with simple alphanumeric/dash/underscore patterns. +static VALID_PARAM: Lazy = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_\-]+$").unwrap()); + +#[derive(Clone, Copy)] +pub(crate) enum NetworkFilterLeftAnchor { + /// A `||` token, which represents a match to the start of a domain or subdomain segment. + DoublePipe, + /// A `|` token, which represents a match to the exact start of the URL. + SinglePipe, +} + +#[derive(Clone, Copy)] +pub(crate) enum NetworkFilterRightAnchor { + /// A `|` token, which represents a match to the exact end of the URL. + SinglePipe, +} + +/// Pattern for a network filter, describing what URLs to match against. +#[derive(Clone)] +pub(crate) struct NetworkFilterPattern { + pub(crate) left_anchor: Option, + pub(crate) pattern: String, + pub(crate) right_anchor: Option, +} + +/// Any option that appears on the right side of a network filter as initiated by a `$` character. +/// All `bool` arguments below are `true` if the option stands alone, or `false` if the option is +/// negated using a prepended `~`. +#[derive(Clone)] +pub(crate) enum NetworkFilterOption { + Domain(Vec<(bool, String)>), + Badfilter, + Important, + MatchCase, + ThirdParty(bool), + FirstParty(bool), + Tag(String), + Redirect(String), + RedirectRule(String), + Csp(Option), + Removeparam(String), + Generichide, + Document, + Image(bool), + Media(bool), + Object(bool), + Other(bool), + Ping(bool), + Script(bool), + Stylesheet(bool), + Subdocument(bool), + XmlHttpRequest(bool), + Websocket(bool), + Font(bool), +} + +impl NetworkFilterOption { + pub fn is_content_type(&self) -> bool { + matches!( + self, + Self::Document + | Self::Image(..) + | Self::Media(..) + | Self::Object(..) + | Self::Other(..) + | Self::Ping(..) + | Self::Script(..) + | Self::Stylesheet(..) + | Self::Subdocument(..) + | Self::XmlHttpRequest(..) + | Self::Websocket(..) + | Self::Font(..) + ) + } + + pub fn is_redirection(&self) -> bool { + matches!(self, Self::Redirect(..) | Self::RedirectRule(..)) + } +} + +/// Abstract syntax representation of a network filter. This representation can fully specify the +/// string representation of a filter as written, with the exception of aliased options like `1p` +/// or `ghide`. This allows separation of concerns between parsing and interpretation. +pub(crate) struct AbstractNetworkFilter { + pub(crate) exception: bool, + pub(crate) pattern: NetworkFilterPattern, + pub(crate) options: Option>, +} + +impl AbstractNetworkFilter { + pub(crate) fn parse(line: &str) -> Result { + let mut filter_index_start: usize = 0; + let mut filter_index_end: usize = line.len(); + + let mut exception = false; + if line.starts_with("@@") { + filter_index_start += 2; + exception = true; + } + + let maybe_options_index: Option = find_char_reverse(b'$', line.as_bytes()); + + let mut options = None; + if let Some(options_index) = maybe_options_index { + filter_index_end = options_index; + + // slicing here is safe; the first byte after '$' will be a character boundary + let raw_options = &line[filter_index_end + 1..]; + + options = Some(parse_filter_options(raw_options)?); + } + + let left_anchor = if line[filter_index_start..].starts_with("||") { + filter_index_start += 2; + Some(NetworkFilterLeftAnchor::DoublePipe) + } else if line[filter_index_start..].starts_with('|') { + filter_index_start += 1; + Some(NetworkFilterLeftAnchor::SinglePipe) + } else { + None + }; + + let right_anchor = if filter_index_end > 0 + && filter_index_end > filter_index_start + && line[..filter_index_end].ends_with('|') + { + filter_index_end -= 1; + Some(NetworkFilterRightAnchor::SinglePipe) + } else { + None + }; + + let pattern = &line[filter_index_start..filter_index_end]; + + Ok(AbstractNetworkFilter { + exception, + pattern: NetworkFilterPattern { + left_anchor, + pattern: pattern.to_string(), + right_anchor, + }, + options, + }) + } +} + +fn parse_filter_options(raw_options: &str) -> Result, NetworkFilterError> { + let mut result = vec![]; + + for raw_option in raw_options.split(',') { + // Check for negation: ~option + let negation = raw_option.starts_with('~'); + let maybe_negated_option = raw_option.trim_start_matches('~'); + + // Check for options: option=value1|value2 + let mut option_and_values = maybe_negated_option.splitn(2, '='); + let (option, value) = ( + option_and_values.next().unwrap(), + option_and_values.next().unwrap_or_default(), + ); + + result.push(match (option, negation) { + ("domain", _) | ("from", _) => { + let domains: Vec<(bool, String)> = value + .split('|') + .map(|domain| { + if let Some(negated_domain) = domain.strip_prefix('~') { + (false, negated_domain.to_string()) + } else { + (true, domain.to_string()) + } + }) + .filter(|(_, d)| !(d.starts_with('/') && d.ends_with('/'))) + .collect(); + if domains.is_empty() { + return Err(NetworkFilterError::NoSupportedDomains); + } + NetworkFilterOption::Domain(domains) + } + ("badfilter", true) => return Err(NetworkFilterError::NegatedBadFilter), + ("badfilter", false) => NetworkFilterOption::Badfilter, + ("important", true) => return Err(NetworkFilterError::NegatedImportant), + ("important", false) => NetworkFilterOption::Important, + ("match-case", true) => return Err(NetworkFilterError::NegatedOptionMatchCase), + ("match-case", false) => NetworkFilterOption::MatchCase, + ("third-party", negated) | ("3p", negated) => NetworkFilterOption::ThirdParty(!negated), + ("first-party", negated) | ("1p", negated) => NetworkFilterOption::FirstParty(!negated), + ("tag", true) => return Err(NetworkFilterError::NegatedTag), + ("tag", false) => NetworkFilterOption::Tag(String::from(value)), + ("redirect", true) => return Err(NetworkFilterError::NegatedRedirection), + ("redirect", false) => { + // Ignore this filter if no redirection resource is specified + if value.is_empty() { + return Err(NetworkFilterError::EmptyRedirection); + } + + NetworkFilterOption::Redirect(String::from(value)) + } + ("redirect-rule", true) => return Err(NetworkFilterError::NegatedRedirection), + ("redirect-rule", false) => { + if value.is_empty() { + return Err(NetworkFilterError::EmptyRedirection); + } + + NetworkFilterOption::RedirectRule(String::from(value)) + } + ("csp", _) => NetworkFilterOption::Csp(if !value.is_empty() { + Some(String::from(value)) + } else { + None + }), + ("removeparam", true) => return Err(NetworkFilterError::NegatedRemoveparam), + ("removeparam", false) => { + if value.is_empty() { + return Err(NetworkFilterError::EmptyRemoveparam); + } + if !VALID_PARAM.is_match(value) { + return Err(NetworkFilterError::RemoveparamRegexUnsupported); + } + NetworkFilterOption::Removeparam(String::from(value)) + } + ("generichide", true) | ("ghide", true) => { + return Err(NetworkFilterError::NegatedGenericHide) + } + ("generichide", false) | ("ghide", false) => NetworkFilterOption::Generichide, + ("document", true) | ("doc", true) => return Err(NetworkFilterError::NegatedDocument), + ("document", false) | ("doc", false) => NetworkFilterOption::Document, + ("image", negated) => NetworkFilterOption::Image(!negated), + ("media", negated) => NetworkFilterOption::Media(!negated), + ("object", negated) | ("object-subrequest", negated) => { + NetworkFilterOption::Object(!negated) + } + ("other", negated) => NetworkFilterOption::Other(!negated), + ("ping", negated) | ("beacon", negated) => NetworkFilterOption::Ping(!negated), + ("script", negated) => NetworkFilterOption::Script(!negated), + ("stylesheet", negated) | ("css", negated) => NetworkFilterOption::Stylesheet(!negated), + ("subdocument", negated) | ("frame", negated) => { + NetworkFilterOption::Subdocument(!negated) + } + ("xmlhttprequest", negated) | ("xhr", negated) => { + NetworkFilterOption::XmlHttpRequest(!negated) + } + ("websocket", negated) => NetworkFilterOption::Websocket(!negated), + ("font", negated) => NetworkFilterOption::Font(!negated), + (_, _) => return Err(NetworkFilterError::UnrecognisedOption), + }); + } + Ok(result) +} diff --git a/src/filters/cosmetic.rs b/src/filters/cosmetic.rs index d0f961e0..e0b5743d 100644 --- a/src/filters/cosmetic.rs +++ b/src/filters/cosmetic.rs @@ -47,7 +47,7 @@ pub enum CosmeticFilterError { /// Refer to #[derive(PartialEq, Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type", content = "arg")] -#[serde(rename_all="kebab-case")] +#[serde(rename_all = "kebab-case")] pub enum CosmeticFilterAction { /// Rules with a remove action, e.g. `example.com##.ad:remove()`. /// @@ -131,7 +131,7 @@ pub struct CosmeticFilter { /// have one or more procedural operators. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "type", content = "arg")] -#[serde(rename_all="kebab-case")] +#[serde(rename_all = "kebab-case")] pub enum CosmeticFilterOperator { CssSelector(String), HasText(String), @@ -297,7 +297,10 @@ impl CosmeticFilter { const REMOVE_TOKEN: &str = ":remove()"; - const PAIRS: &[(&[u8], fn(&str) -> Result)] = &[ + const PAIRS: &[( + &[u8], + fn(&str) -> Result, + )] = &[ (STYLE_TOKEN, CosmeticFilterAction::new_style), (REMOVE_ATTR_TOKEN, CosmeticFilterAction::new_remove_attr), (REMOVE_CLASS_TOKEN, CosmeticFilterAction::new_remove_class), @@ -343,14 +346,18 @@ impl CosmeticFilter { } match &self.selector[0] { CosmeticFilterOperator::CssSelector(s) => Some(s), - _ => None + _ => None, } } /// Parse the rule in `line` into a `CosmeticFilter`. If `debug` is true, the original rule /// will be reported in the resulting `CosmeticFilter` struct as well. Use `permission` to /// manage the filter's access to scriptlet resources for `+js(...)` injections. - pub fn parse(line: &str, debug: bool, permission: PermissionMask) -> Result { + pub fn parse( + line: &str, + debug: bool, + permission: PermissionMask, + ) -> Result { let mut mask = CosmeticFilterMask::NONE; if let Some(sharp_index) = find_char(b'#', line.as_bytes()) { let after_sharp_index = sharp_index + 1; @@ -425,7 +432,9 @@ impl CosmeticFilter { mask |= CosmeticFilterMask::SCRIPT_INJECT; ( // TODO: overloading `CssSelector` here is not ideal. - vec![CosmeticFilterOperator::CssSelector(String::from(&line[suffix_start_index + 4..line.len() - 1]))], + vec![CosmeticFilterOperator::CssSelector(String::from( + &line[suffix_start_index + 4..line.len() - 1], + ))], None, ) } else { @@ -574,8 +583,13 @@ pub(crate) fn get_hostname_hashes_from_labels(hostname: &str, domain: &str) -> V mod css_validation { use super::{CosmeticFilterError, CosmeticFilterOperator}; - pub fn validate_css_selector(selector: &str, _accept_abp_selectors: bool) -> Result, CosmeticFilterError> { - Ok(vec![CosmeticFilterOperator::CssSelector(selector.to_string())]) + pub fn validate_css_selector( + selector: &str, + _accept_abp_selectors: bool, + ) -> Result, CosmeticFilterError> { + Ok(vec![CosmeticFilterOperator::CssSelector( + selector.to_string(), + )]) } pub fn is_valid_css_style(_style: &str) -> bool { @@ -586,10 +600,10 @@ mod css_validation { #[cfg(feature = "css-validation")] mod css_validation { //! Methods for validating CSS selectors and style rules extracted from cosmetic filter rules. + use super::{CosmeticFilterError, CosmeticFilterOperator}; use core::fmt::{Result as FmtResult, Write}; use cssparser::{CowRcStr, ParseError, Parser, ParserInput, SourceLocation, ToCss, Token}; use selectors::parser::SelectorParseErrorKind; - use super::{CosmeticFilterError, CosmeticFilterOperator}; /// Returns a validated canonical CSS selector for the given input, or nothing if one can't be /// determined. @@ -599,14 +613,19 @@ mod css_validation { /// /// In addition to normalizing formatting, this function will remove unsupported procedural /// selectors and convert others to canonical representations (i.e. `:-abp-has` -> `:has`). - pub fn validate_css_selector(selector: &str, accept_abp_selectors: bool) -> Result, CosmeticFilterError> { + pub fn validate_css_selector( + selector: &str, + accept_abp_selectors: bool, + ) -> Result, CosmeticFilterError> { use once_cell::sync::Lazy; use regex::Regex; static RE_SIMPLE_SELECTOR: Lazy = Lazy::new(|| Regex::new(r"^[#.]?[A-Za-z_][\w-]*$").unwrap()); if RE_SIMPLE_SELECTOR.is_match(selector) { - return Ok(vec![CosmeticFilterOperator::CssSelector(selector.to_string())]); + return Ok(vec![CosmeticFilterOperator::CssSelector( + selector.to_string(), + )]); } // Use `mock-stylesheet-marker` where uBO uses `color: red` since we have control over the @@ -621,9 +640,7 @@ mod css_validation { }, ); - let prelude = rule_list_parser - .next() - .and_then(|r| r.ok()); + let prelude = rule_list_parser.next().and_then(|r| r.ok()); // There should only be one rule if rule_list_parser.next().is_some() { @@ -665,7 +682,9 @@ mod css_validation { if !prelude.0.iter().any(|s| has_procedural_operator(s)) { // There are no procedural filters, so all selectors use standard CSS. // It's ok to return that as a "single" selector. - return Ok(vec![CosmeticFilterOperator::CssSelector(prelude.to_css_string())]); + return Ok(vec![CosmeticFilterOperator::CssSelector( + prelude.to_css_string(), + )]); } if prelude.0.len() != 1 { @@ -719,19 +738,26 @@ mod css_validation { SelectorsPart::Component(Component::NonTSPseudoClass(c)) => { if let Some(procedural_operator) = c.to_procedural_operator() { if !pending_css_selector.is_empty() { - output.push(CosmeticFilterOperator::CssSelector(pending_css_selector)); + output.push(CosmeticFilterOperator::CssSelector( + pending_css_selector, + )); pending_css_selector = String::new(); } output.push(procedural_operator); } else { - c.to_css(&mut pending_css_selector).map_err(|_| CosmeticFilterError::InvalidCssSelector)?; + c.to_css(&mut pending_css_selector) + .map_err(|_| CosmeticFilterError::InvalidCssSelector)?; } } SelectorsPart::Component(other) => { - other.to_css(&mut pending_css_selector).map_err(|_| CosmeticFilterError::InvalidCssSelector)?; + other + .to_css(&mut pending_css_selector) + .map_err(|_| CosmeticFilterError::InvalidCssSelector)?; } SelectorsPart::Combinator(combinator) => { - combinator.to_css(&mut pending_css_selector).map_err(|_| CosmeticFilterError::InvalidCssSelector)?; + combinator + .to_css(&mut pending_css_selector) + .map_err(|_| CosmeticFilterError::InvalidCssSelector)?; } } } @@ -1096,12 +1122,24 @@ mod css_validation { fn to_procedural_operator(&self) -> Option { match self { NonTSPseudoClass::HasText(a) => Some(CosmeticFilterOperator::HasText(a.to_owned())), - NonTSPseudoClass::MatchesAttr(a) => Some(CosmeticFilterOperator::MatchesAttr(a.to_owned())), - NonTSPseudoClass::MatchesCss(a) => Some(CosmeticFilterOperator::MatchesCss(a.to_owned())), - NonTSPseudoClass::MatchesCssBefore(a) => Some(CosmeticFilterOperator::MatchesCssBefore(a.to_owned())), - NonTSPseudoClass::MatchesCssAfter(a) => Some(CosmeticFilterOperator::MatchesCssAfter(a.to_owned())), - NonTSPseudoClass::MatchesPath(a) => Some(CosmeticFilterOperator::MatchesPath(a.to_owned())), - NonTSPseudoClass::MinTextLength(a) => Some(CosmeticFilterOperator::MinTextLength(a.to_owned())), + NonTSPseudoClass::MatchesAttr(a) => { + Some(CosmeticFilterOperator::MatchesAttr(a.to_owned())) + } + NonTSPseudoClass::MatchesCss(a) => { + Some(CosmeticFilterOperator::MatchesCss(a.to_owned())) + } + NonTSPseudoClass::MatchesCssBefore(a) => { + Some(CosmeticFilterOperator::MatchesCssBefore(a.to_owned())) + } + NonTSPseudoClass::MatchesCssAfter(a) => { + Some(CosmeticFilterOperator::MatchesCssAfter(a.to_owned())) + } + NonTSPseudoClass::MatchesPath(a) => { + Some(CosmeticFilterOperator::MatchesPath(a.to_owned())) + } + NonTSPseudoClass::MinTextLength(a) => { + Some(CosmeticFilterOperator::MinTextLength(a.to_owned())) + } NonTSPseudoClass::Upward(a) => Some(CosmeticFilterOperator::Upward(a.to_owned())), NonTSPseudoClass::Xpath(a) => Some(CosmeticFilterOperator::Xpath(a.to_owned())), _ => None, @@ -1131,1088 +1169,8 @@ mod css_validation { Ok(()) } } - - #[test] - fn bad_selector_inputs() { - assert!(validate_css_selector(r#"rm -rf ./*"#, false).is_err()); - assert!(validate_css_selector(r#"javascript:alert("All pseudo-classes are valid")"#, false).is_ok()); - assert!(validate_css_selector(r#"javascript:alert("But opening comments are still forbidden" /*)"#, false).is_err()); - assert!(validate_css_selector(r#"This is not a CSS selector."#, false).is_err()); - assert!(validate_css_selector(r#"./malware.sh"#, false).is_err()); - assert!(validate_css_selector(r#"https://safesite.ru"#, false).is_err()); - assert!(validate_css_selector(r#"(function(){var e=60;return String.fromCharCode(e.charCodeAt(0))})();"#, false).is_err()); - assert!(validate_css_selector(r#"#!/usr/bin/sh"#, false).is_err()); - assert!(validate_css_selector(r#"input,input/*"#, false).is_err()); - // Accept a closing comment within a string. It should still be impossible to create an - // opening comment to match it. - assert!(validate_css_selector(r#"input[x="*/{}*{background:url(https://hackvertor.co.uk/images/logo.gif)}"]"#, false).is_ok()); - } - - #[test] - fn escaped_quote_in_tag_name() { - assert_eq!(validate_css_selector(r#"head\""#, false), Ok(vec![CosmeticFilterOperator::CssSelector(r#"head\""#.to_string())])); - } -} - -#[cfg(test)] -mod parse_tests { - use super::*; - - /// An easily modified summary of a `CosmeticFilter` rule to be used in tests. - #[derive(Debug, PartialEq)] - struct CosmeticFilterBreakdown { - entities: Option>, - hostnames: Option>, - not_entities: Option>, - not_hostnames: Option>, - selector: SelectorType, - action: Option, - - unhide: bool, - script_inject: bool, - } - - impl From<&CosmeticFilter> for CosmeticFilterBreakdown { - fn from(filter: &CosmeticFilter) -> CosmeticFilterBreakdown { - CosmeticFilterBreakdown { - entities: filter.entities.as_ref().cloned(), - hostnames: filter.hostnames.as_ref().cloned(), - not_entities: filter.not_entities.as_ref().cloned(), - not_hostnames: filter.not_hostnames.as_ref().cloned(), - selector: SelectorType::from(filter), - action: filter.action.as_ref().cloned(), - - unhide: filter.mask.contains(CosmeticFilterMask::UNHIDE), - script_inject: filter.mask.contains(CosmeticFilterMask::SCRIPT_INJECT), - } - } - } - - impl From for CosmeticFilterBreakdown { - fn from(filter: CosmeticFilter) -> CosmeticFilterBreakdown { - (&filter).into() - } - } - - impl Default for CosmeticFilterBreakdown { - fn default() -> Self { - CosmeticFilterBreakdown { - entities: None, - hostnames: None, - not_entities: None, - not_hostnames: None, - selector: SelectorType::PlainCss(String::from("")), - action: None, - - unhide: false, - script_inject: false, - } - } - } - - #[derive(Debug, PartialEq)] - enum SelectorType { - PlainCss(String), - Procedural(Vec), - } - - impl From<&CosmeticFilter> for SelectorType { - fn from(v: &CosmeticFilter) -> Self { - if let Some(selector) = v.plain_css_selector() { - Self::PlainCss(selector.to_string()) - } else { - Self::Procedural(v.selector.clone()) - } - } - } - - fn parse_cf(rule: &str) -> Result { - CosmeticFilter::parse(rule, false, Default::default()) - } - - /// Asserts that `rule` parses into a `CosmeticFilter` equivalent to the summary provided by - /// `expected`. - fn check_parse_result(rule: &str, expected: CosmeticFilterBreakdown) { - let filter: CosmeticFilterBreakdown = parse_cf(rule).unwrap().into(); - assert_eq!(expected, filter); - } - - #[test] - fn simple_selectors() { - check_parse_result( - "##div.popup", - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss("div.popup".to_string()), - ..Default::default() - }, - ); - check_parse_result( - "###selector", - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss("#selector".to_string()), - ..Default::default() - }, - ); - check_parse_result( - "##.selector", - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(".selector".to_string()), - ..Default::default() - }, - ); - check_parse_result( - "##a[href=\"foo.com\"]", - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss("a[href=\"foo.com\"]".to_string()), - ..Default::default() - }, - ); - check_parse_result( - "##[href=\"foo.com\"]", - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss("[href=\"foo.com\"]".to_string()), - ..Default::default() - }, - ); - } - - /// Produces a sorted vec of the hashes of all the given domains. - /// - /// For convenience, the return value is wrapped in a `Some()` to be consumed by a - /// `CosmeticFilterBreakdown`. - fn sort_hash_domains(domains: Vec<&str>) -> Option> { - let mut hashes: Vec<_> = domains.iter().map(|d| crate::utils::fast_hash(d)).collect(); - hashes.sort(); - Some(hashes) - } - - #[test] - fn hostnames() { - check_parse_result( - r#"u00p.com##div[class^="adv-box"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"div[class^="adv-box"]"#.to_string()), - hostnames: sort_hash_domains(vec!["u00p.com"]), - ..Default::default() - }, - ); - check_parse_result( - r#"distractify.com##div[class*="AdInArticle"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"div[class*="AdInArticle"]"#.to_string()), - hostnames: sort_hash_domains(vec!["distractify.com"]), - ..Default::default() - }, - ); - check_parse_result( - r#"soundtrackcollector.com,the-numbers.com##a[href^="http://affiliates.allposters.com/"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href^="http://affiliates.allposters.com/"]"#.to_string()), - hostnames: sort_hash_domains(vec!["soundtrackcollector.com", "the-numbers.com"]), - ..Default::default() - }, - ); - check_parse_result( - r#"thelocal.at,thelocal.ch,thelocal.de,thelocal.dk,thelocal.es,thelocal.fr,thelocal.it,thelocal.no,thelocal.se##div[class*="-widget"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"div[class*="-widget"]"#.to_string()), - hostnames: sort_hash_domains(vec![ - "thelocal.at", - "thelocal.ch", - "thelocal.de", - "thelocal.dk", - "thelocal.es", - "thelocal.fr", - "thelocal.it", - "thelocal.no", - "thelocal.se", - ]), - ..Default::default() - }, - ); - check_parse_result( - r#"base64decode.org,base64encode.org,beautifyjson.org,minifyjson.org,numgen.org,pdfmrg.com,pdfspl.com,prettifycss.com,pwdgen.org,strlength.com,strreverse.com,uglifyjs.net,urldecoder.org##div[class^="banner_"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"div[class^="banner_"]"#.to_string()), - hostnames: sort_hash_domains(vec![ - "base64decode.org", - "base64encode.org", - "beautifyjson.org", - "minifyjson.org", - "numgen.org", - "pdfmrg.com", - "pdfspl.com", - "prettifycss.com", - "pwdgen.org", - "strlength.com", - "strreverse.com", - "uglifyjs.net", - "urldecoder.org", - ]), - ..Default::default() - }, - ); - check_parse_result( - r#"adforum.com,alliednews.com,americustimesrecorder.com,andovertownsman.com,athensreview.com,batesvilleheraldtribune.com,bdtonline.com,channel24.pk,chickashanews.com,claremoreprogress.com,cleburnetimesreview.com,clintonherald.com,commercejournal.com,commercial-news.com,coopercrier.com,cordeledispatch.com,corsicanadailysun.com,crossville-chronicle.com,cullmantimes.com,dailyiowegian.com,dailyitem.com,daltondailycitizen.com,derrynews.com,duncanbanner.com,eagletribune.com,edmondsun.com,effinghamdailynews.com,enewscourier.com,enidnews.com,farmtalknewspaper.com,fayettetribune.com,flasharcade.com,flashgames247.com,flyergroup.com,foxsportsasia.com,gainesvilleregister.com,gloucestertimes.com,goshennews.com,greensburgdailynews.com,heraldbanner.com,heraldbulletin.com,hgazette.com,homemagonline.com,itemonline.com,jacksonvilleprogress.com,jerusalemonline.com,joplinglobe.com,journal-times.com,journalexpress.net,kexp.org,kokomotribune.com,lockportjournal.com,mankatofreepress.com,mcalesternews.com,mccrearyrecord.com,mcleansborotimesleader.com,meadvilletribune.com,meridianstar.com,mineralwellsindex.com,montgomery-herald.com,mooreamerican.com,moultrieobserver.com,muskogeephoenix.com,ncnewsonline.com,newburyportnews.com,newsaegis.com,newsandtribune.com,niagara-gazette.com,njeffersonnews.com,normantranscript.com,opposingviews.com,orangeleader.com,oskaloosa.com,ottumwacourier.com,outlookmoney.com,palestineherald.com,panews.com,paulsvalleydailydemocrat.com,pellachronicle.com,pharostribune.com,pressrepublican.com,pryordailytimes.com,randolphguide.com,record-eagle.com,register-herald.com,register-news.com,reporter.net,rockwallheraldbanner.com,roysecityheraldbanner.com,rushvillerepublican.com,salemnews.com,sentinel-echo.com,sharonherald.com,shelbyvilledailyunion.com,siteslike.com,standardmedia.co.ke,starbeacon.com,stwnewspress.com,suwanneedemocrat.com,tahlequahdailypress.com,theadanews.com,theawesomer.com,thedailystar.com,thelandonline.com,themoreheadnews.com,thesnaponline.com,tiftongazette.com,times-news.com,timesenterprise.com,timessentinel.com,timeswv.com,tonawanda-news.com,tribdem.com,tribstar.com,unionrecorder.com,valdostadailytimes.com,washtimesherald.com,waurikademocrat.com,wcoutlook.com,weatherforddemocrat.com,woodwardnews.net,wrestlinginc.com##div[style="width:300px; height:250px;"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"div[style="width:300px; height:250px;"]"#.to_string()), - hostnames: sort_hash_domains(vec![ - "adforum.com", - "alliednews.com", - "americustimesrecorder.com", - "andovertownsman.com", - "athensreview.com", - "batesvilleheraldtribune.com", - "bdtonline.com", - "channel24.pk", - "chickashanews.com", - "claremoreprogress.com", - "cleburnetimesreview.com", - "clintonherald.com", - "commercejournal.com", - "commercial-news.com", - "coopercrier.com", - "cordeledispatch.com", - "corsicanadailysun.com", - "crossville-chronicle.com", - "cullmantimes.com", - "dailyiowegian.com", - "dailyitem.com", - "daltondailycitizen.com", - "derrynews.com", - "duncanbanner.com", - "eagletribune.com", - "edmondsun.com", - "effinghamdailynews.com", - "enewscourier.com", - "enidnews.com", - "farmtalknewspaper.com", - "fayettetribune.com", - "flasharcade.com", - "flashgames247.com", - "flyergroup.com", - "foxsportsasia.com", - "gainesvilleregister.com", - "gloucestertimes.com", - "goshennews.com", - "greensburgdailynews.com", - "heraldbanner.com", - "heraldbulletin.com", - "hgazette.com", - "homemagonline.com", - "itemonline.com", - "jacksonvilleprogress.com", - "jerusalemonline.com", - "joplinglobe.com", - "journal-times.com", - "journalexpress.net", - "kexp.org", - "kokomotribune.com", - "lockportjournal.com", - "mankatofreepress.com", - "mcalesternews.com", - "mccrearyrecord.com", - "mcleansborotimesleader.com", - "meadvilletribune.com", - "meridianstar.com", - "mineralwellsindex.com", - "montgomery-herald.com", - "mooreamerican.com", - "moultrieobserver.com", - "muskogeephoenix.com", - "ncnewsonline.com", - "newburyportnews.com", - "newsaegis.com", - "newsandtribune.com", - "niagara-gazette.com", - "njeffersonnews.com", - "normantranscript.com", - "opposingviews.com", - "orangeleader.com", - "oskaloosa.com", - "ottumwacourier.com", - "outlookmoney.com", - "palestineherald.com", - "panews.com", - "paulsvalleydailydemocrat.com", - "pellachronicle.com", - "pharostribune.com", - "pressrepublican.com", - "pryordailytimes.com", - "randolphguide.com", - "record-eagle.com", - "register-herald.com", - "register-news.com", - "reporter.net", - "rockwallheraldbanner.com", - "roysecityheraldbanner.com", - "rushvillerepublican.com", - "salemnews.com", - "sentinel-echo.com", - "sharonherald.com", - "shelbyvilledailyunion.com", - "siteslike.com", - "standardmedia.co.ke", - "starbeacon.com", - "stwnewspress.com", - "suwanneedemocrat.com", - "tahlequahdailypress.com", - "theadanews.com", - "theawesomer.com", - "thedailystar.com", - "thelandonline.com", - "themoreheadnews.com", - "thesnaponline.com", - "tiftongazette.com", - "times-news.com", - "timesenterprise.com", - "timessentinel.com", - "timeswv.com", - "tonawanda-news.com", - "tribdem.com", - "tribstar.com", - "unionrecorder.com", - "valdostadailytimes.com", - "washtimesherald.com", - "waurikademocrat.com", - "wcoutlook.com", - "weatherforddemocrat.com", - "woodwardnews.net", - "wrestlinginc.com", - ]), - ..Default::default() - }, - ); - } - - #[test] - fn href() { - check_parse_result( - r#"##a[href$="/vghd.shtml"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href$="/vghd.shtml"]"#.to_string()), - ..Default::default() - }, - ); - check_parse_result( - r#"##a[href*=".adk2x.com/"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href*=".adk2x.com/"]"#.to_string()), - ..Default::default() - }, - ); - check_parse_result( - r#"##a[href^="//40ceexln7929.com/"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href^="//40ceexln7929.com/"]"#.to_string()), - ..Default::default() - }, - ); - check_parse_result( - r#"##a[href*=".trust.zone"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href*=".trust.zone"]"#.to_string()), - ..Default::default() - }, - ); - check_parse_result( - r#"tf2maps.net##a[href="http://forums.tf2maps.net/payments.php"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href="http://forums.tf2maps.net/payments.php"]"#.to_string()), - hostnames: sort_hash_domains(vec!["tf2maps.net"]), - ..Default::default() - }, - ); - check_parse_result( - r#"rarbg.to,rarbg.unblockall.org,rarbgaccess.org,rarbgmirror.com,rarbgmirror.org,rarbgmirror.xyz,rarbgproxy.com,rarbgproxy.org,rarbgunblock.com##a[href][target="_blank"] > button"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href][target="_blank"] > button"#.to_string()), - hostnames: sort_hash_domains(vec![ - "rarbg.to", - "rarbg.unblockall.org", - "rarbgaccess.org", - "rarbgmirror.com", - "rarbgmirror.org", - "rarbgmirror.xyz", - "rarbgproxy.com", - "rarbgproxy.org", - "rarbgunblock.com", - ]), - ..Default::default() - }, - ); - } - - #[test] - fn injected_scripts() { - check_parse_result( - r#"hentaifr.net,jeu.info,tuxboard.com,xstory-fr.com##+js(goyavelab-defuser.js)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"goyavelab-defuser.js"#.to_string()), - hostnames: sort_hash_domains(vec![ - "hentaifr.net", - "jeu.info", - "tuxboard.com", - "xstory-fr.com", - ]), - script_inject: true, - ..Default::default() - }, - ); - check_parse_result( - r#"haus-garten-test.de,sozialversicherung-kompetent.de##+js(set-constant.js, Object.keys, trueFunc)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"set-constant.js, Object.keys, trueFunc"#.to_string()), - hostnames: sort_hash_domains(vec![ - "haus-garten-test.de", - "sozialversicherung-kompetent.de", - ]), - script_inject: true, - ..Default::default() - }, - ); - check_parse_result( - r#"airliners.de,auszeit.bio,autorevue.at,clever-tanken.de,fanfiktion.de,finya.de,frag-mutti.de,frustfrei-lernen.de,fussballdaten.de,gameswelt.*,liga3-online.de,lz.de,mt.de,psychic.de,rimondo.com,spielen.de,weltfussball.at,weristdeinfreund.de##+js(abort-current-inline-script.js, Number.isNaN)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"abort-current-inline-script.js, Number.isNaN"#.to_string()), - hostnames: sort_hash_domains(vec![ - "airliners.de", - "auszeit.bio", - "autorevue.at", - "clever-tanken.de", - "fanfiktion.de", - "finya.de", - "frag-mutti.de", - "frustfrei-lernen.de", - "fussballdaten.de", - "liga3-online.de", - "lz.de", - "mt.de", - "psychic.de", - "rimondo.com", - "spielen.de", - "weltfussball.at", - "weristdeinfreund.de", - ]), - entities: sort_hash_domains(vec!["gameswelt"]), - script_inject: true, - ..Default::default() - }, - ); - check_parse_result( - r#"prad.de##+js(abort-on-property-read.js, document.cookie)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"abort-on-property-read.js, document.cookie"#.to_string()), - hostnames: sort_hash_domains(vec!["prad.de"]), - script_inject: true, - ..Default::default() - }, - ); - check_parse_result( - r#"computerbild.de##+js(abort-on-property-read.js, Date.prototype.toUTCString)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"abort-on-property-read.js, Date.prototype.toUTCString"#.to_string()), - hostnames: sort_hash_domains(vec!["computerbild.de"]), - script_inject: true, - ..Default::default() - }, - ); - check_parse_result( - r#"computerbild.de##+js(setTimeout-defuser.js, ())return)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"setTimeout-defuser.js, ())return"#.to_string()), - hostnames: sort_hash_domains(vec!["computerbild.de"]), - script_inject: true, - ..Default::default() - }, - ); - } - - #[test] - fn entities() { - check_parse_result( - r#"monova.*##+js(nowebrtc.js)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"nowebrtc.js"#.to_string()), - entities: sort_hash_domains(vec!["monova"]), - script_inject: true, - ..Default::default() - }, - ); - check_parse_result( - r#"monova.*##tr.success.desktop"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"tr.success.desktop"#.to_string()), - entities: sort_hash_domains(vec!["monova"]), - ..Default::default() - }, - ); - check_parse_result( - r#"monova.*#@#script + [class] > [class]:first-child"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"script + [class] > [class]:first-child"#.to_string()), - entities: sort_hash_domains(vec!["monova"]), - unhide: true, - ..Default::default() - }, - ); - check_parse_result( - r#"adshort.im,adsrt.*#@#[id*="ScriptRoot"]"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"[id*="ScriptRoot"]"#.to_string()), - hostnames: sort_hash_domains(vec!["adshort.im"]), - entities: sort_hash_domains(vec!["adsrt"]), - unhide: true, - ..Default::default() - }, - ); - check_parse_result( - r#"downloadsource.*##.date:not(dt):style(display: block !important;)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#".date:not(dt)"#.to_string()), - entities: sort_hash_domains(vec!["downloadsource"]), - action: Some(CosmeticFilterAction::Style("display: block !important;".into())), - ..Default::default() - }, - ); - } - - #[test] - fn styles() { - check_parse_result( - r#"chip.de##.video-wrapper > video[style]:style(display:block!important;padding-top:0!important;)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#".video-wrapper > video[style]"#.to_string()), - hostnames: sort_hash_domains(vec!["chip.de"]), - action: Some(CosmeticFilterAction::Style("display:block!important;padding-top:0!important;".into())), - ..Default::default() - }, - ); - check_parse_result( - r#"allmusic.com##.advertising.medium-rectangle:style(min-height: 1px !important;)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#".advertising.medium-rectangle"#.to_string()), - hostnames: sort_hash_domains(vec!["allmusic.com"]), - action: Some(CosmeticFilterAction::Style("min-height: 1px !important;".into())), - ..Default::default() - }, - ); - #[cfg(feature = "css-validation")] - check_parse_result( - r#"quora.com##.signup_wall_prevent_scroll .SiteHeader,.signup_wall_prevent_scroll .LoggedOutFooter,.signup_wall_prevent_scroll .ContentWrapper:style(filter: none !important;)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#".signup_wall_prevent_scroll .SiteHeader, .signup_wall_prevent_scroll .LoggedOutFooter, .signup_wall_prevent_scroll .ContentWrapper"#.to_string()), - hostnames: sort_hash_domains(vec!["quora.com"]), - action: Some(CosmeticFilterAction::Style("filter: none !important;".into())), - ..Default::default() - } - ); - check_parse_result( - r#"imdb.com##body#styleguide-v2:style(background-color: #e3e2dd !important; background-image: none !important;)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"body#styleguide-v2"#.to_string()), - hostnames: sort_hash_domains(vec!["imdb.com"]), - action: Some(CosmeticFilterAction::Style("background-color: #e3e2dd !important; background-image: none !important;".into())), - ..Default::default() - }, - ); - check_parse_result( - r#"streamcloud.eu###login > div[style^="width"]:style(display: block !important)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"#login > div[style^="width"]"#.to_string()), - hostnames: sort_hash_domains(vec!["streamcloud.eu"]), - action: Some(CosmeticFilterAction::Style("display: block !important".into())), - ..Default::default() - }, - ); - check_parse_result( - r#"moonbit.co.in,moondoge.co.in,moonliteco.in##[src^="//coinad.com/ads/"]:style(visibility: collapse !important)"#, - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"[src^="//coinad.com/ads/"]"#.to_string()), - hostnames: sort_hash_domains(vec![ - "moonbit.co.in", - "moondoge.co.in", - "moonliteco.in", - ]), - action: Some(CosmeticFilterAction::Style("visibility: collapse !important".into())), - ..Default::default() - }, - ); - } - - #[test] - fn unicode() { - check_parse_result( - "###неделя", - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss("#неделя".to_string()), - ..Default::default() - }, - ); - check_parse_result( - "неlloworlд.com#@##week", - CosmeticFilterBreakdown { - selector: SelectorType::PlainCss("#week".to_string()), - hostnames: sort_hash_domains(vec!["xn--lloworl-5ggb3f.com"]), - unhide: true, - ..Default::default() - } - ); - } - - /// As of writing, these procedural filters with multiple comma-separated selectors aren't - /// fully supported by uBO. Here, they are treated as parsing errors. - #[test] - #[cfg(feature = "css-validation")] - fn multi_selector_procedural_filters() { - assert!(parse_cf("example.com##h1:has-text(Example Domain),p:has-text(More)").is_err()); - assert!(parse_cf("example.com##h1,p:has-text(ill)").is_err()); - assert!(parse_cf("example.com##h1:has-text(om),p").is_err()); - } - - #[test] - #[cfg(feature = "css-validation")] - fn procedural_operators() { - /// Check against simple `example.com` domains. Domain parsing is well-handled by other - /// tests, but procedural filters cannot be generic. - fn check_procedural(raw: &str, expected_selectors: Vec) { - check_parse_result( - &format!("example.com##{}", raw), - CosmeticFilterBreakdown { - selector: SelectorType::Procedural(expected_selectors), - hostnames: sort_hash_domains(vec![ - "example.com", - ]), - ..Default::default() - } - ); - } - check_procedural( - ".items:has-text(Sponsored)", - vec![ - CosmeticFilterOperator::CssSelector(".items".to_string()), - CosmeticFilterOperator::HasText("Sponsored".to_string()), - ], - ); - check_procedural( - "div.items:has(p):has-text(Sponsored)", - vec![ - CosmeticFilterOperator::CssSelector("div.items:has(p)".to_string()), - CosmeticFilterOperator::HasText("Sponsored".to_string()), - ], - ); - check_procedural( - "div.items:has-text(Sponsored):has(p)", - vec![ - CosmeticFilterOperator::CssSelector("div.items".to_string()), - CosmeticFilterOperator::HasText("Sponsored".to_string()), - CosmeticFilterOperator::CssSelector(":has(p)".to_string()), - ], - ); - check_procedural( - ".items:has-text(Sponsored) .container", - vec![ - CosmeticFilterOperator::CssSelector(".items".to_string()), - CosmeticFilterOperator::HasText("Sponsored".to_string()), - CosmeticFilterOperator::CssSelector(" .container".to_string()), - ], - ); - check_procedural( - ".items:has-text(Sponsored) > .container", - vec![ - CosmeticFilterOperator::CssSelector(".items".to_string()), - CosmeticFilterOperator::HasText("Sponsored".to_string()), - CosmeticFilterOperator::CssSelector(" > .container".to_string()), - ], - ); - check_procedural( - ".items:has-text(Sponsored) + .container:has-text(Ad) ~ div", - vec![ - CosmeticFilterOperator::CssSelector(".items".to_string()), - CosmeticFilterOperator::HasText("Sponsored".to_string()), - CosmeticFilterOperator::CssSelector(" + .container".to_string()), - CosmeticFilterOperator::HasText("Ad".to_string()), - CosmeticFilterOperator::CssSelector(" ~ div".to_string()), - ], - ); - } - - #[test] - #[cfg(feature = "css-validation")] - fn unsupported() { - assert!(parse_cf("yandex.*##.serp-item:if(:scope > div.organic div.organic__subtitle:matches-css-after(content: /[Рр]еклама/))").is_err()); - assert!(parse_cf(r#"facebook.com,facebookcorewwwi.onion##.ego_column:if(a[href^="/campaign/landing"])"#).is_err()); - assert!(parse_cf(r#"readcomiconline.to##^script:has-text(this[atob)"#).is_err()); - assert!(parse_cf("##").is_err()); - assert!(parse_cf("").is_err()); - - // `:has` was previously limited to procedural filtering, but is now a native CSS feature. - assert!(parse_cf(r#"thedailywtf.com##.article-body > div:has(a[href*="utm_medium"])"#).is_ok()); - - // `:has-text` and `:xpath` are now supported procedural filters - assert!(parse_cf("twitter.com##article:has-text(/Promoted|Gesponsert|Реклама|Promocionado/):xpath(../..)").is_ok()); - - // generic procedural filters are not supported - assert!(parse_cf("##.t-rec > .t886:has-text(cookies)").is_err()); - } - - #[test] - fn hidden_generic() { - let rule = parse_cf("##.selector").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.com##.selector").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.*##.selector").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.com,~a.test.com##.selector").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.*,~a.test.com##.selector").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.*,~a.test.*##.selector").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.com#@#.selector").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("~test.com##.selector").unwrap(); - assert_eq!( - CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), - parse_cf("##.selector").unwrap().into(), - ); - - let rule = parse_cf("~test.*##.selector").unwrap(); - assert_eq!( - CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), - parse_cf("##.selector").unwrap().into(), - ); - - let rule = parse_cf("~test.*,~a.test.*##.selector").unwrap(); - assert_eq!( - CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), - parse_cf("##.selector").unwrap().into(), - ); - - let rule = parse_cf("test.com##.selector:style(border-radius: 13px)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.*##.selector:style(border-radius: 13px)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("~test.com##.selector:style(border-radius: 13px)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("~test.*##.selector:style(border-radius: 13px)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.com#@#.selector:style(border-radius: 13px)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.com##+js(nowebrtc.js)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.*##+js(nowebrtc.js)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("~test.com##+js(nowebrtc.js)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("~test.*##+js(nowebrtc.js)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - - let rule = parse_cf("test.com#@#+js(nowebrtc.js)").unwrap(); - assert!(rule.hidden_generic_rule().is_none()); - } } #[cfg(test)] -mod util_tests { - use super::*; - use crate::utils::fast_hash; - - #[test] - fn label_hashing() { - assert_eq!(get_hashes_from_labels("foo.bar.baz", 11, 11), vec![fast_hash("baz"), fast_hash("bar.baz"), fast_hash("foo.bar.baz")]); - assert_eq!(get_hashes_from_labels("foo.bar.baz.com", 15, 8), vec![fast_hash("baz.com"), fast_hash("bar.baz.com"), fast_hash("foo.bar.baz.com")]); - assert_eq!(get_hashes_from_labels("foo.bar.baz.com", 11, 11), vec![fast_hash("baz"), fast_hash("bar.baz"), fast_hash("foo.bar.baz")]); - assert_eq!(get_hashes_from_labels("foo.bar.baz.com", 11, 8), vec![fast_hash("baz"), fast_hash("bar.baz"), fast_hash("foo.bar.baz")]); - } - - #[test] - fn without_public_suffix() { - assert_eq!(get_hostname_without_public_suffix("", ""), None); - assert_eq!(get_hostname_without_public_suffix("com", ""), None); - assert_eq!(get_hostname_without_public_suffix("com", "com"), None); - assert_eq!(get_hostname_without_public_suffix("foo.com", "foo.com"), Some(("foo", "com"))); - assert_eq!(get_hostname_without_public_suffix("foo.bar.com", "bar.com"), Some(("foo.bar", "com"))); - assert_eq!(get_hostname_without_public_suffix("test.github.io", "test.github.io"), Some(("test", "github.io"))); - } -} - -#[cfg(test)] -mod matching_tests { - use super::*; - use crate::utils::bin_lookup; - - trait MatchByStr { - fn matches(&self, request_entities: &[Hash], request_hostnames: &[Hash]) -> bool; - fn matches_str(&self, hostname: &str, domain: &str) -> bool; - } - - impl MatchByStr for CosmeticFilter { - /// `hostname` and `domain` should be specified as, e.g. "subdomain.domain.com" and - /// "domain.com", respectively. This function will panic if the specified `domain` is - /// longer than the specified `hostname`. - fn matches_str(&self, hostname: &str, domain: &str) -> bool { - debug_assert!(hostname.len() >= domain.len()); - - let request_entities = get_entity_hashes_from_labels(hostname, domain); - - let request_hostnames = get_hostname_hashes_from_labels(hostname, domain); - - self.matches(&request_entities[..], &request_hostnames[..]) - } - - /// Check whether this rule applies to content from the hostname and domain corresponding to - /// the provided hash lists. - /// - /// See the `matches_str` test function for an example of how to convert hostnames and - /// domains into the appropriate hash lists. - fn matches(&self, request_entities: &[Hash], request_hostnames: &[Hash]) -> bool { - let has_hostname_constraint = self.has_hostname_constraint(); - if !has_hostname_constraint { - return true; - } - if request_entities.is_empty() - && request_hostnames.is_empty() - && has_hostname_constraint - { - return false; - } - - if let Some(ref filter_not_hostnames) = self.not_hostnames { - if request_hostnames - .iter() - .any(|hash| bin_lookup(filter_not_hostnames, *hash)) - { - return false; - } - } - - if let Some(ref filter_not_entities) = self.not_entities { - if request_entities - .iter() - .any(|hash| bin_lookup(filter_not_entities, *hash)) - { - return false; - } - } - - if self.hostnames.is_some() || self.entities.is_some() { - if let Some(ref filter_hostnames) = self.hostnames { - if request_hostnames - .iter() - .any(|hash| bin_lookup(filter_hostnames, *hash)) - { - return true; - } - } - - if let Some(ref filter_entities) = self.entities { - if request_entities - .iter() - .any(|hash| bin_lookup(filter_entities, *hash)) - { - return true; - } - } - - return false; - } - - true - } - } - - fn parse_cf(rule: &str) -> Result { - CosmeticFilter::parse(rule, false, Default::default()) - } - - #[test] - fn generic_filter() { - let rule = parse_cf("##.selector").unwrap(); - assert!(rule.matches_str("foo.com", "foo.com")); - } - - #[test] - fn single_domain() { - let rule = parse_cf("foo.com##.selector").unwrap(); - assert!(rule.matches_str("foo.com", "foo.com")); - assert!(!rule.matches_str("bar.com", "bar.com")); - } - - #[test] - fn multiple_domains() { - let rule = parse_cf("foo.com,test.com##.selector").unwrap(); - assert!(rule.matches_str("foo.com", "foo.com")); - assert!(rule.matches_str("test.com", "test.com")); - assert!(!rule.matches_str("bar.com", "bar.com")); - } - - #[test] - fn subdomain() { - let rule = parse_cf("foo.com,test.com##.selector").unwrap(); - assert!(rule.matches_str("sub.foo.com", "foo.com")); - assert!(rule.matches_str("sub.test.com", "test.com")); - - let rule = parse_cf("foo.com,sub.test.com##.selector").unwrap(); - assert!(rule.matches_str("sub.test.com", "test.com")); - assert!(!rule.matches_str("test.com", "test.com")); - assert!(!rule.matches_str("com", "com")); - } - - #[test] - fn entity() { - let rule = parse_cf("foo.com,sub.test.*##.selector").unwrap(); - assert!(rule.matches_str("foo.com", "foo.com")); - assert!(rule.matches_str("bar.foo.com", "foo.com")); - assert!(rule.matches_str("sub.test.com", "test.com")); - assert!(rule.matches_str("sub.test.fr", "test.fr")); - assert!(!rule.matches_str("sub.test.evil.biz", "evil.biz")); - - let rule = parse_cf("foo.*##.selector").unwrap(); - assert!(rule.matches_str("foo.co.uk", "foo.co.uk")); - assert!(rule.matches_str("bar.foo.co.uk", "foo.co.uk")); - assert!(rule.matches_str("baz.bar.foo.co.uk", "foo.co.uk")); - assert!(!rule.matches_str("foo.evil.biz", "evil.biz")); - } - - #[test] - fn nonmatching() { - let rule = parse_cf("foo.*##.selector").unwrap(); - assert!(!rule.matches_str("foo.bar.com", "bar.com")); - assert!(!rule.matches_str("bar-foo.com", "bar-foo.com")); - } - - #[test] - fn entity_negations() { - let rule = parse_cf("~foo.*##.selector").unwrap(); - assert!(!rule.matches_str("foo.com", "foo.com")); - assert!(rule.matches_str("foo.evil.biz", "evil.biz")); - - let rule = parse_cf("~foo.*,~bar.*##.selector").unwrap(); - assert!(rule.matches_str("baz.com", "baz.com")); - assert!(!rule.matches_str("foo.com", "foo.com")); - assert!(!rule.matches_str("sub.foo.com", "foo.com")); - assert!(!rule.matches_str("bar.com", "bar.com")); - assert!(!rule.matches_str("sub.bar.com", "bar.com")); - } - - #[test] - fn hostname_negations() { - let rule = parse_cf("~foo.com##.selector").unwrap(); - assert!(!rule.matches_str("foo.com", "foo.com")); - assert!(!rule.matches_str("bar.foo.com", "foo.com")); - assert!(rule.matches_str("foo.com.bar", "com.bar")); - assert!(rule.matches_str("foo.co.uk", "foo.co.uk")); - - let rule = parse_cf("~foo.com,~foo.de,~bar.com##.selector").unwrap(); - assert!(!rule.matches_str("foo.com", "foo.com")); - assert!(!rule.matches_str("sub.foo.com", "foo.com")); - assert!(!rule.matches_str("foo.de", "foo.de")); - assert!(!rule.matches_str("sub.foo.de", "foo.de")); - assert!(!rule.matches_str("bar.com", "bar.com")); - assert!(!rule.matches_str("sub.bar.com", "bar.com")); - assert!(rule.matches_str("bar.de", "bar.de")); - assert!(rule.matches_str("sub.bar.de", "bar.de")); - } - - #[test] - fn entity_with_suffix_exception() { - let rule = parse_cf("foo.*,~foo.com##.selector").unwrap(); - assert!(!rule.matches_str("foo.com", "foo.com")); - assert!(!rule.matches_str("sub.foo.com", "foo.com")); - assert!(rule.matches_str("foo.de", "foo.de")); - assert!(rule.matches_str("sub.foo.de", "foo.de")); - } - - #[test] - fn entity_with_subdomain_exception() { - let rule = parse_cf("foo.*,~sub.foo.*##.selector").unwrap(); - assert!(rule.matches_str("foo.com", "foo.com")); - assert!(rule.matches_str("foo.de", "foo.de")); - assert!(!rule.matches_str("sub.foo.com", "foo.com")); - assert!(!rule.matches_str("bar.com", "bar.com")); - assert!(rule.matches_str("sub2.foo.com", "foo.com")); - } - - #[test] - fn no_domain_provided() { - let rule = parse_cf("foo.*##.selector").unwrap(); - assert!(!rule.matches_str("foo.com", "")); - } - - #[test] - fn no_hostname_provided() { - let rule = parse_cf("domain.com##.selector").unwrap(); - assert!(!rule.matches_str("", "")); - let rule = parse_cf("domain.*##.selector").unwrap(); - assert!(!rule.matches_str("", "")); - let rule = parse_cf("~domain.*##.selector").unwrap(); - assert!(!rule.matches_str("", "")); - let rule = parse_cf("~domain.com##.selector").unwrap(); - assert!(!rule.matches_str("", "")); - } - - #[test] - fn respects_etld() { - let rule = parse_cf("github.io##.selector").unwrap(); - assert!(rule.matches_str("test.github.io", "github.io")); - } - - #[test] - fn multiple_selectors() { - assert!(parse_cf("youtube.com##.masthead-ad-control,.ad-div,.pyv-afc-ads-container").is_ok()); - assert!(parse_cf("m.economictimes.com###appBanner,#stickyBanner").is_ok()); - assert!(parse_cf("googledrivelinks.com###wpsafe-generate, #wpsafe-link:style(display: block !important;)").is_ok()); - } - - #[test] - fn actions() { - assert!(parse_cf("example.com###adBanner:style(background: transparent)").is_ok()); - assert!(parse_cf("example.com###adBanner:remove()").is_ok()); - assert!(parse_cf("example.com###adBanner:remove-attr(style)").is_ok()); - assert!(parse_cf("example.com###adBanner:remove-class(src)").is_ok()); - } - - #[test] - fn zero_width_space() { - assert!(parse_cf(r#"​##a[href^="https://www.g2fame.com/"] > img"#).is_err()); - } - - #[test] - fn adg_regex() { - assert!(parse_cf(r"/^dizipal\d+\.com$/##.web").is_err()); - // Filter is still salvageable if at least one location is supported - assert!(parse_cf(r"/^dizipal\d+\.com,test.net$/##.web").is_ok()); - } - - #[test] - #[cfg(feature = "css-validation")] - fn abp_has_conversion() { - let rule = parse_cf("imgur.com#?#div.Gallery-Sidebar-PostContainer:-abp-has(div.promoted-hover)").unwrap(); - assert_eq!(rule.plain_css_selector(), Some("div.Gallery-Sidebar-PostContainer:has(div.promoted-hover)")); - let rule = parse_cf(r##"webtools.fineaty.com#?#div[class*=" hidden-"]:-abp-has(.adsbygoogle)"##).unwrap(); - assert_eq!(rule.plain_css_selector(), Some(r#"div[class*=" hidden-"]:has(.adsbygoogle)"#)); - let rule = parse_cf(r##"facebook.com,facebookcorewwwi.onion#?#._6y8t:-abp-has(a[href="/ads/about/?entry_product=ad_preferences"])"##).unwrap(); - assert_eq!(rule.plain_css_selector(), Some(r#"._6y8t:has(a[href="/ads/about/?entry_product=ad_preferences"])"#)); - let rule = parse_cf(r##"mtgarena.pro#?##root > div > div:-abp-has(> .vm-placement)"##).unwrap(); - assert_eq!(rule.plain_css_selector(), Some(r#"#root > div > div:has(> .vm-placement)"#)); - // Error without `#?#`: - assert!(parse_cf(r##"mtgarena.pro###root > div > div:-abp-has(> .vm-placement)"##).is_err()); - } -} +#[path = "../../tests/unit/filters/cosmetic.rs"] +mod unit_tests; diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs new file mode 100644 index 00000000..d1e617d6 --- /dev/null +++ b/src/filters/fb_network.rs @@ -0,0 +1,307 @@ +use std::collections::HashMap; +use std::vec; + +use flatbuffers::WIPOffset; + +use crate::filters::network::{ + NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable, +}; +use crate::network_filter_list::FlatNetworkFilterList; +use crate::regex_manager::RegexManager; +use crate::request::Request; +use crate::utils::Hash; + +#[allow(dead_code, unused_imports, unsafe_code)] +#[path = "../flat/fb_network_filter_generated.rs"] +pub mod flat; +use flat::fb; + +pub struct FlatNetworkFiltersListBuilder<'a> { + builder: flatbuffers::FlatBufferBuilder<'a>, + filters: Vec>>, + + unique_domains: Vec, + unique_domains_map: HashMap, +} + +impl<'a> FlatNetworkFiltersListBuilder<'a> { + pub fn new() -> Self { + Self { + builder: flatbuffers::FlatBufferBuilder::new(), + filters: vec![], + unique_domains: vec![], + unique_domains_map: HashMap::new(), + } + } + + fn get_or_insert(&mut self, h: &Hash) -> u16 { + if let Some(&index) = self.unique_domains_map.get(h) { + return index; + } + let index = self.unique_domains.len() as u16; + self.unique_domains.push(*h); + self.unique_domains_map.insert(*h, index); + return index; + } + + pub fn add(&mut self, network_filter: &NetworkFilter) -> u32 { + let opt_domains = network_filter.opt_domains.as_ref().map(|v| { + let mut o: Vec = v.iter().map(|x| self.get_or_insert(x)).collect(); + + o.sort_unstable(); + o.dedup(); + self.builder.create_vector(&o) + }); + + let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| { + let mut o: Vec = v.iter().map(|x| self.get_or_insert(x)).collect(); + o.sort_unstable(); + o.dedup(); + self.builder.create_vector(&o) + }); + + let modifier_option = network_filter + .modifier_option + .as_ref() + .map(|s| self.builder.create_string(&s)); + + let hostname = network_filter + .hostname + .as_ref() + .map(|s| self.builder.create_string(&s)); + + let tag = network_filter + .tag + .as_ref() + .map(|s| self.builder.create_string(&s)); + + let patterns = if network_filter.filter.iter().len() > 0 { + let offsets: Vec> = network_filter + .filter + .iter() + .map(|s| self.builder.create_string(s)) + .collect(); + Some(self.builder.create_vector(&offsets)) + } else { + None + }; + + let raw_line = network_filter + .raw_line + .as_ref() + .map(|v| self.builder.create_string(v.as_str())); + + let filter = fb::NetworkFilter::create( + &mut self.builder, + &fb::NetworkFilterArgs { + mask: network_filter.mask.bits(), + patterns: patterns, + modifier_option: modifier_option, + opt_domains: opt_domains, + opt_not_domains: opt_not_domains, + hostname: hostname, + tag: tag, + raw_line: raw_line, + }, + ); + + self.filters.push(filter); + u32::try_from(self.filters.len() - 1).expect("< u32::MAX") + } + + pub fn finish(&mut self) -> Vec { + let filters = self.builder.create_vector(&self.filters); + + let unique_domains = self.builder.create_vector(&self.unique_domains); + + let storage = fb::NetworkFilterList::create( + &mut self.builder, + &&fb::NetworkFilterListArgs { + network_filters: Some(filters), + unique_domains_hashes: Some(unique_domains), + }, + ); + self.builder.finish(storage, None); + + let binary = Vec::from(self.builder.finished_data()); + binary + } +} +pub struct FlatPatterns<'a> { + patterns: Option>>, +} + +impl<'a> FlatPatterns<'a> { + #[inline(always)] + pub fn new( + patterns: Option>>, + ) -> Self { + Self { patterns } + } + + #[inline(always)] + pub fn iter(&self) -> FlatPatternsIterator { + FlatPatternsIterator { + patterns: self, + len: self.patterns.map_or(0, |d| d.len()), + index: 0, + } + } +} + +pub struct FlatPatternsIterator<'a> { + patterns: &'a FlatPatterns<'a>, + len: usize, + index: usize, +} + +impl<'a> Iterator for FlatPatternsIterator<'a> { + type Item = &'a str; + + #[inline(always)] + fn next(&mut self) -> Option { + self.patterns.patterns.map_or(None, |fi| { + if self.index < self.len { + self.index += 1; + Some(fi.get(self.index - 1)) + } else { + None + } + }) + } +} + +// Implement ExactSizeIterator for FilterPartIterator +impl<'a> ExactSizeIterator for FlatPatternsIterator<'a> { + #[inline(always)] + fn len(&self) -> usize { + self.len + } +} + +pub struct FlatNetworkFilter<'a> { + key: u64, + owner: &'a FlatNetworkFilterList, + fb_filter: &'a fb::NetworkFilter<'a>, + + pub mask: NetworkFilterMask, +} + +impl<'a> FlatNetworkFilter<'a> { + #[inline(always)] + pub fn new( + filter: &'a fb::NetworkFilter<'a>, + index: u32, + owner: &'a FlatNetworkFilterList, + ) -> Self { + let list_address: *const FlatNetworkFilterList = owner as *const FlatNetworkFilterList; + + Self { + fb_filter: filter, + key: index as u64 | (((list_address) as u64) << 32), + mask: unsafe { NetworkFilterMask::from_bits_unchecked(filter.mask()) }, + owner: owner, + } + } + + #[inline(always)] + pub fn tag(&self) -> Option<&'a str> { + self.fb_filter.tag() + } + + #[inline(always)] + pub fn modifier_option(&self) -> Option { + self.fb_filter.modifier_option().map(|o| o.to_string()) + } + + #[inline(always)] + pub fn include_domains(&self) -> Option<&[u16]> { + self.fb_filter.opt_domains().map(|data| { + let bytes = data.bytes(); + unsafe { + std::slice::from_raw_parts( + bytes.as_ptr() as *const u16, + bytes.len() / std::mem::size_of::(), + ) + } + }) + } + + #[inline(always)] + pub fn exclude_domains(&self) -> Option<&[u16]> { + self.fb_filter.opt_not_domains().map(|data| { + let bytes = data.bytes(); + unsafe { + std::slice::from_raw_parts( + bytes.as_ptr() as *const u16, + bytes.len() / std::mem::size_of::(), + ) + } + }) + } + + #[inline(always)] + pub fn hostname(&self) -> Option<&'a str> { + if self.mask.is_hostname_anchor() { + self.fb_filter.hostname() + } else { + None + } + } + + #[inline(always)] + pub fn patterns(&self) -> FlatPatterns { + FlatPatterns::new(self.fb_filter.patterns()) + } + + #[inline(always)] + pub fn raw_line(&self) -> Option { + self.fb_filter.raw_line().map(|v| v.to_string()) + } +} + +impl<'a> NetworkFilterMaskHelper for FlatNetworkFilter<'a> { + #[inline] + fn has_flag(&self, v: NetworkFilterMask) -> bool { + self.mask.contains(v) + } +} + +impl<'a> NetworkMatchable for FlatNetworkFilter<'a> { + fn matches(&self, request: &Request, regex_manager: &mut RegexManager) -> bool { + use crate::filters::network_matchers::{ + check_excluded_domains_mapped, check_included_domains_mapped, check_options, + check_pattern, + }; + if !check_options(self.mask, request) { + return false; + } + if !check_included_domains_mapped( + self.include_domains(), + request, + &self.owner.domain_hashes_mapping, + ) { + return false; + } + if !check_excluded_domains_mapped( + self.exclude_domains(), + request, + &self.owner.domain_hashes_mapping, + ) { + return false; + } + check_pattern( + self.mask, + self.patterns().iter(), + self.hostname(), + self.key, + request, + regex_manager, + ) + } + + #[cfg(test)] + fn matches_test(&self, request: &Request) -> bool { + self.matches(request, &mut RegexManager::default()) + } +} diff --git a/src/filters/mod.rs b/src/filters/mod.rs index 7b0f52ce..91042d96 100644 --- a/src/filters/mod.rs +++ b/src/filters/mod.rs @@ -1,4 +1,8 @@ //! Contains representations and standalone behaviors of individual filter rules. pub mod cosmetic; +pub mod fb_network; pub mod network; + +mod abstract_network; +mod network_matchers; diff --git a/src/filters/network.rs b/src/filters/network.rs index 6dd2e822..a2fdc337 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -1,17 +1,17 @@ //! Filters that take effect at the network request level, including blocking and response //! modification. -use memchr::{memchr as find_char, memmem, memrchr as find_char_reverse}; +use memchr::memchr as find_char; use once_cell::sync::Lazy; -use regex::{ - bytes::Regex as BytesRegex, bytes::RegexBuilder as BytesRegexBuilder, - bytes::RegexSet as BytesRegexSet, bytes::RegexSetBuilder as BytesRegexSetBuilder, Regex, -}; +use regex::Regex; use serde::{Deserialize, Serialize}; use thiserror::Error; use std::fmt; +use crate::filters::abstract_network::{ + AbstractNetworkFilter, NetworkFilterLeftAnchor, NetworkFilterOption, NetworkFilterRightAnchor, +}; use crate::lists::ParseOptions; use crate::regex_manager::RegexManager; use crate::request; @@ -150,6 +150,124 @@ bitflags::bitflags! { const NONE = 0; } } +pub trait NetworkFilterMaskHelper { + fn has_flag(&self, v: NetworkFilterMask) -> bool; + + #[inline] + fn is_exception(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_EXCEPTION) + } + + #[inline] + fn is_hostname_anchor(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_HOSTNAME_ANCHOR) + } + + #[inline] + fn is_right_anchor(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_RIGHT_ANCHOR) + } + + #[inline] + fn is_left_anchor(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_LEFT_ANCHOR) + } + + #[inline] + fn match_case(&self) -> bool { + self.has_flag(NetworkFilterMask::MATCH_CASE) + } + + #[inline] + fn is_important(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_IMPORTANT) + } + + #[inline] + fn is_redirect(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_REDIRECT) + } + + #[inline] + fn is_removeparam(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_REMOVEPARAM) + } + + #[inline] + fn also_block_redirect(&self) -> bool { + self.has_flag(NetworkFilterMask::ALSO_BLOCK_REDIRECT) + } + + #[inline] + fn is_badfilter(&self) -> bool { + self.has_flag(NetworkFilterMask::BAD_FILTER) + } + + #[inline] + fn is_generic_hide(&self) -> bool { + self.has_flag(NetworkFilterMask::GENERIC_HIDE) + } + + #[inline] + fn is_regex(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_REGEX) + } + + #[inline] + fn is_complete_regex(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_COMPLETE_REGEX) + } + + #[inline] + fn is_plain(&self) -> bool { + !self.is_regex() + } + + #[inline] + fn is_csp(&self) -> bool { + self.has_flag(NetworkFilterMask::IS_CSP) + } + + #[inline] + fn third_party(&self) -> bool { + self.has_flag(NetworkFilterMask::THIRD_PARTY) + } + + #[inline] + fn first_party(&self) -> bool { + self.has_flag(NetworkFilterMask::FIRST_PARTY) + } + + #[inline] + fn for_http(&self) -> bool { + self.has_flag(NetworkFilterMask::FROM_HTTP) + } + + #[inline] + fn for_https(&self) -> bool { + self.has_flag(NetworkFilterMask::FROM_HTTPS) + } + + #[inline] + fn check_cpt_allowed(&self, cpt: &request::RequestType) -> bool { + match NetworkFilterMask::from(cpt) { + // TODO this is not ideal, but required to allow regexed exception rules without an + // explicit `$document` option to apply uBO-style. + // See also: https://github.com/uBlockOrigin/uBlock-issues/issues/1501 + NetworkFilterMask::FROM_DOCUMENT => { + self.has_flag(NetworkFilterMask::FROM_DOCUMENT) || self.is_exception() + } + mask => self.has_flag(mask), + } + } +} + +impl NetworkFilterMaskHelper for NetworkFilterMask { + #[inline] + fn has_flag(&self, v: NetworkFilterMask) -> bool { + self.contains(v) + } +} impl fmt::Display for NetworkFilterMask { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -181,47 +299,56 @@ impl From<&request::RequestType> for NetworkFilterMask { } } -#[derive(Debug, Clone)] -pub enum CompiledRegex { - Compiled(BytesRegex), - CompiledSet(BytesRegexSet), - MatchAll, - RegexParsingError(regex::Error), +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum FilterPart { + Empty, + Simple(String), + AnyOf(Vec), } -impl CompiledRegex { - pub fn is_match(&self, pattern: &str) -> bool { - match &self { - CompiledRegex::MatchAll => true, // simple case for matching everything, e.g. for empty filter - CompiledRegex::RegexParsingError(_e) => false, // no match if regex didn't even compile - CompiledRegex::Compiled(r) => r.is_match(pattern.as_bytes()), - CompiledRegex::CompiledSet(r) => { - // let matches: Vec<_> = r.matches(pattern).into_iter().collect(); - // println!("Matching {} against RegexSet: {:?}", pattern, matches); - r.is_match(pattern.as_bytes()) +pub struct FilterPartIterator<'a> { + filter_part: &'a FilterPart, + index: usize, +} + +impl<'a> Iterator for FilterPartIterator<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + match self.filter_part { + FilterPart::Empty => None, + FilterPart::Simple(s) => { + if self.index == 0 { + self.index += 1; + Some(s.as_str()) + } else { + None + } + } + FilterPart::AnyOf(vec) => { + if self.index < vec.len() { + let result = Some(vec[self.index].as_str()); + self.index += 1; + result + } else { + None + } } } } } -impl fmt::Display for CompiledRegex { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match &self { - CompiledRegex::MatchAll => write!(f, ".*"), // simple case for matching everything, e.g. for empty filter - CompiledRegex::RegexParsingError(_e) => write!(f, "ERROR"), // no match if regex didn't even compile - CompiledRegex::Compiled(r) => write!(f, "{}", r.as_str()), - CompiledRegex::CompiledSet(r) => write!(f, "{}", r.patterns().join(" | ")), +// Implement ExactSizeIterator for FilterPartIterator +impl<'a> ExactSizeIterator for FilterPartIterator<'a> { + fn len(&self) -> usize { + match self.filter_part { + FilterPart::Empty => 0, + FilterPart::Simple(_) => 1, + FilterPart::AnyOf(vec) => vec.len(), } } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum FilterPart { - Empty, - Simple(String), - AnyOf(Vec), -} - impl FilterPart { pub fn string_view(&self) -> Option { match &self { @@ -230,240 +357,15 @@ impl FilterPart { FilterPart::AnyOf(s) => Some(s.join("|")), } } -} - -#[derive(Clone, Copy)] -enum NetworkFilterLeftAnchor { - /// A `||` token, which represents a match to the start of a domain or subdomain segment. - DoublePipe, - /// A `|` token, which represents a match to the exact start of the URL. - SinglePipe, -} - -#[derive(Clone, Copy)] -enum NetworkFilterRightAnchor { - /// A `|` token, which represents a match to the exact end of the URL. - SinglePipe, -} - -/// Pattern for a network filter, describing what URLs to match against. -#[derive(Clone)] -struct NetworkFilterPattern { - left_anchor: Option, - pattern: String, - right_anchor: Option, -} - -/// Any option that appears on the right side of a network filter as initiated by a `$` character. -/// All `bool` arguments below are `true` if the option stands alone, or `false` if the option is -/// negated using a prepended `~`. -#[derive(Clone)] -enum NetworkFilterOption { - Domain(Vec<(bool, String)>), - Badfilter, - Important, - MatchCase, - ThirdParty(bool), - FirstParty(bool), - Tag(String), - Redirect(String), - RedirectRule(String), - Csp(Option), - Removeparam(String), - Generichide, - Document, - Image(bool), - Media(bool), - Object(bool), - Other(bool), - Ping(bool), - Script(bool), - Stylesheet(bool), - Subdocument(bool), - XmlHttpRequest(bool), - Websocket(bool), - Font(bool), -} - -impl NetworkFilterOption { - pub fn is_content_type(&self) -> bool { - matches!(self, Self::Document - | Self::Image(..) - | Self::Media(..) - | Self::Object(..) - | Self::Other(..) - | Self::Ping(..) - | Self::Script(..) - | Self::Stylesheet(..) - | Self::Subdocument(..) - | Self::XmlHttpRequest(..) - | Self::Websocket(..) - | Self::Font(..)) - } - - pub fn is_redirection(&self) -> bool { - matches!(self, Self::Redirect(..) | Self::RedirectRule(..)) - } -} - -/// Abstract syntax representation of a network filter. This representation can fully specify the -/// string representation of a filter as written, with the exception of aliased options like `1p` -/// or `ghide`. This allows separation of concerns between parsing and interpretation. -struct AbstractNetworkFilter { - exception: bool, - pattern: NetworkFilterPattern, - options: Option>, -} - -impl AbstractNetworkFilter { - fn parse(line: &str) -> Result { - let mut filter_index_start: usize = 0; - let mut filter_index_end: usize = line.len(); - - let mut exception = false; - if line.starts_with("@@") { - filter_index_start += 2; - exception = true; - } - - let maybe_options_index: Option = find_char_reverse(b'$', line.as_bytes()); - - let mut options = None; - if let Some(options_index) = maybe_options_index { - filter_index_end = options_index; - // slicing here is safe; the first byte after '$' will be a character boundary - let raw_options = &line[filter_index_end + 1..]; - - options = Some(parse_filter_options(raw_options)?); + pub fn iter(&self) -> FilterPartIterator { + FilterPartIterator { + filter_part: self, + index: 0, } - - let left_anchor = if line[filter_index_start..].starts_with("||") { - filter_index_start += 2; - Some(NetworkFilterLeftAnchor::DoublePipe) - } else if line[filter_index_start..].starts_with('|') { - filter_index_start += 1; - Some(NetworkFilterLeftAnchor::SinglePipe) - } else { - None - }; - - let right_anchor = if filter_index_end > 0 && filter_index_end > filter_index_start && line[..filter_index_end].ends_with('|') { - filter_index_end -= 1; - Some(NetworkFilterRightAnchor::SinglePipe) - } else { - None - }; - - let pattern = &line[filter_index_start..filter_index_end]; - - Ok(AbstractNetworkFilter { - exception, - pattern: NetworkFilterPattern { - left_anchor, - pattern: pattern.to_string(), - right_anchor, - }, - options, - }) - } -} - -fn parse_filter_options(raw_options: &str) -> Result, NetworkFilterError> { - let mut result = vec![]; - - for raw_option in raw_options.split(',') { - // Check for negation: ~option - let negation = raw_option.starts_with('~'); - let maybe_negated_option = raw_option.trim_start_matches('~'); - - // Check for options: option=value1|value2 - let mut option_and_values = maybe_negated_option.splitn(2, '='); - let (option, value) = ( - option_and_values.next().unwrap(), - option_and_values.next().unwrap_or_default(), - ); - - result.push(match (option, negation) { - ("domain", _) | ("from", _) => { - let domains: Vec<(bool, String)> = value.split('|').map(|domain| { - if let Some(negated_domain) = domain.strip_prefix('~') { - (false, negated_domain.to_string()) - } else { - (true, domain.to_string()) - } - }) - .filter(|(_, d)| !(d.starts_with('/') && d.ends_with('/'))) - .collect(); - if domains.is_empty() { - return Err(NetworkFilterError::NoSupportedDomains); - } - NetworkFilterOption::Domain(domains) - } - ("badfilter", true) => return Err(NetworkFilterError::NegatedBadFilter), - ("badfilter", false) => NetworkFilterOption::Badfilter, - ("important", true) => return Err(NetworkFilterError::NegatedImportant), - ("important", false) => NetworkFilterOption::Important, - ("match-case", true) => return Err(NetworkFilterError::NegatedOptionMatchCase), - ("match-case", false) => NetworkFilterOption::MatchCase, - ("third-party", negated) | ("3p", negated) => NetworkFilterOption::ThirdParty(!negated), - ("first-party", negated) | ("1p", negated) => NetworkFilterOption::FirstParty(!negated), - ("tag", true) => return Err(NetworkFilterError::NegatedTag), - ("tag", false) => NetworkFilterOption::Tag(String::from(value)), - ("redirect", true) => return Err(NetworkFilterError::NegatedRedirection), - ("redirect", false) => { - // Ignore this filter if no redirection resource is specified - if value.is_empty() { - return Err(NetworkFilterError::EmptyRedirection); - } - - NetworkFilterOption::Redirect(String::from(value)) - } - ("redirect-rule", true) => return Err(NetworkFilterError::NegatedRedirection), - ("redirect-rule", false) => { - if value.is_empty() { - return Err(NetworkFilterError::EmptyRedirection); - } - - NetworkFilterOption::RedirectRule(String::from(value)) - } - ("csp", _) => NetworkFilterOption::Csp(if !value.is_empty() { - Some(String::from(value)) - } else { - None - }), - ("removeparam", true) => return Err(NetworkFilterError::NegatedRemoveparam), - ("removeparam", false) => { - if value.is_empty() { - return Err(NetworkFilterError::EmptyRemoveparam); - } - if !VALID_PARAM.is_match(value) { - return Err(NetworkFilterError::RemoveparamRegexUnsupported); - } - NetworkFilterOption::Removeparam(String::from(value)) - } - ("generichide", true) | ("ghide", true) => return Err(NetworkFilterError::NegatedGenericHide), - ("generichide", false) | ("ghide", false) => NetworkFilterOption::Generichide, - ("document", true) | ("doc", true) => return Err(NetworkFilterError::NegatedDocument), - ("document", false) | ("doc", false) => NetworkFilterOption::Document, - ("image", negated) => NetworkFilterOption::Image(!negated), - ("media", negated) => NetworkFilterOption::Media(!negated), - ("object", negated) | ("object-subrequest", negated) => NetworkFilterOption::Object(!negated), - ("other", negated) => NetworkFilterOption::Other(!negated), - ("ping", negated) | ("beacon", negated) => NetworkFilterOption::Ping(!negated), - ("script", negated) => NetworkFilterOption::Script(!negated), - ("stylesheet", negated) | ("css", negated) => NetworkFilterOption::Stylesheet(!negated), - ("subdocument", negated) | ("frame", negated) => NetworkFilterOption::Subdocument(!negated), - ("xmlhttprequest", negated) | ("xhr", negated) => NetworkFilterOption::XmlHttpRequest(!negated), - ("websocket", negated) => NetworkFilterOption::Websocket(!negated), - ("font", negated) => NetworkFilterOption::Font(!negated), - (_, _) => return Err(NetworkFilterError::UnrecognisedOption), - }); } - Ok(result) } - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NetworkFilter { pub mask: NetworkFilterMask, @@ -513,7 +415,8 @@ fn validate_options(options: &[NetworkFilterOption]) -> Result<(), NetworkFilter modifier_options += 1; } else if option.is_content_type() { has_content_type = true; - } else if option.is_redirection() || matches!(option, NetworkFilterOption::Removeparam(..)) { + } else if option.is_redirection() || matches!(option, NetworkFilterOption::Removeparam(..)) + { modifier_options += 1; } } @@ -528,6 +431,10 @@ fn validate_options(options: &[NetworkFilterOption]) -> Result<(), NetworkFilter } impl NetworkFilter { + pub fn key(&self) -> u64 { + (self as *const Self) as u64 + } + pub fn parse(line: &str, debug: bool, _opts: ParseOptions) -> Result { let parsed = AbstractNetworkFilter::parse(line)?; @@ -590,20 +497,30 @@ impl NetworkFilter { if !opt_domains_array.is_empty() { opt_domains_array.sort_unstable(); - opt_domains_union = Some(opt_domains_array.iter().fold(0, |acc, x| acc | x)); + opt_domains_union = + Some(opt_domains_array.iter().fold(0, |acc, x| acc | x)); opt_domains = Some(opt_domains_array); } if !opt_not_domains_array.is_empty() { opt_not_domains_array.sort_unstable(); - opt_not_domains_union = Some(opt_not_domains_array.iter().fold(0, |acc, x| acc | x)); + opt_not_domains_union = + Some(opt_not_domains_array.iter().fold(0, |acc, x| acc | x)); opt_not_domains = Some(opt_not_domains_array); } } NetworkFilterOption::Badfilter => mask.set(NetworkFilterMask::BAD_FILTER, true), - NetworkFilterOption::Important => mask.set(NetworkFilterMask::IS_IMPORTANT, true), + NetworkFilterOption::Important => { + mask.set(NetworkFilterMask::IS_IMPORTANT, true) + } NetworkFilterOption::MatchCase => mask.set(NetworkFilterMask::MATCH_CASE, true), - NetworkFilterOption::ThirdParty(false) | NetworkFilterOption::FirstParty(true) => mask.set(NetworkFilterMask::THIRD_PARTY, false), - NetworkFilterOption::ThirdParty(true) | NetworkFilterOption::FirstParty(false) => mask.set(NetworkFilterMask::FIRST_PARTY, false), + NetworkFilterOption::ThirdParty(false) + | NetworkFilterOption::FirstParty(true) => { + mask.set(NetworkFilterMask::THIRD_PARTY, false) + } + NetworkFilterOption::ThirdParty(true) + | NetworkFilterOption::FirstParty(false) => { + mask.set(NetworkFilterMask::FIRST_PARTY, false) + } NetworkFilterOption::Tag(value) => tag = Some(value), NetworkFilterOption::Redirect(value) => { mask.set(NetworkFilterMask::IS_REDIRECT, true); @@ -626,18 +543,34 @@ impl NetworkFilter { mask.set(NetworkFilterMask::FROM_DOCUMENT, true); modifier_option = value; } - NetworkFilterOption::Generichide => mask.set(NetworkFilterMask::GENERIC_HIDE, true), - NetworkFilterOption::Document => cpt_mask_positive.set(NetworkFilterMask::FROM_DOCUMENT, true), + NetworkFilterOption::Generichide => { + mask.set(NetworkFilterMask::GENERIC_HIDE, true) + } + NetworkFilterOption::Document => { + cpt_mask_positive.set(NetworkFilterMask::FROM_DOCUMENT, true) + } NetworkFilterOption::Image(enabled) => apply_content_type!(FROM_IMAGE, enabled), NetworkFilterOption::Media(enabled) => apply_content_type!(FROM_MEDIA, enabled), - NetworkFilterOption::Object(enabled) => apply_content_type!(FROM_OBJECT, enabled), + NetworkFilterOption::Object(enabled) => { + apply_content_type!(FROM_OBJECT, enabled) + } NetworkFilterOption::Other(enabled) => apply_content_type!(FROM_OTHER, enabled), NetworkFilterOption::Ping(enabled) => apply_content_type!(FROM_PING, enabled), - NetworkFilterOption::Script(enabled) => apply_content_type!(FROM_SCRIPT, enabled), - NetworkFilterOption::Stylesheet(enabled) => apply_content_type!(FROM_STYLESHEET, enabled), - NetworkFilterOption::Subdocument(enabled) => apply_content_type!(FROM_SUBDOCUMENT, enabled), - NetworkFilterOption::XmlHttpRequest(enabled) => apply_content_type!(FROM_XMLHTTPREQUEST, enabled), - NetworkFilterOption::Websocket(enabled) => apply_content_type!(FROM_WEBSOCKET, enabled), + NetworkFilterOption::Script(enabled) => { + apply_content_type!(FROM_SCRIPT, enabled) + } + NetworkFilterOption::Stylesheet(enabled) => { + apply_content_type!(FROM_STYLESHEET, enabled) + } + NetworkFilterOption::Subdocument(enabled) => { + apply_content_type!(FROM_SUBDOCUMENT, enabled) + } + NetworkFilterOption::XmlHttpRequest(enabled) => { + apply_content_type!(FROM_XMLHTTPREQUEST, enabled) + } + NetworkFilterOption::Websocket(enabled) => { + apply_content_type!(FROM_WEBSOCKET, enabled) + } NetworkFilterOption::Font(enabled) => apply_content_type!(FROM_FONT, enabled), } }); @@ -650,7 +583,9 @@ impl NetworkFilter { // // This doesn't apply to removeparam filters. if !mask.contains(NetworkFilterMask::IS_REMOVEPARAM) - && (cpt_mask_negative & NetworkFilterMask::FROM_NETWORK_TYPES) != NetworkFilterMask::NONE { + && (cpt_mask_negative & NetworkFilterMask::FROM_NETWORK_TYPES) + != NetworkFilterMask::NONE + { mask |= NetworkFilterMask::FROM_NETWORK_TYPES; } // If no positive types were set, then the filter should apply to all network types. @@ -666,8 +601,12 @@ impl NetworkFilter { } match parsed.pattern.left_anchor { - Some(NetworkFilterLeftAnchor::DoublePipe) => mask.set(NetworkFilterMask::IS_HOSTNAME_ANCHOR, true), - Some(NetworkFilterLeftAnchor::SinglePipe) => mask.set(NetworkFilterMask::IS_LEFT_ANCHOR, true), + Some(NetworkFilterLeftAnchor::DoublePipe) => { + mask.set(NetworkFilterMask::IS_HOSTNAME_ANCHOR, true) + } + Some(NetworkFilterLeftAnchor::SinglePipe) => { + mask.set(NetworkFilterMask::IS_LEFT_ANCHOR, true) + } None => (), } @@ -715,7 +654,9 @@ impl NetworkFilter { // somewhere. // If the first separator is a wildcard, included in in hostname - if first_separator_start < pattern.len() && pattern[first_separator_start..=first_separator_start].starts_with('*') { + if first_separator_start < pattern.len() + && pattern[first_separator_start..=first_separator_start].starts_with('*') + { mask.set(NetworkFilterMask::IS_HOSTNAME_REGEX, true); } @@ -744,9 +685,7 @@ impl NetworkFilter { let slash_index = find_char(b'/', pattern.as_bytes()); slash_index .map(|i| { - hostname = Some(String::from( - &pattern[..i], - )); + hostname = Some(String::from(&pattern[..i])); filter_index_start += i; mask.set(NetworkFilterMask::IS_LEFT_ANCHOR, true); }) @@ -759,14 +698,12 @@ impl NetworkFilter { } // Remove trailing '*' - if filter_index_end > filter_index_start && pattern.ends_with('*') - { + if filter_index_end > filter_index_start && pattern.ends_with('*') { filter_index_end -= 1; } // Remove leading '*' if the filter is not hostname anchored. - if filter_index_end > filter_index_start && pattern[filter_index_start..].starts_with('*') - { + if filter_index_end > filter_index_start && pattern[filter_index_start..].starts_with('*') { mask.set(NetworkFilterMask::IS_LEFT_ANCHOR, false); filter_index_start += 1; } @@ -807,10 +744,7 @@ impl NetworkFilter { let filter: Option = if filter_index_end > filter_index_start { let filter_str = &pattern[filter_index_start..filter_index_end]; - mask.set( - NetworkFilterMask::IS_REGEX, - check_is_regex(filter_str), - ); + mask.set(NetworkFilterMask::IS_REGEX, check_is_regex(filter_str)); if mask.contains(NetworkFilterMask::MATCH_CASE) { Some(String::from(filter_str)) } else { @@ -822,21 +756,24 @@ impl NetworkFilter { // TODO: ignore hostname anchor is not hostname provided - let hostname_decoded = hostname.map(|host| { - let hostname_normalised = if mask.contains(NetworkFilterMask::IS_HOSTNAME_ANCHOR) { - host.trim_start_matches("www.") - } else { - &host - }; + let hostname_decoded = hostname + .map(|host| { + let hostname_normalised = if mask.contains(NetworkFilterMask::IS_HOSTNAME_ANCHOR) { + host.trim_start_matches("www.") + } else { + &host + }; - let lowercase = hostname_normalised.to_lowercase(); - let hostname = if lowercase.is_ascii() { - lowercase - } else { - idna::domain_to_ascii(&lowercase).map_err(|_| NetworkFilterError::PunycodeError)? - }; - Ok(hostname) - }).transpose(); + let lowercase = hostname_normalised.to_lowercase(); + let hostname = if lowercase.is_ascii() { + lowercase + } else { + idna::domain_to_ascii(&lowercase) + .map_err(|_| NetworkFilterError::PunycodeError)? + }; + Ok(hostname) + }) + .transpose(); if mask.contains(NetworkFilterMask::GENERIC_HIDE) && !parsed.exception { return Err(NetworkFilterError::GenericHideWithoutException); @@ -856,12 +793,13 @@ impl NetworkFilter { // filter, which isn't saved in Brave unless running with filter lists compiled in "debug" // mode. Instead, we apply the implicit document matching more strictly, only for hostname // filters of the form `||example.com^`. - if (cpt_mask_positive & NetworkFilterMask::FROM_ALL_TYPES).is_empty() && - (cpt_mask_negative & NetworkFilterMask::FROM_ALL_TYPES).is_empty() && - mask.contains(NetworkFilterMask::IS_HOSTNAME_ANCHOR) && - mask.contains(NetworkFilterMask::IS_RIGHT_ANCHOR) && - !end_url_anchor && - !mask.contains(NetworkFilterMask::IS_REMOVEPARAM) { + if (cpt_mask_positive & NetworkFilterMask::FROM_ALL_TYPES).is_empty() + && (cpt_mask_negative & NetworkFilterMask::FROM_ALL_TYPES).is_empty() + && mask.contains(NetworkFilterMask::IS_HOSTNAME_ANCHOR) + && mask.contains(NetworkFilterMask::IS_RIGHT_ANCHOR) + && !end_url_anchor + && !mask.contains(NetworkFilterMask::IS_REMOVEPARAM) + { mask |= NetworkFilterMask::FROM_ALL_TYPES; } // Finally, apply any explicitly negated request types @@ -894,13 +832,17 @@ impl NetworkFilter { /// emulate the behavior of hosts-style blocking. pub fn parse_hosts_style(hostname: &str, debug: bool) -> Result { // Make sure the hostname doesn't contain any invalid characters - static INVALID_CHARS: Lazy = Lazy::new(|| Regex::new("[/^*!?$&(){}\\[\\]+=~`\\s|@,'\"><:;]").unwrap()); + static INVALID_CHARS: Lazy = + Lazy::new(|| Regex::new("[/^*!?$&(){}\\[\\]+=~`\\s|@,'\"><:;]").unwrap()); if INVALID_CHARS.is_match(hostname) { return Err(NetworkFilterError::FilterParseError); } // This shouldn't be used to block an entire TLD, and the hostname shouldn't end with a dot - if find_char(b'.', hostname.as_bytes()).is_none() || (hostname.starts_with('.') && find_char(b'.', hostname[1..].as_bytes()).is_none()) || hostname.ends_with('.') { + if find_char(b'.', hostname.as_bytes()).is_none() + || (hostname.starts_with('.') && find_char(b'.', hostname[1..].as_bytes()).is_none()) + || hostname.ends_with('.') + { return Err(NetworkFilterError::FilterParseError); } @@ -912,7 +854,10 @@ impl NetworkFilter { if normalized_host.is_ascii() { hostname.push_str(normalized_host); } else { - hostname.push_str(&idna::domain_to_ascii(normalized_host).map_err(|_| NetworkFilterError::PunycodeError)?); + hostname.push_str( + &idna::domain_to_ascii(normalized_host) + .map_err(|_| NetworkFilterError::PunycodeError)?, + ); } hostname.push('^'); @@ -967,10 +912,10 @@ impl NetworkFilter { (self.is_plain() || self.is_regex()) && !self.is_right_anchor(); let skip_first_token = self.is_right_anchor(); - let mut filter_tokens = + let filter_tokens = utils::tokenize_filter(f, skip_first_token, skip_last_token); - tokens.append(&mut filter_tokens); + tokens.extend(&filter_tokens); } } FilterPart::AnyOf(_) => (), // across AnyOf set of filters no single token is guaranteed to match to a request @@ -979,17 +924,17 @@ impl NetworkFilter { // Append tokens from hostname, if any if !self.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX) { - if let Some(hostname) = self.hostname.as_ref() { - let mut hostname_tokens = utils::tokenize(hostname); - tokens.append(&mut hostname_tokens); + if let Some(hostname) = self.hostname.as_ref() { + let hostname_tokens = utils::tokenize(hostname); + tokens.extend(&hostname_tokens); } } if tokens.is_empty() && self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) { if let Some(removeparam) = &self.modifier_option { if VALID_PARAM.is_match(removeparam) { - let mut param_tokens = utils::tokenize(&removeparam.to_ascii_lowercase()); - tokens.append(&mut param_tokens); + let param_tokens = utils::tokenize(&removeparam.to_ascii_lowercase()); + tokens.extend(¶m_tokens); } } } @@ -1014,91 +959,11 @@ impl NetworkFilter { vec![tokens] } } +} - pub fn is_exception(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_EXCEPTION) - } - - pub fn is_hostname_anchor(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_HOSTNAME_ANCHOR) - } - - pub fn is_right_anchor(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_RIGHT_ANCHOR) - } - - pub fn is_left_anchor(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_LEFT_ANCHOR) - } - - fn match_case(&self) -> bool { - self.mask.contains(NetworkFilterMask::MATCH_CASE) - } - - pub fn is_important(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_IMPORTANT) - } - - pub fn is_redirect(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_REDIRECT) - } - - pub fn is_removeparam(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) - } - - pub fn also_block_redirect(&self) -> bool { - self.mask.contains(NetworkFilterMask::ALSO_BLOCK_REDIRECT) - } - - pub fn is_badfilter(&self) -> bool { - self.mask.contains(NetworkFilterMask::BAD_FILTER) - } - - pub fn is_generic_hide(&self) -> bool { - self.mask.contains(NetworkFilterMask::GENERIC_HIDE) - } - - pub fn is_regex(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_REGEX) - } - - pub fn is_complete_regex(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_COMPLETE_REGEX) - } - - fn is_plain(&self) -> bool { - !self.is_regex() - } - - pub fn is_csp(&self) -> bool { - self.mask.contains(NetworkFilterMask::IS_CSP) - } - - fn third_party(&self) -> bool { - self.mask.contains(NetworkFilterMask::THIRD_PARTY) - } - - fn first_party(&self) -> bool { - self.mask.contains(NetworkFilterMask::FIRST_PARTY) - } - - fn for_http(&self) -> bool { - self.mask.contains(NetworkFilterMask::FROM_HTTP) - } - - fn for_https(&self) -> bool { - self.mask.contains(NetworkFilterMask::FROM_HTTPS) - } - - fn check_cpt_allowed(&self, cpt: &request::RequestType) -> bool { - match NetworkFilterMask::from(cpt) { - // TODO this is not ideal, but required to allow regexed exception rules without an - // explicit `$document` option to apply uBO-style. - // See also: https://github.com/uBlockOrigin/uBlock-issues/issues/1501 - NetworkFilterMask::FROM_DOCUMENT => self.mask.contains(NetworkFilterMask::FROM_DOCUMENT) || self.is_exception(), - mask => self.mask.contains(mask), - } +impl NetworkFilterMaskHelper for NetworkFilter { + fn has_flag(&self, v: NetworkFilterMask) -> bool { + self.mask.contains(v) } } @@ -1120,7 +985,20 @@ pub trait NetworkMatchable { impl NetworkMatchable for NetworkFilter { fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool { - check_options(self, request) && check_pattern(self, request, regex_manager) + use crate::filters::network_matchers::{ + check_excluded_domains, check_included_domains, check_options, check_pattern, + }; + check_options(self.mask, request) + && check_included_domains(self.opt_domains.as_deref(), request) + && check_excluded_domains(self.opt_not_domains.as_deref(), request) + && check_pattern( + self.mask, + self.filter.iter(), + self.hostname.as_deref(), + self.key(), + request, + regex_manager, + ) } #[cfg(test)] @@ -1179,80 +1057,6 @@ fn compute_filter_id( hash } -/// Compiles a filter pattern to a regex. This is only performed *lazily* for -/// filters containing at least a * or ^ symbol. Because Regexes are expansive, -/// we try to convert some patterns to plain filters. -#[allow(clippy::trivial_regex)] -pub(crate) fn compile_regex( - filter: &FilterPart, - is_right_anchor: bool, - is_left_anchor: bool, - is_complete_regex: bool, -) -> CompiledRegex { - // Escape special regex characters: |.$+?{}()[]\ - static SPECIAL_RE: Lazy = Lazy::new(|| Regex::new(r"([\|\.\$\+\?\{\}\(\)\[\]])").unwrap()); - // * can match anything - static WILDCARD_RE: Lazy = Lazy::new(|| Regex::new(r"\*").unwrap()); - // ^ can match any separator or the end of the pattern - static ANCHOR_RE: Lazy = Lazy::new(|| Regex::new(r"\^(.)").unwrap()); - // ^ can match any separator or the end of the pattern - static ANCHOR_RE_EOL: Lazy = Lazy::new(|| Regex::new(r"\^$").unwrap()); - - let filters: Vec = match filter { - FilterPart::Empty => vec![], - FilterPart::Simple(s) => vec![s.clone()], - FilterPart::AnyOf(f) => f.clone(), - }; - - let mut escaped_patterns = Vec::with_capacity(filters.len()); - for filter_str in filters { - // If any filter is empty, the entire set matches anything - if filter_str.is_empty() { - return CompiledRegex::MatchAll; - } - if is_complete_regex { - // unescape unrecognised escaping sequences, otherwise a normal regex - let unescaped = filter_str[1..filter_str.len() - 1] - .replace("\\/", "/") - .replace("\\:", ":"); - - escaped_patterns.push(unescaped); - } else { - let repl = SPECIAL_RE.replace_all(&filter_str, "\\$1"); - let repl = WILDCARD_RE.replace_all(&repl, ".*"); - // in adblock rules, '^' is a separator. - // The separator character is anything but a letter, a digit, or one of the following: _ - . % - let repl = ANCHOR_RE.replace_all(&repl, "(?:[^\\w\\d\\._%-])$1"); - let repl = ANCHOR_RE_EOL.replace_all(&repl, "(?:[^\\w\\d\\._%-]|$)"); - - // Should match start or end of url - let left_anchor = if is_left_anchor { "^" } else { "" }; - let right_anchor = if is_right_anchor { "$" } else { "" }; - let filter = format!("{}{}{}", left_anchor, repl, right_anchor); - - escaped_patterns.push(filter); - } - } - - if escaped_patterns.is_empty() { - CompiledRegex::MatchAll - } else if escaped_patterns.len() == 1 { - let pattern = &escaped_patterns[0]; - match BytesRegexBuilder::new(pattern).unicode(false).build() { - Ok(compiled) => CompiledRegex::Compiled(compiled), - Err(e) => { - // println!("Regex parsing failed ({:?})", e); - CompiledRegex::RegexParsingError(e) - } - } - } else { - match BytesRegexSetBuilder::new(escaped_patterns).unicode(false).build() { - Ok(compiled) => CompiledRegex::CompiledSet(compiled), - Err(e) => CompiledRegex::RegexParsingError(e), - } - } -} - /// Check if the sub-string contained between the indices start and end is a /// regex filter (it contains a '*' or '^' char). Here we are limited by the /// capability of javascript to check the presence of a pattern between two @@ -1264,2171 +1068,6 @@ fn check_is_regex(filter: &str) -> bool { start_index.is_some() || separator_index.is_some() } -/// Handle hostname anchored filters, given 'hostname' from ||hostname and -/// request's hostname, check if there is a match. This is tricky because -/// filters authors rely and different assumption. We can have prefix of suffix -/// matches of anchor. -fn is_anchored_by_hostname(filter_hostname: &str, hostname: &str, wildcard_filter_hostname: bool) -> bool { - let filter_hostname_len = filter_hostname.len(); - // Corner-case, if `filterHostname` is empty, then it's a match - if filter_hostname_len == 0 { - return true; - } - let hostname_len = hostname.len(); - - if filter_hostname_len > hostname_len { - // `filterHostname` cannot be longer than actual hostname - false - } else if filter_hostname_len == hostname_len { - // If they have the same len(), they should be equal - filter_hostname == hostname - } else if let Some(match_index) = memmem::find(hostname.as_bytes(), filter_hostname.as_bytes()) { - if match_index == 0 { - // `filter_hostname` is a prefix of `hostname` and needs to match full a label. - // - // Examples (filter_hostname, hostname): - // * (foo, foo.com) - // * (sub.foo, sub.foo.com) - wildcard_filter_hostname || filter_hostname.ends_with('.') || hostname[filter_hostname_len..].starts_with('.') - } else if match_index == hostname_len - filter_hostname_len { - // `filter_hostname` is a suffix of `hostname`. - // - // Examples (filter_hostname, hostname): - // * (foo.com, sub.foo.com) - // * (com, foo.com) - filter_hostname.starts_with('.') || hostname[match_index - 1..].starts_with('.') - } else { - // `filter_hostname` is infix of `hostname` and needs match full labels - (wildcard_filter_hostname || filter_hostname.ends_with('.') || hostname[filter_hostname_len..].starts_with('.')) - && (filter_hostname.starts_with('.') || hostname[match_index - 1..].starts_with('.')) - } - } - else { - // No match - false - } -} - -fn get_url_after_hostname<'a>(url: &'a str, hostname: &str) -> &'a str { - let start = - memmem::find(url.as_bytes(), hostname.as_bytes()).unwrap_or(url.len() - hostname.len()); - &url[start + hostname.len()..] -} - -// --------------------------------------------------------------------------- -// Filter matching -// --------------------------------------------------------------------------- - -// pattern -fn check_pattern_plain_filter_filter(filter: &NetworkFilter, request: &request::Request) -> bool { - let request_url = request.get_url(filter.match_case()); - match &filter.filter { - FilterPart::Empty => true, - FilterPart::Simple(f) => memmem::find(request_url.as_bytes(), f.as_bytes()).is_some(), - FilterPart::AnyOf(filters) => { - for f in filters { - if memmem::find(request_url.as_bytes(), f.as_bytes()).is_some() { - return true; - } - } - false - } - } -} - -// pattern| -fn check_pattern_right_anchor_filter(filter: &NetworkFilter, request: &request::Request) -> bool { - let request_url = request.get_url(filter.match_case()); - match &filter.filter { - FilterPart::Empty => true, - FilterPart::Simple(f) => request_url.ends_with(f), - FilterPart::AnyOf(filters) => { - for f in filters { - if request_url.ends_with(f) { - return true; - } - } - false - } - } -} - -// |pattern -fn check_pattern_left_anchor_filter(filter: &NetworkFilter, request: &request::Request) -> bool { - let request_url = request.get_url(filter.match_case()); - match &filter.filter { - FilterPart::Empty => true, - FilterPart::Simple(f) => request_url.starts_with(f), - FilterPart::AnyOf(filters) => { - for f in filters { - if request_url.starts_with(f) { - return true; - } - } - false - } - } -} - -// |pattern| -fn check_pattern_left_right_anchor_filter( - filter: &NetworkFilter, - request: &request::Request, -) -> bool { - let request_url = request.get_url(filter.match_case()); - match &filter.filter { - FilterPart::Empty => true, - FilterPart::Simple(f) => &request_url == f, - FilterPart::AnyOf(filters) => { - for f in filters { - if &request_url == f { - return true; - } - } - false - } - } -} - -// pattern*^ -fn check_pattern_regex_filter_at( - filter: &NetworkFilter, - request: &request::Request, - start_from: usize, - regex_manager: &mut RegexManager, -) -> bool { - let request_url = request.get_url(filter.match_case()); - regex_manager.matches(filter, &request_url[start_from..]) -} - -fn check_pattern_regex_filter( - filter: &NetworkFilter, - request: &request::Request, - regex_manager: &mut RegexManager, -) -> bool { - check_pattern_regex_filter_at(filter, request, 0, regex_manager) -} - -// ||pattern*^ -fn check_pattern_hostname_anchor_regex_filter( - filter: &NetworkFilter, - request: &request::Request, - regex_manager: &mut RegexManager, -) -> bool { - let request_url = request.get_url(filter.match_case()); - filter - .hostname - .as_ref() - .map(|hostname| { - if is_anchored_by_hostname(hostname, &request.hostname, filter.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX)) { - check_pattern_regex_filter_at( - filter, - request, - memmem::find(request_url.as_bytes(), hostname.as_bytes()).unwrap_or_default() - + hostname.len(), - regex_manager, - ) - } else { - false - } - }) - .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable -} - -// ||pattern| -fn check_pattern_hostname_right_anchor_filter( - filter: &NetworkFilter, - request: &request::Request, -) -> bool { - filter - .hostname - .as_ref() - .map(|hostname| { - if is_anchored_by_hostname(hostname, &request.hostname, filter.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX)) { - match &filter.filter { - // In this specific case it means that the specified hostname should match - // at the end of the hostname of the request. This allows to prevent false - // positive like ||foo.bar which would match https://foo.bar.baz where - // ||foo.bar^ would not. - FilterPart::Empty => { - request.hostname.len() == hostname.len() // if lengths are equal, hostname equality is implied by anchoring check - || request.hostname.ends_with(hostname) - } - _ => check_pattern_right_anchor_filter(filter, request), - } - } else { - false - } - }) - .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable -} - -// |||pattern| -fn check_pattern_hostname_left_right_anchor_filter( - filter: &NetworkFilter, - request: &request::Request, -) -> bool { - // Since this is not a regex, the filter pattern must follow the hostname - // with nothing in between. So we extract the part of the URL following - // after hostname and will perform the matching on it. - - filter - .hostname - .as_ref() - .map(|hostname| { - if is_anchored_by_hostname(hostname, &request.hostname, filter.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX)) { - let request_url = request.get_url(filter.match_case()); - match &filter.filter { - // if no filter, we have a match - FilterPart::Empty => true, - // Since it must follow immediatly after the hostname and be a suffix of - // the URL, we conclude that filter must be equal to the part of the - // url following the hostname. - FilterPart::Simple(f) => get_url_after_hostname(&request_url, hostname) == f, - FilterPart::AnyOf(filters) => { - let url_after_hostname = get_url_after_hostname(&request_url, hostname); - for f in filters { - if url_after_hostname == f { - return true; - } - } - false - } - } - } else { - false - } - }) - .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable -} - -// ||pattern + left-anchor => This means that a plain pattern needs to appear -// exactly after the hostname, with nothing in between. -fn check_pattern_hostname_left_anchor_filter( - filter: &NetworkFilter, - request: &request::Request, -) -> bool { - filter - .hostname - .as_ref() - .map(|hostname| { - if is_anchored_by_hostname(hostname, &request.hostname, filter.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX)) { - let request_url = request.get_url(filter.match_case()); - match &filter.filter { - // if no filter, we have a match - FilterPart::Empty => true, - // Since this is not a regex, the filter pattern must follow the hostname - // with nothing in between. So we extract the part of the URL following - // after hostname and will perform the matching on it. - FilterPart::Simple(f) => get_url_after_hostname(&request_url, hostname).starts_with(f), - FilterPart::AnyOf(filters) => { - let url_after_hostname = get_url_after_hostname(&request_url, hostname); - for f in filters { - if url_after_hostname.starts_with(f) { - return true; - } - } - false - } - } - } else { - false - } - }) - .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable -} - -// ||pattern -fn check_pattern_hostname_anchor_filter( - filter: &NetworkFilter, - request: &request::Request, -) -> bool { - filter - .hostname - .as_ref() - .map(|hostname| { - if is_anchored_by_hostname(hostname, &request.hostname, filter.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX)) { - let request_url = request.get_url(filter.match_case()); - match &filter.filter { - // if no filter, we have a match - FilterPart::Empty => true, - // Filter hostname does not necessarily have to be a full, proper hostname, part of it can be lumped together with the URL - FilterPart::Simple(f) => get_url_after_hostname(&request_url, hostname) - .contains(f), - FilterPart::AnyOf(filters) => { - let url_after_hostname = get_url_after_hostname(&request_url, hostname); - for f in filters { - if url_after_hostname.contains(f) { - return true; - } - } - false - } - } - } else { - false - } - }) - .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable -} - -/// Efficiently checks if a certain network filter matches against a network -/// request. -fn check_pattern( - filter: &NetworkFilter, - request: &request::Request, - regex_manager: &mut RegexManager, -) -> bool { - if filter.is_hostname_anchor() { - if filter.is_regex() { - check_pattern_hostname_anchor_regex_filter(filter, request, regex_manager) - } else if filter.is_right_anchor() && filter.is_left_anchor() { - check_pattern_hostname_left_right_anchor_filter(filter, request) - } else if filter.is_right_anchor() { - check_pattern_hostname_right_anchor_filter(filter, request) - } else if filter.is_left_anchor() { - check_pattern_hostname_left_anchor_filter(filter, request) - } else { - check_pattern_hostname_anchor_filter(filter, request) - } - } else if filter.is_regex() || filter.is_complete_regex() { - check_pattern_regex_filter(filter, request, regex_manager) - } else if filter.is_left_anchor() && filter.is_right_anchor() { - check_pattern_left_right_anchor_filter(filter, request) - } else if filter.is_left_anchor() { - check_pattern_left_anchor_filter(filter, request) - } else if filter.is_right_anchor() { - check_pattern_right_anchor_filter(filter, request) - } else { - check_pattern_plain_filter_filter(filter, request) - } -} - -fn check_options(filter: &NetworkFilter, request: &request::Request) -> bool { - // Bad filter never matches - if filter.is_badfilter() { - return false; - } - // We first discard requests based on type, protocol and party. This is really - // cheap and should be done first. - if !filter.check_cpt_allowed(&request.request_type) - || (request.is_https && !filter.for_https()) - || (request.is_http && !filter.for_http()) - || (!filter.first_party() && !request.is_third_party) - || (!filter.third_party() && request.is_third_party) - { - return false; - } - - // Source URL must be among these domains to match - if let Some(included_domains) = filter.opt_domains.as_ref() { - if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { - // If the union of included domains is recorded - if let Some(included_domains_union) = filter.opt_domains_union { - // If there isn't any source hash that matches the union, there's no match at all - if source_hashes.iter().all(|h| h & included_domains_union != *h) { - return false - } - } - if source_hashes.iter().all(|h| !utils::bin_lookup(included_domains, *h)) { - return false - } - } - } - - if let Some(excluded_domains) = filter.opt_not_domains.as_ref() { - if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { - // If the union of excluded domains is recorded - if let Some(excluded_domains_union) = filter.opt_not_domains_union { - // If there's any source hash that matches the union, check the actual values - if source_hashes.iter().any(|h| (h & excluded_domains_union == *h) && utils::bin_lookup(excluded_domains, *h)) { - return false - } - } else if source_hashes.iter().any(|h| utils::bin_lookup(excluded_domains, *h)) { - return false - } - } - } - - true -} - #[cfg(test)] -mod parse_tests { - use super::*; - - #[derive(Debug, PartialEq)] - struct NetworkFilterBreakdown { - filter: Option, - hostname: Option, - opt_domains: Option>, - opt_not_domains: Option>, - modifier_option: Option, - - // filter type - is_exception: bool, - is_hostname_anchor: bool, - is_right_anchor: bool, - is_left_anchor: bool, - is_regex: bool, - is_csp: bool, - is_plain: bool, - is_important: bool, - - // Options - first_party: bool, - from_network_types: bool, - from_font: bool, - from_image: bool, - from_media: bool, - from_object: bool, - from_other: bool, - from_ping: bool, - from_script: bool, - from_stylesheet: bool, - from_subdocument: bool, - from_websocket: bool, - from_xml_http_request: bool, - from_document: bool, - match_case: bool, - third_party: bool, - } - - impl From<&NetworkFilter> for NetworkFilterBreakdown { - fn from(filter: &NetworkFilter) -> NetworkFilterBreakdown { - NetworkFilterBreakdown { - filter: filter.filter.string_view(), - hostname: filter.hostname.as_ref().cloned(), - opt_domains: filter.opt_domains.as_ref().cloned(), - opt_not_domains: filter.opt_not_domains.as_ref().cloned(), - modifier_option: filter.modifier_option.as_ref().cloned(), - - // filter type - is_exception: filter.is_exception(), - is_hostname_anchor: filter.is_hostname_anchor(), - is_right_anchor: filter.is_right_anchor(), - is_left_anchor: filter.is_left_anchor(), - is_regex: filter.is_regex(), - is_csp: filter.is_csp(), - is_plain: filter.is_plain(), - is_important: filter.is_important(), - - // Options - first_party: filter.first_party(), - from_network_types: filter.mask.contains(NetworkFilterMask::FROM_NETWORK_TYPES), - from_font: filter.mask.contains(NetworkFilterMask::FROM_FONT), - from_image: filter.mask.contains(NetworkFilterMask::FROM_IMAGE), - from_media: filter.mask.contains(NetworkFilterMask::FROM_MEDIA), - from_object: filter.mask.contains(NetworkFilterMask::FROM_OBJECT), - from_other: filter.mask.contains(NetworkFilterMask::FROM_OTHER), - from_ping: filter.mask.contains(NetworkFilterMask::FROM_PING), - from_script: filter.mask.contains(NetworkFilterMask::FROM_SCRIPT), - from_stylesheet: filter.mask.contains(NetworkFilterMask::FROM_STYLESHEET), - from_subdocument: filter.mask.contains(NetworkFilterMask::FROM_SUBDOCUMENT), - from_websocket: filter.mask.contains(NetworkFilterMask::FROM_WEBSOCKET), - from_xml_http_request: filter.mask.contains(NetworkFilterMask::FROM_XMLHTTPREQUEST), - from_document: filter.mask.contains(NetworkFilterMask::FROM_DOCUMENT), - match_case: filter.match_case(), - third_party: filter.third_party(), - } - } - } - - fn default_network_filter_breakdown() -> NetworkFilterBreakdown { - NetworkFilterBreakdown { - filter: None, - hostname: None, - opt_domains: None, - opt_not_domains: None, - modifier_option: None, - - // filter type - is_exception: false, - is_hostname_anchor: false, - is_right_anchor: false, - is_left_anchor: false, - is_regex: false, - is_csp: false, - is_plain: false, - is_important: false, - - // Options - first_party: true, - from_network_types: true, - from_font: true, - from_image: true, - from_media: true, - from_object: true, - from_other: true, - from_ping: true, - from_script: true, - from_stylesheet: true, - from_subdocument: true, - from_websocket: true, - from_xml_http_request: true, - from_document: false, - match_case: false, - third_party: true, - } - } - - #[test] - // pattern - fn parses_plain_pattern() { - { - let filter = NetworkFilter::parse("ads", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("ads")); - defaults.is_plain = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("/ads/foo-", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("/ads/foo-")); - defaults.is_plain = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("/ads/foo-$important", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("/ads/foo-")); - defaults.is_plain = true; - defaults.is_important = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("foo.com/ads$important", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("foo.com/ads")); - defaults.is_plain = true; - defaults.is_important = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // ||pattern - fn parses_hostname_anchor_pattern() { - { - let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = None; - defaults.hostname = Some(String::from("foo.com")); - defaults.is_plain = true; - defaults.is_hostname_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("||foo.com$important", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = None; - defaults.hostname = Some(String::from("foo.com")); - defaults.is_plain = true; - defaults.is_hostname_anchor = true; - defaults.is_important = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("||foo.com/bar/baz$important", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("/bar/baz")); - defaults.is_plain = true; - defaults.is_hostname_anchor = true; - defaults.is_important = true; - defaults.is_left_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // ||pattern| - fn parses_hostname_right_anchor_pattern() { - { - let filter = NetworkFilter::parse("||foo.com|", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = None; - defaults.is_plain = true; - defaults.is_right_anchor = true; - defaults.is_hostname_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("||foo.com|$important", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = None; - defaults.is_plain = true; - defaults.is_important = true; - defaults.is_right_anchor = true; - defaults.is_hostname_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("||foo.com/bar/baz|$important", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("/bar/baz")); - defaults.is_plain = true; - defaults.is_important = true; - defaults.is_left_anchor = true; - defaults.is_right_anchor = true; - defaults.is_hostname_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("||foo.com^bar/*baz|$important", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("^bar/*baz")); - defaults.is_important = true; - defaults.is_left_anchor = true; - defaults.is_right_anchor = true; - defaults.is_hostname_anchor = true; - defaults.is_regex = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // |pattern - fn parses_left_anchor_pattern() { - { - let filter = NetworkFilter::parse("|foo.com", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("foo.com")); - defaults.is_plain = true; - defaults.is_left_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("|foo.com/bar/baz", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("foo.com/bar/baz")); - defaults.is_plain = true; - defaults.is_left_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("|foo.com^bar/*baz", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("foo.com^bar/*baz")); - defaults.is_regex = true; - defaults.is_left_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // |pattern| - fn parses_left_right_anchor_pattern() { - { - let filter = NetworkFilter::parse("|foo.com|", true, Default::default()).unwrap(); - - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("foo.com")); - defaults.is_plain = true; - defaults.is_right_anchor = true; - defaults.is_left_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("|foo.com/bar|", true, Default::default()).unwrap(); - - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("foo.com/bar")); - defaults.is_plain = true; - defaults.is_right_anchor = true; - defaults.is_left_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("|foo.com*bar^|", true, Default::default()).unwrap(); - - let mut defaults = default_network_filter_breakdown(); - defaults.filter = Some(String::from("foo.com*bar^")); - defaults.is_regex = true; - defaults.is_right_anchor = true; - defaults.is_left_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // ||regexp - fn parses_hostname_anchor_regex_pattern() { - { - let filter = NetworkFilter::parse("||foo.com*bar^", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("bar^")); - defaults.is_hostname_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("||foo.com^bar*/baz^", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("^bar*/baz^")); - defaults.is_hostname_anchor = true; - defaults.is_left_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // ||regexp| - fn parses_hostname_right_anchor_regex_pattern() { - { - let filter = NetworkFilter::parse("||foo.com*bar^|", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("bar^")); - defaults.is_hostname_anchor = true; - defaults.is_right_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("||foo.com^bar*/baz^|", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("^bar*/baz^")); - defaults.is_hostname_anchor = true; - defaults.is_left_anchor = true; - defaults.is_right_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // |regexp - fn parses_hostname_left_anchor_regex_pattern() { - { - let filter = NetworkFilter::parse("|foo.com*bar^", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = None; - defaults.filter = Some(String::from("foo.com*bar^")); - defaults.is_left_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("|foo.com^bar*/baz^", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = None; - defaults.filter = Some(String::from("foo.com^bar*/baz^")); - defaults.is_left_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // |regexp| - fn parses_hostname_left_right_anchor_regex_pattern() { - { - let filter = NetworkFilter::parse("|foo.com*bar^|", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = None; - defaults.filter = Some(String::from("foo.com*bar^")); - defaults.is_left_anchor = true; - defaults.is_right_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse("|foo.com^bar*/baz^|", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = None; - defaults.filter = Some(String::from("foo.com^bar*/baz^")); - defaults.is_left_anchor = true; - defaults.is_right_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - // @@pattern - fn parses_exception_pattern() { - { - let filter = NetworkFilter::parse("@@ads", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.is_exception = true; - defaults.filter = Some(String::from("ads")); - defaults.is_plain = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = NetworkFilter::parse("@@||foo.com/ads", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.is_exception = true; - defaults.filter = Some(String::from("/ads")); - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_left_anchor = true; - defaults.is_plain = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = NetworkFilter::parse("@@|foo.com/ads", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.is_exception = true; - defaults.filter = Some(String::from("foo.com/ads")); - defaults.is_left_anchor = true; - defaults.is_plain = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = NetworkFilter::parse("@@|foo.com/ads|", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.is_exception = true; - defaults.filter = Some(String::from("foo.com/ads")); - defaults.is_left_anchor = true; - defaults.is_plain = true; - defaults.is_right_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = NetworkFilter::parse("@@foo.com/ads|", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.is_exception = true; - defaults.filter = Some(String::from("foo.com/ads")); - defaults.is_plain = true; - defaults.is_right_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = NetworkFilter::parse("@@||foo.com/ads|", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.is_exception = true; - defaults.filter = Some(String::from("/ads")); - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_left_anchor = true; - defaults.is_plain = true; - defaults.is_right_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - } - - // Options - - #[test] - fn accepts_any_content_type() { - { - let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.from_network_types = true; - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = NetworkFilter::parse("||foo.com$first-party", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.from_network_types = true; - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.first_party = true; - defaults.third_party = false; - - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = NetworkFilter::parse("||foo.com$third-party", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.from_network_types = true; - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.first_party = false; - defaults.third_party = true; - - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = NetworkFilter::parse("||foo.com$domain=test.com", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.from_network_types = true; - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.opt_domains = Some(vec![utils::fast_hash("test.com")]); - - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - { - let filter = - NetworkFilter::parse("||foo.com$domain=test.com", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.from_network_types = true; - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.opt_domains = Some(vec![utils::fast_hash("test.com")]); - - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - } - - #[test] - fn parses_important() { - { - let filter = NetworkFilter::parse("||foo.com$important", true, Default::default()).unwrap(); - assert_eq!(filter.is_important(), true); - } - { - // parses ~important - let filter = NetworkFilter::parse("||foo.com$~important", true, Default::default()); - assert_eq!(filter.err(), Some(NetworkFilterError::NegatedImportant)); - } - { - // defaults to false - let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); - assert_eq!(filter.is_important(), false); - } - } - - #[test] - fn parses_csp() { - { - let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); - assert_eq!(filter.modifier_option, None); - } - { - // parses simple CSP - let filter = NetworkFilter::parse(r#"||foo.com$csp=self bar """#, true, Default::default()).unwrap(); - assert_eq!(filter.is_csp(), true); - assert_eq!(filter.modifier_option, Some(String::from(r#"self bar """#))); - } - { - // parses empty CSP - let filter = NetworkFilter::parse("||foo.com$csp", true, Default::default()).unwrap(); - assert_eq!(filter.is_csp(), true); - assert_eq!(filter.modifier_option, None); - } - { - // CSP mixed with content type is an error - let filter = - NetworkFilter::parse(r#"||foo.com$domain=foo|bar,csp=self bar "",image"#, true, Default::default()); - assert_eq!(filter.err(), Some(NetworkFilterError::CspWithContentType)); - } - } - - #[test] - fn parses_domain() { - // parses domain - { - let filter = NetworkFilter::parse("||foo.com$domain=bar.com", true, Default::default()).unwrap(); - assert_eq!(filter.opt_domains, Some(vec![utils::fast_hash("bar.com")])); - assert_eq!(filter.opt_not_domains, None); - } - { - let filter = NetworkFilter::parse("||foo.com$domain=bar.com|baz.com", true, Default::default()).unwrap(); - let mut domains = vec![utils::fast_hash("bar.com"), utils::fast_hash("baz.com")]; - domains.sort_unstable(); - assert_eq!(filter.opt_domains, Some(domains)); - assert_eq!(filter.opt_not_domains, None); - } - - // parses ~domain - { - let filter = NetworkFilter::parse("||foo.com$domain=~bar.com", true, Default::default()).unwrap(); - assert_eq!(filter.opt_domains, None); - assert_eq!( - filter.opt_not_domains, - Some(vec![utils::fast_hash("bar.com")]) - ); - } - { - let filter = NetworkFilter::parse("||foo.com$domain=~bar.com|~baz.com", true, Default::default()).unwrap(); - assert_eq!(filter.opt_domains, None); - let mut domains = vec![utils::fast_hash("bar.com"), utils::fast_hash("baz.com")]; - domains.sort_unstable(); - assert_eq!(filter.opt_not_domains, Some(domains)); - } - // parses domain and ~domain - { - let filter = NetworkFilter::parse("||foo.com$domain=~bar.com|baz.com", true, Default::default()).unwrap(); - assert_eq!(filter.opt_domains, Some(vec![utils::fast_hash("baz.com")])); - assert_eq!( - filter.opt_not_domains, - Some(vec![utils::fast_hash("bar.com")]) - ); - } - { - let filter = NetworkFilter::parse("||foo.com$domain=bar.com|~baz.com", true, Default::default()).unwrap(); - assert_eq!(filter.opt_domains, Some(vec![utils::fast_hash("bar.com")])); - assert_eq!( - filter.opt_not_domains, - Some(vec![utils::fast_hash("baz.com")]) - ); - } - { - let filter = NetworkFilter::parse("||foo.com$domain=foo|~bar|baz", true, Default::default()).unwrap(); - let mut domains = vec![utils::fast_hash("foo"), utils::fast_hash("baz")]; - domains.sort(); - assert_eq!(filter.opt_domains, Some(domains)); - assert_eq!(filter.opt_not_domains, Some(vec![utils::fast_hash("bar")])); - } - // defaults to no constraint - { - let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); - assert_eq!(filter.opt_domains, None); - assert_eq!(filter.opt_not_domains, None); - } - // `from` is an alias for `domain` - { - let filter = NetworkFilter::parse("||foo.com$from=bar.com", true, Default::default()).unwrap(); - assert_eq!(filter.opt_domains, Some(vec![utils::fast_hash("bar.com")])); - assert_eq!(filter.opt_not_domains, None); - } - { - let filter = NetworkFilter::parse(r"||video.twimg.com/ext_tw_video/*/*.m3u8$domain=/^i[a-z]*\.strmrdr[a-z]+\..*/", true, Default::default()); - assert_eq!(filter.err(), Some(NetworkFilterError::NoSupportedDomains)); - } - } - - #[test] - fn parses_redirects() { - // parses redirect - { - let filter = NetworkFilter::parse("||foo.com$redirect=bar.js", true, Default::default()).unwrap(); - assert_eq!(filter.modifier_option, Some(String::from("bar.js"))); - } - { - let filter = NetworkFilter::parse("$redirect=bar.js", true, Default::default()).unwrap(); - assert_eq!(filter.modifier_option, Some(String::from("bar.js"))); - } - // parses ~redirect - { - // ~redirect is not a valid option - let filter = NetworkFilter::parse("||foo.com$~redirect", true, Default::default()); - assert_eq!(filter.err(), Some(NetworkFilterError::NegatedRedirection)); - } - // parses redirect without a value - { - // Not valid - let filter = NetworkFilter::parse("||foo.com$redirect", true, Default::default()); - assert_eq!(filter.err(), Some(NetworkFilterError::EmptyRedirection)); - } - { - let filter = NetworkFilter::parse("||foo.com$redirect=", true, Default::default()); - assert_eq!(filter.err(), Some(NetworkFilterError::EmptyRedirection)) - } - // defaults to false - { - let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); - assert_eq!(filter.modifier_option, None); - } - } - - #[test] - fn parses_removeparam() { - { - let filter = NetworkFilter::parse("||foo.com^$removeparam", true, Default::default()); - assert!(filter.is_err()); - } - { - let filter = NetworkFilter::parse("$~removeparam=test", true, Default::default()); - assert!(filter.is_err()); - } - { - let filter = NetworkFilter::parse("@@||foo.com^$removeparam=test", true, Default::default()); - assert!(filter.is_err()); - } - { - let filter = NetworkFilter::parse("||foo.com^$removeparam=", true, Default::default()); - assert!(filter.is_err()); - } - { - let filter = NetworkFilter::parse("||foo.com^$removeparam=test,redirect=test", true, Default::default()); - assert!(filter.is_err()); - } - { - let filter = NetworkFilter::parse("||foo.com^$removeparam=test,removeparam=test2", true, Default::default()); - assert!(filter.is_err()); - } - { - let filter = NetworkFilter::parse("||foo.com^$removeparam=𝐔𝐍𝐈𝐂𝐎𝐃𝐄🧋", true, Default::default()); - assert!(filter.is_err()); - } - { - let filter = NetworkFilter::parse("||foo.com^$removeparam=/abc.*/", true, Default::default()); - assert_eq!(filter, Err(NetworkFilterError::RemoveparamRegexUnsupported)); - } - { - let filter = NetworkFilter::parse("||foo.com^$removeparam=test", true, Default::default()).unwrap(); - assert!(filter.is_removeparam()); - assert_eq!(filter.modifier_option, Some("test".into())); - } - } - - #[test] - fn parses_match_case() { - // match-case on non-regex rules is invalid - { - assert!(NetworkFilter::parse("||foo.com$match-case", true, Default::default()).is_err()); - } - { - assert!(NetworkFilter::parse("||foo.com$image,match-case", true, Default::default()).is_err()); - } - { - assert!(NetworkFilter::parse("||foo.com$media,match-case,image", true, Default::default()).is_err()); - } - // match-case on regex rules is ok - { - let filter = NetworkFilter::parse(r#"/foo[0-9]*\.com/$media,match-case,image"#, true, Default::default()).unwrap(); - assert_eq!(filter.match_case(), true); - } - { - let filter = NetworkFilter::parse(r#"/^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?\$/$css,3p,match-case"#, true, Default::default()).unwrap(); - assert_eq!(filter.match_case(), true); - } - - // parses ~match-case - { - // ~match-case is not supported - let filter = NetworkFilter::parse("||foo.com$~match-case", true, Default::default()); - assert_eq!(filter.err(), Some(NetworkFilterError::NegatedOptionMatchCase)); - } - - // defaults to false - { - let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); - assert_eq!(filter.match_case(), false) - } - } - - #[test] - fn parses_first_party() { - // parses first-party - assert_eq!( - NetworkFilter::parse("||foo.com$first-party", true, Default::default()) - .unwrap() - .first_party(), - true - ); - assert_eq!( - NetworkFilter::parse("@@||foo.com$first-party", true, Default::default()) - .unwrap() - .first_party(), - true - ); - assert_eq!( - NetworkFilter::parse("@@||foo.com|$first-party", true, Default::default()) - .unwrap() - .first_party(), - true - ); - // parses ~first-party - assert_eq!( - NetworkFilter::parse("||foo.com$~first-party", true, Default::default()) - .unwrap() - .first_party(), - false - ); - assert_eq!( - NetworkFilter::parse("||foo.com$first-party,~first-party", true, Default::default()) - .unwrap() - .first_party(), - false - ); - // defaults to true - assert_eq!( - NetworkFilter::parse("||foo.com", true, Default::default()) - .unwrap() - .first_party(), - true - ); - } - - #[test] - fn parses_third_party() { - // parses third-party - assert_eq!( - NetworkFilter::parse("||foo.com$third-party", true, Default::default()) - .unwrap() - .third_party(), - true - ); - assert_eq!( - NetworkFilter::parse("@@||foo.com$third-party", true, Default::default()) - .unwrap() - .third_party(), - true - ); - assert_eq!( - NetworkFilter::parse("@@||foo.com|$third-party", true, Default::default()) - .unwrap() - .third_party(), - true - ); - assert_eq!( - NetworkFilter::parse("||foo.com$~first-party", true, Default::default()) - .unwrap() - .third_party(), - true - ); - // parses ~third-party - assert_eq!( - NetworkFilter::parse("||foo.com$~third-party", true, Default::default()) - .unwrap() - .third_party(), - false - ); - assert_eq!( - NetworkFilter::parse("||foo.com$first-party,~third-party", true, Default::default()) - .unwrap() - .third_party(), - false - ); - // defaults to true - assert_eq!( - NetworkFilter::parse("||foo.com", true, Default::default()) - .unwrap() - .third_party(), - true - ); - } - - #[test] - fn parses_generic_hide() { - { - let filter = NetworkFilter::parse("||foo.com$generichide", true, Default::default()); - assert!(filter.is_err()); - } - { - let filter = NetworkFilter::parse("@@||foo.com$generichide", true, Default::default()).unwrap(); - assert_eq!(filter.is_exception(), true); - assert_eq!(filter.is_generic_hide(), true); - } - { - let filter = NetworkFilter::parse("@@||foo.com|$generichide", true, Default::default()).unwrap(); - assert_eq!(filter.is_exception(), true); - assert_eq!(filter.is_generic_hide(), true); - } - { - let filter = NetworkFilter::parse("@@$generichide,domain=example.com", true, Default::default()).unwrap(); - assert_eq!(filter.is_generic_hide(), true); - let breakdown = NetworkFilterBreakdown::from(&filter); - assert_eq!(breakdown.opt_domains, Some(vec![utils::fast_hash("example.com")])); - } - { - let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); - assert_eq!(filter.is_generic_hide(), false); - } - } - - #[test] - fn parses_hosts_style() { - { - let filter = NetworkFilter::parse_hosts_style("example.com", true).unwrap(); - assert!(filter.raw_line.is_some()); - assert_eq!(*filter.raw_line.clone().unwrap(), "||example.com^"); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some("example.com".to_string()); - defaults.is_plain = true; - defaults.is_hostname_anchor = true; - defaults.is_right_anchor = true; - defaults.from_document = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse_hosts_style("www.example.com", true).unwrap(); - assert!(filter.raw_line.is_some()); - assert_eq!(*filter.raw_line.clone().unwrap(), "||example.com^"); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some("example.com".to_string()); - defaults.is_plain = true; - defaults.is_hostname_anchor = true; - defaults.is_right_anchor = true; - defaults.from_document = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - { - let filter = NetworkFilter::parse_hosts_style("malware.example.com", true).unwrap(); - assert!(filter.raw_line.is_some()); - assert_eq!(*filter.raw_line.clone().unwrap(), "||malware.example.com^"); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some("malware.example.com".to_string()); - defaults.is_plain = true; - defaults.is_hostname_anchor = true; - defaults.is_right_anchor = true; - defaults.from_document = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) - } - } - - #[test] - fn handles_unsupported_options() { - let options = vec![ - "genericblock", - "inline-script", - "popunder", - "popup", - "woot", - ]; - - for option in options { - let filter = NetworkFilter::parse(&format!("||foo.com${}", option), true, Default::default()); - assert!(filter.err().is_some()); - } - } - - #[test] - fn handles_content_type_options() { - let options = vec![ - "font", - "image", - "media", - "object", - "object-subrequest", - "other", - "ping", - "script", - "stylesheet", - "subdocument", - "websocket", - "xmlhttprequest", - "xhr", - ]; - - fn set_all_options(breakdown: &mut NetworkFilterBreakdown, value: bool) { - breakdown.from_font = value; - breakdown.from_image = value; - breakdown.from_media = value; - breakdown.from_object = value; - breakdown.from_other = value; - breakdown.from_ping = value; - breakdown.from_script = value; - breakdown.from_stylesheet = value; - breakdown.from_subdocument = value; - breakdown.from_websocket = value; - breakdown.from_xml_http_request = value; - } - - fn set_option(option: &str, breakdown: &mut NetworkFilterBreakdown, value: bool) { - match option { - "font" => breakdown.from_font = value, - "image" => breakdown.from_image = value, - "media" => breakdown.from_media = value, - "object" => breakdown.from_object = value, - "object-subrequest" => breakdown.from_object = value, - "other" => breakdown.from_other = value, - "ping" => breakdown.from_ping = value, - "script" => breakdown.from_script = value, - "stylesheet" => breakdown.from_stylesheet = value, - "subdocument" => breakdown.from_subdocument = value, - "websocket" => breakdown.from_websocket = value, - "xmlhttprequest" => breakdown.from_xml_http_request = value, - "xhr" => breakdown.from_xml_http_request = value, - _ => unreachable!(), - } - } - - for option in options { - // positive - { - let filter = NetworkFilter::parse(&format!("||foo.com${}", option), true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.from_network_types = false; - set_all_options(&mut defaults, false); - set_option(&option, &mut defaults, true); - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - - { - let filter = - NetworkFilter::parse(&format!("||foo.com$object,{}", option), true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.from_network_types = false; - set_all_options(&mut defaults, false); - set_option(&option, &mut defaults, true); - set_option("object", &mut defaults, true); - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - - { - let filter = - NetworkFilter::parse(&format!("||foo.com$domain=bar.com,{}", option), true, Default::default()) - .unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.from_network_types = false; - defaults.opt_domains = Some(vec![utils::fast_hash("bar.com")]); - set_all_options(&mut defaults, false); - set_option(&option, &mut defaults, true); - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - - // negative - { - let filter = NetworkFilter::parse(&format!("||foo.com$~{}", option), true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.from_network_types = false; - set_all_options(&mut defaults, true); - set_option(&option, &mut defaults, false); - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - - { - let filter = - NetworkFilter::parse(&format!("||foo.com${},~{}", option, option), true, Default::default()) - .unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.from_network_types = false; - set_all_options(&mut defaults, true); - set_option(&option, &mut defaults, false); - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - // default - positive - { - let filter = NetworkFilter::parse(&format!("||foo.com"), true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.is_hostname_anchor = true; - defaults.is_plain = true; - defaults.from_network_types = true; - set_all_options(&mut defaults, true); - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - } - } - - #[test] - fn binary_serialization_works() { - use rmp_serde::{Deserializer, Serializer}; - { - let filter = NetworkFilter::parse("||foo.com/bar/baz$important", true, Default::default()).unwrap(); - - let mut encoded = Vec::new(); - filter.serialize(&mut Serializer::new(&mut encoded)).unwrap(); - let mut de = Deserializer::new(&encoded[..]); - let decoded: NetworkFilter = Deserialize::deserialize(&mut de).unwrap(); - - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("/bar/baz")); - defaults.is_plain = true; - defaults.is_hostname_anchor = true; - defaults.is_important = true; - defaults.is_left_anchor = true; - assert_eq!(defaults, NetworkFilterBreakdown::from(&decoded)) - } - { - let filter = NetworkFilter::parse("||foo.com*bar^", true, Default::default()).unwrap(); - let mut defaults = default_network_filter_breakdown(); - defaults.hostname = Some(String::from("foo.com")); - defaults.filter = Some(String::from("bar^")); - defaults.is_hostname_anchor = true; - defaults.is_regex = true; - defaults.is_plain = false; - - let mut encoded = Vec::new(); - filter.serialize(&mut Serializer::new(&mut encoded)).unwrap(); - let mut de = Deserializer::new(&encoded[..]); - let decoded: NetworkFilter = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(defaults, NetworkFilterBreakdown::from(&decoded)); - assert_eq!(RegexManager::default().matches(&decoded, "bar/"), true); - } - } - - #[test] - fn parse_empty_host_anchor_exception() { - let filter_parsed = NetworkFilter::parse("@@||$domain=auth.wi-fi.ru", true, Default::default()); - assert!(filter_parsed.is_ok()); - - let filter = filter_parsed.unwrap(); - - let mut defaults = default_network_filter_breakdown(); - - defaults.hostname = Some(String::from("")); - defaults.is_hostname_anchor = true; - defaults.is_exception = true; - defaults.is_plain = true; - defaults.from_network_types = true; - defaults.opt_domains = Some(vec![utils::fast_hash("auth.wi-fi.ru")]); - assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); - } - -} - -#[cfg(test)] -mod match_tests { - use super::*; - - #[test] - fn is_anchored_by_hostname_works() { - // matches empty hostname - assert_eq!(is_anchored_by_hostname("", "foo.com", false), true); - - // does not match when filter hostname is longer than hostname - assert_eq!(is_anchored_by_hostname("bar.foo.com", "foo.com", false), false); - assert_eq!(is_anchored_by_hostname("b", "", false), false); - assert_eq!(is_anchored_by_hostname("foo.com", "foo.co", false), false); - - // does not match if there is not match - assert_eq!(is_anchored_by_hostname("bar", "foo.com", false), false); - - // ## prefix match - // matches exact match - assert_eq!(is_anchored_by_hostname("", "", false), true); - assert_eq!(is_anchored_by_hostname("f", "f", false), true); - assert_eq!(is_anchored_by_hostname("foo", "foo", false), true); - assert_eq!(is_anchored_by_hostname("foo.com", "foo.com", false), true); - assert_eq!(is_anchored_by_hostname(".com", ".com", false), true); - assert_eq!(is_anchored_by_hostname("com.", "com.", false), true); - - // matches partial - // Single label - assert_eq!(is_anchored_by_hostname("foo", "foo.com", false), true); - assert_eq!(is_anchored_by_hostname("foo.", "foo.com", false), true); - assert_eq!(is_anchored_by_hostname(".foo", ".foo.com", false), true); - assert_eq!(is_anchored_by_hostname(".foo.", ".foo.com", false), true); - - // Multiple labels - assert_eq!(is_anchored_by_hostname("foo.com", "foo.com.", false), true); - assert_eq!(is_anchored_by_hostname("foo.com.", "foo.com.", false), true); - assert_eq!(is_anchored_by_hostname(".foo.com.", ".foo.com.", false), true); - assert_eq!(is_anchored_by_hostname(".foo.com", ".foo.com", false), true); - - assert_eq!(is_anchored_by_hostname("foo.bar", "foo.bar.com", false), true); - assert_eq!(is_anchored_by_hostname("foo.bar.", "foo.bar.com", false), true); - - // does not match partial prefix - // Single label - assert_eq!(is_anchored_by_hostname("foo", "foobar.com", false), false); - assert_eq!(is_anchored_by_hostname("fo", "foo.com", false), false); - assert_eq!(is_anchored_by_hostname(".foo", "foobar.com", false), false); - - // Multiple labels - assert_eq!(is_anchored_by_hostname("foo.bar", "foo.barbaz.com", false), false); - assert_eq!( - is_anchored_by_hostname(".foo.bar", ".foo.barbaz.com", false), - false - ); - - // ## suffix match - // matches partial - // Single label - assert_eq!(is_anchored_by_hostname("com", "foo.com", false), true); - assert_eq!(is_anchored_by_hostname(".com", "foo.com", false), true); - assert_eq!(is_anchored_by_hostname(".com.", "foo.com.", false), true); - assert_eq!(is_anchored_by_hostname("com.", "foo.com.", false), true); - - // Multiple labels - assert_eq!(is_anchored_by_hostname("foo.com.", ".foo.com.", false), true); - assert_eq!(is_anchored_by_hostname("foo.com", ".foo.com", false), true); - - // does not match partial - // Single label - assert_eq!(is_anchored_by_hostname("om", "foo.com", false), false); - assert_eq!(is_anchored_by_hostname("com", "foocom", false), false); - - // Multiple labels - assert_eq!(is_anchored_by_hostname("foo.bar.com", "baz.bar.com", false), false); - assert_eq!(is_anchored_by_hostname("fo.bar.com", "foo.bar.com", false), false); - assert_eq!(is_anchored_by_hostname(".fo.bar.com", "foo.bar.com", false), false); - assert_eq!(is_anchored_by_hostname("bar.com", "foobar.com", false), false); - assert_eq!(is_anchored_by_hostname(".bar.com", "foobar.com", false), false); - - // ## infix match - // matches partial - assert_eq!(is_anchored_by_hostname("bar", "foo.bar.com", false), true); - assert_eq!(is_anchored_by_hostname("bar.", "foo.bar.com", false), true); - assert_eq!(is_anchored_by_hostname(".bar.", "foo.bar.com", false), true); - } - - fn filter_match_url(filter: &str, url: &str, matching: bool) { - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let request = request::Request::new(url, "https://example.com", "other").unwrap(); - - assert!( - network_filter.matches_test(&request) == matching, - "Expected match={} for {} {:?} on {}", - matching, - filter, - network_filter, - url - ); - } - - fn hosts_filter_match_url(filter: &str, url: &str, matching: bool) { - let network_filter = NetworkFilter::parse_hosts_style(filter, true).unwrap(); - let request = request::Request::new(url, "https://example.com", "other").unwrap(); - - assert!( - network_filter.matches_test(&request) == matching, - "Expected match={} for {} {:?} on {}", - matching, - filter, - network_filter, - url - ); - } - - #[test] - // pattern - fn check_pattern_plain_filter_filter_works() { - filter_match_url("foo", "https://bar.com/foo", true); - filter_match_url("foo", "https://bar.com/baz/foo", true); - filter_match_url("foo", "https://bar.com/q=foo/baz", true); - filter_match_url("foo", "https://foo.com", true); - filter_match_url("-foo-", "https://bar.com/baz/42-foo-q", true); - filter_match_url("&fo.o=+_-", "https://bar.com?baz=42&fo.o=+_-", true); - filter_match_url("foo/bar/baz", "https://bar.com/foo/bar/baz", true); - filter_match_url("com/bar/baz", "https://bar.com/bar/baz", true); - filter_match_url("https://bar.com/bar/baz", "https://bar.com/bar/baz", true); - } - - #[test] - // ||pattern - fn check_pattern_hostname_anchor_filter_works() { - filter_match_url("||foo.com", "https://foo.com/bar", true); - filter_match_url("||foo.com/bar", "https://foo.com/bar", true); - filter_match_url("||foo", "https://foo.com/bar", true); - filter_match_url("||foo", "https://baz.foo.com/bar", true); - filter_match_url("||foo", "https://foo.baz.com/bar", true); - filter_match_url("||foo.baz", "https://foo.baz.com/bar", true); - filter_match_url("||foo.baz.", "https://foo.baz.com/bar", true); - - filter_match_url("||foo.baz.com^", "https://foo.baz.com/bar", true); - filter_match_url("||foo.baz^", "https://foo.baz.com/bar", false); - - filter_match_url("||foo", "https://baz.com", false); - filter_match_url("||foo", "https://foo-bar.baz.com/bar", false); - filter_match_url("||foo.com", "https://foo.de", false); - filter_match_url("||foo.com", "https://bar.foo.de", false); - filter_match_url("||s.foo.com", "https://substring.s.foo.com", true); - filter_match_url("||s.foo.com", "https://substrings.foo.com", false); - } - - #[test] - fn check_hosts_style_works() { - hosts_filter_match_url("foo.com", "https://foo.com/bar", true); - hosts_filter_match_url("foo.foo.com", "https://foo.com/bar", false); - hosts_filter_match_url("www.foo.com", "https://foo.com/bar", true); - hosts_filter_match_url("com.foo", "https://foo.baz.com/bar", false); - hosts_filter_match_url("foo.baz", "https://foo.baz.com/bar", false); - - hosts_filter_match_url("foo.baz.com", "https://foo.baz.com/bar", true); - hosts_filter_match_url("foo.baz", "https://foo.baz.com/bar", false); - - hosts_filter_match_url("foo.com", "https://baz.com", false); - hosts_filter_match_url("bar.baz.com", "https://foo-bar.baz.com/bar", false); - hosts_filter_match_url("foo.com", "https://foo.de", false); - hosts_filter_match_url("foo.com", "https://bar.foo.de", false); - } - - #[test] - // ||pattern| - fn check_pattern_hostname_right_anchor_filter_works() { - filter_match_url("||foo.com|", "https://foo.com", true); - filter_match_url("||foo.com/bar|", "https://foo.com/bar", true); - - filter_match_url("||foo.com/bar|", "https://foo.com/bar/baz", false); - filter_match_url("||foo.com/bar|", "https://foo.com/", false); - filter_match_url("||bar.com/bar|", "https://foo.com/", false); - } - - #[test] - // pattern| - fn check_pattern_right_anchor_filter_works() { - filter_match_url("foo.com", "https://foo.com", true); - filter_match_url("foo|", "https://bar.com/foo", true); - filter_match_url("foo|", "https://bar.com/foo/", false); - filter_match_url("foo|", "https://bar.com/foo/baz", false); - } - - #[test] - // |pattern - fn check_pattern_left_anchor_filter_works() { - filter_match_url("|http", "http://foo.com", true); - filter_match_url("|http", "https://foo.com", true); - filter_match_url("|https://", "https://foo.com", true); - - filter_match_url("https", "http://foo.com", false); - } - - #[test] - // |pattern| - fn check_pattern_left_right_anchor_filter_works() { - filter_match_url("|https://foo.com|", "https://foo.com", true); - } - - #[test] - // ||pattern + left-anchor - fn check_pattern_hostname_left_anchor_filter_works() { - filter_match_url("||foo.com^test", "https://foo.com/test", true); - filter_match_url("||foo.com/test", "https://foo.com/test", true); - filter_match_url("||foo.com^test", "https://foo.com/tes", false); - filter_match_url("||foo.com/test", "https://foo.com/tes", false); - - filter_match_url("||foo.com^", "https://foo.com/test", true); - - filter_match_url("||foo.com/test*bar", "https://foo.com/testbar", true); - filter_match_url("||foo.com^test*bar", "https://foo.com/testbar", true); - } - - #[test] - // ||hostname^*/pattern - fn check_pattern_hostname_anchor_regex_filter_works() { - filter_match_url("||foo.com^*/bar", "https://foo.com/bar", false); - filter_match_url("||com^*/bar", "https://foo.com/bar", false); - filter_match_url("||foo^*/bar", "https://foo.com/bar", false); - - // @see https://github.com/cliqz-oss/adblocker/issues/29 - filter_match_url("||foo.co^aaa/", "https://bar.foo.com/bbb/aaa/", false); - filter_match_url("||foo.com^aaa/", "https://bar.foo.com/bbb/aaa/", false); - - filter_match_url("||com*^bar", "https://foo.com/bar", true); - filter_match_url("||foo.com^bar", "https://foo.com/bar", true); - filter_match_url("||com^bar", "https://foo.com/bar", true); - filter_match_url("||foo*^bar", "https://foo.com/bar", true); - filter_match_url("||foo*/bar", "https://foo.com/bar", true); - filter_match_url("||foo*com/bar", "https://foo.com/bar", true); - filter_match_url("||foo2*com/bar", "https://foo2.com/bar", true); - filter_match_url("||foo*com*/bar", "https://foo.com/bar", true); - filter_match_url("||foo*com*^bar", "https://foo.com/bar", true); - filter_match_url("||*foo*com*^bar", "https://foo.com/bar", true); - filter_match_url("||*/bar", "https://foo.com/bar", true); - filter_match_url("||*^bar", "https://foo.com/bar", true); - filter_match_url("||*com/bar", "https://foo.com/bar", true); - filter_match_url("||*.com/bar", "https://foo.com/bar", true); - filter_match_url("||*foo.com/bar", "https://foo.com/bar", true); - filter_match_url("||*com/bar", "https://foo.com/bar", true); - filter_match_url("||*com*/bar", "https://foo.com/bar", true); - filter_match_url("||*com*^bar", "https://foo.com/bar", true); - } - - #[test] - fn check_pattern_hostname_anchor_regex_filter_works_realisitic() { - filter_match_url("||vimeo.com^*?type=", "https://vimeo.com/ablincoln/fatal_attraction?type=pageview&target=%2F193641463", true); - } - - #[test] - fn check_pattern_hostname_left_right_anchor_regex_filter_works() { - filter_match_url("||geo*.hltv.org^", "https://geo2.hltv.org/rekl13.php", true); - filter_match_url( - "||www*.swatchseries.to^", - "https://www1.swatchseries.to/sw.js", - true, - ); - filter_match_url("||imp*.tradedoubler.com^", "https://impde.tradedoubler.com/imp?type(js)g(22608602)a(1725113)epi(30148500144427100033372010772028)preurl(https://pixel.mathtag.com/event/js?mt_id=1160537&mt_adid=166882&mt_exem=&mt_excl=&v1=&v2=&v3=&s1=&s2=&s3=&mt_nsync=1&redirect=https%3A%2F%2Fad28.ad-srv.net%2Fc%2Fczqwm6dm6kagr2j%3Ftprde%3D)768489806", true); - } - - #[test] - fn check_pattern_exception_works() { - { - let filter = "@@||fastly.net/ad2/$image,script,xmlhttprequest"; - let url = "https://0914.global.ssl.fastly.net/ad2/script/x.js?cb=1549980040838"; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let request = request::Request::new( - url, - "https://www.gamespot.com/metro-exodus/", - "script", - ) - .unwrap(); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - { - let filter = "@@||swatchseries.to/public/js/edit-show.js$script,domain=swatchseries.to"; - let url = "https://www1.swatchseries.to/public/js/edit-show.js"; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let request = request::Request::new( - url, - "https://www1.swatchseries.to/serie/roswell_new_mexico", - "script", - ) - .unwrap(); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - } - - #[test] - fn check_pattern_match_case() { - filter_match_url(r#"/BannerAd[0-9]/$match-case"#, "https://example.com/BannerAd0.gif", true); - filter_match_url(r#"/BannerAd[0-9]/$match-case"#, "https://example.com/bannerad0.gif", false); - } - - #[test] - fn check_ws_vs_http_matching() { - let network_filter = NetworkFilter::parse("|ws://$domain=4shared.com", true, Default::default()).unwrap(); - - assert!(network_filter.matches_test(&request::Request::new("ws://example.com", "https://4shared.com", "websocket").unwrap())); - assert!(network_filter.matches_test(&request::Request::new("wss://example.com", "https://4shared.com", "websocket").unwrap())); - assert!(!network_filter.matches_test(&request::Request::new("http://example.com", "https://4shared.com", "script").unwrap())); - assert!(!network_filter.matches_test(&request::Request::new("https://example.com", "https://4shared.com", "script").unwrap())); - - // The `ws://` and `wss://` protocols should be used, rather than the resource type. - assert!(network_filter.matches_test(&request::Request::new("ws://example.com", "https://4shared.com", "script").unwrap())); - assert!(network_filter.matches_test(&request::Request::new("wss://example.com", "https://4shared.com", "script").unwrap())); - assert!(!network_filter.matches_test(&request::Request::new("http://example.com", "https://4shared.com", "websocket").unwrap())); - assert!(!network_filter.matches_test(&request::Request::new("https://example.com", "https://4shared.com", "websocket").unwrap())); - } - - #[test] - // options - fn check_options_works() { - // cpt test - { - let network_filter = NetworkFilter::parse("||foo$image", true, Default::default()).unwrap(); - let request = request::Request::new("https://foo.com/bar", "", "image").unwrap(); - assert_eq!(check_options(&network_filter, &request), true); - } - { - let network_filter = NetworkFilter::parse("||foo$image", true, Default::default()).unwrap(); - let request = request::Request::new("https://foo.com/bar", "", "script").unwrap(); - assert_eq!(check_options(&network_filter, &request), false); - } - { - let network_filter = NetworkFilter::parse("||foo$~image", true, Default::default()).unwrap(); - let request = request::Request::new("https://foo.com/bar", "", "script").unwrap(); - assert_eq!(check_options(&network_filter, &request), true); - } - - // ~third-party - { - let network_filter = NetworkFilter::parse("||foo$~third-party", true, Default::default()).unwrap(); - let request = - request::Request::new("https://foo.com/bar", "http://baz.foo.com", "") - .unwrap(); - assert_eq!(check_options(&network_filter, &request), true); - } - { - let network_filter = NetworkFilter::parse("||foo$~third-party", true, Default::default()).unwrap(); - let request = - request::Request::new("https://foo.com/bar", "http://baz.bar.com", "") - .unwrap(); - assert_eq!(check_options(&network_filter, &request), false); - } - - // ~first-party - { - let network_filter = NetworkFilter::parse("||foo$~first-party", true, Default::default()).unwrap(); - let request = - request::Request::new("https://foo.com/bar", "http://baz.bar.com", "") - .unwrap(); - assert_eq!(check_options(&network_filter, &request), true); - } - { - let network_filter = NetworkFilter::parse("||foo$~first-party", true, Default::default()).unwrap(); - let request = - request::Request::new("https://foo.com/bar", "http://baz.foo.com", "") - .unwrap(); - assert_eq!(check_options(&network_filter, &request), false); - } - - // opt-domain - { - let network_filter = NetworkFilter::parse("||foo$domain=foo.com", true, Default::default()).unwrap(); - let request = - request::Request::new("https://foo.com/bar", "http://foo.com", "").unwrap(); - assert_eq!(check_options(&network_filter, &request), true); - } - { - let network_filter = NetworkFilter::parse("||foo$domain=foo.com", true, Default::default()).unwrap(); - let request = - request::Request::new("https://foo.com/bar", "http://bar.com", "").unwrap(); - assert_eq!(check_options(&network_filter, &request), false); - } - - // opt-not-domain - { - let network_filter = NetworkFilter::parse("||foo$domain=~bar.com", true, Default::default()).unwrap(); - let request = - request::Request::new("https://foo.com/bar", "http://foo.com", "").unwrap(); - assert_eq!(check_options(&network_filter, &request), true); - } - { - let network_filter = NetworkFilter::parse("||foo$domain=~bar.com", true, Default::default()).unwrap(); - let request = - request::Request::new("https://foo.com/bar", "http://bar.com", "").unwrap(); - assert_eq!(check_options(&network_filter, &request), false); - } - } - - #[test] - fn check_domain_option_subsetting_works() { - { - let network_filter = NetworkFilter::parse("adv$domain=example.com|~foo.example.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); - } - { - let network_filter = NetworkFilter::parse("adv$domain=~example.com|~foo.example.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == true); - } - { - let network_filter = NetworkFilter::parse("adv$domain=example.com|foo.example.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); - } - { - let network_filter = NetworkFilter::parse("adv$domain=~example.com|foo.example.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); - } - { - let network_filter = NetworkFilter::parse("adv$domain=com|~foo.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://co.uk", "").unwrap()) == false); - } - } - - #[test] - fn check_unicode_handled() { - filter_match_url( - "||firstrowsports.li/frame/", - "https://firstrowsports.li/frame/bar", - true, - ); - filter_match_url( - "||fırstrowsports.eu/pu/", - "https://fırstrowsports.eu/pu/foo", - true, - ); - filter_match_url( - "||fırstrowsports.eu/pu/", - "https://xn--frstrowsports-39b.eu/pu/foo", - true, - ); - - filter_match_url("||atđhe.net/pu/", "https://atđhe.net/pu/foo", true); - filter_match_url("||atđhe.net/pu/", "https://xn--athe-1ua.net/pu/foo", true); - - filter_match_url("foo", "https://example.com/Ѥ/foo", true); - filter_match_url("Ѥ", "https://example.com/Ѥ/foo", true); - } - - #[test] - fn check_regex_escaping_handled() { - // A few rules that are not correctly escaped for rust Regex - { - // regex escaping "\/" unrecognised - let filter = - r#"/^https?:\/\/.*(bitly|bit)\.(com|ly)\/.*/$domain=123movies.com|1337x.to"#; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://bit.ly/bar/"; - let source = "http://123movies.com"; - let request = request::Request::new(url, source, "").unwrap(); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - { - // regex escaping "\:" unrecognised - let filter = r#"/\:\/\/data.*\.com\/[a-zA-Z0-9]{30,}/$third-party,xmlhttprequest"#; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer"; - let source = "http://123movies.com"; - let request = request::Request::new(url, source, "xmlhttprequest").unwrap(); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - // - { - let filter = r#"/\.(accountant|bid|click|club|com|cricket|date|download|faith|link|loan|lol|men|online|party|racing|review|science|site|space|stream|top|trade|webcam|website|win|xyz|com)\/(([0-9]{2,9})(\.|\/)(css|\?)?)$/$script,stylesheet,third-party,xmlhttprequest"#; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://hello.club/123.css"; - let source = "http://123movies.com"; - let request = request::Request::new(url, source, "stylesheet").unwrap(); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - } - - #[test] - #[ignore] // Not going to handle lookaround regexes - #[cfg(feature = "regex-debug-info")] - fn check_lookaround_regex_handled() { - { - let filter = r#"/^https?:\/\/([0-9a-z\-]+\.)?(9anime|animeland|animenova|animeplus|animetoon|animewow|gamestorrent|goodanime|gogoanime|igg-games|kimcartoon|memecenter|readcomiconline|toonget|toonova|watchcartoononline)\.[a-z]{2,4}\/(?!([Ee]xternal|[Ii]mages|[Ss]cripts|[Uu]ploads|ac|ajax|assets|combined|content|cov|cover|(img\/bg)|(img\/icon)|inc|jwplayer|player|playlist-cat-rss|static|thumbs|wp-content|wp-includes)\/)(.*)/$image,other,script,~third-party,xmlhttprequest,domain=~animeland.hu"#; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer"; - let source = "http://123movies.com"; - let request = request::Request::new(url, source, "script").unwrap(); - let mut regex_manager = RegexManager::default(); - assert!(regex_manager.get_compiled_regex_count() == 0); - assert!( - network_filter.matches(&request, &mut regex_manager) == true, - "Expected match for {} on {}", - filter, - url - ); - assert!(regex_manager.get_compiled_regex_count() == 1); - } - } - - #[test] - fn check_empty_host_anchor_matches() { - { - let filter = "||$domain=auth.wi-fi.ru"; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://example.com/ad.js"; - let source = "http://auth.wi-fi.ru"; - let request = request::Request::new(url, source, "script").unwrap(); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - { - let filter = "@@||$domain=auth.wi-fi.ru"; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://example.com/ad.js"; - let source = "http://auth.wi-fi.ru"; - let request = request::Request::new(url, source, "script").unwrap(); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - } - - #[test] - fn check_url_path_regex_matches() { - { - let filter = "@@||www.google.com/aclk?*&adurl=$document,~third-party"; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; - let source = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; - let request = request::Request::new(url, source, "document").unwrap(); - assert!(!request.is_third_party); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - { - let filter = "@@||www.google.*/aclk?$first-party"; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; - let source = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; - let request = request::Request::new(url, source, "main_frame").unwrap(); - assert!(!request.is_third_party); - assert!( - network_filter.matches_test(&request) == true, - "Expected match for {} on {}", - filter, - url - ); - } - } - - #[test] - fn check_get_url_after_hostname_handles_bad_input() { - // The function requires the hostname to necessarily be there in the URL, - // but should fail gracefully if that is not the case. - // Graceful failure here is returning an empty string for the rest of the URL - assert_eq!(get_url_after_hostname("https://www.google.com/ad", "google.com"), "/ad"); - assert_eq!(get_url_after_hostname("https://www.google.com/?aclksa=l&ai=DChcSEwioqMfq5", "google.com"), "/?aclksa=l&ai=DChcSEwioqMfq5"); - assert_eq!(get_url_after_hostname("https://www.google.com/?aclksa=l&ai=DChcSEwioqMfq5", "www.google.com"), "/?aclksa=l&ai=DChcSEwioqMfq5"); - assert_eq!(get_url_after_hostname("https://www.youtube.com/?aclksa=l&ai=DChcSEwioqMfq5", "google.com"), ""); - } -} - -#[cfg(test)] -mod hash_collision_tests { - use super::*; - - use crate::test_utils; - use crate::lists::parse_filters; - use std::collections::HashMap; - - #[test] - fn check_rule_ids_no_collisions() { - let rules = test_utils::rules_from_lists([ - "data/easylist.to/easylist/easylist.txt", - "data/easylist.to/easylist/easyprivacy.txt", - ]).filter(|f| f != "||www.bred4tula.com^"); // remove known collision - let (network_filters, _) = parse_filters(rules, true, Default::default()); - - let mut filter_ids: HashMap = HashMap::new(); - - for filter in network_filters { - let id = filter.get_id(); - let rule = *filter.raw_line.unwrap_or_default(); - let existing_rule = filter_ids.get(&id); - assert!(existing_rule.is_none() || existing_rule.unwrap() == &rule, "ID {} for {} already present from {}", id, rule, existing_rule.unwrap()); - filter_ids.insert(id, rule); - } - } -} +#[path = "../../tests/unit/filters/network.rs"] +mod unit_tests; diff --git a/src/filters/network_matchers.rs b/src/filters/network_matchers.rs new file mode 100644 index 00000000..2d379ceb --- /dev/null +++ b/src/filters/network_matchers.rs @@ -0,0 +1,573 @@ +// --------------------------------------------------------------------------- +// Filter matching +// --------------------------------------------------------------------------- + +use std::collections::HashMap; + +use memchr::memmem; + +use crate::filters::network::NetworkFilterMask; +use crate::regex_manager::RegexManager; +use crate::request; +use crate::utils::{self, Hash}; + +impl NetworkFilterMask { + #[inline(always)] + pub fn match_case(&self) -> bool { + self.contains(NetworkFilterMask::MATCH_CASE) + } + + #[inline(always)] + pub fn is_hostname_anchor(&self) -> bool { + self.contains(NetworkFilterMask::IS_HOSTNAME_ANCHOR) + } + + #[inline(always)] + pub fn is_right_anchor(&self) -> bool { + self.contains(NetworkFilterMask::IS_RIGHT_ANCHOR) + } + + #[inline(always)] + pub fn is_left_anchor(&self) -> bool { + self.contains(NetworkFilterMask::IS_LEFT_ANCHOR) + } + + #[inline(always)] + pub fn is_regex(&self) -> bool { + self.contains(NetworkFilterMask::IS_REGEX) + } + + #[inline(always)] + pub fn is_complete_regex(&self) -> bool { + self.contains(NetworkFilterMask::IS_COMPLETE_REGEX) + } + + #[inline(always)] + pub fn is_badfilter(&self) -> bool { + self.contains(NetworkFilterMask::BAD_FILTER) + } + + #[inline(always)] + pub fn is_exception(&self) -> bool { + self.contains(NetworkFilterMask::IS_EXCEPTION) + } + + #[inline(always)] + pub fn third_party(&self) -> bool { + self.contains(NetworkFilterMask::THIRD_PARTY) + } + + #[inline(always)] + pub fn first_party(&self) -> bool { + self.contains(NetworkFilterMask::FIRST_PARTY) + } + + #[inline(always)] + pub fn for_http(&self) -> bool { + self.contains(NetworkFilterMask::FROM_HTTP) + } + + #[inline(always)] + pub fn for_https(&self) -> bool { + self.contains(NetworkFilterMask::FROM_HTTPS) + } + + #[inline(always)] + pub fn check_cpt_allowed(&self, cpt: &request::RequestType) -> bool { + match NetworkFilterMask::from(cpt) { + // TODO this is not ideal, but required to allow regexed exception rules without an + // explicit `$document` option to apply uBO-style. + // See also: https://github.com/uBlockOrigin/uBlock-issues/issues/1501 + NetworkFilterMask::FROM_DOCUMENT => { + self.contains(NetworkFilterMask::FROM_DOCUMENT) || self.is_exception() + } + mask => self.contains(mask), + } + } +} + +fn get_url_after_hostname<'a>(url: &'a str, hostname: &str) -> &'a str { + let start = + memmem::find(url.as_bytes(), hostname.as_bytes()).unwrap_or(url.len() - hostname.len()); + &url[start + hostname.len()..] +} + +/// Handle hostname anchored filters, given 'hostname' from ||hostname and +/// request's hostname, check if there is a match. This is tricky because +/// filters authors rely and different assumption. We can have prefix of suffix +/// matches of anchor. +fn is_anchored_by_hostname( + filter_hostname: &str, + hostname: &str, + wildcard_filter_hostname: bool, +) -> bool { + let filter_hostname_len = filter_hostname.len(); + // Corner-case, if `filterHostname` is empty, then it's a match + if filter_hostname_len == 0 { + return true; + } + let hostname_len = hostname.len(); + + if filter_hostname_len > hostname_len { + // `filterHostname` cannot be longer than actual hostname + false + } else if filter_hostname_len == hostname_len { + // If they have the same len(), they should be equal + filter_hostname == hostname + } else if let Some(match_index) = memmem::find(hostname.as_bytes(), filter_hostname.as_bytes()) + { + if match_index == 0 { + // `filter_hostname` is a prefix of `hostname` and needs to match full a label. + // + // Examples (filter_hostname, hostname): + // * (foo, foo.com) + // * (sub.foo, sub.foo.com) + wildcard_filter_hostname + || filter_hostname.ends_with('.') + || hostname[filter_hostname_len..].starts_with('.') + } else if match_index == hostname_len - filter_hostname_len { + // `filter_hostname` is a suffix of `hostname`. + // + // Examples (filter_hostname, hostname): + // * (foo.com, sub.foo.com) + // * (com, foo.com) + filter_hostname.starts_with('.') || hostname[match_index - 1..].starts_with('.') + } else { + // `filter_hostname` is infix of `hostname` and needs match full labels + (wildcard_filter_hostname + || filter_hostname.ends_with('.') + || hostname[filter_hostname_len..].starts_with('.')) + && (filter_hostname.starts_with('.') + || hostname[match_index - 1..].starts_with('.')) + } + } else { + // No match + false + } +} + +// pattern +fn check_pattern_plain_filter_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + mut filters: FiltersIter, + request: &request::Request, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + if filters.len() == 0 { + return true; + } + let request_url = request.get_url(mask.match_case()); + filters.any(|f| memmem::find(request_url.as_bytes(), f.as_bytes()).is_some()) +} + +// pattern| +fn check_pattern_right_anchor_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + mut filters: FiltersIter, + request: &request::Request, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + if filters.len() == 0 { + return true; + } + let request_url = request.get_url(mask.match_case()); + filters.any(|f| request_url.ends_with(f)) +} + +// |pattern +fn check_pattern_left_anchor_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + mut filters: FiltersIter, + request: &request::Request, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + if filters.len() == 0 { + return true; + } + let request_url = request.get_url(mask.match_case()); + filters.any(|f| request_url.starts_with(f)) +} + +// |pattern| +fn check_pattern_left_right_anchor_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + mut filters: FiltersIter, + request: &request::Request, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + if filters.len() == 0 { + return true; + } + let request_url = request.get_url(mask.match_case()); + filters.any(|f| &request_url == f) +} + +// pattern*^ +fn check_pattern_regex_filter_at<'a, FiltersIter>( + mask: NetworkFilterMask, + filters: FiltersIter, + key: u64, + request: &request::Request, + start_from: usize, + regex_manager: &mut RegexManager, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + if filters.len() == 0 { + return true; + } + let request_url = request.get_url(mask.match_case()); + regex_manager.matches(mask, filters, key, &request_url[start_from..]) +} + +fn check_pattern_regex_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + filters: FiltersIter, + key: u64, + request: &request::Request, + regex_manager: &mut RegexManager, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + check_pattern_regex_filter_at(mask, filters, key, request, 0, regex_manager) +} + +// ||pattern*^ +fn check_pattern_hostname_anchor_regex_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + filters: FiltersIter, + hostname: Option<&'a str>, + key: u64, + request: &request::Request, + regex_manager: &mut RegexManager, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + hostname + .as_ref() + .map(|hostname| { + if is_anchored_by_hostname( + hostname, + &request.hostname, + mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX), + ) { + let request_url = request.get_url(mask.match_case()); + check_pattern_regex_filter_at( + mask, + filters, + key, + request, + memmem::find(request_url.as_bytes(), hostname.as_bytes()).unwrap_or_default() + + hostname.len(), + regex_manager, + ) + } else { + false + } + }) + .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable +} + +// ||pattern| +fn check_pattern_hostname_right_anchor_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + filters: FiltersIter, + hostname: Option<&'a str>, + request: &request::Request, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + hostname + .as_ref() + .map(|hostname| { + if is_anchored_by_hostname( + hostname, + &request.hostname, + mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX), + ) { + if filters.len() == 0 { + // In this specific case it means that the specified hostname should match + // at the end of the hostname of the request. This allows to prevent false + // positive like ||foo.bar which would match https://foo.bar.baz where + // ||foo.bar^ would not. + request.hostname.len() == hostname.len() // if lengths are equal, hostname equality is implied by anchoring check + || request.hostname.ends_with(hostname) + } else { + check_pattern_right_anchor_filter(mask, filters, request) + } + } else { + false + } + }) + .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable +} + +// |||pattern| +fn check_pattern_hostname_left_right_anchor_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + mut filters: FiltersIter, + hostname: Option<&'a str>, + request: &request::Request, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + // Since this is not a regex, the filter pattern must follow the hostname + // with nothing in between. So we extract the part of the URL following + // after hostname and will perform the matching on it. + + hostname + .as_ref() + .map(|hostname| { + if is_anchored_by_hostname( + hostname, + &request.hostname, + mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX), + ) { + if filters.len() == 0 { + return true; + } + let request_url = request.get_url(mask.match_case()); + let url_after_hostname = get_url_after_hostname(&request_url, hostname); + filters.any(|f| { + // Since it must follow immediatly after the hostname and be a suffix of + // the URL, we conclude that filter must be equal to the part of the + // url following the hostname. + url_after_hostname == f + }) + } else { + false + } + }) + .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable +} + +// ||pattern + left-anchor => This means that a plain pattern needs to appear +// exactly after the hostname, with nothing in between. +fn check_pattern_hostname_left_anchor_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + mut filters: FiltersIter, + hostname: Option<&'a str>, + request: &request::Request, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + hostname + .as_ref() + .map(|hostname| { + if is_anchored_by_hostname( + hostname, + &request.hostname, + mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX), + ) { + if filters.len() == 0 { + return true; + } else { + let request_url = request.get_url(mask.match_case()); + let url_after_hostname = get_url_after_hostname(&request_url, hostname); + filters.any(|f| { + // Since this is not a regex, the filter pattern must follow the hostname + // with nothing in between. So we extract the part of the URL following + // after hostname and will perform the matching on it. + url_after_hostname.starts_with(f) + }) + } + } else { + false + } + }) + .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable +} + +// ||pattern +fn check_pattern_hostname_anchor_filter<'a, FiltersIter>( + mask: NetworkFilterMask, + mut filters: FiltersIter, + hostname: Option<&'a str>, + request: &request::Request, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + hostname + .as_ref() + .map(|hostname| { + if is_anchored_by_hostname( + hostname, + &request.hostname, + mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX), + ) { + if filters.len() == 0 { + return true; + } + let request_url = request.get_url(mask.match_case()); + let url_after_hostname = get_url_after_hostname(&request_url, hostname); + filters.any(|f| { + // Filter hostname does not necessarily have to be a full, proper hostname, part of it can be lumped together with the URL + (*url_after_hostname).contains(f) + }) + } else { + false + } + }) + .unwrap_or_else(|| unreachable!()) // no match if filter has no hostname - should be unreachable +} + +/// Efficiently checks if a certain network filter matches against a network +/// request. +pub fn check_pattern<'a, FiltersIter>( + mask: NetworkFilterMask, + filters: FiltersIter, + hostname: Option<&'a str>, + key: u64, + request: &request::Request, + regex_manager: &mut RegexManager, +) -> bool +where + FiltersIter: Iterator + ExactSizeIterator, +{ + if mask.is_hostname_anchor() { + if mask.is_regex() { + check_pattern_hostname_anchor_regex_filter( + mask, + filters, + hostname, + key, + request, + regex_manager, + ) + } else if mask.is_right_anchor() && mask.is_left_anchor() { + check_pattern_hostname_left_right_anchor_filter(mask, filters, hostname, request) + } else if mask.is_right_anchor() { + check_pattern_hostname_right_anchor_filter(mask, filters, hostname, request) + } else if mask.is_left_anchor() { + check_pattern_hostname_left_anchor_filter(mask, filters, hostname, request) + } else { + check_pattern_hostname_anchor_filter(mask, filters, hostname, request) + } + } else if mask.is_regex() || mask.is_complete_regex() { + check_pattern_regex_filter(mask, filters, key, request, regex_manager) + } else if mask.is_left_anchor() && mask.is_right_anchor() { + check_pattern_left_right_anchor_filter(mask, filters, request) + } else if mask.is_left_anchor() { + check_pattern_left_anchor_filter(mask, filters, request) + } else if mask.is_right_anchor() { + check_pattern_right_anchor_filter(mask, filters, request) + } else { + check_pattern_plain_filter_filter(mask, filters, request) + } +} + +#[inline] +pub fn check_options(mask: NetworkFilterMask, request: &request::Request) -> bool { + // Bad filter never matches + if mask.is_badfilter() { + return false; + } + // We first discard requests based on type, protocol and party. This is really + // cheap and should be done first. + if !mask.check_cpt_allowed(&request.request_type) + || (request.is_https && !mask.for_https()) + || (request.is_http && !mask.for_http()) + || (!mask.first_party() && !request.is_third_party) + || (!mask.third_party() && request.is_third_party) + { + return false; + } + + true +} + +#[inline] +pub fn check_included_domains(opt_domains: Option<&[Hash]>, request: &request::Request) -> bool { + // Source URL must be among these domains to match + if let Some(included_domains) = opt_domains.as_ref() { + if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { + if source_hashes + .iter() + .all(|h| !utils::bin_lookup(included_domains, *h)) + { + return false; + } + } + } + true +} + +#[inline] +pub fn check_included_domains_mapped( + opt_domains: Option<&[u16]>, + request: &request::Request, + mapping: &HashMap, +) -> bool { + // Source URL must be among these domains to match + if let Some(included_domains) = opt_domains.as_ref() { + if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { + if source_hashes.iter().all(|h| { + mapping + .get(h) + .map_or(true, |index| !utils::bin_lookup(included_domains, *index)) + }) { + return false; + } + } + } + true +} + +#[inline] +pub fn check_excluded_domains( + opt_not_domains: Option<&[Hash]>, + request: &request::Request, +) -> bool { + if let Some(excluded_domains) = opt_not_domains.as_ref() { + if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { + if source_hashes + .iter() + .any(|h| utils::bin_lookup(excluded_domains, *h)) + { + return false; + } + } + } + + true +} + +#[inline] +pub fn check_excluded_domains_mapped( + opt_not_domains: Option<&[u16]>, + request: &request::Request, + mapping: &HashMap, +) -> bool { + if let Some(excluded_domains) = opt_not_domains.as_ref() { + if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { + if source_hashes.iter().any(|h| { + mapping + .get(h) + .map_or(false, |index| utils::bin_lookup(excluded_domains, *index)) + }) { + return false; + } + } + } + + true +} + +#[cfg(test)] +#[path = "../../tests/unit/filters/network_matchers.rs"] +mod unit_tests; diff --git a/src/flat/fb_network_filter.fbs b/src/flat/fb_network_filter.fbs new file mode 100644 index 00000000..bc815829 --- /dev/null +++ b/src/flat/fb_network_filter.fbs @@ -0,0 +1,27 @@ +// flatc --rust --gen-object-api -o src/flat/ src/flat/network_filter.fbs + +namespace fb; + +table NetworkFilter { + mask: uint32; // NetworkFilterMask (network.rs) + + // These arrays contain sorted(asc) indecies in the |unique_domains_hashes| + // instead of hashes itself. It saves a lot of memory because there + // aren't many unique hashes. + opt_domains: [uint16]; + opt_not_domains: [uint16]; + + patterns: [string]; + modifier_option: string; + hostname: string; + + tag: string; + raw_line: string; +} + +table NetworkFilterList { + network_filters: [NetworkFilter] (required); + unique_domains_hashes: [uint64] (required); +} + +root_type NetworkFilterList; diff --git a/src/flat/fb_network_filter_generated.rs b/src/flat/fb_network_filter_generated.rs new file mode 100644 index 00000000..6fe91441 --- /dev/null +++ b/src/flat/fb_network_filter_generated.rs @@ -0,0 +1,739 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +// @generated + +use core::cmp::Ordering; +use core::mem; + +extern crate flatbuffers; +use self::flatbuffers::{EndianScalar, Follow}; + +#[allow(unused_imports, dead_code)] +pub mod fb { + + use core::cmp::Ordering; + use core::mem; + + extern crate flatbuffers; + use self::flatbuffers::{EndianScalar, Follow}; + + pub enum NetworkFilterOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct NetworkFilter<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for NetworkFilter<'a> { + type Inner = NetworkFilter<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> NetworkFilter<'a> { + pub const VT_MASK: flatbuffers::VOffsetT = 4; + pub const VT_OPT_DOMAINS: flatbuffers::VOffsetT = 6; + pub const VT_OPT_NOT_DOMAINS: flatbuffers::VOffsetT = 8; + pub const VT_PATTERNS: flatbuffers::VOffsetT = 10; + pub const VT_MODIFIER_OPTION: flatbuffers::VOffsetT = 12; + pub const VT_HOSTNAME: flatbuffers::VOffsetT = 14; + pub const VT_TAG: flatbuffers::VOffsetT = 16; + pub const VT_RAW_LINE: flatbuffers::VOffsetT = 18; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + NetworkFilter { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args NetworkFilterArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = NetworkFilterBuilder::new(_fbb); + if let Some(x) = args.raw_line { + builder.add_raw_line(x); + } + if let Some(x) = args.tag { + builder.add_tag(x); + } + if let Some(x) = args.hostname { + builder.add_hostname(x); + } + if let Some(x) = args.modifier_option { + builder.add_modifier_option(x); + } + if let Some(x) = args.patterns { + builder.add_patterns(x); + } + if let Some(x) = args.opt_not_domains { + builder.add_opt_not_domains(x); + } + if let Some(x) = args.opt_domains { + builder.add_opt_domains(x); + } + builder.add_mask(args.mask); + builder.finish() + } + + pub fn unpack(&self) -> NetworkFilterT { + let mask = self.mask(); + let opt_domains = self.opt_domains().map(|x| x.into_iter().collect()); + let opt_not_domains = self.opt_not_domains().map(|x| x.into_iter().collect()); + let patterns = self + .patterns() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let modifier_option = self.modifier_option().map(|x| x.to_string()); + let hostname = self.hostname().map(|x| x.to_string()); + let tag = self.tag().map(|x| x.to_string()); + let raw_line = self.raw_line().map(|x| x.to_string()); + NetworkFilterT { + mask, + opt_domains, + opt_not_domains, + patterns, + modifier_option, + hostname, + tag, + raw_line, + } + } + + #[inline] + pub fn mask(&self) -> u32 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::(NetworkFilter::VT_MASK, Some(0)) + .unwrap() + } + } + #[inline] + pub fn opt_domains(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + NetworkFilter::VT_OPT_DOMAINS, + None, + ) + } + } + #[inline] + pub fn opt_not_domains(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + NetworkFilter::VT_OPT_NOT_DOMAINS, + None, + ) + } + } + #[inline] + pub fn patterns( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(NetworkFilter::VT_PATTERNS, None) + } + } + #[inline] + pub fn modifier_option(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>( + NetworkFilter::VT_MODIFIER_OPTION, + None, + ) + } + } + #[inline] + pub fn hostname(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>(NetworkFilter::VT_HOSTNAME, None) + } + } + #[inline] + pub fn tag(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>(NetworkFilter::VT_TAG, None) + } + } + #[inline] + pub fn raw_line(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>(NetworkFilter::VT_RAW_LINE, None) + } + } + } + + impl flatbuffers::Verifiable for NetworkFilter<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("mask", Self::VT_MASK, false)? + .visit_field::>>( + "opt_domains", + Self::VT_OPT_DOMAINS, + false, + )? + .visit_field::>>( + "opt_not_domains", + Self::VT_OPT_NOT_DOMAINS, + false, + )? + .visit_field::>, + >>("patterns", Self::VT_PATTERNS, false)? + .visit_field::>( + "modifier_option", + Self::VT_MODIFIER_OPTION, + false, + )? + .visit_field::>( + "hostname", + Self::VT_HOSTNAME, + false, + )? + .visit_field::>("tag", Self::VT_TAG, false)? + .visit_field::>( + "raw_line", + Self::VT_RAW_LINE, + false, + )? + .finish(); + Ok(()) + } + } + pub struct NetworkFilterArgs<'a> { + pub mask: u32, + pub opt_domains: Option>>, + pub opt_not_domains: Option>>, + pub patterns: Option< + flatbuffers::WIPOffset>>, + >, + pub modifier_option: Option>, + pub hostname: Option>, + pub tag: Option>, + pub raw_line: Option>, + } + impl<'a> Default for NetworkFilterArgs<'a> { + #[inline] + fn default() -> Self { + NetworkFilterArgs { + mask: 0, + opt_domains: None, + opt_not_domains: None, + patterns: None, + modifier_option: None, + hostname: None, + tag: None, + raw_line: None, + } + } + } + + pub struct NetworkFilterBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NetworkFilterBuilder<'a, 'b, A> { + #[inline] + pub fn add_mask(&mut self, mask: u32) { + self.fbb_.push_slot::(NetworkFilter::VT_MASK, mask, 0); + } + #[inline] + pub fn add_opt_domains( + &mut self, + opt_domains: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_OPT_DOMAINS, + opt_domains, + ); + } + #[inline] + pub fn add_opt_not_domains( + &mut self, + opt_not_domains: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_OPT_NOT_DOMAINS, + opt_not_domains, + ); + } + #[inline] + pub fn add_patterns( + &mut self, + patterns: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_PATTERNS, + patterns, + ); + } + #[inline] + pub fn add_modifier_option(&mut self, modifier_option: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_MODIFIER_OPTION, + modifier_option, + ); + } + #[inline] + pub fn add_hostname(&mut self, hostname: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_HOSTNAME, + hostname, + ); + } + #[inline] + pub fn add_tag(&mut self, tag: flatbuffers::WIPOffset<&'b str>) { + self.fbb_ + .push_slot_always::>(NetworkFilter::VT_TAG, tag); + } + #[inline] + pub fn add_raw_line(&mut self, raw_line: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_RAW_LINE, + raw_line, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> NetworkFilterBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + NetworkFilterBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for NetworkFilter<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("NetworkFilter"); + ds.field("mask", &self.mask()); + ds.field("opt_domains", &self.opt_domains()); + ds.field("opt_not_domains", &self.opt_not_domains()); + ds.field("patterns", &self.patterns()); + ds.field("modifier_option", &self.modifier_option()); + ds.field("hostname", &self.hostname()); + ds.field("tag", &self.tag()); + ds.field("raw_line", &self.raw_line()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct NetworkFilterT { + pub mask: u32, + pub opt_domains: Option>, + pub opt_not_domains: Option>, + pub patterns: Option>, + pub modifier_option: Option, + pub hostname: Option, + pub tag: Option, + pub raw_line: Option, + } + impl Default for NetworkFilterT { + fn default() -> Self { + Self { + mask: 0, + opt_domains: None, + opt_not_domains: None, + patterns: None, + modifier_option: None, + hostname: None, + tag: None, + raw_line: None, + } + } + } + impl NetworkFilterT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let mask = self.mask; + let opt_domains = self.opt_domains.as_ref().map(|x| _fbb.create_vector(x)); + let opt_not_domains = self.opt_not_domains.as_ref().map(|x| _fbb.create_vector(x)); + let patterns = self.patterns.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let modifier_option = self.modifier_option.as_ref().map(|x| _fbb.create_string(x)); + let hostname = self.hostname.as_ref().map(|x| _fbb.create_string(x)); + let tag = self.tag.as_ref().map(|x| _fbb.create_string(x)); + let raw_line = self.raw_line.as_ref().map(|x| _fbb.create_string(x)); + NetworkFilter::create( + _fbb, + &NetworkFilterArgs { + mask, + opt_domains, + opt_not_domains, + patterns, + modifier_option, + hostname, + tag, + raw_line, + }, + ) + } + } + pub enum NetworkFilterListOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct NetworkFilterList<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for NetworkFilterList<'a> { + type Inner = NetworkFilterList<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> NetworkFilterList<'a> { + pub const VT_NETWORK_FILTERS: flatbuffers::VOffsetT = 4; + pub const VT_UNIQUE_DOMAINS_HASHES: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + NetworkFilterList { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args NetworkFilterListArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = NetworkFilterListBuilder::new(_fbb); + if let Some(x) = args.unique_domains_hashes { + builder.add_unique_domains_hashes(x); + } + if let Some(x) = args.network_filters { + builder.add_network_filters(x); + } + builder.finish() + } + + pub fn unpack(&self) -> NetworkFilterListT { + let network_filters = { + let x = self.network_filters(); + x.iter().map(|t| t.unpack()).collect() + }; + let unique_domains_hashes = { + let x = self.unique_domains_hashes(); + x.into_iter().collect() + }; + NetworkFilterListT { + network_filters, + unique_domains_hashes, + } + } + + #[inline] + pub fn network_filters( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(NetworkFilterList::VT_NETWORK_FILTERS, None) + .unwrap() + } + } + #[inline] + pub fn unique_domains_hashes(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + NetworkFilterList::VT_UNIQUE_DOMAINS_HASHES, + None, + ) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for NetworkFilterList<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("network_filters", Self::VT_NETWORK_FILTERS, true)? + .visit_field::>>( + "unique_domains_hashes", + Self::VT_UNIQUE_DOMAINS_HASHES, + true, + )? + .finish(); + Ok(()) + } + } + pub struct NetworkFilterListArgs<'a> { + pub network_filters: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + pub unique_domains_hashes: Option>>, + } + impl<'a> Default for NetworkFilterListArgs<'a> { + #[inline] + fn default() -> Self { + NetworkFilterListArgs { + network_filters: None, // required field + unique_domains_hashes: None, // required field + } + } + } + + pub struct NetworkFilterListBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NetworkFilterListBuilder<'a, 'b, A> { + #[inline] + pub fn add_network_filters( + &mut self, + network_filters: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + NetworkFilterList::VT_NETWORK_FILTERS, + network_filters, + ); + } + #[inline] + pub fn add_unique_domains_hashes( + &mut self, + unique_domains_hashes: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + NetworkFilterList::VT_UNIQUE_DOMAINS_HASHES, + unique_domains_hashes, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> NetworkFilterListBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + NetworkFilterListBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_ + .required(o, NetworkFilterList::VT_NETWORK_FILTERS, "network_filters"); + self.fbb_.required( + o, + NetworkFilterList::VT_UNIQUE_DOMAINS_HASHES, + "unique_domains_hashes", + ); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for NetworkFilterList<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("NetworkFilterList"); + ds.field("network_filters", &self.network_filters()); + ds.field("unique_domains_hashes", &self.unique_domains_hashes()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct NetworkFilterListT { + pub network_filters: Vec, + pub unique_domains_hashes: Vec, + } + impl Default for NetworkFilterListT { + fn default() -> Self { + Self { + network_filters: Default::default(), + unique_domains_hashes: Default::default(), + } + } + } + impl NetworkFilterListT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let network_filters = Some({ + let x = &self.network_filters; + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); + _fbb.create_vector(&w) + }); + let unique_domains_hashes = Some({ + let x = &self.unique_domains_hashes; + _fbb.create_vector(x) + }); + NetworkFilterList::create( + _fbb, + &NetworkFilterListArgs { + network_filters, + unique_domains_hashes, + }, + ) + } + } + #[inline] + /// Verifies that a buffer of bytes contains a `NetworkFilterList` + /// and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_network_filter_list_unchecked`. + pub fn root_as_network_filter_list( + buf: &[u8], + ) -> Result { + flatbuffers::root::(buf) + } + #[inline] + /// Verifies that a buffer of bytes contains a size prefixed + /// `NetworkFilterList` and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `size_prefixed_root_as_network_filter_list_unchecked`. + pub fn size_prefixed_root_as_network_filter_list( + buf: &[u8], + ) -> Result { + flatbuffers::size_prefixed_root::(buf) + } + #[inline] + /// Verifies, with the given options, that a buffer of bytes + /// contains a `NetworkFilterList` and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_network_filter_list_unchecked`. + pub fn root_as_network_filter_list_with_opts<'b, 'o>( + opts: &'o flatbuffers::VerifierOptions, + buf: &'b [u8], + ) -> Result, flatbuffers::InvalidFlatbuffer> { + flatbuffers::root_with_opts::>(opts, buf) + } + #[inline] + /// Verifies, with the given verifier options, that a buffer of + /// bytes contains a size prefixed `NetworkFilterList` and returns + /// it. Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_network_filter_list_unchecked`. + pub fn size_prefixed_root_as_network_filter_list_with_opts<'b, 'o>( + opts: &'o flatbuffers::VerifierOptions, + buf: &'b [u8], + ) -> Result, flatbuffers::InvalidFlatbuffer> { + flatbuffers::size_prefixed_root_with_opts::>(opts, buf) + } + #[inline] + /// Assumes, without verification, that a buffer of bytes contains a NetworkFilterList and returns it. + /// # Safety + /// Callers must trust the given bytes do indeed contain a valid `NetworkFilterList`. + pub unsafe fn root_as_network_filter_list_unchecked(buf: &[u8]) -> NetworkFilterList { + flatbuffers::root_unchecked::(buf) + } + #[inline] + /// Assumes, without verification, that a buffer of bytes contains a size prefixed NetworkFilterList and returns it. + /// # Safety + /// Callers must trust the given bytes do indeed contain a valid size prefixed `NetworkFilterList`. + pub unsafe fn size_prefixed_root_as_network_filter_list_unchecked( + buf: &[u8], + ) -> NetworkFilterList { + flatbuffers::size_prefixed_root_unchecked::(buf) + } + #[inline] + pub fn finish_network_filter_list_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + root: flatbuffers::WIPOffset>, + ) { + fbb.finish(root, None); + } + + #[inline] + pub fn finish_size_prefixed_network_filter_list_buffer< + 'a, + 'b, + A: flatbuffers::Allocator + 'a, + >( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + root: flatbuffers::WIPOffset>, + ) { + fbb.finish_size_prefixed(root, None); + } +} // pub mod fb diff --git a/src/lib.rs b/src/lib.rs index 956f0827..b33e030b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,10 +20,13 @@ pub mod blocker; #[cfg(feature = "content-blocking")] pub mod content_blocking; pub mod cosmetic_filter_cache; +#[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. mod data_format; mod engine; +mod engine_serializer; pub mod filters; pub mod lists; +pub mod network_filter_list; mod optimizer; pub mod regex_manager; pub mod request; @@ -34,6 +37,7 @@ pub mod utils; #[doc(inline)] pub use engine::Engine; +pub use engine_serializer::Serialize; #[doc(inline)] pub use lists::FilterSet; diff --git a/src/lists.rs b/src/lists.rs index 13f2347f..ae027159 100644 --- a/src/lists.rs +++ b/src/lists.rs @@ -2,8 +2,8 @@ use std::convert::TryFrom; -use crate::filters::network::{NetworkFilter, NetworkFilterError}; use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterError}; +use crate::filters::network::{NetworkFilter, NetworkFilterError}; use crate::resources::PermissionMask; use itertools::{Either, Itertools}; @@ -154,14 +154,14 @@ impl TryFrom<&str> for ExpiresInterval { if (1..=HOURS_MAX).contains(&amount) { return Ok(Self::Hours(amount)); } - }, + } "day" | "days" => { let amount = amount.parse::().map_err(|_| ())?; if (1..=DAYS_MAX).contains(&amount) { - return Ok(Self::Days(amount)) + return Ok(Self::Days(amount)); } } - _ => () + _ => (), } Err(()) } @@ -200,14 +200,18 @@ impl FilterListMetadata { if let Some(kv) = line.strip_prefix("! ") { if let Some((key, value)) = kv.split_once(": ") { match key { - "Homepage" if self.homepage.is_none() => self.homepage = Some(value.to_string()), + "Homepage" if self.homepage.is_none() => { + self.homepage = Some(value.to_string()) + } "Title" if self.title.is_none() => self.title = Some(value.to_string()), "Expires" if self.expires.is_none() => { if let Ok(expires) = ExpiresInterval::try_from(value) { self.expires = Some(expires); } } - "Redirect" if self.redirect.is_none() => self.redirect = Some(value.to_string()), + "Redirect" if self.redirect.is_none() => { + self.redirect = Some(value.to_string()) + } _ => (), } } @@ -236,8 +240,13 @@ impl FilterSet { /// Adds a collection of filter rules to this `FilterSet`. Filters that cannot be parsed /// successfully are ignored. Returns any discovered metadata about the list of rules added. - pub fn add_filters(&mut self, filters: impl IntoIterator>, opts: ParseOptions) -> FilterListMetadata { - let (metadata, mut parsed_network_filters, mut parsed_cosmetic_filters) = parse_filters_with_metadata(filters, self.debug, opts); + pub fn add_filters( + &mut self, + filters: impl IntoIterator>, + opts: ParseOptions, + ) -> FilterListMetadata { + let (metadata, mut parsed_network_filters, mut parsed_cosmetic_filters) = + parse_filters_with_metadata(filters, self.debug, opts); self.network_filters.append(&mut parsed_network_filters); self.cosmetic_filters.append(&mut parsed_cosmetic_filters); metadata @@ -262,11 +271,13 @@ impl FilterSet { /// /// This function will fail if the `FilterSet` was not created in debug mode. #[cfg(feature = "content-blocking")] - pub fn into_content_blocking(self) -> Result<(Vec, Vec), ()> { + pub fn into_content_blocking( + self, + ) -> Result<(Vec, Vec), ()> { use crate::content_blocking; if !self.debug { - return Err(()) + return Err(()); } let mut ignore_previous_rules = vec![]; @@ -275,26 +286,37 @@ impl FilterSet { let mut filters_used = vec![]; self.network_filters.into_iter().for_each(|filter| { - let original_rule = *filter.raw_line.clone().expect("All rules should be in debug mode"); - if let Ok(equivalent) = TryInto::::try_into(filter) { + let original_rule = *filter + .raw_line + .clone() + .expect("All rules should be in debug mode"); + if let Ok(equivalent) = TryInto::::try_into(filter) + { filters_used.push(original_rule); - equivalent.into_iter().for_each(|cb_rule| { - match &cb_rule.action.typ { - content_blocking::CbType::IgnorePreviousRules => ignore_previous_rules.push(cb_rule), + equivalent + .into_iter() + .for_each(|cb_rule| match &cb_rule.action.typ { + content_blocking::CbType::IgnorePreviousRules => { + ignore_previous_rules.push(cb_rule) + } _ => other_rules.push(cb_rule), - } - }); + }); } }); let add_fp_document_exception = !filters_used.is_empty(); self.cosmetic_filters.into_iter().for_each(|filter| { - let original_rule = *filter.raw_line.clone().expect("All rules should be in debug mode"); + let original_rule = *filter + .raw_line + .clone() + .expect("All rules should be in debug mode"); if let Ok(cb_rule) = TryInto::::try_into(filter) { filters_used.push(original_rule); match &cb_rule.action.typ { - content_blocking::CbType::IgnorePreviousRules => ignore_previous_rules.push(cb_rule), + content_blocking::CbType::IgnorePreviousRules => { + ignore_previous_rules.push(cb_rule) + } _ => other_rules.push(cb_rule), } } @@ -402,17 +424,19 @@ pub fn parse_filter( } match opts.format { - FilterFormat::Standard => { - match (detect_filter_type(filter), opts.rule_types) { - (FilterType::Network, RuleTypes::All | RuleTypes::NetworkOnly) => NetworkFilter::parse(filter, debug, opts) + FilterFormat::Standard => match (detect_filter_type(filter), opts.rule_types) { + (FilterType::Network, RuleTypes::All | RuleTypes::NetworkOnly) => { + NetworkFilter::parse(filter, debug, opts) .map(|f| f.into()) - .map_err(|e| e.into()), - (FilterType::Cosmetic, RuleTypes::All | RuleTypes::CosmeticOnly) => CosmeticFilter::parse(filter, debug, opts.permissions) + .map_err(|e| e.into()) + } + (FilterType::Cosmetic, RuleTypes::All | RuleTypes::CosmeticOnly) => { + CosmeticFilter::parse(filter, debug, opts.permissions) .map(|f| f.into()) - .map_err(|e| e.into()), - _ => Err(FilterParseError::Unsupported), + .map_err(|e| e.into()) } - } + _ => Err(FilterParseError::Unsupported), + }, FilterFormat::Hosts => { // Hosts-style rules can only ever be network rules if !opts.rule_types.loads_network_rules() { @@ -437,7 +461,11 @@ pub fn parse_filter( // Take the last of at most 2 whitespace separated fields let mut filter_parts = filter.split_whitespace(); - let hostname = match (filter_parts.next(), filter_parts.next(), filter_parts.next()) { + let hostname = match ( + filter_parts.next(), + filter_parts.next(), + filter_parts.next(), + ) { (None, None, None) => return Err(FilterParseError::Unsupported), (Some(hostname), None, None) => hostname, (Some(_ip), Some(hostname), None) => hostname, @@ -461,22 +489,19 @@ pub fn parse_filter( /// Parse an entire list of filters, ignoring any errors pub fn parse_filters( - list: impl IntoIterator>, + list: impl IntoIterator>, debug: bool, opts: ParseOptions, ) -> (Vec, Vec) { - let (_metadata, network_filters, cosmetic_filters) = parse_filters_with_metadata( - list, - debug, - opts, - ); + let (_metadata, network_filters, cosmetic_filters) = + parse_filters_with_metadata(list, debug, opts); (network_filters, cosmetic_filters) } /// Parse an entire list of filters, ignoring any errors pub fn parse_filters_with_metadata( - list: impl IntoIterator>, + list: impl IntoIterator>, debug: bool, opts: ParseOptions, ) -> (FilterListMetadata, Vec, Vec) { @@ -523,7 +548,12 @@ fn detect_filter_type(filter: &str) -> FilterType { // Check the next few bytes for a second `#` // Indexing is safe here because it uses the filter's byte // representation and guards against short strings - if find_char(b'#', &filter.as_bytes()[after_sharp_index..(after_sharp_index+4).min(filter.len())]).is_some() { + if find_char( + b'#', + &filter.as_bytes()[after_sharp_index..(after_sharp_index + 4).min(filter.len())], + ) + .is_some() + { return FilterType::Cosmetic; } } @@ -538,302 +568,5 @@ fn detect_filter_type(filter: &str) -> FilterType { } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_hosts_style() { - { - let input = "www.malware.com"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_ok()); - } - { - let input = "www.malware.com/virus.txt"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_err()); - } - { - let input = "127.0.0.1 www.malware.com"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_ok()); - } - { - let input = "127.0.0.1\t\twww.malware.com"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_ok()); - } - { - let input = "0.0.0.0 www.malware.com"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_ok()); - } - { - let input = "0.0.0.0 www.malware.com # replace after issue #289336 is addressed"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_ok()); - } - { - let input = "! Title: list.txt"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_err()); - } - { - let input = "127.0.0.1 localhost"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_err()); - } - { - let input = "127.0.0.1 com"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_err()); - } - { - let input = ".com"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_err()); - } - { - let input = "*.com"; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_err()); - } - { - let input = "www."; - let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(result.is_err()); - } - } - - #[test] - fn adguard_cosmetic_detection() { - { - let input = r#"example.org$$script[data-src="banner"]"#; - let result = parse_filter(input, true, Default::default()); - assert!(result.is_err()); - } - { - let input = "example.org##+js(set-local-storage-item, Test, $$remove$$)"; - let result = parse_filter(input, true, Default::default()); - assert!(result.is_ok()); - } - { - let input = "[$app=org.example.app]example.com##.textad"; - let result = parse_filter(input, true, Default::default()); - assert!(result.is_err()); - } - { - let input = r#"[$domain=/^i\[a-z\]*\.strmrdr\[a-z\]+\..*/]##+js(set-constant, adscfg.enabled, false)"#; - let result = parse_filter(input, true, Default::default()); - assert!(result.is_err()); - } - } - - #[test] - fn parse_filter_failed_fuzz_1() { - let input = "Ѥ"; - let result = parse_filter(input, true, Default::default()); - assert!(result.is_ok()); - } - - #[test] - fn parse_filter_failed_fuzz_2() { - assert!(parse_filter(r#"###\\\00DB \008D"#, true, Default::default()).is_ok()); - assert!(parse_filter(r#"###\Û"#, true, Default::default()).is_ok()); - } - - #[test] - fn parse_filter_failed_fuzz_3() { - let input = "||$3p=/"; - let result = parse_filter(input, true, Default::default()); - assert!(result.is_ok()); - } - - #[test] - fn parse_filter_failed_fuzz_4() { - // \\##+js(,\xdd\x8d - let parsed = parse_filter( - &String::from_utf8(vec![92, 35, 35, 43, 106, 115, 40, 44, 221, 141]).unwrap(), - true, - Default::default(), - ); - #[cfg(feature = "css-validation")] - assert!(parsed.is_err()); - #[cfg(not(feature = "css-validation"))] - assert!(parsed.is_ok()); - } - - #[test] - #[cfg(feature = "css-validation")] - fn parse_filter_opening_comment() { - assert!(parse_filter( - "##input,input/*", - true, - Default::default(), - ).is_err()); - } - - #[test] - fn test_parse_expires_interval() { - assert_eq!(ExpiresInterval::try_from("0 hour"), Err(())); - assert_eq!(ExpiresInterval::try_from("0 hours"), Err(())); - assert_eq!(ExpiresInterval::try_from("1 hour"), Ok(ExpiresInterval::Hours(1))); - assert_eq!(ExpiresInterval::try_from("1 hours"), Ok(ExpiresInterval::Hours(1))); - assert_eq!(ExpiresInterval::try_from("2 hours"), Ok(ExpiresInterval::Hours(2))); - assert_eq!(ExpiresInterval::try_from("2 hour"), Ok(ExpiresInterval::Hours(2))); - assert_eq!(ExpiresInterval::try_from("3.5 hours"), Err(())); - assert_eq!(ExpiresInterval::try_from("336 hours"), Ok(ExpiresInterval::Hours(336))); - assert_eq!(ExpiresInterval::try_from("337 hours"), Err(())); - - assert_eq!(ExpiresInterval::try_from("0 day"), Err(())); - assert_eq!(ExpiresInterval::try_from("0 days"), Err(())); - assert_eq!(ExpiresInterval::try_from("1 day"), Ok(ExpiresInterval::Days(1))); - assert_eq!(ExpiresInterval::try_from("1 days"), Ok(ExpiresInterval::Days(1))); - assert_eq!(ExpiresInterval::try_from("2 days"), Ok(ExpiresInterval::Days(2))); - assert_eq!(ExpiresInterval::try_from("2 day"), Ok(ExpiresInterval::Days(2))); - assert_eq!(ExpiresInterval::try_from("3.5 days"), Err(())); - assert_eq!(ExpiresInterval::try_from("14 days"), Ok(ExpiresInterval::Days(14))); - assert_eq!(ExpiresInterval::try_from("15 days"), Err(())); - - assert_eq!(ExpiresInterval::try_from("-5 hours"), Err(())); - assert_eq!(ExpiresInterval::try_from("+5 hours"), Err(())); - - assert_eq!(ExpiresInterval::try_from("2 days (update frequency)"), Ok(ExpiresInterval::Days(2))); - assert_eq!(ExpiresInterval::try_from("2 hours (update frequency)"), Ok(ExpiresInterval::Hours(2))); - } - - #[test] - fn test_parsing_list_metadata() { - let list = [ - "[Adblock Plus 2.0]", - "! Title: 0131 Block List", - "! Homepage: https://austinhuang.me/0131-block-list", - "! Licence: https://creativecommons.org/licenses/by-sa/4.0/", - "! Expires: 7 days", - "! Version: 20220411", - "", - "! => https://austinhuang.me/0131-block-list/list.txt", - ]; - - let mut filter_set = FilterSet::new(false); - let metadata = filter_set.add_filters(list, ParseOptions::default()); - - assert_eq!(metadata.title, Some("0131 Block List".to_string())); - assert_eq!(metadata.homepage, Some("https://austinhuang.me/0131-block-list".to_string())); - assert_eq!(metadata.expires, Some(ExpiresInterval::Days(7))); - assert_eq!(metadata.redirect, None); - } - - #[test] - /// Some lists are formatted in unusual ways. This example has a version string with - /// non-numeric characters and an `Expires` field with extra information trailing afterwards. - /// Valid fields should still be recognized and parsed accordingly. - fn test_parsing_list_best_effort() { - let list = [ - "[Adblock Plus 2]", - "!-----------------------------------", - "! ABOUT", - "!-----------------------------------", - "! Version: 1.2.0.0", - "! Title: ABPVN Advanced", - "! Last modified: 09/03/2021", - "! Expires: 7 days (update frequency)", - "! Homepage: https://www.haopro.net/", - ]; - - let mut filter_set = FilterSet::new(false); - let metadata = filter_set.add_filters(list, ParseOptions::default()); - - assert_eq!(metadata.title, Some("ABPVN Advanced".to_string())); - assert_eq!(metadata.homepage, Some("https://www.haopro.net/".to_string())); - assert_eq!(metadata.expires, Some(ExpiresInterval::Days(7))); - assert_eq!(metadata.redirect, None); - } - - #[test] - fn test_read_metadata() { - { - let list = -r##"! Title: uBlock₀ filters – Annoyances -! Description: Filters optimized for uBlock Origin, to be used with Fanboy's -! and/or Adguard's "Annoyances" list(s) -! Expires: 4 days -! Last modified: %timestamp% -! License: https://github.com/uBlockOrigin/uAssets/blob/master/LICENSE -! Homepage: https://github.com/uBlockOrigin/uAssets -! Forums: https://github.com/uBlockOrigin/uAssets/issues"##; - let metadata = read_list_metadata(&list); - - assert_eq!(metadata.title, Some("uBlock₀ filters – Annoyances".to_string())); - assert_eq!(metadata.homepage, Some("https://github.com/uBlockOrigin/uAssets".to_string())); - assert_eq!(metadata.expires, Some(ExpiresInterval::Days(4))); - assert_eq!(metadata.redirect, None); - } - { - let list = -r##"[uBlock Origin] -! Title: PersianBlocker -! Description: سرانجام، یک لیست بهینه و گسترده برای مسدودسازی تبلیغ ها و ردیاب ها در سایت های پارسی زبان! -! Expires: 2 days -! Last modified: 2022-12-11 -! Homepage: https://github.com/MasterKia/PersianBlocker -! License: AGPLv3 (https://github.com/MasterKia/PersianBlocker/blob/main/LICENSE) - -! مشکل/پیشنهاد: https://github.com/MasterKia/PersianBlocker/issues -! مشارکت: https://github.com/MasterKia/PersianBlocker/pulls - -! لیستی برای برگرداندن آزادی کاربران، چون هر کاربر این آزادی را دارد که چه چیزی وارد مرورگرش می‌شود و چه چیزی وارد نمی‌شود -!-------------------------v Experimental Generic Filters v-----------------------! -! applicationha.com, androidgozar.com, downloadkral.com, gold-team.org, iranecar.com, icoff.ee, koolakmag.ir, -!! mybia4music.com, my-film.pw, pedal.ir, vgdl.ir, sakhamusic.ir -/wp-admin/admin-ajax.php?postviews_id=$xhr -"##; - let metadata = read_list_metadata(&list); - - assert_eq!(metadata.title, Some("PersianBlocker".to_string())); - assert_eq!(metadata.homepage, Some("https://github.com/MasterKia/PersianBlocker".to_string())); - assert_eq!(metadata.expires, Some(ExpiresInterval::Days(2))); - assert_eq!(metadata.redirect, None); - } - } - - #[test] - fn parse_cosmetic_variants() { - { - let input = "example.com##.selector"; - let result = parse_filter(input, true, Default::default()); - assert!(matches!(result, Ok(ParsedFilter::Cosmetic(..)))); - } - { - let input = "9gag.com#?#article:-abp-has(.promoted)"; - let result = parse_filter(input, true, Default::default()); - assert!(matches!(result, Ok(ParsedFilter::Cosmetic(..)))); - } - #[cfg(feature = "css-validation")] - { - let input = "sportowefakty.wp.pl#@?#body > [class]:not([id]):matches-css(position: fixed):matches-css(top: 0px)"; - let result = parse_filter(input, true, Default::default()); - assert!(matches!(result, Ok(ParsedFilter::Cosmetic(..)))); - } - { - let input = r#"odkrywamyzakryte.com#%#//scriptlet("abort-on-property-read", "sc_adv_out")"#; - let result = parse_filter(input, true, Default::default()); - assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::UnsupportedSyntax)))); - } - { - let input = "bikeradar.com,spiegel.de#@%#!function(){function b(){}function a(a){return{get:function(){return a},set:b}}function c(a)"; - let result = parse_filter(input, true, Default::default()); - assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::UnsupportedSyntax)))); - } - { - let input = "nczas.com#$#.adsbygoogle { position: absolute!important; left: -3000px!important; }"; - let result = parse_filter(input, true, Default::default()); - assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::UnsupportedSyntax)))); - } - { - let input = "kurnik.pl#@$#.adsbygoogle { height: 1px !important; width: 1px !important; }"; - let result = parse_filter(input, true, Default::default()); - assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::UnsupportedSyntax)))); - } - } -} +#[path = "../tests/unit/lists.rs"] +mod unit_tests; diff --git a/src/network_filter_list.rs b/src/network_filter_list.rs new file mode 100644 index 00000000..f060bce2 --- /dev/null +++ b/src/network_filter_list.rs @@ -0,0 +1,507 @@ +use std::fmt; +use std::{collections::HashMap, collections::HashSet, sync::Arc}; + +use serde::{Deserialize, Serialize}; + +use crate::filters::network::NetworkMatchable; +use crate::filters::network::{NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper}; +use crate::optimizer; +use crate::regex_manager::RegexManager; +use crate::request::Request; +use crate::utils::{fast_hash, Hash}; + +pub struct CheckResult { + pub filter_mask: NetworkFilterMask, + pub modifier_option: Option, + pub raw_line: Option, +} + +impl fmt::Display for CheckResult { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + if let Some(ref raw_line) = self.raw_line { + write!(f, "{}", raw_line) + } else { + write!(f, "NetworkFilter") + } + } +} + +impl NetworkFilterMaskHelper for CheckResult { + #[inline] + fn has_flag(&self, v: NetworkFilterMask) -> bool { + self.filter_mask.contains(v) + } +} +pub trait NetworkFilterListTrait { + fn new(filters: Vec, optimize: bool) -> Self + where + Self: Sized; + fn optimize(&mut self); + fn add_filter(&mut self, filter: NetworkFilter); + fn filter_exists(&self, filter: &NetworkFilter) -> bool; + + fn check( + &self, + request: &Request, + active_tags: &HashSet, + regex_manager: &mut RegexManager, + ) -> Option; + fn check_all( + &self, + request: &Request, + active_tags: &HashSet, + regex_manager: &mut RegexManager, + ) -> Vec; +} + +#[derive(Serialize, Deserialize, Default)] +pub struct NetworkFilterList { + #[cfg(not(feature = "flatbuffers"))] + #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] + pub(crate) filter_map: HashMap>>, + + #[cfg(feature = "flatbuffers")] + pub(crate) filter_map: HashMap>>, +} + +impl NetworkFilterListTrait for NetworkFilterList { + fn new(filters: Vec, optimize: bool) -> NetworkFilterList { + // Compute tokens for all filters + let filter_tokens: Vec<_> = filters + .into_iter() + .map(|filter| { + let tokens = filter.get_tokens(); + (Arc::new(filter), tokens) + }) + .collect(); + // compute the tokens' frequency histogram + let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); + + // Build a HashMap of tokens to Network Filters (held through Arc, Atomic Reference Counter) + let mut filter_map = HashMap::with_capacity(filter_tokens.len()); + { + for (filter_pointer, multi_tokens) in filter_tokens { + for tokens in multi_tokens { + let mut best_token: Hash = 0; + let mut min_count = total_number_of_tokens + 1; + for token in tokens { + match tokens_histogram.get(&token) { + None => { + min_count = 0; + best_token = token + } + Some(&count) if count < min_count => { + min_count = count; + best_token = token + } + _ => {} + } + } + insert_dup(&mut filter_map, best_token, Arc::clone(&filter_pointer)); + } + } + } + + let mut self_ = NetworkFilterList { filter_map }; + + if optimize { + self_.optimize(); + } else { + self_.filter_map.shrink_to_fit(); + } + + self_ + } + + fn optimize(&mut self) { + let mut optimized_map = HashMap::with_capacity(self.filter_map.len()); + for (key, filters) in self.filter_map.drain() { + let mut unoptimized: Vec = Vec::with_capacity(filters.len()); + let mut unoptimizable: Vec> = Vec::with_capacity(filters.len()); + for f in filters { + match Arc::try_unwrap(f) { + Ok(f) => unoptimized.push(f), + Err(af) => unoptimizable.push(af), + } + } + + let mut optimized: Vec<_> = if unoptimized.len() > 1 { + optimizer::optimize(unoptimized) + .into_iter() + .map(Arc::new) + .collect() + } else { + // nothing to optimize + unoptimized.into_iter().map(Arc::new).collect() + }; + + optimized.append(&mut unoptimizable); + optimized.shrink_to_fit(); + optimized_map.insert(key, optimized); + } + + // won't mutate anymore, shrink to fit items + optimized_map.shrink_to_fit(); + + self.filter_map = optimized_map; + } + + fn add_filter(&mut self, filter: NetworkFilter) { + let filter_tokens = filter.get_tokens(); + let total_rules = vec_hashmap_len(&self.filter_map); + let filter_pointer = Arc::new(filter); + + for tokens in filter_tokens { + let mut best_token: Hash = 0; + let mut min_count = total_rules + 1; + for token in tokens { + match self.filter_map.get(&token) { + None => { + min_count = 0; + best_token = token + } + Some(filters) if filters.len() < min_count => { + min_count = filters.len(); + best_token = token + } + _ => {} + } + } + + insert_dup( + &mut self.filter_map, + best_token, + Arc::clone(&filter_pointer), + ); + } + } + + /// This may not work if the list has been optimized. + fn filter_exists(&self, filter: &NetworkFilter) -> bool { + let tokens: Vec<_> = filter.get_tokens().into_iter().flatten().collect(); + tokens.into_iter().chain(std::iter::once(0)).any(|token| { + self.filter_map.get(&token).map_or(false, |filters| { + filters + .iter() + .any(|saved_filter| saved_filter.id == filter.id) + }) + }) + } + + /// Returns the first found filter, if any, that matches the given request. The backing storage + /// has a non-deterministic order, so this should be used for any category of filters where a + /// match from each would be functionally equivalent. For example, if two different exception + /// filters match a certain request, it doesn't matter _which_ one is matched - the request + /// will be excepted either way. + fn check( + &self, + request: &Request, + active_tags: &HashSet, + regex_manager: &mut RegexManager, + ) -> Option { + if self.filter_map.is_empty() { + return None; + } + + for token in request.checkable_tokens_iter() { + if let Some(filter_bucket) = self.filter_map.get(token) { + for filter in filter_bucket { + // if matched, also needs to be tagged with an active tag (or not tagged at all) + if filter.matches(request, regex_manager) + && filter + .tag + .as_ref() + .map(|t| active_tags.contains(t)) + .unwrap_or(true) + { + return Some(CheckResult { + filter_mask: filter.mask, + modifier_option: filter.modifier_option.clone(), + raw_line: filter.raw_line.clone().map(|line| *line), + }); + } + } + } + } + + None + } + + /// Returns _all_ filters that match the given request. This should be used for any category of + /// filters where a match from each may carry unique information. For example, if two different + /// `$csp` filters match a certain request, they may each carry a distinct CSP directive, and + /// each directive should be combined for the final result. + fn check_all( + &self, + request: &Request, + active_tags: &HashSet, + regex_manager: &mut RegexManager, + ) -> Vec { + let mut filters: Vec = vec![]; + + if self.filter_map.is_empty() { + return filters; + } + + for token in request.checkable_tokens_iter() { + if let Some(filter_bucket) = self.filter_map.get(token) { + for filter in filter_bucket { + // if matched, also needs to be tagged with an active tag (or not tagged at all) + if filter.matches(request, regex_manager) + && filter + .tag + .as_ref() + .map(|t| active_tags.contains(t)) + .unwrap_or(true) + { + filters.push(CheckResult { + filter_mask: filter.mask, + modifier_option: filter.modifier_option.clone(), + raw_line: filter.raw_line.clone().map(|line| *line), + }); + } + } + } + } + filters + } +} + +use crate::filters::fb_network::flat::fb; +use crate::filters::fb_network::{FlatNetworkFilter, FlatNetworkFiltersListBuilder}; + +pub struct FlatNetworkFilterList { + flatbuffer_memory: Vec, + pub(crate) filter_map: HashMap>, + pub(crate) domain_hashes_mapping: HashMap, +} + +impl NetworkFilterListTrait for FlatNetworkFilterList { + fn new(filters: Vec, optimize: bool) -> Self { + // Compute tokens for all filters + let filter_tokens: Vec<_> = filters + .into_iter() + .map(|filter| { + let tokens = filter.get_tokens(); + (filter, tokens) + }) + .collect(); + // compute the tokens' frequency histogram + let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); + + let mut flat_builder = FlatNetworkFiltersListBuilder::new(); + let mut filter_map = HashMap::>::new(); + let mut optimizable = HashMap::>::new(); + { + for (network_filter, multi_tokens) in filter_tokens { + let index = if !optimize + || !optimizer::is_filter_optimizable_by_patterns(&network_filter) + { + Some(flat_builder.add(&network_filter)) + } else { + None + }; + + for tokens in multi_tokens { + let mut best_token: Hash = 0; + let mut min_count = total_number_of_tokens + 1; + for token in tokens { + match tokens_histogram.get(&token) { + None => { + min_count = 0; + best_token = token + } + Some(&count) if count < min_count => { + min_count = count; + best_token = token + } + _ => {} + } + } + if let Some(index) = index { + insert_dup(&mut filter_map, best_token, index); + } else { + insert_dup(&mut optimizable, best_token, network_filter.clone()); + } + } // tokens + } + } + + if optimize { + for (token, v) in optimizable { + let optimized = optimizer::optimize_by_groupping_patterns(v); + + for filter in optimized { + let index = flat_builder.add(&filter); + insert_dup(&mut filter_map, token, index); + } + } + } else { + debug_assert!( + optimizable.is_empty(), + "Should be empty if optimization is off" + ); + } + + let flatbuffer_memory = flat_builder.finish(); + let root = fb::root_as_network_filter_list(&flatbuffer_memory) + .expect("Ok because it is created in the previous line"); + + let mut domain_hashes_mapping: HashMap = HashMap::new(); + for (index, hash) in root.unique_domains_hashes().iter().enumerate() { + domain_hashes_mapping.insert(hash, u16::try_from(index).expect("< u16 max")); + } + + filter_map.shrink_to_fit(); + domain_hashes_mapping.shrink_to_fit(); + + Self { + flatbuffer_memory, + filter_map, + domain_hashes_mapping, + } + } + + fn optimize(&mut self) {} + + fn add_filter(&mut self, _filter: NetworkFilter) {} + + fn filter_exists(&self, _filter: &NetworkFilter) -> bool { + false + } + + /// Returns the first found filter, if any, that matches the given request. The backing storage + /// has a non-deterministic order, so this should be used for any category of filters where a + /// match from each would be functionally equivalent. For example, if two different exception + /// filters match a certain request, it doesn't matter _which_ one is matched - the request + /// will be excepted either way. + fn check( + &self, + request: &Request, + active_tags: &HashSet, + regex_manager: &mut RegexManager, + ) -> Option { + if self.filter_map.is_empty() { + return None; + } + + let filters_list = + unsafe { fb::root_as_network_filter_list_unchecked(&self.flatbuffer_memory) }; + let network_filters = filters_list.network_filters(); + + for token in request.checkable_tokens_iter() { + if let Some(filter_bucket) = self.filter_map.get(token) { + for filter_index in filter_bucket { + let fb_filter = network_filters.get(*filter_index as usize); + let filter = FlatNetworkFilter::new(&fb_filter, *filter_index, self); + + // if matched, also needs to be tagged with an active tag (or not tagged at all) + if filter.matches(request, regex_manager) + && filter.tag().map_or(true, |t| active_tags.contains(t)) + { + return Some(CheckResult { + filter_mask: filter.mask, + modifier_option: filter.modifier_option(), + raw_line: filter.raw_line(), + }); + } + } + } + } + + None + } + + /// Returns _all_ filters that match the given request. This should be used for any category of + /// filters where a match from each may carry unique information. For example, if two different + /// `$csp` filters match a certain request, they may each carry a distinct CSP directive, and + /// each directive should be combined for the final result. + fn check_all( + &self, + request: &Request, + active_tags: &HashSet, + regex_manager: &mut RegexManager, + ) -> Vec { + let mut filters: Vec = vec![]; + + if self.filter_map.is_empty() { + return filters; + } + + let filters_list = + unsafe { fb::root_as_network_filter_list_unchecked(&self.flatbuffer_memory) }; + let network_filters = filters_list.network_filters(); + + for token in request.checkable_tokens_iter() { + if let Some(filter_bucket) = self.filter_map.get(token) { + for filter_index in filter_bucket { + let fb_filter = network_filters.get(*filter_index as usize); + let filter = FlatNetworkFilter::new(&fb_filter, *filter_index, self); + + // if matched, also needs to be tagged with an active tag (or not tagged at all) + if filter.matches(request, regex_manager) + && filter.tag().map_or(true, |t| active_tags.contains(t)) + { + filters.push(CheckResult { + filter_mask: filter.mask, + modifier_option: filter.modifier_option(), + raw_line: filter.raw_line(), + }); + } + } + } + } + filters + } +} + +/// Inserts a value into the `Vec` under the specified key in the `HashMap`. The entry will be +/// created if it does not exist. If it already exists, it will be inserted in the `Vec` in a +/// sorted order. +fn insert_dup(map: &mut HashMap, H>, k: K, v: V) +where + K: std::cmp::Ord + std::hash::Hash, + V: PartialOrd, +{ + let entry = map.entry(k).or_insert_with(Vec::new); + + match entry.binary_search_by(|f| f.partial_cmp(&v).unwrap_or(std::cmp::Ordering::Equal)) { + Ok(_pos) => (), // Can occur if the exact same rule is inserted twice. No reason to add anything. + Err(slot) => entry.insert(slot, v), + } +} + +pub(crate) fn vec_hashmap_len( + map: &HashMap, H>, +) -> usize { + let mut size = 0usize; + for (_, val) in map.iter() { + size += val.len(); + } + size +} + +pub(crate) fn token_histogram( + filter_tokens: &[(T, Vec>)], +) -> (u32, HashMap) { + let mut tokens_histogram: HashMap = HashMap::new(); + let mut number_of_tokens = 0; + for (_, tokens) in filter_tokens.iter() { + for tg in tokens { + for t in tg { + *tokens_histogram.entry(*t).or_insert(0) += 1; + number_of_tokens += 1; + } + } + } + + for bad_token in ["http", "https", "www", "com"].iter() { + tokens_histogram.insert(fast_hash(bad_token), number_of_tokens); + } + + (number_of_tokens, tokens_histogram) +} + +#[cfg(test)] +#[path = "../tests/unit/network_filter_list.rs"] +mod unit_tests; diff --git a/src/optimizer.rs b/src/optimizer.rs index 8efdf6b0..f000412e 100644 --- a/src/optimizer.rs +++ b/src/optimizer.rs @@ -1,4 +1,6 @@ -use crate::filters::network::{FilterPart, NetworkFilter, NetworkFilterMask}; +use crate::filters::network::{ + FilterPart, NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, +}; use itertools::*; use std::collections::HashMap; @@ -8,6 +10,29 @@ trait Optimization { fn select(&self, filter: &NetworkFilter) -> bool; } +pub fn is_filter_optimizable_by_patterns(filter: &NetworkFilter) -> bool { + filter.opt_domains.is_none() + && filter.opt_not_domains.is_none() + && !filter.is_hostname_anchor() + && !filter.is_redirect() + && !filter.is_csp() +} + +pub fn optimize_by_groupping_patterns(filters: Vec) -> Vec { + let mut optimized: Vec = Vec::new(); + + let simple_pattern_group = SimplePatternGroup {}; + let (mut fused, mut unfused) = apply_optimisation(&simple_pattern_group, filters); + optimized.append(&mut fused); + + // Append whatever is still left unfused + optimized.append(&mut unfused); + + // Re-sort the list, now that the order has been perturbed + optimized.sort_by_key(|f| f.id); + optimized +} + /// Fuse `NetworkFilter`s together by applying optimizations sequentially. pub fn optimize(filters: Vec) -> Vec { let mut optimized: Vec = Vec::new(); @@ -127,11 +152,7 @@ impl Optimization for SimplePatternGroup { format!("{:b}:{:?}", filter.mask, filter.is_complete_regex()) } fn select(&self, filter: &NetworkFilter) -> bool { - filter.opt_domains.is_none() - && filter.opt_not_domains.is_none() - && !filter.is_hostname_anchor() - && !filter.is_redirect() - && !filter.is_csp() + is_filter_optimizable_by_patterns(filter) } } @@ -202,328 +223,5 @@ impl Optimization for UnionDomainGroup { */ #[cfg(test)] -mod optimization_tests_pattern_group { - use super::*; - use crate::filters::network::CompiledRegex; - use crate::filters::network::NetworkMatchable; - use crate::lists; - use crate::regex_manager::RegexManager; - use crate::request::Request; - use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder; - - fn check_regex_match(regex: &CompiledRegex, pattern: &str, matches: bool) { - let is_match = regex.is_match(pattern); - assert!( - is_match == matches, - "Expected {} match {} = {}", - regex.to_string(), - pattern, - matches - ); - } - - fn check_match( - regex_manager: &mut RegexManager, - filter: &NetworkFilter, - url_path: &str, - matches: bool, - ) { - let is_match = filter.matches(&Request::new( - ("https://example.com/".to_string() + url_path).as_str(), - "https://google.com", - "" - ).unwrap(), regex_manager); - assert!( - is_match == matches, - "Expected {} match {} = {}", - filter.to_string(), - url_path, - matches - ); - } - - #[test] - fn regex_set_works() { - let regex_set = BytesRegexSetBuilder::new(&[ - r"/static/ad\.", - "/static/ad-", - "/static/ad/.*", - "/static/ads/.*", - "/static/adv/.*", - ]) - .unicode(false) - .build(); - - let fused_regex = CompiledRegex::CompiledSet(regex_set.unwrap()); - assert!(matches!(fused_regex, CompiledRegex::CompiledSet(_))); - check_regex_match(&fused_regex, "/static/ad.", true); - check_regex_match(&fused_regex, "/static/ad-", true); - check_regex_match(&fused_regex, "/static/ads-", false); - check_regex_match(&fused_regex, "/static/ad/", true); - check_regex_match(&fused_regex, "/static/ad", false); - check_regex_match(&fused_regex, "/static/ad/foobar", true); - check_regex_match(&fused_regex, "/static/ad/foobar/asd?q=1", true); - check_regex_match(&fused_regex, "/static/ads/", true); - check_regex_match(&fused_regex, "/static/ads", false); - check_regex_match(&fused_regex, "/static/ads/foobar", true); - check_regex_match(&fused_regex, "/static/ads/foobar/asd?q=1", true); - check_regex_match(&fused_regex, "/static/adv/", true); - check_regex_match(&fused_regex, "/static/adv", false); - check_regex_match(&fused_regex, "/static/adv/foobar", true); - check_regex_match(&fused_regex, "/static/adv/foobar/asd?q=1", true); - } - - #[test] - fn combines_simple_regex_patterns() { - let rules = [ - "/static/ad-", - "/static/ad.", - "/static/ad/*", - "/static/ads/*", - "/static/adv/*", - ]; - - let (filters, _) = lists::parse_filters(&rules, true, Default::default()); - - let optimization = SimplePatternGroup {}; - - filters - .iter() - .for_each(|f| assert!(optimization.select(f), "Expected rule to be selected")); - - let fused = optimization.fusion(&filters); - - assert!(fused.is_regex() == false, "Expected rule to not be a regex"); - assert_eq!( - fused.to_string(), - "/static/ad- <+> /static/ad. <+> /static/ad/* <+> /static/ads/* <+> /static/adv/*" - ); - let mut regex_manager = RegexManager::default(); - check_match(&mut regex_manager, &fused, "/static/ad-", true); - check_match(&mut regex_manager, &fused, "/static/ad.", true); - check_match(&mut regex_manager, &fused, "/static/ad%", false); - check_match(&mut regex_manager, &fused, "/static/ads-", false); - check_match(&mut regex_manager, &fused, "/static/ad/", true); - check_match(&mut regex_manager, &fused, "/static/ad", false); - check_match(&mut regex_manager, &fused, "/static/ad/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/ad/foobar/asd?q=1", - true, - ); - check_match(&mut regex_manager, &fused, "/static/ads/", true); - check_match(&mut regex_manager, &fused, "/static/ads", false); - check_match(&mut regex_manager, &fused, "/static/ads/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/ads/foobar/asd?q=1", - true, - ); - check_match(&mut regex_manager, &fused, "/static/adv/", true); - check_match(&mut regex_manager, &fused, "/static/adv", false); - check_match(&mut regex_manager, &fused, "/static/adv/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/adv/foobar/asd?q=1", - true, - ); - } - - #[test] - fn separates_pattern_by_grouping() { - let rules = [ - "/analytics-v1.", - "/v1/pixel?", - "/api/v1/stat?", - "/analytics/v1/*$domain=~my.leadpages.net", - "/v1/ads/*", - ]; - - let (filters, _) = lists::parse_filters(&rules, true, Default::default()); - - let optimization = SimplePatternGroup {}; - - let (fused, skipped) = apply_optimisation(&optimization, filters); - - assert_eq!(fused.len(), 1); - let filter = fused.get(0).unwrap(); - assert_eq!( - filter.to_string(), - "/analytics-v1. <+> /v1/pixel? <+> /api/v1/stat? <+> /v1/ads/*" - ); - - assert!(filter.matches_test( - &Request::new( - "https://example.com/v1/pixel?", - "https://my.leadpages.net", - "" - ) - .unwrap() - )); - - assert_eq!(skipped.len(), 1); - let filter = skipped.get(0).unwrap(); - assert_eq!( - filter.to_string(), - "/analytics/v1/*$domain=~my.leadpages.net" - ); - - assert!(filter.matches_test( - &Request::new( - "https://example.com/analytics/v1/foobar", - "https://foo.leadpages.net", - "" - ) - .unwrap() - )) - } -} - -/* -#[cfg(test)] -mod optimization_tests_union_domain { - use super::*; - use crate::filters::network::NetworkMatchable; - use crate::lists; - use crate::request::Request; - use crate::utils; - - #[test] - fn merges_domains() { - let rules = [ - "/analytics-v1$domain=google.com", - "/analytics-v1$domain=example.com", - ]; - - let (filters, _) = lists::parse_filters(&rules, true, Default::default()); - let optimization = UnionDomainGroup {}; - let (fused, _) = apply_optimisation(&optimization, filters); - - assert_eq!(fused.len(), 1); - let filter = fused.get(0).unwrap(); - assert_eq!( - filter.to_string(), - "/analytics-v1$domain=google.com <+> /analytics-v1$domain=example.com" - ); - - let expected_domains = vec![ - utils::fast_hash("example.com"), - utils::fast_hash("google.com"), - ]; - assert!(filter.opt_domains.is_some()); - let filter_domains = filter.opt_domains.as_ref().unwrap(); - for dom in expected_domains { - assert!(filter_domains.contains(&dom)); - } - - assert!( - filter.matches_test( - &Request::new( - "https://example.com/analytics-v1/foobar", - "https://google.com", - "" - ) - .unwrap() - ) == true - ); - assert!( - filter.matches_test( - &Request::new( - "https://example.com/analytics-v1/foobar", - "https://foo.leadpages.net", - "" - ) - .unwrap() - ) == false - ); - } - - #[test] - fn skips_rules_with_no_domain() { - let rules = [ - "/analytics-v1$domain=google.com", - "/analytics-v1$domain=example.com", - "/analytics-v1", - ]; - - let (filters, _) = lists::parse_filters(&rules, true, Default::default()); - let optimization = UnionDomainGroup {}; - let (_, skipped) = apply_optimisation(&optimization, filters); - - assert_eq!(skipped.len(), 1); - let filter = skipped.get(0).unwrap(); - assert_eq!(filter.to_string(), "/analytics-v1"); - } - - #[test] - fn optimises_domains() { - let rules = [ - "/analytics-v1$domain=google.com", - "/analytics-v1$domain=example.com", - "/analytics-v1$domain=exampleone.com|exampletwo.com", - "/analytics-v1", - ]; - - let (filters, _) = lists::parse_filters(&rules, true, Default::default()); - - let optimization = UnionDomainGroup {}; - - let (fused, skipped) = apply_optimisation(&optimization, filters); - - assert_eq!(fused.len(), 1); - let filter = fused.get(0).unwrap(); - assert_eq!( - filter.to_string(), - "/analytics-v1$domain=google.com <+> /analytics-v1$domain=example.com <+> /analytics-v1$domain=exampleone.com|exampletwo.com" - ); - - assert_eq!(skipped.len(), 1); - let skipped_filter = skipped.get(0).unwrap(); - assert_eq!(skipped_filter.to_string(), "/analytics-v1"); - - assert!( - filter.matches_test( - &Request::new( - "https://example.com/analytics-v1/foobar", - "https://google.com", - "" - ) - .unwrap() - ) == true - ); - assert!( - filter.matches_test( - &Request::new( - "https://example.com/analytics-v1/foobar", - "https://example.com", - "" - ) - .unwrap() - ) == true - ); - assert!( - filter.matches_test( - &Request::new( - "https://example.com/analytics-v1/foobar", - "https://exampletwo.com", - "" - ) - .unwrap() - ) == true - ); - assert!( - filter.matches_test( - &Request::new( - "https://example.com/analytics-v1/foobar", - "https://foo.leadpages.net", - "" - ) - .unwrap() - ) == false - ); - } -} -*/ +#[path = "../tests/unit/optimizer.rs"] +mod unit_tests; diff --git a/src/regex_manager.rs b/src/regex_manager.rs index 258f13ce..81610509 100644 --- a/src/regex_manager.rs +++ b/src/regex_manager.rs @@ -2,9 +2,15 @@ //! the [`crate::Engine`], infrequently used regexes can be discarded. The [`RegexManager`] is //! responsible for managing the storage of regexes used by filters. -use crate::filters::network::{compile_regex, CompiledRegex, NetworkFilter}; +use crate::filters::network::NetworkFilterMask; + +use regex::{ + bytes::Regex as BytesRegex, bytes::RegexBuilder as BytesRegexBuilder, + bytes::RegexSet as BytesRegexSet, bytes::RegexSetBuilder as BytesRegexSetBuilder, Regex, +}; use std::collections::HashMap; +use std::fmt; use std::time::Duration; #[cfg(test)] @@ -58,6 +64,40 @@ pub struct RegexDebugEntry { pub usage_count: usize, } +#[derive(Debug, Clone)] +pub enum CompiledRegex { + Compiled(BytesRegex), + CompiledSet(BytesRegexSet), + MatchAll, + RegexParsingError(regex::Error), +} + +impl CompiledRegex { + pub fn is_match(&self, pattern: &str) -> bool { + match &self { + CompiledRegex::MatchAll => true, // simple case for matching everything, e.g. for empty filter + CompiledRegex::RegexParsingError(_e) => false, // no match if regex didn't even compile + CompiledRegex::Compiled(r) => r.is_match(pattern.as_bytes()), + CompiledRegex::CompiledSet(r) => { + // let matches: Vec<_> = r.matches(pattern).into_iter().collect(); + // println!("Matching {} against RegexSet: {:?}", pattern, matches); + r.is_match(pattern.as_bytes()) + } + } + } +} + +impl fmt::Display for CompiledRegex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self { + CompiledRegex::MatchAll => write!(f, ".*"), // simple case for matching everything, e.g. for empty filter + CompiledRegex::RegexParsingError(_e) => write!(f, "ERROR"), // no match if regex didn't even compile + CompiledRegex::Compiled(r) => write!(f, "{}", r.as_str()), + CompiledRegex::CompiledSet(r) => write!(f, "{}", r.patterns().join(" | ")), + } + } +} + struct RegexEntry { regex: Option, last_used: Instant, @@ -88,7 +128,7 @@ type RandomState = std::hash::BuildHasherDefault; /// /// The [`RegexManager`] is not thread safe, so any access to it must be synchronized externally. pub struct RegexManager { - map: HashMap<*const NetworkFilter, RegexEntry, RandomState>, + map: HashMap, compiled_regex_count: usize, now: Instant, #[cfg_attr(target_arch = "wasm32", allow(unused))] @@ -108,23 +148,110 @@ impl Default for RegexManager { } } -fn make_regexp(filter: &NetworkFilter) -> CompiledRegex { +fn make_regexp<'a, FiltersIter>(mask: NetworkFilterMask, filters: FiltersIter) -> CompiledRegex +where + FiltersIter: Iterator + ExactSizeIterator, +{ compile_regex( - &filter.filter, - filter.is_right_anchor(), - filter.is_left_anchor(), - filter.is_complete_regex(), + filters, + mask.is_right_anchor(), + mask.is_left_anchor(), + mask.is_complete_regex(), ) } +/// Compiles a filter pattern to a regex. This is only performed *lazily* for +/// filters containing at least a * or ^ symbol. Because Regexes are expansive, +/// we try to convert some patterns to plain filters. +#[allow(clippy::trivial_regex)] +pub(crate) fn compile_regex<'a, I>( + filters: I, + is_right_anchor: bool, + is_left_anchor: bool, + is_complete_regex: bool, +) -> CompiledRegex +where + I: Iterator + ExactSizeIterator, +{ + use once_cell::sync::Lazy; + // Escape special regex characters: |.$+?{}()[]\ + static SPECIAL_RE: Lazy = + Lazy::new(|| Regex::new(r"([\|\.\$\+\?\{\}\(\)\[\]])").unwrap()); + // * can match anything + static WILDCARD_RE: Lazy = Lazy::new(|| Regex::new(r"\*").unwrap()); + // ^ can match any separator or the end of the pattern + static ANCHOR_RE: Lazy = Lazy::new(|| Regex::new(r"\^(.)").unwrap()); + // ^ can match any separator or the end of the pattern + static ANCHOR_RE_EOL: Lazy = Lazy::new(|| Regex::new(r"\^$").unwrap()); + + let mut escaped_patterns = Vec::with_capacity(filters.len()); + for filter_str in filters { + // If any filter is empty, the entire set matches anything + if filter_str.is_empty() { + return CompiledRegex::MatchAll; + } + if is_complete_regex { + // unescape unrecognised escaping sequences, otherwise a normal regex + let unescaped = filter_str[1..filter_str.len() - 1] + .replace("\\/", "/") + .replace("\\:", ":"); + + escaped_patterns.push(unescaped); + } else { + let repl = SPECIAL_RE.replace_all(&filter_str, "\\$1"); + let repl = WILDCARD_RE.replace_all(&repl, ".*"); + // in adblock rules, '^' is a separator. + // The separator character is anything but a letter, a digit, or one of the following: _ - . % + let repl = ANCHOR_RE.replace_all(&repl, "(?:[^\\w\\d\\._%-])$1"); + let repl = ANCHOR_RE_EOL.replace_all(&repl, "(?:[^\\w\\d\\._%-]|$)"); + + // Should match start or end of url + let left_anchor = if is_left_anchor { "^" } else { "" }; + let right_anchor = if is_right_anchor { "$" } else { "" }; + let filter = format!("{}{}{}", left_anchor, repl, right_anchor); + + escaped_patterns.push(filter); + } + } + + if escaped_patterns.is_empty() { + CompiledRegex::MatchAll + } else if escaped_patterns.len() == 1 { + let pattern = &escaped_patterns[0]; + match BytesRegexBuilder::new(pattern).unicode(false).build() { + Ok(compiled) => CompiledRegex::Compiled(compiled), + Err(e) => { + // println!("Regex parsing failed ({:?})", e); + CompiledRegex::RegexParsingError(e) + } + } + } else { + match BytesRegexSetBuilder::new(escaped_patterns) + .unicode(false) + .build() + { + Ok(compiled) => CompiledRegex::CompiledSet(compiled), + Err(e) => CompiledRegex::RegexParsingError(e), + } + } +} + impl RegexManager { /// Check whether or not a regex network filter matches a certain URL pattern, using the /// [`RegexManager`]'s managed regex storage. - pub fn matches(&mut self, filter: &NetworkFilter, pattern: &str) -> bool { - if !filter.is_regex() && !filter.is_complete_regex() { + pub fn matches<'a, FiltersIter>( + &mut self, + mask: NetworkFilterMask, + filters: FiltersIter, + key: u64, + pattern: &str, + ) -> bool + where + FiltersIter: Iterator + ExactSizeIterator, + { + if !mask.is_regex() && !mask.is_complete_regex() { return true; } - let key = filter as *const NetworkFilter; use std::collections::hash_map::Entry; match self.map.entry(key) { Entry::Occupied(mut e) => { @@ -133,7 +260,7 @@ impl RegexManager { v.last_used = self.now; if v.regex.is_none() { // A discarded entry, recreate it: - v.regex = Some(make_regexp(filter)); + v.regex = Some(make_regexp(mask, filters)); self.compiled_regex_count += 1; } return v.regex.as_ref().unwrap().is_match(pattern); @@ -141,7 +268,7 @@ impl RegexManager { Entry::Vacant(e) => { self.compiled_regex_count += 1; let new_entry = RegexEntry { - regex: Some(make_regexp(filter)), + regex: Some(make_regexp(mask, filters)), last_used: self.now, usage_count: 1, }; @@ -225,73 +352,6 @@ impl RegexManager { } } -#[cfg(all(test, feature = "regex-debug-info"))] -mod tests { - use super::*; - - use crate::filters::network::NetworkMatchable; - use crate::request; - - use mock_instant::global::MockClock; - - fn make_filter(line: &str) -> NetworkFilter { - NetworkFilter::parse(line, true, Default::default()).unwrap() - } - - fn make_request(url: &str) -> request::Request { - request::Request::new(url, "https://example.com", "other").unwrap() - } - - fn get_active_regex_count(regex_manager: &RegexManager) -> usize { - regex_manager - .get_debug_regex_data() - .iter() - .filter(|x| x.regex.is_some()) - .count() - } - - #[test] - fn simple_match() { - let mut regex_manager = RegexManager::default(); - regex_manager.update_time(); - - let filter = make_filter("||geo*.hltv.org^"); - assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); - assert_eq!(get_active_regex_count(®ex_manager), 1); - assert_eq!(regex_manager.get_debug_regex_data().len(), 1); - } - - #[test] - fn discard_and_recreate() { - let mut regex_manager = RegexManager::default(); - regex_manager.update_time(); - - let filter = make_filter("||geo*.hltv.org^"); - assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); - assert_eq!(regex_manager.get_compiled_regex_count(), 1); - assert_eq!(get_active_regex_count(®ex_manager), 1); - - MockClock::advance(DEFAULT_DISCARD_UNUSED_TIME - Duration::from_secs(1)); - regex_manager.update_time(); - // The entry shouldn't be discarded because was used during - // last REGEX_MANAGER_DISCARD_TIME. - assert_eq!(get_active_regex_count(®ex_manager), 1); - - // The entry is entry is outdated, but should be discarded only - // in the next cleanup() call. The call was 2 sec ago and is throttled - // now. - MockClock::advance(DEFAULT_CLEAN_UP_INTERVAL - Duration::from_secs(1)); - regex_manager.update_time(); - assert_eq!(get_active_regex_count(®ex_manager), 1); - - MockClock::advance(Duration::from_secs(2)); - regex_manager.update_time(); - // The entry is now outdated & cleanup() should be called => discard. - assert_eq!(get_active_regex_count(®ex_manager), 0); - - // The entry is recreated, get_compiled_regex_count() increased +1. - assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); - assert_eq!(regex_manager.get_compiled_regex_count(), 2); - assert_eq!(get_active_regex_count(®ex_manager), 1); - } -} +#[cfg(test)] +#[path = "../tests/unit/regex_manager.rs"] +mod unit_tests; diff --git a/src/request.rs b/src/request.rs index 89d29842..4dec62d4 100644 --- a/src/request.rs +++ b/src/request.rs @@ -5,7 +5,7 @@ use std::borrow::Cow; use thiserror::Error; use crate::url_parser; -use crate::utils; +use crate::utils::{self, Tokens}; /// The type of resource requested from the URL endpoint. #[derive(Clone, PartialEq, Debug)] @@ -87,8 +87,10 @@ pub struct Request { pub is_supported: bool, pub is_third_party: bool, pub url: String, + pub url_lower_cased: String, pub hostname: String, - pub source_hostname_hashes: Option>, + pub request_tokens: Tokens, + pub source_hostname_hashes: Option, pub(crate) original_url: String, } @@ -98,15 +100,25 @@ impl Request { if case_sensitive { Cow::Borrowed(&self.url) } else { - Cow::Owned(self.url.to_ascii_lowercase()) + Cow::Borrowed(&self.url_lower_cased) } } - pub fn get_tokens(&self, token_buffer: &mut Vec) { - token_buffer.clear(); - utils::tokenize_pooled(&self.url.to_ascii_lowercase(), token_buffer); - // Add zero token as a fallback to wildcard rule bucket - token_buffer.push(0); + pub fn get_tokens(&self) -> &Tokens { + &self.request_tokens + } + + pub fn checkable_tokens_iter( + &self, + ) -> core::iter::Chain< + core::iter::Flatten>, + std::slice::Iter<'_, u64>, + > { + self.source_hostname_hashes + .as_ref() + .into_iter() + .flatten() + .chain(self.get_tokens().into_iter()) } #[allow(clippy::too_many_arguments)] @@ -143,12 +155,22 @@ impl Request { } } + let url_lower_cased = url.to_ascii_lowercase(); + let mut request_tokens = utils::tokenize(&url_lower_cased); + // Add zero token as a fallback to wildcard rule bucket + request_tokens.push(0).expect("Ok"); + let source_hostname_hashes = if !source_hostname.is_empty() { - let mut hashes = Vec::with_capacity(4); - hashes.push(utils::fast_hash(source_hostname)); + let mut hashes = Tokens::new(); + hashes.push(utils::fast_hash(source_hostname)).unwrap(); for (i, c) in source_hostname.char_indices() { if c == '.' && i + 1 < source_hostname.len() { - hashes.push(utils::fast_hash(&source_hostname[i + 1..])); + if hashes + .push(utils::fast_hash(&source_hostname[i + 1..])) + .is_err() + { + break; + } } } Some(hashes) @@ -159,7 +181,9 @@ impl Request { Request { request_type, url: url.to_owned(), + url_lower_cased: url_lower_cased.to_owned(), hostname: hostname.to_owned(), + request_tokens: request_tokens, source_hostname_hashes, is_third_party: third_party, is_http, @@ -170,11 +194,7 @@ impl Request { } /// Construct a new [`Request`]. - pub fn new( - url: &str, - source_url: &str, - request_type: &str, - ) -> Result { + pub fn new(url: &str, source_url: &str, request_type: &str) -> Result { if let Some(parsed_url) = url_parser::parse_url(url) { if let Some(parsed_source) = url_parser::parse_url(source_url) { let source_domain = parsed_source.domain(); @@ -232,201 +252,5 @@ impl Request { } #[cfg(test)] -mod tests { - use super::*; - - fn build_request( - raw_type: &str, - url: &str, - schema: &str, - hostname: &str, - domain: &str, - source_hostname: &str, - source_domain: &str, - ) -> Request { - let third_party = source_domain != domain; - - Request::from_detailed_parameters( - raw_type, - url, - schema, - hostname, - source_hostname, - third_party, - url.to_string(), - ) - } - - #[test] - fn new_works() { - let simple_example = build_request( - "document", - "https://example.com/ad", - "https", - "example.com", - "example.com", - "example.com", - "example.com", - ); - assert_eq!(simple_example.is_https, true); - assert_eq!(simple_example.is_supported, true); - assert_eq!(simple_example.is_third_party, false); - assert_eq!(simple_example.request_type, RequestType::Document); - assert_eq!( - simple_example.source_hostname_hashes, - Some(vec![ - utils::fast_hash("example.com"), - utils::fast_hash("com") - ]), - ); - - let unsupported_example = build_request( - "document", - "file://example.com/ad", - "file", - "example.com", - "example.com", - "example.com", - "example.com", - ); - assert_eq!(unsupported_example.is_https, false); - assert_eq!(unsupported_example.is_http, false); - assert_eq!(unsupported_example.is_supported, false); - - let first_party = build_request( - "document", - "https://subdomain.example.com/ad", - "https", - "subdomain.example.com", - "example.com", - "example.com", - "example.com", - ); - assert_eq!(first_party.is_https, true); - assert_eq!(first_party.is_supported, true); - assert_eq!(first_party.is_third_party, false); - - let third_party = build_request( - "document", - "https://subdomain.anotherexample.com/ad", - "https", - "subdomain.anotherexample.com", - "anotherexample.com", - "example.com", - "example.com", - ); - assert_eq!(third_party.is_https, true); - assert_eq!(third_party.is_supported, true); - assert_eq!(third_party.is_third_party, true); - - let websocket = build_request( - "document", - "wss://subdomain.anotherexample.com/ad", - "wss", - "subdomain.anotherexample.com", - "anotherexample.com", - "example.com", - "example.com", - ); - assert_eq!(websocket.is_https, false); - assert_eq!(websocket.is_https, false); - assert_eq!(websocket.is_supported, true); - assert_eq!(websocket.is_third_party, true); - assert_eq!(websocket.request_type, RequestType::Websocket); - - let assumed_https = build_request( - "document", - "//subdomain.anotherexample.com/ad", - "", - "subdomain.anotherexample.com", - "anotherexample.com", - "example.com", - "example.com", - ); - assert_eq!(assumed_https.is_https, true); - assert_eq!(assumed_https.is_http, false); - assert_eq!(assumed_https.is_supported, true); - } - - fn tokenize(tokens: &[&str], extra_tokens: &[utils::Hash]) -> Vec { - let mut tokens: Vec<_> = tokens.into_iter().map(|t| utils::fast_hash(&t)).collect(); - tokens.extend(extra_tokens); - tokens - } - - #[test] - fn tokens_works() { - let simple_example = build_request( - "document", - "https://subdomain.example.com/ad", - "https", - "subdomain.example.com", - "example.com", - "subdomain.example.com", - "example.com", - ); - assert_eq!( - simple_example - .source_hostname_hashes - .as_ref() - .unwrap() - .as_slice(), - tokenize(&["subdomain.example.com", "example.com", "com",], &[]).as_slice() - ); - let mut tokens = Vec::new(); - simple_example.get_tokens(&mut tokens); - assert_eq!( - tokens.as_slice(), - tokenize(&["https", "subdomain", "example", "com", "ad"], &[0]).as_slice() - ) - } - - #[test] - fn parses_urls() { - let parsed = Request::new( - "https://subdomain.example.com/ad", - "https://example.com/", - "document", - ) - .unwrap(); - assert_eq!(parsed.is_https, true); - assert_eq!(parsed.is_supported, true); - assert_eq!(parsed.is_third_party, false); - assert_eq!(parsed.request_type, RequestType::Document); - - // assert_eq!(parsed.domain, "example.com"); - assert_eq!(parsed.hostname, "subdomain.example.com"); - - // assert_eq!(parsed.source_domain, "example.com"); - assert_eq!( - parsed.source_hostname_hashes, - Some(vec![ - utils::fast_hash("example.com"), - utils::fast_hash("com") - ]), - ); - // assert_eq!(parsed.source_hostname, "example.com"); - - let bad_url = Request::new( - "subdomain.example.com/ad", - "https://example.com/", - "document", - ); - assert_eq!(bad_url.err(), Some(RequestError::HostnameParseError)); - } - - #[test] - fn fuzzing_errors() { - { - let parsed = Request::new("https://߶", "https://example.com", "other"); - assert!(parsed.is_ok()); - } - { - let parsed = Request::new(&format!( - "https://{}", - std::str::from_utf8(&[9, 9, 64]).unwrap() - ), "https://example.com", "other"); - assert!(parsed.is_err()); - } - } -} +#[path = "../tests/unit/request.rs"] +mod unit_tests; diff --git a/src/resources/mod.rs b/src/resources/mod.rs index 0c99337d..2ea21062 100644 --- a/src/resources/mod.rs +++ b/src/resources/mod.rs @@ -4,8 +4,8 @@ //! can be injected into pages to inhibit malicious behavior. //! //! If the `resource-assembler` feature is enabled, the -#![cfg_attr(not(feature = "resource-assembler"), doc="`resource_assembler`")] -#![cfg_attr(feature = "resource-assembler", doc="[`resource_assembler`]")] +#![cfg_attr(not(feature = "resource-assembler"), doc = "`resource_assembler`")] +#![cfg_attr(feature = "resource-assembler", doc = "[`resource_assembler`]")] //! module will assist with the construction of [`Resource`]s directly from the uBlock Origin //! project. @@ -13,9 +13,9 @@ pub mod resource_assembler; mod resource_storage; +pub(crate) use resource_storage::parse_scriptlet_args; #[doc(inline)] pub use resource_storage::{AddResourceError, ResourceStorage, ScriptletResourceError}; -pub(crate) use resource_storage::parse_scriptlet_args; use memchr::memrchr as find_char_reverse; use serde::{Deserialize, Serialize}; @@ -182,12 +182,18 @@ pub enum ResourceType { impl ResourceType { /// Can resources of this type be used as network redirects? pub fn supports_redirect(&self) -> bool { - !matches!(self, ResourceType::Template | ResourceType::Mime(MimeType::FnJavascript)) + !matches!( + self, + ResourceType::Template | ResourceType::Mime(MimeType::FnJavascript) + ) } /// Can resources of this type be used for scriptlet injections? pub fn supports_scriptlet_injection(&self) -> bool { - matches!(self, ResourceType::Template | ResourceType::Mime(MimeType::ApplicationJavascript)) + matches!( + self, + ResourceType::Template | ResourceType::Mime(MimeType::ApplicationJavascript) + ) } } diff --git a/src/resources/resource_assembler.rs b/src/resources/resource_assembler.rs index e252e6fa..c4ecbf30 100644 --- a/src/resources/resource_assembler.rs +++ b/src/resources/resource_assembler.rs @@ -91,9 +91,7 @@ fn read_redirectable_resource_mapping(mapfile_data: &str) -> Vec Vec { } #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_war_resource_assembly() { - let web_accessible_resource_dir = - Path::new("data/test/fake-uBO-files/web_accessible_resources"); - let redirect_resources_path = Path::new("data/test/fake-uBO-files/redirect-resources.js"); - let resources = - assemble_web_accessible_resources(web_accessible_resource_dir, redirect_resources_path); - - let expected_resource_names = vec![ - "1x1.gif", - "2x2.png", - "3x2.png", - "32x32.png", - "addthis_widget.js", - "amazon_ads.js", - "amazon_apstag.js", - "ampproject_v0.js", - "chartbeat.js", - //"click-to-load.html" is ignored because it has a params field. - "doubleclick_instream_ad_status.js", - "empty", - "fingerprint2.js", - "fingerprint3.js", - "google-analytics_analytics.js", - "google-analytics_cx_api.js", - "google-analytics_ga.js", - "google-analytics_inpage_linkid.js", - "google-ima.js", - "googlesyndication_adsbygoogle.js", - "googletagservices_gpt.js", - "hd-main.js", - "ligatus_angular-tag.js", - "mxpnl_mixpanel.js", - "monkeybroker.js", - "noeval.js", - "noeval-silent.js", - "nobab.js", - "nobab2.js", - "nofab.js", - "noop-0.1s.mp3", - "noop-0.5s.mp3", - "noop-1s.mp4", - "noop.html", - "noop.js", - "noop.txt", - "noop-vmap1.0.xml", - "outbrain-widget.js", - "popads.js", - "popads-dummy.js", - "prebid-ads.js", - "scorecardresearch_beacon.js", - "window.open-defuser.js", - ]; - - for name in expected_resource_names { - dbg!(&name); - assert!( - resources - .iter() - .find(|resource| { - if let ResourceType::Mime(_) = resource.kind { - resource.name == name - } else { - false - } - }) - .is_some(), - "{:?}", - name - ); - } - - let serialized = serde_json::to_string(&resources).expect("serialize resources"); - - let reserialized: Vec = - serde_json::from_str(&serialized).expect("deserialize resources"); - - assert_eq!(reserialized[0].name, "1x1.gif"); - assert_eq!(reserialized[0].aliases, vec!["1x1-transparent.gif"]); - assert_eq!(reserialized[0].kind, ResourceType::Mime(MimeType::ImageGif)); - - assert_eq!(reserialized[34].name, "noop.js"); - assert_eq!( - reserialized[34].aliases, - vec!["noopjs", "abp-resource:blank-js"] - ); - assert_eq!( - reserialized[34].kind, - ResourceType::Mime(MimeType::ApplicationJavascript) - ); - let noopjs_contents = std::fs::read_to_string(Path::new( - "data/test/fake-uBO-files/web_accessible_resources/noop.js", - )) - .unwrap() - .replace('\r', ""); - assert_eq!( - std::str::from_utf8( - &base64::decode(&reserialized[34].content).expect("decode base64 content") - ) - .expect("convert to utf8 string"), - noopjs_contents, - ); - } - - #[test] - fn test_scriptlet_resource_assembly2() { - let scriptlets_path = Path::new("data/test/fake-uBO-files/scriptlets2.js"); - #[allow(deprecated)] - let resources = assemble_scriptlet_resources(scriptlets_path); - - let expected_resource_names = vec![ - "abort-current-inline-script.js", - "abort-on-property-read.js", - "abort-on-property-write.js", - "abort-on-stack-trace.js", - "addEventListener-defuser.js", - "addEventListener-logger.js", - "json-prune.js", - "nano-setInterval-booster.js", - "nano-setTimeout-booster.js", - "noeval-if.js", - "no-fetch-if.js", - "no-floc.js", - "remove-attr.js", - "remove-class.js", - "no-requestAnimationFrame-if.js", - "set-constant.js", - "no-setInterval-if.js", - "no-setTimeout-if.js", - "webrtc-if.js", - "window.name-defuser", - "overlay-buster.js", - "alert-buster.js", - "gpt-defuser.js", - "nowebrtc.js", - "golem.de.js", - "upmanager-defuser.js", - "smartadserver.com.js", - "adfly-defuser.js", - "disable-newtab-links.js", - "damoh-defuser.js", - "twitch-videoad.js", - "fingerprint2.js", - "cookie-remover.js", - ]; - - for name in expected_resource_names { - assert!( - resources - .iter() - .find(|resource| { - match resource.kind { - ResourceType::Template - | ResourceType::Mime(MimeType::ApplicationJavascript) => { - resource.name == name - } - _ => false, - } - }) - .is_some(), - "failed to find {}", - name - ); - } - - let serialized = serde_json::to_string(&resources).expect("serialize resources"); - - let reserialized: Vec = - serde_json::from_str(&serialized).expect("deserialize resources"); - - assert_eq!(reserialized[0].name, "abort-current-inline-script.js"); - assert_eq!(reserialized[0].aliases, vec!["acis.js"]); - assert_eq!(reserialized[0].kind, ResourceType::Template); - - assert_eq!(reserialized[17].name, "no-setTimeout-if.js"); - assert_eq!( - reserialized[17].aliases, - vec!["nostif.js", "setTimeout-defuser.js"] - ); - assert_eq!(reserialized[17].kind, ResourceType::Template); - - assert_eq!(reserialized[20].name, "overlay-buster.js"); - assert_eq!(reserialized[20].aliases, Vec::::new()); - assert_eq!( - reserialized[20].kind, - ResourceType::Mime(MimeType::ApplicationJavascript) - ); - assert_eq!( - std::str::from_utf8( - &base64::decode(&reserialized[20].content).expect("decode base64 content") - ).expect("convert to utf8 string"), - "(function() {\nif ( window !== window.top ) {\nreturn;\n}\nvar tstart;\nvar ttl = 30000;\nvar delay = 0;\nvar delayStep = 50;\nvar buster = function() {\nvar docEl = document.documentElement,\nbodyEl = document.body,\nvw = Math.min(docEl.clientWidth, window.innerWidth),\nvh = Math.min(docEl.clientHeight, window.innerHeight),\ntol = Math.min(vw, vh) * 0.05,\nel = document.elementFromPoint(vw/2, vh/2),\nstyle, rect;\nfor (;;) {\nif ( el === null || el.parentNode === null || el === bodyEl ) {\nbreak;\n}\nstyle = window.getComputedStyle(el);\nif ( parseInt(style.zIndex, 10) >= 1000 || style.position === 'fixed' ) {\nrect = el.getBoundingClientRect();\nif ( rect.left <= tol && rect.top <= tol && (vw - rect.right) <= tol && (vh - rect.bottom) < tol ) {\nel.parentNode.removeChild(el);\ntstart = Date.now();\nel = document.elementFromPoint(vw/2, vh/2);\nbodyEl.style.setProperty('overflow', 'auto', 'important');\ndocEl.style.setProperty('overflow', 'auto', 'important');\ncontinue;\n}\n}\nel = el.parentNode;\n}\nif ( (Date.now() - tstart) < ttl ) {\ndelay = Math.min(delay + delayStep, 1000);\nsetTimeout(buster, delay);\n}\n};\nvar domReady = function(ev) {\nif ( ev ) {\ndocument.removeEventListener(ev.type, domReady);\n}\ntstart = Date.now();\nsetTimeout(buster, delay);\n};\nif ( document.readyState === 'loading' ) {\ndocument.addEventListener('DOMContentLoaded', domReady);\n} else {\ndomReady();\n}\n})();\n", - ); - - assert_eq!(reserialized[6].name, "json-prune.js"); - assert_eq!(reserialized[6].aliases, Vec::::new()); - assert_eq!(reserialized[6].kind, ResourceType::Template); - assert_eq!( - std::str::from_utf8( - &base64::decode(&reserialized[6].content).expect("decode base64 content") - ).expect("convert to utf8 string"), - "(function() {\nconst rawPrunePaths = '{{1}}';\nconst rawNeedlePaths = '{{2}}';\nconst prunePaths = rawPrunePaths !== '{{1}}' && rawPrunePaths !== ''\n? rawPrunePaths.split(/ +/)\n: [];\nlet needlePaths;\nlet log, reLogNeedle;\nif ( prunePaths.length !== 0 ) {\nneedlePaths = prunePaths.length !== 0 &&\nrawNeedlePaths !== '{{2}}' && rawNeedlePaths !== ''\n? rawNeedlePaths.split(/ +/)\n: [];\n} else {\nlog = console.log.bind(console);\nlet needle;\nif ( rawNeedlePaths === '' || rawNeedlePaths === '{{2}}' ) {\nneedle = '.?';\n} else if ( rawNeedlePaths.charAt(0) === '/' && rawNeedlePaths.slice(-1) === '/' ) {\nneedle = rawNeedlePaths.slice(1, -1);\n} else {\nneedle = rawNeedlePaths.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n}\nreLogNeedle = new RegExp(needle);\n}\nconst findOwner = function(root, path, prune = false) {\nlet owner = root;\nlet chain = path;\nfor (;;) {\nif ( typeof owner !== 'object' || owner === null ) {\nreturn false;\n}\nconst pos = chain.indexOf('.');\nif ( pos === -1 ) {\nif ( prune === false ) {\nreturn owner.hasOwnProperty(chain);\n}\nif ( chain === '*' ) {\nfor ( const key in owner ) {\nif ( owner.hasOwnProperty(key) === false ) { continue; }\ndelete owner[key];\n}\n} else if ( owner.hasOwnProperty(chain) ) {\ndelete owner[chain];\n}\nreturn true;\n}\nconst prop = chain.slice(0, pos);\nif (\nprop === '[]' && Array.isArray(owner) ||\nprop === '*' && owner instanceof Object\n) {\nconst next = chain.slice(pos + 1);\nlet found = false;\nfor ( const key of Object.keys(owner) ) {\nfound = findOwner(owner[key], next, prune) || found;\n}\nreturn found;\n}\nif ( owner.hasOwnProperty(prop) === false ) { return false; }\nowner = owner[prop];\nchain = chain.slice(pos + 1);\n}\n};\nconst mustProcess = function(root) {\nfor ( const needlePath of needlePaths ) {\nif ( findOwner(root, needlePath) === false ) {\nreturn false;\n}\n}\nreturn true;\n};\nconst pruner = function(o) {\nif ( log !== undefined ) {\nconst json = JSON.stringify(o, null, 2);\nif ( reLogNeedle.test(json) ) {\nlog('uBO:', location.hostname, json);\n}\nreturn o;\n}\nif ( mustProcess(o) === false ) { return o; }\nfor ( const path of prunePaths ) {\nfindOwner(o, path, true);\n}\nreturn o;\n};\nJSON.parse = new Proxy(JSON.parse, {\napply: function() {\nreturn pruner(Reflect.apply(...arguments));\n},\n});\nResponse.prototype.json = new Proxy(Response.prototype.json, {\napply: function() {\nreturn Reflect.apply(...arguments).then(o => pruner(o));\n},\n});\n})();\n", - ); - } - - #[test] - fn test_scriptlet_resource_assembly() { - let scriptlets_path = Path::new("data/test/fake-uBO-files/scriptlets.js"); - #[allow(deprecated)] - let resources = assemble_scriptlet_resources(scriptlets_path); - - let expected_resource_names = vec![ - "abort-current-inline-script.js", - "abort-on-property-read.js", - "abort-on-property-write.js", - "addEventListener-defuser.js", - "addEventListener-logger.js", - "json-prune.js", - "nano-setInterval-booster.js", - "nano-setTimeout-booster.js", - "noeval-if.js", - "remove-attr.js", - "requestAnimationFrame-if.js", - "set-constant.js", - "setInterval-defuser.js", - "no-setInterval-if.js", - "setTimeout-defuser.js", - "no-setTimeout-if.js", - "webrtc-if.js", - "window.name-defuser", - "overlay-buster.js", - "alert-buster.js", - "gpt-defuser.js", - "nowebrtc.js", - "golem.de.js", - "upmanager-defuser.js", - "smartadserver.com.js", - "adfly-defuser.js", - "disable-newtab-links.js", - "damoh-defuser.js", - "twitch-videoad.js", - "fingerprint2.js", - "cookie-remover.js", - ]; - - for name in expected_resource_names { - assert!( - resources - .iter() - .find(|resource| { - match resource.kind { - ResourceType::Template - | ResourceType::Mime(MimeType::ApplicationJavascript) => { - resource.name == name - } - _ => false, - } - }) - .is_some(), - "failed to find {}", - name - ); - } - - let serialized = serde_json::to_string(&resources).expect("serialize resources"); - - let reserialized: Vec = - serde_json::from_str(&serialized).expect("deserialize resources"); - - assert_eq!(reserialized[0].name, "abort-current-inline-script.js"); - assert_eq!(reserialized[0].aliases, vec!["acis.js"]); - assert_eq!(reserialized[0].kind, ResourceType::Template); - - assert_eq!(reserialized[18].name, "overlay-buster.js"); - assert_eq!(reserialized[18].aliases, Vec::::new()); - assert_eq!( - reserialized[18].kind, - ResourceType::Mime(MimeType::ApplicationJavascript) - ); - assert_eq!( - std::str::from_utf8( - &base64::decode(&reserialized[18].content).expect("decode base64 content") - ).expect("convert to utf8 string"), - "(function() {\nif ( window !== window.top ) {\nreturn;\n}\nvar tstart;\nvar ttl = 30000;\nvar delay = 0;\nvar delayStep = 50;\nvar buster = function() {\nvar docEl = document.documentElement,\nbodyEl = document.body,\nvw = Math.min(docEl.clientWidth, window.innerWidth),\nvh = Math.min(docEl.clientHeight, window.innerHeight),\ntol = Math.min(vw, vh) * 0.05,\nel = document.elementFromPoint(vw/2, vh/2),\nstyle, rect;\nfor (;;) {\nif ( el === null || el.parentNode === null || el === bodyEl ) {\nbreak;\n}\nstyle = window.getComputedStyle(el);\nif ( parseInt(style.zIndex, 10) >= 1000 || style.position === 'fixed' ) {\nrect = el.getBoundingClientRect();\nif ( rect.left <= tol && rect.top <= tol && (vw - rect.right) <= tol && (vh - rect.bottom) < tol ) {\nel.parentNode.removeChild(el);\ntstart = Date.now();\nel = document.elementFromPoint(vw/2, vh/2);\nbodyEl.style.setProperty('overflow', 'auto', 'important');\ndocEl.style.setProperty('overflow', 'auto', 'important');\ncontinue;\n}\n}\nel = el.parentNode;\n}\nif ( (Date.now() - tstart) < ttl ) {\ndelay = Math.min(delay + delayStep, 1000);\nsetTimeout(buster, delay);\n}\n};\nvar domReady = function(ev) {\nif ( ev ) {\ndocument.removeEventListener(ev.type, domReady);\n}\ntstart = Date.now();\nsetTimeout(buster, delay);\n};\nif ( document.readyState === 'loading' ) {\ndocument.addEventListener('DOMContentLoaded', domReady);\n} else {\ndomReady();\n}\n})();\n", - ); - } -} +#[path = "../../tests/unit/resources/resource_assembler.rs"] +mod unit_tests; diff --git a/src/resources/resource_storage.rs b/src/resources/resource_storage.rs index 58aa8529..ac391f0e 100644 --- a/src/resources/resource_storage.rs +++ b/src/resources/resource_storage.rs @@ -36,33 +36,33 @@ fn stringify_arg(arg: &str) -> String { // Look up table for characters that need escaping in a product string static ESCAPED: [u8; 256] = [ - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0 - UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1 - __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 - __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E - __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0 + UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1 + __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 + __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F ]; #[inline(never)] fn write_string_complex(output: &mut Vec, string: &str, mut start: usize) { - output.extend_from_slice(&string.as_bytes()[ .. start]); + output.extend_from_slice(&string.as_bytes()[..start]); for (index, ch) in string.bytes().enumerate().skip(start) { let escape = ESCAPED[ch as usize]; if escape > 0 { - output.extend_from_slice(&string.as_bytes()[start .. index]); + output.extend_from_slice(&string.as_bytes()[start..index]); output.extend_from_slice(&[b'\\', escape]); start = index + 1; } @@ -70,7 +70,7 @@ fn stringify_arg(arg: &str) -> String { output.extend_from_slice(format!("{:04x}", ch).as_bytes()); } } - output.extend_from_slice(&string.as_bytes()[start ..]); + output.extend_from_slice(&string.as_bytes()[start..]); } let mut output = Vec::with_capacity(arg.as_bytes().len() + 2); @@ -101,9 +101,8 @@ fn stringify_arg(arg: &str) -> String { /// Gets the function name from a JS function definition fn extract_function_name(fn_def: &str) -> Option<&str> { // This is not bulletproof, but should be robust against most issues. - const FUNCTION_NAME_RE: Lazy = Lazy::new(|| { - Regex::new(r#"^function\s+([^\(\)\{\}\s]+)\s*\("#).unwrap() - }); + const FUNCTION_NAME_RE: Lazy = + Lazy::new(|| Regex::new(r#"^function\s+([^\(\)\{\}\s]+)\s*\("#).unwrap()); FUNCTION_NAME_RE.captures(&fn_def).map(|captures| { // capture 1 is always present in the above regex if any match was made @@ -114,16 +113,14 @@ fn extract_function_name(fn_def: &str) -> Option<&str> { impl ResourceStorage { /// Convenience constructor that allows building storage for many resources at once. Errors are /// silently consumed. - pub fn from_resources(resources: impl IntoIterator) -> Self { + pub fn from_resources(resources: impl IntoIterator) -> Self { let mut self_ = Self::default(); resources.into_iter().for_each(|resource| { - self_ - .add_resource(resource) - .unwrap_or_else(|_e| { - #[cfg(test)] - eprintln!("Failed to add resource: {:?}", _e) - }) + self_.add_resource(resource).unwrap_or_else(|_e| { + #[cfg(test)] + eprintln!("Failed to add resource: {:?}", _e) + }) }); self_ @@ -159,7 +156,10 @@ impl ResourceStorage { /// Given the contents of the `+js(...)` parts of multiple filters, return a script string /// appropriate for injection in a page. - pub fn get_scriptlet_resources<'a>(&self, script_injections: impl IntoIterator) -> String { + pub fn get_scriptlet_resources<'a>( + &self, + script_injections: impl IntoIterator, + ) -> String { let mut deps = vec![]; let mut invokations = String::new(); @@ -192,7 +192,12 @@ impl ResourceStorage { /// /// Note that no ordering is guaranteed; function definitions in JS can appear after they are /// used. - fn recursive_dependencies<'a: 'b, 'b>(&'a self, new_dep: &str, prev_deps: &mut Vec<&'b Resource>, filter_permission: PermissionMask) -> Result<(), ScriptletResourceError> { + fn recursive_dependencies<'a: 'b, 'b>( + &'a self, + new_dep: &str, + prev_deps: &mut Vec<&'b Resource>, + filter_permission: PermissionMask, + ) -> Result<(), ScriptletResourceError> { if prev_deps.iter().find(|dep| dep.name == new_dep).is_some() { return Ok(()); } @@ -210,7 +215,12 @@ impl ResourceStorage { /// Given the contents of a single `+js(...)` filter part, return a scriptlet string /// appropriate for injection in a page. - fn get_scriptlet_resource<'a: 'b, 'b>(&'a self, scriptlet_args: &str, filter_permission: PermissionMask, required_deps: &mut Vec<&'b Resource>) -> Result { + fn get_scriptlet_resource<'a: 'b, 'b>( + &'a self, + scriptlet_args: &str, + filter_permission: PermissionMask, + required_deps: &mut Vec<&'b Resource>, + ) -> Result { // `unwrap` is safe because these are guaranteed valid at filter parsing. let scriptlet_args = parse_scriptlet_args(scriptlet_args).unwrap(); @@ -233,7 +243,7 @@ impl ResourceStorage { for dep in resource.dependencies.iter() { self.recursive_dependencies(dep, required_deps, filter_permission)?; - }; + } let template = String::from_utf8(base64::decode(&resource.content)?)?; @@ -241,15 +251,26 @@ impl ResourceStorage { // newer function-style resource: pass args using function call syntax // add the scriptlet itself as a dependency and invoke via function name - if required_deps.iter().find(|dep| dep.name == resource.name).is_none() { + if required_deps + .iter() + .find(|dep| dep.name == resource.name) + .is_none() + { required_deps.push(resource); } use itertools::Itertools as _; - Ok(format!("{}({})", function_name, args.iter().map(|arg| stringify_arg::(arg)).join(", "))) + Ok(format!( + "{}({})", + function_name, + args.iter().map(|arg| stringify_arg::(arg)).join(", ") + )) } else { // older template-style resource: replace first instances with args - Ok(patch_template_scriptlet(template, args.iter().map(|arg| stringify_arg::(arg)))) + Ok(patch_template_scriptlet( + template, + args.iter().map(|arg| stringify_arg::(arg)), + )) } } @@ -285,7 +306,11 @@ impl ResourceStorage { resource } - fn get_permissioned_resource(&self, scriptlet_name: &str, filter_permission: PermissionMask) -> Result<&Resource, ScriptletResourceError> { + fn get_permissioned_resource( + &self, + scriptlet_name: &str, + filter_permission: PermissionMask, + ) -> Result<&Resource, ScriptletResourceError> { let resource = self .get_internal_resource(&scriptlet_name) .ok_or(ScriptletResourceError::NoMatchingScriptlet)?; @@ -369,14 +394,20 @@ fn template_argument_regex(i: usize) -> Regex { } /// Omit the 0th element of `args` (the scriptlet name) when calling this method. -fn patch_template_scriptlet(mut template: String, args: impl IntoIterator>) -> String { +fn patch_template_scriptlet( + mut template: String, + args: impl IntoIterator>, +) -> String { // `regex` treats `$` as a special character. Instead, `$$` is interpreted as a literal `$` // character. - args.into_iter().take(TEMPLATE_ARGUMENT_RE.len()).enumerate().for_each(|(i, arg)| { - template = TEMPLATE_ARGUMENT_RE[i] - .replace(&template, arg.as_ref().replace('$', "$$")) - .to_string(); - }); + args.into_iter() + .take(TEMPLATE_ARGUMENT_RE.len()) + .enumerate() + .for_each(|(i, arg)| { + template = TEMPLATE_ARGUMENT_RE[i] + .replace(&template, arg.as_ref().replace('$', "$$")) + .to_string(); + }); template } @@ -421,7 +452,7 @@ fn index_next_unescaped_separator(s: &str, separator: char) -> (Option, b } } else { // no match - return (None, needs_transform) + return (None, needs_transform); } } // don't index beyond the end of the string @@ -490,7 +521,7 @@ pub(crate) fn parse_scriptlet_args(mut args: &str) -> Option> { (i, needs_transform) = index_next_unescaped_separator(args, qc); if let Some(i) = i { arg = &args[..i]; - args = &args[i+1..]; + args = &args[i + 1..]; // consume whitespace following the quote if let Some(i) = args.find(|c: char| !c.is_whitespace()) { args = &args[i..]; @@ -534,613 +565,5 @@ pub(crate) fn parse_scriptlet_args(mut args: &str) -> Option> { } #[cfg(test)] -mod extract_function_name_tests { - use super::extract_function_name; - - #[test] - fn test_extract_function_name() { - assert_eq!(extract_function_name("function test() {}"), Some("test")); - assert_eq!(extract_function_name("function $() {}"), Some("$")); - assert_eq!(extract_function_name("function _() {}"), Some("_")); - assert_eq!(extract_function_name("function ಠ_ಠ() {}"), Some("ಠ_ಠ")); - assert_eq!(extract_function_name("function\ntest\n(\n)\n{\n}"), Some("test")); - assert_eq!(extract_function_name("function\ttest\t(\t)\t{\t}"), Some("test")); - assert_eq!(extract_function_name("function test() { (function inner() {})() }"), Some("test")); - assert_eq!(extract_function_name("let e = function test() { (function inner() {})() }"), None); - assert_eq!(extract_function_name("function () { (function inner() {})() }"), None); - } -} - -#[cfg(test)] -mod arg_parsing_util_tests { - use super::*; - - #[test] - fn test_index_next_unescaped_separator() { - assert_eq!(index_next_unescaped_separator(r#"``"#, '`'), (Some(0), false)); - assert_eq!(index_next_unescaped_separator(r#"\``"#, '`'), (Some(2), true)); - assert_eq!(index_next_unescaped_separator(r#"\\``"#, '`'), (Some(2), false)); - assert_eq!(index_next_unescaped_separator(r#"\\\``"#, '`'), (Some(4), true)); - assert_eq!(index_next_unescaped_separator(r#"\\\\``"#, '`'), (Some(4), false)); - assert_eq!(index_next_unescaped_separator(r#"\`\\\``"#, '`'), (Some(6), true)); - assert_eq!(index_next_unescaped_separator(r#"\\\`\``"#, '`'), (Some(6), true)); - assert_eq!(index_next_unescaped_separator(r#"\\\`\\``"#, '`'), (Some(6), true)); - - assert_eq!(index_next_unescaped_separator(r#"\,test\,"#, ','), (None, true)) - } - - #[test] - fn test_normalize_arg() { - assert_eq!(normalize_arg(r#"\`"#, '`'), r#"`"#); - assert_eq!(normalize_arg(r#"\\\`"#, '`'), r#"\\`"#); - assert_eq!(normalize_arg(r#"\`\\\`"#, '`'), r#"`\\`"#); - assert_eq!(normalize_arg(r#"\\\`\`"#, '`'), r#"\\``"#); - assert_eq!(normalize_arg(r#"\\\`\\`"#, '`'), r#"\\`\\`"#); - } -} - -#[cfg(test)] -mod redirect_storage_tests { - use super::*; - use crate::resources::MimeType; - - #[test] - fn get_resource_by_name() { - let mut storage = ResourceStorage::default(); - storage - .add_resource( - Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"), - ) - .unwrap(); - - assert_eq!( - storage.get_redirect_resource("name.js"), - Some(format!("data:application/javascript;base64,{}", base64::encode("resource data"))), - ); - } - - #[test] - fn get_resource_by_alias() { - let mut storage = ResourceStorage::default(); - let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); - r.aliases.push("alias.js".to_string()); - storage - .add_resource(r) - .unwrap(); - - assert_eq!( - storage.get_redirect_resource("alias.js"), - Some(format!("data:application/javascript;base64,{}", base64::encode("resource data"))), - ); - } - - #[test] - fn permissions() { - let mut storage = ResourceStorage::default(); - let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); - r.aliases.push("alias.js".to_string()); - r.permission = PermissionMask::from_bits(0b00000001); - storage - .add_resource(r) - .unwrap(); - - assert_eq!( - storage.get_redirect_resource("name.js"), - None, - ); - assert_eq!( - storage.get_redirect_resource("alias.js"), - None, - ); - } -} - -#[cfg(test)] -mod scriptlet_storage_tests { - use super::*; - use crate::resources::MimeType; - - #[test] - fn parse_argslist() { - let args = parse_scriptlet_args("scriptlet, hello world, foobar").unwrap(); - assert_eq!(args, vec!["scriptlet", "hello world", "foobar"]); - } - - #[test] - fn parse_argslist_noargs() { - let args = parse_scriptlet_args("scriptlet").unwrap(); - assert_eq!(args, vec!["scriptlet"]); - } - - #[test] - fn parse_argslist_empty() { - let args = parse_scriptlet_args("").unwrap(); - assert!(args.is_empty()); - } - - #[test] - fn parse_argslist_commas() { - let args = parse_scriptlet_args("scriptletname, one\\, two\\, three, four").unwrap(); - assert_eq!(args, vec!["scriptletname", "one, two, three", "four"]); - } - - #[test] - fn parse_argslist_badchars() { - let args = parse_scriptlet_args( - r##"scriptlet, "; window.location.href = bad.com; , '; alert("you're\, hacked"); , \u\r\l(bad.com) "##, - ); - assert_eq!(args, None); - } - - #[test] - fn parse_argslist_quoted() { - let args = parse_scriptlet_args(r#"debug-scriptlet, 'test', '"test"', "test", "'test'", `test`, '`test`'"#).unwrap(); - assert_eq!( - args, - vec![ - r#"debug-scriptlet"#, - r#"test"#, - r#""test""#, - r#"test"#, - r#"'test'"#, - r#"test"#, - r#"`test`"#, - ], - ); - let args = parse_scriptlet_args(r#"debug-scriptlet, 'test,test', '', "", ' ', ' test '"#).unwrap(); - assert_eq!( - args, - vec![ - r#"debug-scriptlet"#, - r#"test,test"#, - r#""#, - r#""#, - r#" "#, - r#" test "#, - ], - ); - let args = parse_scriptlet_args(r#"debug-scriptlet, test\,test, test\test, "test\test", 'test\test', "#).unwrap(); - assert_eq!( - args, - vec![ - r#"debug-scriptlet"#, - r#"test,test"#, - r#"test\test"#, - r#"test\test"#, - r#"test\test"#, - r#""#, - ], - ); - let args = parse_scriptlet_args(r#"debug-scriptlet, "test"#); - assert_eq!(args, None); - let args = parse_scriptlet_args(r#"debug-scriptlet, 'test'"test""#); - assert_eq!(args, None); - } - - #[test] - fn parse_argslist_trailing_escaped_comma() { - let args = parse_scriptlet_args(r#"remove-node-text, script, \,mr=function(r\,"#).unwrap(); - assert_eq!(args, vec!["remove-node-text", "script", ",mr=function(r,"]); - } - - #[test] - fn get_patched_scriptlets() { - let resources = ResourceStorage::from_resources([ - Resource { - name: "greet.js".to_string(), - aliases: vec![], - kind: ResourceType::Template, - content: base64::encode("console.log('Hello {{1}}, my name is {{2}}')"), - dependencies: vec![], - permission: Default::default(), - }, - Resource { - name: "alert.js".to_owned(), - aliases: vec![], - kind: ResourceType::Template, - content: base64::encode("alert('{{1}}')"), - dependencies: vec![], - permission: Default::default(), - }, - Resource { - name: "blocktimer.js".to_owned(), - aliases: vec![], - kind: ResourceType::Template, - content: base64::encode("setTimeout(blockAds, {{1}})"), - dependencies: vec![], - permission: Default::default(), - }, - Resource { - name: "null.js".to_owned(), - aliases: vec![], - kind: ResourceType::Template, - content: base64::encode("(()=>{})()"), - dependencies: vec![], - permission: Default::default(), - }, - Resource { - name: "set-local-storage-item.js".to_owned(), - aliases: vec![], - kind: ResourceType::Template, - content: base64::encode(r#"{{1}} that dollar signs in {{2}} are untouched"#), - dependencies: vec![], - permission: Default::default(), - }, - ]); - - assert_eq!( - resources.get_scriptlet_resources([("greet, world, adblock-rust", Default::default())]), - "try {\nconsole.log('Hello world, my name is adblock-rust')\n} catch ( e ) { }\n", - ); - assert_eq!( - resources.get_scriptlet_resources([("alert, All systems are go!! ", Default::default())]), - "try {\nalert('All systems are go!!')\n} catch ( e ) { }\n", - ); - assert_eq!( - resources.get_scriptlet_resources([("alert, Uh oh\\, check the logs...", Default::default())]), - "try {\nalert('Uh oh, check the logs...')\n} catch ( e ) { }\n", - ); - assert_eq!( - resources.get_scriptlet_resources([(r#"alert, this has "quotes""#, Default::default())]), - "try {\nalert('this has \\\"quotes\\\"')\n} catch ( e ) { }\n", - ); - assert_eq!( - resources.get_scriptlet_resources([("blocktimer, 3000", Default::default())]), - "try {\nsetTimeout(blockAds, 3000)\n} catch ( e ) { }\n", - ); - assert_eq!( - resources.get_scriptlet_resources([("null", Default::default())]), - "try {\n(()=>{})()\n} catch ( e ) { }\n" - ); - assert_eq!( - resources.get_scriptlet_resources([("null, null", Default::default())]), - "try {\n(()=>{})()\n} catch ( e ) { }\n", - ); - assert_eq!( - resources.get_scriptlet_resources([("greet, everybody", Default::default())]), - "try {\nconsole.log('Hello everybody, my name is {{2}}')\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resource("unit-testing", Default::default(), &mut vec![]), - Err(ScriptletResourceError::NoMatchingScriptlet), - ); - assert_eq!( - resources.get_scriptlet_resource("", Default::default(), &mut vec![]), - Err(ScriptletResourceError::MissingScriptletName), - ); - - assert_eq!( - resources.get_scriptlet_resources([("set-local-storage-item, Test, $remove$", Default::default())]), - "try {\nTest that dollar signs in $remove$ are untouched\n} catch ( e ) { }\n", - ); - } - - #[test] - fn parse_template_file_format() { - let resources = ResourceStorage::from_resources([ - Resource { - name: "abort-current-inline-script.js".into(), - aliases: vec!["acis.js".into()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("(function() {alert(\"hi\");})();"), - dependencies: vec![], - permission: Default::default(), - }, - Resource { - name: "abort-on-property-read.js".into(), - aliases: vec!["aopr.js".into()], - kind: ResourceType::Template, - content: base64::encode("(function() {confirm(\"Do you want to {{1}}?\");})();"), - dependencies: vec![], - permission: Default::default(), - }, - Resource { - name: "googletagservices_gpt.js".into(), - aliases: vec!["googletagservices.com/gpt.js".into(), "googletagservices-gpt".into()], - kind: ResourceType::Template, - content: base64::encode("function gpt(a1 = '', a2 = '') {console.log(a1, a2)}"), - dependencies: vec![], - permission: Default::default(), - }, - ]); - - assert_eq!( - resources.get_scriptlet_resources([("aopr, code", Default::default())]), - "try {\n(function() {confirm(\"Do you want to code?\");})();\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resources([("abort-on-property-read, write tests", Default::default())]), - "try {\n(function() {confirm(\"Do you want to write tests?\");})();\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resources([("abort-on-property-read.js, block advertisements", Default::default())]), - "try {\n(function() {confirm(\"Do you want to block advertisements?\");})();\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resources([("acis", Default::default())]), - "try {\n(function() {alert(\"hi\");})();\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resources([("acis.js", Default::default())]), - "try {\n(function() {alert(\"hi\");})();\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resources([("googletagservices_gpt.js", Default::default())]), - "function gpt(a1 = '', a2 = '') {console.log(a1, a2)}\ntry {\ngpt()\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resources([("googletagservices_gpt, test1", Default::default())]), - "function gpt(a1 = '', a2 = '') {console.log(a1, a2)}\ntry {\ngpt(\"test1\")\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resources([("googletagservices.com/gpt, test1, test2", Default::default())]), - "function gpt(a1 = '', a2 = '') {console.log(a1, a2)}\ntry {\ngpt(\"test1\", \"test2\")\n} catch ( e ) { }\n", - ); - - assert_eq!( - resources.get_scriptlet_resource(r#"googletagservices.com/gpt.js, t"es't1, $te\st2$"#, Default::default(), &mut vec![]), - Ok(r#"gpt("t\"es't1", "$te\\st2$")"#.to_owned()), - ); - - // The alias does not have a `.js` extension, so it cannot be used for a scriptlet - // injection (only as a redirect resource). - assert_eq!( - resources.get_scriptlet_resource(r#"googletagservices-gpt, t"es't1, te\st2"#, Default::default(), &mut vec![]), - Err(ScriptletResourceError::NoMatchingScriptlet), - ); - - // Object-style injection - assert_eq!( - resources.get_scriptlet_resource(r#"googletagservices.com/gpt, { "test": true }"#, Default::default(), &mut vec![]), - Err(ScriptletResourceError::ScriptletArgObjectSyntaxUnsupported), - ); - } - - /// Currently, only 9 template arguments are supported - but reaching that limit should not - /// cause a panic. - #[test] - fn patch_argslist_many_args() { - let resources = ResourceStorage::from_resources([ - Resource { - name: "abort-current-script.js".into(), - aliases: vec!["acs.js".into()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("{{1}} {{2}} {{3}} {{4}} {{5}} {{6}} {{7}} {{8}} {{9}} {{10}} {{11}} {{12}}"), - dependencies: vec![], - permission: Default::default(), - }, - ]); - - let args = parse_scriptlet_args("acs, this, probably, is, going, to, break, brave, and, crash, it, instead, of, ignoring, it").unwrap(); - assert_eq!(args, vec!["acs", "this", "probably", "is", "going", "to", "break", "brave", "and", "crash", "it", "instead", "of", "ignoring", "it"]); - - assert_eq!( - resources.get_scriptlet_resources([("acs, this, probably, is, going, to, break, brave, and, crash, it, instead, of, ignoring, it", Default::default())]), - "try {\nthis probably is going to break brave and crash {{10}} {{11}} {{12}}\n} catch ( e ) { }\n", - ); - } - - #[test] - fn permissions() { - const PERM01: PermissionMask = PermissionMask::from_bits(0b00000001); - const PERM10: PermissionMask = PermissionMask::from_bits(0b00000010); - const PERM11: PermissionMask = PermissionMask::from_bits(0b00000011); - let resources = ResourceStorage::from_resources([ - Resource::simple("default-perms.js", MimeType::ApplicationJavascript, "default-perms"), - Resource { - name: "perm0.js".into(), - aliases: vec!["0.js".to_string()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("perm0"), - dependencies: vec![], - permission: PERM01, - }, - Resource { - name: "perm1.js".into(), - aliases: vec!["1.js".to_string()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("perm1"), - dependencies: vec![], - permission: PERM10, - }, - Resource { - name: "perm10.js".into(), - aliases: vec!["10.js".to_string()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("perm10"), - dependencies: vec![], - permission: PERM11, - }, - ]); - - fn test_perm(resources: &ResourceStorage, perm: PermissionMask, expect_ok: &[&str], expect_fail: &[&str]) { - for ident in expect_ok { - if ident.len() > 2 { - assert_eq!( - resources.get_scriptlet_resources([(*ident, perm)]), - format!("try {{\n{}\n}} catch ( e ) {{ }}\n", ident), - ); - } else { - assert_eq!( - resources.get_scriptlet_resources([(*ident, perm)]), - format!("try {{\nperm{}\n}} catch ( e ) {{ }}\n", ident), - ); - } - } - - for ident in expect_fail { - assert_eq!( - resources.get_scriptlet_resource(ident, perm, &mut vec![]), - Err(ScriptletResourceError::InsufficientPermissions), - ); - } - } - - test_perm(&resources, Default::default(), &["default-perms"], &["perm0", "perm1", "perm10", "0", "1", "10"]); - test_perm(&resources, PERM01, &["default-perms", "perm0", "0"], &["perm1", "perm10", "1", "10"]); - test_perm(&resources, PERM10, &["default-perms", "perm1", "1"], &["perm0", "perm10", "0", "10"]); - test_perm(&resources, PERM11, &["default-perms", "perm0", "perm1", "perm10", "0", "1", "10"], &[]); - } - - #[test] - fn dependencies() { - const PERM01: PermissionMask = PermissionMask::from_bits(0b00000001); - let resources = ResourceStorage::from_resources([ - Resource::simple("simple.fn", MimeType::FnJavascript, "simple"), - Resource { - name: "permissioned.fn".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::FnJavascript), - content: base64::encode("permissioned"), - dependencies: vec!["a.fn".to_string(), "common.fn".to_string()], - permission: PERM01, - }, - Resource { - name: "a.fn".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::FnJavascript), - content: base64::encode("a"), - dependencies: vec!["common.fn".to_string()], - permission: Default::default(), - }, - Resource { - name: "b.fn".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::FnJavascript), - content: base64::encode("b"), - dependencies: vec!["common.fn".to_string()], - permission: Default::default(), - }, - Resource { - name: "common.fn".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::FnJavascript), - content: base64::encode("common"), - dependencies: vec![], - permission: Default::default(), - }, - Resource { - name: "test.js".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("function test() {}"), - dependencies: vec!["permissioned.fn".to_string(), "a.fn".to_string(), "b.fn".to_string(), "common.fn".to_string()], - permission: Default::default(), - }, - Resource { - name: "deploop1.fn".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::FnJavascript), - content: base64::encode("deploop1"), - dependencies: vec!["deploop1.fn".to_string()], - permission: Default::default(), - }, - Resource { - name: "deploop2a.fn".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::FnJavascript), - content: base64::encode("deploop2a"), - dependencies: vec!["deploop2b.fn".to_string()], - permission: Default::default(), - }, - Resource { - name: "deploop2b.fn".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::FnJavascript), - content: base64::encode("deploop2b"), - dependencies: vec!["deploop2a.fn".to_string()], - permission: Default::default(), - }, - Resource { - name: "test-wrapper.js".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("function testWrapper() { test(arguments) }"), - dependencies: vec!["test.js".to_string()], - permission: Default::default(), - }, - Resource { - name: "shared.js".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("function shared() { }"), - dependencies: vec!["a.fn".to_string(), "b.fn".to_string()], - permission: Default::default(), - }, - ]); - - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("common.fn", &mut deps, Default::default()), Ok(())); - assert_eq!(deps.iter().map(|dep| dep.name.to_string()).collect::>(), vec!["common.fn"]); - } - - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("a.fn", &mut deps, Default::default()), Ok(())); - assert_eq!(deps.iter().map(|dep| dep.name.to_string()).collect::>(), vec!["a.fn", "common.fn"]); - } - - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("b.fn", &mut deps, Default::default()), Ok(())); - assert_eq!(deps.iter().map(|dep| dep.name.to_string()).collect::>(), vec!["b.fn", "common.fn"]); - } - - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("permissioned.fn", &mut deps, Default::default()), Err(ScriptletResourceError::InsufficientPermissions)); - } - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("permissioned.fn", &mut deps, PERM01), Ok(())); - assert_eq!(deps.iter().map(|dep| dep.name.to_string()).collect::>(), vec!["permissioned.fn", "a.fn", "common.fn"]); - } - - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("test.js", &mut deps, Default::default()), Err(ScriptletResourceError::InsufficientPermissions)); - } - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("test.js", &mut deps, PERM01), Ok(())); - assert_eq!(deps.iter().map(|dep| dep.name.to_string()).collect::>(), vec!["test.js", "permissioned.fn", "a.fn", "common.fn", "b.fn"]); - } - - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("deploop1.fn", &mut deps, Default::default()), Ok(())); - assert_eq!(deps.iter().map(|dep| dep.name.to_string()).collect::>(), vec!["deploop1.fn"]); - } - - { - let mut deps = vec![]; - assert_eq!(resources.recursive_dependencies("deploop2a.fn", &mut deps, Default::default()), Ok(())); - assert_eq!(deps.iter().map(|dep| dep.name.to_string()).collect::>(), vec!["deploop2a.fn", "deploop2b.fn"]); - } - - assert_eq!(resources.get_scriptlet_resources([]), ""); - - assert_eq!(resources.get_scriptlet_resources([("test, arg1, arg2", Default::default())]), ""); - - assert_eq!(resources.get_scriptlet_resources([("test, arg1, arg2", PERM01)]), "permissioned\na\ncommon\nb\nfunction test() {}\ntry {\ntest(\"arg1\", \"arg2\")\n} catch ( e ) { }\n"); - - // Note: `test` still gets inserted as a dependency before it becomes apparent that - // `permissioned` is not authorized. However, this shouldn't have much detrimental effect. - assert_eq!(resources.get_scriptlet_resources([("test-wrapper", Default::default())]), "function test() {}\n"); - assert_eq!(resources.get_scriptlet_resources([("test-wrapper", PERM01)]), "function test() {}\npermissioned\na\ncommon\nb\nfunction testWrapper() { test(arguments) }\ntry {\ntestWrapper()\n} catch ( e ) { }\n"); - - assert_eq!(resources.get_scriptlet_resources([("test", PERM01), ("test-wrapper", PERM01)]), "permissioned\na\ncommon\nb\nfunction test() {}\nfunction testWrapper() { test(arguments) }\ntry {\ntest()\n} catch ( e ) { }\ntry {\ntestWrapper()\n} catch ( e ) { }\n"); - - assert_eq!(resources.get_scriptlet_resources([("shared, argument", Default::default())]), "a\ncommon\nb\nfunction shared() { }\ntry {\nshared(\"argument\")\n} catch ( e ) { }\n"); - assert_eq!(resources.get_scriptlet_resources([("test, 1", PERM01), ("test-wrapper, 2", PERM01), ("shared, 3", Default::default())]), "permissioned\na\ncommon\nb\nfunction test() {}\nfunction testWrapper() { test(arguments) }\nfunction shared() { }\ntry {\ntest(\"1\")\n} catch ( e ) { }\ntry {\ntestWrapper(\"2\")\n} catch ( e ) { }\ntry {\nshared(\"3\")\n} catch ( e ) { }\n"); - } -} +#[path = "../../tests/unit/resources/resource_storage.rs"] +mod unit_tests; diff --git a/src/url_parser/parser.rs b/src/url_parser/parser.rs index 4653d886..fc6fe055 100644 --- a/src/url_parser/parser.rs +++ b/src/url_parser/parser.rs @@ -36,7 +36,7 @@ pub(super) struct Hostname { serialization: String, // Components - pub(super) scheme_end: usize, // Before ':' + pub(super) scheme_end: usize, // Before ':' pub(super) host_start: usize, pub(super) host_end: usize, } diff --git a/src/url_parser/parser_full.rs b/src/url_parser/parser_full.rs deleted file mode 100644 index 12cbea7e..00000000 --- a/src/url_parser/parser_full.rs +++ /dev/null @@ -1,11 +0,0 @@ -use url::{Url}; - -fn parse_url(url: &str) -> Option { - url.parse::() - .ok() // convert to Option -} - -pub fn get_url_host(url: &str) -> Option { - parse_url(url) - .and_then(|p| p.host_str().map(String::from)) -} diff --git a/src/url_parser/parser_regex.rs b/src/url_parser/parser_regex.rs deleted file mode 100644 index afedc110..00000000 --- a/src/url_parser/parser_regex.rs +++ /dev/null @@ -1,77 +0,0 @@ -use regex::Regex; - -pub fn get_hostname_regex(url: &str) -> Option<(usize, (usize, usize))> { - lazy_static! { - static ref HOSTNAME_REGEX_STR: &'static str = concat!( - r"(?P[a-z][a-z0-9+\-.]*)://", // Scheme - r"(?:[a-z0-9\-._~%!$&'()*+,;=]+@)?", // User - r"(?P[\w\-.~%]+", // Named host - r"|\[[a-f0-9:.]+\]", // IPv6 host - r"|\[v[a-f0-9][a-z0-9\-._~%!$&'()*+,;=:]+\])", // IPvFuture host - // r"(?::[0-9]+)?", // Port - // r"(?:/[a-z0-9\-._~%!$&'()*+,;=:@]+)*/?", // Path - // r"(?:\?[a-z0-9\-._~%!$&'()*+,;=:@/?]*)?", // Query - // r"(?:\#[a-z0-9\-._~%!$&'()*+,;=:@/?]*)?", // Fragment - ); - static ref HOST_REGEX: Regex = Regex::new(&HOSTNAME_REGEX_STR).unwrap(); - } - - HOST_REGEX.captures(url) - .and_then(|c| { - Some((c.name("scheme")?.end(), (c.name("host")?.start(), c.name("host")?.end()))) - }) -} - -pub fn get_url_host(url: &str) -> Option<(String, usize, (usize, usize))> { - let decode_flags = idna::uts46::Flags { - use_std3_ascii_rules: true, - transitional_processing: true, - verify_dns_length: true, - }; - get_hostname_regex(&url) - .and_then(|(schema_end, (hostname_start, hostname_end))| { - let host = &url[hostname_start..hostname_end]; - if host.is_ascii() { - Some((url.to_owned(), schema_end, (hostname_start, hostname_end))) - } else { - idna::uts46::to_ascii(&host, decode_flags).map(|h| { - let normalised = format!("{}://{}{}", &url[..schema_end], &h, &url[hostname_end..]); - (normalised, schema_end, (hostname_start, hostname_start + h.len())) - }).ok() - } - }) -} - -impl super::UrlParser for crate::request::Request { - fn parse_url(url: &str) -> Option { - let parsed = get_url_host(&url); - parsed.map(|(url, schema_end, (host_start, host_end))| { - super::RequestUrl { - url: url, - schema_end: schema_end, - hostname_pos: (host_start, host_end), - domain: super::get_host_domain(&url[host_start..host_end]) - } - }) - } -} - - -#[cfg(test)] -mod parse_tests { - use super::*; - - #[test] - // pattern - fn parses_hostname() { - assert_eq!(get_url_host("http://example.foo.edu.au"), Some(("http://example.foo.edu.au".to_owned(), 4, (7, 25)))); - assert_eq!(get_url_host("http://example.foo.edu.sh"), Some(("http://example.foo.edu.sh".to_owned(), 4, (7, 25)))); - assert_eq!(get_url_host("http://example.foo.nom.br"), Some(("http://example.foo.nom.br".to_owned(), 4, (7, 25)))); - assert_eq!(get_url_host("http://example.foo.nom.br:80/"), Some(("http://example.foo.nom.br:80/".to_owned(), 4, (7, 25)))); - assert_eq!(get_url_host("http://example.foo.nom.br:8080/hello?world=true"), Some(("http://example.foo.nom.br:8080/hello?world=true".to_owned(), 4, (7, 25)))); - assert_eq!(get_url_host("http://example.foo.nom.br/hello#world"), Some(("http://example.foo.nom.br/hello#world".to_owned(), 4, (7, 25)))); - assert_eq!(get_url_host("http://127.0.0.1:80"), Some(("http://127.0.0.1:80".to_owned(), 4, (7, 16)))); - assert_eq!(get_url_host("http://[2001:470:20::2]"), Some(("http://[2001:470:20::2]".to_owned(), 4, (7, 23)))); - assert_eq!(get_url_host("http://[2001:4860:4860::1:8888]"), Some(("http://[2001:4860:4860::1:8888]".to_owned(), 4, (7, 31)))); - } -} diff --git a/src/utils.rs b/src/utils.rs index 55a3f312..1de160e7 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,6 +1,8 @@ //! Common utilities used by the library. Some tests and benchmarks rely on this module having //! public visibility. +use std::ops::{Deref, DerefMut}; + #[cfg(target_pointer_width = "64")] use seahash::hash; #[cfg(target_pointer_width = "32")] @@ -22,21 +24,107 @@ pub(crate) const TOKENS_BUFFER_SIZE: usize = 128; pub(crate) const TOKENS_BUFFER_RESERVED: usize = 1; const TOKENS_MAX: usize = TOKENS_BUFFER_SIZE - TOKENS_BUFFER_RESERVED; +#[derive(Clone, Debug)] +pub struct StackVec { + data: [T; N], + len: usize, +} + +impl StackVec { + pub fn new() -> Self + where + T: Default + Copy, + { + Self { + data: [Default::default(); N], + len: 0, + } + } + + pub fn push(&mut self, value: T) -> Result<(), &'static str> + where + T: Copy, + { + if self.len < N { + self.data[self.len] = value; + self.len += 1; + Ok(()) + } else { + Err("Too many tokens") + } + } + + #[inline] + pub fn len(&self) -> usize { + self.len + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + #[inline] + pub fn get(&self, index: usize) -> Option<&T> { + if index < self.len { + Some(&self.data[index]) + } else { + None + } + } + + #[inline] + pub fn iter(&self) -> std::slice::Iter<'_, T> { + self.data[..self.len].iter() + } + + #[inline] + #[allow(dead_code)] + pub fn as_slice(&self) -> &[T] { + &self.data[..self.len] + } +} + +impl<'a, T, const N: usize> IntoIterator for &'a StackVec { + type Item = &'a T; + type IntoIter = std::slice::Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl Deref for StackVec { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + &self.data[..self.len] + } +} + +impl DerefMut for StackVec { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.data[..self.len] + } +} + +pub type Tokens = StackVec; + fn fast_tokenizer_no_regex( pattern: &str, is_allowed_code: &dyn Fn(char) -> bool, skip_first_token: bool, skip_last_token: bool, - tokens_buffer: &mut Vec, -) { +) -> Tokens { // let mut tokens_buffer_index = 0; let mut inside: bool = false; let mut start = 0; let mut preceding_ch: Option = None; // Used to check if a '*' is not just before a token + let mut tokens = Tokens::new(); for (i, c) in pattern.char_indices() { - if tokens_buffer.len() >= TOKENS_MAX { - return; + if tokens.len() >= TOKENS_MAX { + return tokens; } if is_allowed_code(c) { if !inside { @@ -52,7 +140,9 @@ fn fast_tokenizer_no_regex( && preceding_ch != Some('*') { let hash = fast_hash(&pattern[start..i]); - tokens_buffer.push(hash); + if !tokens.push(hash).is_ok() { + break; + } } preceding_ch = Some(c); } else { @@ -62,40 +152,26 @@ fn fast_tokenizer_no_regex( if !skip_last_token && inside && pattern.len() - start > 1 && (preceding_ch != Some('*')) { let hash = fast_hash(&pattern[start..]); - tokens_buffer.push(hash); + let _ = tokens.push(hash); } + tokens } -pub(crate) fn tokenize_pooled(pattern: &str, tokens_buffer: &mut Vec) { - fast_tokenizer_no_regex(pattern, &is_allowed_filter, false, false, tokens_buffer); -} - -pub fn tokenize(pattern: &str) -> Vec { - let mut tokens_buffer: Vec = Vec::with_capacity(TOKENS_BUFFER_SIZE); - fast_tokenizer_no_regex( - pattern, - &is_allowed_filter, - false, - false, - &mut tokens_buffer, - ); - tokens_buffer +pub fn tokenize(pattern: &str) -> Tokens { + fast_tokenizer_no_regex(pattern, &is_allowed_filter, false, false) } pub(crate) fn tokenize_filter( pattern: &str, skip_first_token: bool, skip_last_token: bool, -) -> Vec { - let mut tokens_buffer: Vec = Vec::with_capacity(TOKENS_BUFFER_SIZE); +) -> Tokens { fast_tokenizer_no_regex( pattern, &is_allowed_filter, skip_first_token, skip_last_token, - &mut tokens_buffer, - ); - tokens_buffer + ) } pub(crate) fn bin_lookup(arr: &[T], elt: T) -> bool { @@ -103,106 +179,5 @@ pub(crate) fn bin_lookup(arr: &[T], elt: T) -> bool { } #[cfg(test)] -mod tests { - use super::*; - - #[test] - #[ignore] // won't match hard-coded values when using a different hash function - fn fast_hash_matches_ts() { - assert_eq!(fast_hash("hello world"), 4173747013); // cross-checked with the TS implementation - assert_eq!(fast_hash("ello worl"), 2759317833); // cross-checked with the TS implementation - assert_eq!(fast_hash(&"hello world"[1..10]), fast_hash("ello worl")); - assert_eq!(fast_hash(&"hello world"[1..5]), fast_hash("ello")); - } - - fn t(tokens: &[&str]) -> Vec { - tokens.into_iter().map(|t| fast_hash(&t)).collect() - } - - #[test] - fn tokenize_filter_works() { - assert_eq!( - tokenize_filter("", false, false).as_slice(), - t(&vec![]).as_slice() - ); - assert_eq!( - tokenize_filter("", true, false).as_slice(), - t(&vec![]).as_slice() - ); - assert_eq!( - tokenize_filter("", false, true).as_slice(), - t(&vec![]).as_slice() - ); - assert_eq!( - tokenize_filter("", true, true).as_slice(), - t(&vec![]).as_slice() - ); - assert_eq!( - tokenize_filter("", false, false).as_slice(), - t(&vec![]).as_slice() - ); - - assert_eq!( - tokenize_filter("foo/bar baz", false, false).as_slice(), - t(&vec!["foo", "bar", "baz"]).as_slice() - ); - assert_eq!( - tokenize_filter("foo/bar baz", true, false).as_slice(), - t(&vec!["bar", "baz"]).as_slice() - ); - assert_eq!( - tokenize_filter("foo/bar baz", true, true).as_slice(), - t(&vec!["bar"]).as_slice() - ); - assert_eq!( - tokenize_filter("foo/bar baz", false, true).as_slice(), - t(&vec!["foo", "bar"]).as_slice() - ); - assert_eq!( - tokenize_filter("foo////bar baz", false, true).as_slice(), - t(&vec!["foo", "bar"]).as_slice() - ); - } - - #[test] - fn tokenize_works() { - assert_eq!(tokenize("").as_slice(), t(&vec![]).as_slice()); - assert_eq!(tokenize("foo").as_slice(), t(&vec!["foo"]).as_slice()); - assert_eq!( - tokenize("foo/bar").as_slice(), - t(&vec!["foo", "bar"]).as_slice() - ); - assert_eq!( - tokenize("foo-bar").as_slice(), - t(&vec!["foo", "bar"]).as_slice() - ); - assert_eq!( - tokenize("foo.bar").as_slice(), - t(&vec!["foo", "bar"]).as_slice() - ); - assert_eq!( - tokenize("foo.barƬ").as_slice(), - t(&vec!["foo", "barƬ"]).as_slice() - ); - - // Tokens cannot be surrounded by * - assert_eq!(tokenize("foo.barƬ*").as_slice(), t(&vec!["foo"]).as_slice()); - assert_eq!( - tokenize("*foo.barƬ").as_slice(), - t(&vec!["barƬ"]).as_slice() - ); - assert_eq!(tokenize("*foo.barƬ*").as_slice(), t(&vec![]).as_slice()); - } - - #[test] - fn bin_lookup_works() { - assert_eq!(bin_lookup(&[], 42), false); - assert_eq!(bin_lookup(&[42], 42), true); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 42), true); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 1), true); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 3), true); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 43), false); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 0), false); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 5), false); - } -} +#[path = "../tests/unit/utils.rs"] +mod unit_tests; diff --git a/tests/deserialization.rs b/tests/deserialization.rs index 4ab9c9d0..2b65dd01 100644 --- a/tests/deserialization.rs +++ b/tests/deserialization.rs @@ -1,5 +1,5 @@ -use adblock::Engine; use adblock::request::Request; +use adblock::{Engine, Serialize}; use serde::Deserialize; @@ -39,6 +39,7 @@ fn load_requests() -> Vec { } #[test] +#[ignore = "temporary"] fn check_works_same_after_deserialization() { println!("Loading requests"); let requests = load_requests(); diff --git a/tests/legacy_harness.rs b/tests/legacy_harness.rs index 0d20645e..7eb58a07 100644 --- a/tests/legacy_harness.rs +++ b/tests/legacy_harness.rs @@ -242,8 +242,14 @@ mod legacy_test_filters { "/banner[0-9]+/", NetworkFilterMask::DEFAULT_OPTIONS | NetworkFilterMask::IS_COMPLETE_REGEX, Some("/banner[0-9]+/"), - &["http://example.com/banner123", "http://example.com/testbanner1"], - &["http://example.com/banners", "http://example.com/banners123"], + &[ + "http://example.com/banner123", + "http://example.com/testbanner1", + ], + &[ + "http://example.com/banners", + "http://example.com/banners123", + ], ); } @@ -297,15 +303,21 @@ mod legacy_test_filters { ); // explicit, separate testcase construction of the "script" option as it is not the deafult - let filter = NetworkFilter::parse("||googlesyndication.com/safeframe/$third-party,script", true, Default::default()).unwrap(); + let filter = NetworkFilter::parse( + "||googlesyndication.com/safeframe/$third-party,script", + true, + Default::default(), + ) + .unwrap(); let request = Request::new("http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html#xpc=sf-gdn-exp-2&p=http%3A//slashdot.org;", "https://this-is-always-third-party.com", "script").unwrap(); assert!(filter.matches(&request, &mut RegexManager::default())); } } +#[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. mod legacy_check_match { - use adblock::Engine; use adblock::request::Request; + use adblock::{Engine, Serialize}; fn check_match<'a>( rules: &[&'a str], @@ -362,49 +374,52 @@ mod legacy_check_match { #[test] fn exception_rules() { - check_match(&[ - "adv", - "@@advice." - ], - &["http://example.com/advert.html"], - &["http://example.com/advice.html"], - &[]); - - check_match(&[ - "@@|http://example.com", - "@@advice.", - "adv", - "!foo" - ], - &[ - "http://examples.com/advert.html", - ], &[ - "http://example.com/advice.html", - "http://example.com/advert.html", - "http://examples.com/advice.html", - "http://examples.com/#!foo", - ], - &[]); + check_match( + &["adv", "@@advice."], + &["http://example.com/advert.html"], + &["http://example.com/advice.html"], + &[], + ); - { - // Explicitly write out the full case instead of using check_match helper - // or tweaking it to allow passing in the source domain for this one case - let engine = Engine::from_rules( - [ - "/ads/freewheel/*", - "@@||turner.com^*/ads/freewheel/*/AdManager.js$domain=cnn.com", + check_match( + &["@@|http://example.com", "@@advice.", "adv", "!foo"], + &["http://examples.com/advert.html"], + &[ + "http://example.com/advice.html", + "http://example.com/advert.html", + "http://examples.com/advice.html", + "http://examples.com/#!foo", ], - Default::default(), + &[], ); - let mut engine_deserialized = Engine::default(); // second empty - { - let engine_serialized = engine.serialize_raw().unwrap(); - engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy - } - let request = Request::new("http://z.cdn.turner.com/xslo/cvp/ads/freewheel/js/0/AdManager.js", "http://cnn.com", "").unwrap(); - assert_eq!(engine.check_network_request(&request).matched, false); - assert_eq!(engine_deserialized.check_network_request(&request).matched, false); + { + // Explicitly write out the full case instead of using check_match helper + // or tweaking it to allow passing in the source domain for this one case + let engine = Engine::from_rules( + [ + "/ads/freewheel/*", + "@@||turner.com^*/ads/freewheel/*/AdManager.js$domain=cnn.com", + ], + Default::default(), + ); + let mut engine_deserialized = Engine::default(); // second empty + { + let engine_serialized = engine.serialize_raw().unwrap(); + engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy + } + + let request = Request::new( + "http://z.cdn.turner.com/xslo/cvp/ads/freewheel/js/0/AdManager.js", + "http://cnn.com", + "", + ) + .unwrap(); + assert_eq!(engine.check_network_request(&request).matched, false); + assert_eq!( + engine_deserialized.check_network_request(&request).matched, + false + ); } check_match( @@ -490,209 +505,483 @@ mod legacy_check_match { } mod legacy_check_options { - use adblock::Engine; use adblock::request::Request; + use adblock::Engine; fn check_option_rule<'a>(rules: &[&'a str], tests: &[(&'a str, &'a str, &'a str, bool)]) { - let engine = Engine::from_rules(rules, Default::default()); // first one with the provided rules + let engine = Engine::from_rules(rules, Default::default()); // first one with the provided rules for (url, source_url, request_type, expectation) in tests { let request = Request::new(url, source_url, request_type).unwrap(); - assert!(engine.check_network_request(&request).matched == *expectation, - "Expected match = {} for {} from {} typed {} against {:?}", expectation, url, source_url, request_type, rules) + assert!( + engine.check_network_request(&request).matched == *expectation, + "Expected match = {} for {} from {} typed {} against {:?}", + expectation, + url, + source_url, + request_type, + rules + ) } } #[test] fn option_no_option() { - check_option_rule(&["||example.com"], &[ - ("http://example.com", "https://example.com", "", true), - ("http://example2.com", "https://example.com", "", false), - ("http://example.com", "https://example.com", "", true) - ]); + check_option_rule( + &["||example.com"], + &[ + ("http://example.com", "https://example.com", "", true), + ("http://example2.com", "https://example.com", "", false), + ("http://example.com", "https://example.com", "", true), + ], + ); } #[test] fn check_options_third_party() { - - check_option_rule(&["||example.com^$third-party"], &[ - ("http://example.com", "http://brianbondy.com","script", true), - ("http://example.com", "http://example.com", "script",false), - ("http://ad.example.com", "http://brianbondy.com","script", true), - ("http://ad.example.com", "http://example.com", "script",false), - ("http://example2.com", "http://brianbondy.com", "script",false), - ("http://example2.com", "http://example.com", "script",false), - ("http://example.com.au", "http://brianbondy.com", "script",false), - ("http://example.com.au", "http://example.com", "script",false), - ]); + check_option_rule( + &["||example.com^$third-party"], + &[ + ( + "http://example.com", + "http://brianbondy.com", + "script", + true, + ), + ("http://example.com", "http://example.com", "script", false), + ( + "http://ad.example.com", + "http://brianbondy.com", + "script", + true, + ), + ( + "http://ad.example.com", + "http://example.com", + "script", + false, + ), + ( + "http://example2.com", + "http://brianbondy.com", + "script", + false, + ), + ("http://example2.com", "http://example.com", "script", false), + ( + "http://example.com.au", + "http://brianbondy.com", + "script", + false, + ), + ( + "http://example.com.au", + "http://example.com", + "script", + false, + ), + ], + ); } #[test] fn check_options_ping() { // We should block ping rules if the resource type is FOPing - check_option_rule(&["||example.com^$ping"], &[ - ("http://example.com", "http://example.com", "ping", true), - ("http://example.com", "http://example.com", "image", false), - ]); + check_option_rule( + &["||example.com^$ping"], + &[ + ("http://example.com", "http://example.com", "ping", true), + ("http://example.com", "http://example.com", "image", false), + ], + ); } #[test] fn check_options_popup() { // Make sure we ignore popup rules for now - check_option_rule(&["||example.com^$popup"], &[ - ("http://example.com", "http://example.com", "popup", false), - ]); + check_option_rule( + &["||example.com^$popup"], + &[("http://example.com", "http://example.com", "popup", false)], + ); } #[test] fn check_options_third_party_notscript() { - check_option_rule(&["||example.com^$third-party,~script"], &[ - ("http://example.com", "http://example2.com", "script", false), - ("http://example.com", "http://example2.com", "other", true), - ("http://example2.com", "http://example2.com", "other", false), - ("http://example.com", "http://example.com", "other", false), - ]); + check_option_rule( + &["||example.com^$third-party,~script"], + &[ + ("http://example.com", "http://example2.com", "script", false), + ("http://example.com", "http://example2.com", "other", true), + ("http://example2.com", "http://example2.com", "other", false), + ("http://example.com", "http://example.com", "other", false), + ], + ); } #[test] fn check_options_domain_list() { - check_option_rule(&["adv$domain=example.com|example.net"], &[ - ("http://example.net/adv", "http://example.com", "", true), - ("http://somewebsite.com/adv", "http://example.com", "", true), - ("http://www.example.net/adv", "http://www.example.net", "", true), - ("http://my.subdomain.example.com/adv", "http://my.subdomain.example.com", "", true), - ("http://my.subdomain.example.com/adv", "http://my.subdomain.example.com", "", true), - ("http://example.com/adv", "http://badexample.com", "", false), - ("http://example.com/adv", "http://otherdomain.net", "", false), - ("http://example.net/ad", "http://example.com", "", false), - ]); - - check_option_rule(&["adv$domain=~example.com"], &[ - ("http://example.net/adv", "http://otherdomain.com", "", true), - ("http://somewebsite.com/adv", "http://example.com", "", false), - ]); - - check_option_rule(&["adv$domain=~example.com|~example.net"], &[ - ("http://example.net/adv", "http://example.net", "", false), - ("http://somewebsite.com/adv", "http://example.com", "", false), - ("http://www.example.net/adv", "http://www.example.net", "", false), - ("http://my.subdomain.example.com/adv", "http://my.subdomain.example.com", "", false), - ("http://example.com/adv", "http://badexample.com", "", true), - ("http://example.com/adv", "http://otherdomain.net", "", true), - ("http://example.net/ad", "http://example.net", "", false), - ]); - - check_option_rule(&["adv$domain=example.com|~example.net"], &[ - ("http://example.net/adv", "http://example.net", "", false), - ("http://somewebsite.com/adv", "http://example.com", "", true), - ("http://www.example.net/adv", "http://www.example.net", "", false), - ("http://my.subdomain.example.com/adv", "http://my.subdomain.example.com", "", true), - ("http://example.com/adv", "http://badexample.com", "", false), - ("http://example.com/adv", "http://otherdomain.net", "", false), - ("http://example.net/ad", "http://example.net", "", false), - ]); + check_option_rule( + &["adv$domain=example.com|example.net"], + &[ + ("http://example.net/adv", "http://example.com", "", true), + ("http://somewebsite.com/adv", "http://example.com", "", true), + ( + "http://www.example.net/adv", + "http://www.example.net", + "", + true, + ), + ( + "http://my.subdomain.example.com/adv", + "http://my.subdomain.example.com", + "", + true, + ), + ( + "http://my.subdomain.example.com/adv", + "http://my.subdomain.example.com", + "", + true, + ), + ("http://example.com/adv", "http://badexample.com", "", false), + ( + "http://example.com/adv", + "http://otherdomain.net", + "", + false, + ), + ("http://example.net/ad", "http://example.com", "", false), + ], + ); + + check_option_rule( + &["adv$domain=~example.com"], + &[ + ("http://example.net/adv", "http://otherdomain.com", "", true), + ( + "http://somewebsite.com/adv", + "http://example.com", + "", + false, + ), + ], + ); + + check_option_rule( + &["adv$domain=~example.com|~example.net"], + &[ + ("http://example.net/adv", "http://example.net", "", false), + ( + "http://somewebsite.com/adv", + "http://example.com", + "", + false, + ), + ( + "http://www.example.net/adv", + "http://www.example.net", + "", + false, + ), + ( + "http://my.subdomain.example.com/adv", + "http://my.subdomain.example.com", + "", + false, + ), + ("http://example.com/adv", "http://badexample.com", "", true), + ("http://example.com/adv", "http://otherdomain.net", "", true), + ("http://example.net/ad", "http://example.net", "", false), + ], + ); + + check_option_rule( + &["adv$domain=example.com|~example.net"], + &[ + ("http://example.net/adv", "http://example.net", "", false), + ("http://somewebsite.com/adv", "http://example.com", "", true), + ( + "http://www.example.net/adv", + "http://www.example.net", + "", + false, + ), + ( + "http://my.subdomain.example.com/adv", + "http://my.subdomain.example.com", + "", + true, + ), + ("http://example.com/adv", "http://badexample.com", "", false), + ( + "http://example.com/adv", + "http://otherdomain.net", + "", + false, + ), + ("http://example.net/ad", "http://example.net", "", false), + ], + ); } #[test] fn check_options_domain_not_subdomain() { - check_option_rule(&["adv$domain=example.com|~foo.example.com"], &[ - ("http://example.net/adv", "http://example.com", "", true), - ("http://example.net/adv", "http://foo.example.com", "", false), - ("http://example.net/adv", "http://www.foo.example.com", "", false), - ]); + check_option_rule( + &["adv$domain=example.com|~foo.example.com"], + &[ + ("http://example.net/adv", "http://example.com", "", true), + ( + "http://example.net/adv", + "http://foo.example.com", + "", + false, + ), + ( + "http://example.net/adv", + "http://www.foo.example.com", + "", + false, + ), + ], + ); - check_option_rule(&["adv$domain=~example.com|foo.example.com"], &[ - ("http://example.net/adv", "http://example.com", "", false), - ("http://example.net/adv", "http://foo.example.com", "", false), - ("http://example.net/adv", "http://www.foo.example.com", "", false), - ]); + check_option_rule( + &["adv$domain=~example.com|foo.example.com"], + &[ + ("http://example.net/adv", "http://example.com", "", false), + ( + "http://example.net/adv", + "http://foo.example.com", + "", + false, + ), + ( + "http://example.net/adv", + "http://www.foo.example.com", + "", + false, + ), + ], + ); - check_option_rule(&["adv$domain=example.com|~foo.example.com,script"], &[ - ("http://example.net/adv", "http://example.com", "script", true), - ("http://example.net/adv", "http://foo.example.com", "script", false), - ("http://example.net/adv", "http://www.foo.example.com", "script", false), - ("http://example.net/adv", "http://example.com", "", false), - ("http://example.net/adv", "http://foo.example.com", "", false), - ("http://example.net/adv", "http://www.foo.example.com", "", false), - ]); + check_option_rule( + &["adv$domain=example.com|~foo.example.com,script"], + &[ + ( + "http://example.net/adv", + "http://example.com", + "script", + true, + ), + ( + "http://example.net/adv", + "http://foo.example.com", + "script", + false, + ), + ( + "http://example.net/adv", + "http://www.foo.example.com", + "script", + false, + ), + ("http://example.net/adv", "http://example.com", "", false), + ( + "http://example.net/adv", + "http://foo.example.com", + "", + false, + ), + ( + "http://example.net/adv", + "http://www.foo.example.com", + "", + false, + ), + ], + ); } #[test] fn check_options_exception_notscript() { - check_option_rule(&["adv", "@@advice.$~script"], &[ - ("http://example.com/advice.html", "", "other", false), - ("http://example.com/advice.html", "", "script", true), - ("http://example.com/advert.html", "", "other", true), - ("http://example.com/advert.html", "", "script", true), - ]); + check_option_rule( + &["adv", "@@advice.$~script"], + &[ + ("http://example.com/advice.html", "", "other", false), + ("http://example.com/advice.html", "", "script", true), + ("http://example.com/advert.html", "", "other", true), + ("http://example.com/advert.html", "", "script", true), + ], + ); } #[test] fn check_options_third_party_flags() { // Single matching context domain to domain list - check_option_rule(&["||mzstatic.com^$image,object-subrequest,domain=dailymotion.com"], &[ - ("http://www.dailymotion.com", "http://dailymotion.com", "", false), - ]); + check_option_rule( + &["||mzstatic.com^$image,object-subrequest,domain=dailymotion.com"], + &[( + "http://www.dailymotion.com", + "http://dailymotion.com", + "", + false, + )], + ); // Third party flags work correctly - check_option_rule(&["||s1.wp.com^$subdocument,third-party"], &[ - ("http://s1.wp.com/_static", "http://windsorstar.com", "", false), - ]); + check_option_rule( + &["||s1.wp.com^$subdocument,third-party"], + &[( + "http://s1.wp.com/_static", + "http://windsorstar.com", + "", + false, + )], + ); // Third party flags work correctly - check_option_rule(&["/scripts/ad."], &[ - ("http://a.fsdn.com/sd/js/scripts/ad.js?release_20160112", "http://slashdot.org", "script", true), - ]); + check_option_rule( + &["/scripts/ad."], + &[( + "http://a.fsdn.com/sd/js/scripts/ad.js?release_20160112", + "http://slashdot.org", + "script", + true, + )], + ); } } mod legacy_misc_tests { - use adblock::Engine; use adblock::filters::network::NetworkFilter; use adblock::request::Request; + use adblock::Engine; #[test] - fn demo_app() { // Demo app test + fn demo_app() { + // Demo app test let engine = Engine::from_rules( ["||googlesyndication.com/safeframe/$third-party"], Default::default(), ); - let request = Request::new("http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html", "http://slashdot.org", "script").unwrap(); + let request = Request::new( + "http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html", + "http://slashdot.org", + "script", + ) + .unwrap(); assert!(engine.check_network_request(&request).matched) } #[test] - fn host_anchored_filters_parse_correctly() { // Host anchor is calculated correctly - let filter = NetworkFilter::parse("||test.com$third-party", false, Default::default()).unwrap(); + fn host_anchored_filters_parse_correctly() { + // Host anchor is calculated correctly + let filter = + NetworkFilter::parse("||test.com$third-party", false, Default::default()).unwrap(); assert_eq!(filter.hostname, Some(String::from("test.com"))); - let filter = NetworkFilter::parse("||test.com/ok$third-party", false, Default::default()).unwrap(); + let filter = + NetworkFilter::parse("||test.com/ok$third-party", false, Default::default()).unwrap(); assert_eq!(filter.hostname, Some(String::from("test.com"))); let filter = NetworkFilter::parse("||test.com/ok", false, Default::default()).unwrap(); assert_eq!(filter.hostname, Some(String::from("test.com"))); } + #[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. #[test] fn serialization_tests() { - let engine = Engine::from_rules_parametrised([ - "||googlesyndication.com$third-party", - "@@||googlesyndication.ca", - "a$explicitcancel", - ], Default::default(), true, false); // enable debugging and disable optimizations + use adblock::Serialize; + + let engine = Engine::from_rules_parametrised( + [ + "||googlesyndication.com$third-party", + "@@||googlesyndication.ca", + "a$explicitcancel", + ], + Default::default(), + true, + false, + ); // enable debugging and disable optimizations let serialized = engine.serialize_raw().unwrap(); let mut engine2 = Engine::new(false); engine2.deserialize(&serialized).unwrap(); - assert!(engine.check_network_request(&Request::new("https://googlesyndication.com/script.js", "https://example.com", "script").unwrap()).matched); - assert!(engine2.check_network_request(&Request::new("https://googlesyndication.com/script.js", "https://example.com", "script").unwrap()).matched); - assert!(!engine.check_network_request(&Request::new("https://googleayndication.com/script.js", "https://example.com", "script").unwrap()).matched); - assert!(!engine2.check_network_request(&Request::new("https://googleayndication.com/script.js", "https://example.com", "script").unwrap()).matched); + assert!( + engine + .check_network_request( + &Request::new( + "https://googlesyndication.com/script.js", + "https://example.com", + "script" + ) + .unwrap() + ) + .matched + ); + assert!( + engine2 + .check_network_request( + &Request::new( + "https://googlesyndication.com/script.js", + "https://example.com", + "script" + ) + .unwrap() + ) + .matched + ); + assert!( + !engine + .check_network_request( + &Request::new( + "https://googleayndication.com/script.js", + "https://example.com", + "script" + ) + .unwrap() + ) + .matched + ); + assert!( + !engine2 + .check_network_request( + &Request::new( + "https://googleayndication.com/script.js", + "https://example.com", + "script" + ) + .unwrap() + ) + .matched + ); - assert!(!engine.check_network_request(&Request::new("https://googlesyndication.ca/script.js", "https://example.com", "script").unwrap()).matched); - assert!(!engine2.check_network_request(&Request::new("https://googlesyndication.ca/script.js", "https://example.com", "script").unwrap()).matched); + assert!( + !engine + .check_network_request( + &Request::new( + "https://googlesyndication.ca/script.js", + "https://example.com", + "script" + ) + .unwrap() + ) + .matched + ); + assert!( + !engine2 + .check_network_request( + &Request::new( + "https://googlesyndication.ca/script.js", + "https://example.com", + "script" + ) + .unwrap() + ) + .matched + ); } #[test] @@ -709,19 +998,34 @@ mod legacy_misc_tests { let request_type = "script"; // Test finds a match - let request = Request::new("http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html", ¤t_page_frame, &request_type).unwrap(); + let request = Request::new( + "http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html", + ¤t_page_frame, + &request_type, + ) + .unwrap(); let checked = engine.check_network_request(&request); assert!(checked.filter.is_some(), "Expected a fitler to match"); - assert!(checked.exception.is_none(), "Expected no exception to match"); + assert!( + checked.exception.is_none(), + "Expected no exception to match" + ); let matched_filter = checked.filter.unwrap(); - assert_eq!(matched_filter, "||googlesyndication.com/safeframe/$third-party"); + assert_eq!( + matched_filter, + "||googlesyndication.com/safeframe/$third-party" + ); // Test when no filter is found, returns None - let request = Request::new("http://ssafsdf.com", ¤t_page_frame, &request_type).unwrap(); + let request = + Request::new("http://ssafsdf.com", ¤t_page_frame, &request_type).unwrap(); let checked = engine.check_network_request(&request); assert!(checked.matched == false, "Expected url to pass"); assert!(checked.filter.is_none(), "Expected no fitler to match"); - assert!(checked.exception.is_none(), "Expected no exception to match"); + assert!( + checked.exception.is_none(), + "Expected no exception to match" + ); assert!(checked.redirect.is_none(), "Expected no redirect to match"); } @@ -740,13 +1044,24 @@ mod legacy_misc_tests { let request_type = "script"; // Parse that it finds exception filters correctly - let request = Request::new("http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html", ¤t_page_frame, &request_type).unwrap(); + let request = Request::new( + "http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html", + ¤t_page_frame, + &request_type, + ) + .unwrap(); let checked = engine.check_network_request(&request); assert!(checked.matched == false, "Expected url to pass"); assert!(checked.filter.is_some(), "Expected a fitler to match"); - assert!(checked.exception.is_some(), "Expected no exception to match"); + assert!( + checked.exception.is_some(), + "Expected no exception to match" + ); let matched_filter = checked.filter.unwrap(); - assert_eq!(matched_filter, "||googlesyndication.com/safeframe/$third-party"); + assert_eq!( + matched_filter, + "||googlesyndication.com/safeframe/$third-party" + ); let matched_exception = checked.exception.unwrap(); assert_eq!(matched_exception, "@@safeframe"); } @@ -755,20 +1070,21 @@ mod legacy_misc_tests { fn matches_with_filter_info_preserves_important() { // exceptions have not effect if important filter matches let engine = Engine::from_rules_debug( - [ - "||brianbondy.com^$important", - "@@||brianbondy.com^", - ], + ["||brianbondy.com^$important", "@@||brianbondy.com^"], Default::default(), ); - let request = Request::new("https://brianbondy.com/t", "https://test.com", "script").unwrap(); + let request = + Request::new("https://brianbondy.com/t", "https://test.com", "script").unwrap(); let checked = engine.check_network_request(&request); assert_eq!(checked.matched, true); assert!(checked.filter.is_some(), "Expected filter to match"); let matched_filter = checked.filter.unwrap(); assert_eq!(matched_filter, "||brianbondy.com^$important"); - assert!(checked.exception.is_none(), "Expected no exception to match"); + assert!( + checked.exception.is_none(), + "Expected no exception to match" + ); } } diff --git a/tests/live.rs b/tests/live.rs index 578738ef..5e9368e4 100644 --- a/tests/live.rs +++ b/tests/live.rs @@ -1,5 +1,5 @@ -use adblock::Engine; use adblock::request::Request; +use adblock::Engine; use serde::Deserialize; use tokio::runtime::Runtime; @@ -148,7 +148,9 @@ fn get_blocker_engine() -> Engine { engine } +#[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. fn get_blocker_engine_deserialized() -> Engine { + use adblock::Serialize; use futures::FutureExt; let async_runtime = Runtime::new().expect("Could not start Tokio runtime"); @@ -184,11 +186,14 @@ fn get_blocker_engine_deserialized() -> Engine { fn check_live_specific_urls() { let mut engine = get_blocker_engine(); { - let checked = engine.check_network_request(&Request::new( - "https://static.scroll.com/js/scroll.js", - "https://www.theverge.com/", - "script", - ).unwrap()); + let checked = engine.check_network_request( + &Request::new( + "https://static.scroll.com/js/scroll.js", + "https://www.theverge.com/", + "script", + ) + .unwrap(), + ); assert_eq!( checked.matched, false, "Expected match, got filter {:?}, exception {:?}", @@ -197,11 +202,14 @@ fn check_live_specific_urls() { } { engine.disable_tags(&["twitter-embeds"]); - let checked = engine.check_network_request(&Request::new( - "https://platform.twitter.com/widgets.js", - "https://fmarier.github.io/brave-testing/social-widgets.html", - "script", - ).unwrap()); + let checked = engine.check_network_request( + &Request::new( + "https://platform.twitter.com/widgets.js", + "https://fmarier.github.io/brave-testing/social-widgets.html", + "script", + ) + .unwrap(), + ); assert_eq!( checked.matched, true, "Expected no match, got filter {:?}, exception {:?}", @@ -225,6 +233,7 @@ fn check_live_specific_urls() { } } +#[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. #[test] #[ignore = "opt-in: requires BRAVE_SERVICE_KEY environment variable"] fn check_live_brave_deserialized_specific_urls() { @@ -232,11 +241,14 @@ fn check_live_brave_deserialized_specific_urls() { let mut engine = get_blocker_engine_deserialized(); { engine.disable_tags(&["twitter-embeds"]); - let checked = engine.check_network_request(&Request::new( - "https://platform.twitter.com/widgets.js", - "https://fmarier.github.io/brave-testing/social-widgets.html", - "script", - ).unwrap()); + let checked = engine.check_network_request( + &Request::new( + "https://platform.twitter.com/widgets.js", + "https://fmarier.github.io/brave-testing/social-widgets.html", + "script", + ) + .unwrap(), + ); assert_eq!( checked.matched, true, "Expected match, got filter {:?}, exception {:?}", @@ -245,11 +257,14 @@ fn check_live_brave_deserialized_specific_urls() { } { engine.enable_tags(&["twitter-embeds"]); - let checked = engine.check_network_request(&Request::new( - "https://platform.twitter.com/widgets.js", - "https://fmarier.github.io/brave-testing/social-widgets.html", - "script", - ).unwrap()); + let checked = engine.check_network_request( + &Request::new( + "https://platform.twitter.com/widgets.js", + "https://fmarier.github.io/brave-testing/social-widgets.html", + "script", + ) + .unwrap(), + ); assert_eq!( checked.matched, false, "Expected no match, got filter {:?}, exception {:?}", @@ -264,7 +279,8 @@ fn check_live_from_filterlists() { let requests = load_requests(); for req in requests { - let checked = engine.check_network_request(&Request::new(&req.url, &req.sourceUrl, &req.r#type).unwrap()); + let checked = engine + .check_network_request(&Request::new(&req.url, &req.sourceUrl, &req.r#type).unwrap()); assert_eq!( checked.matched, req.blocked, "Expected match {} for {} at {}, got filter {:?}, exception {:?}", @@ -273,6 +289,7 @@ fn check_live_from_filterlists() { } } +#[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. #[test] #[ignore = "opt-in: requires BRAVE_SERVICE_KEY environment variable"] fn check_live_brave_deserialized_file() { @@ -282,7 +299,8 @@ fn check_live_brave_deserialized_file() { for req in requests { println!("Checking {:?}", req); - let checked = engine.check_network_request(&Request::new(&req.url, &req.sourceUrl, &req.r#type).unwrap()); + let checked = engine + .check_network_request(&Request::new(&req.url, &req.sourceUrl, &req.r#type).unwrap()); assert_eq!( checked.matched, req.blocked, "Expected match {} for {} {} {}", @@ -304,11 +322,14 @@ fn check_live_redirects() { engine.use_resources(resources); { - let checked = engine.check_network_request(&Request::new( - "https://c.amazon-adsystem.com/aax2/amzn_ads.js", - "https://aussieexotics.com/", - "script", - ).unwrap()); + let checked = engine.check_network_request( + &Request::new( + "https://c.amazon-adsystem.com/aax2/amzn_ads.js", + "https://aussieexotics.com/", + "script", + ) + .unwrap(), + ); assert_eq!( checked.matched, true, "Expected match, got filter {:?}, exception {:?}", @@ -319,11 +340,14 @@ fn check_live_redirects() { assert_eq!(checked.redirect.unwrap(), "data:application/javascript;base64,LyoqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioKCiAgICB1QmxvY2sgT3JpZ2luIC0gYSBicm93c2VyIGV4dGVuc2lvbiB0byBibG9jayByZXF1ZXN0cy4KICAgIENvcHlyaWdodCAoQykgMjAxOS1wcmVzZW50IFJheW1vbmQgSGlsbAoKICAgIFRoaXMgcHJvZ3JhbSBpcyBmcmVlIHNvZnR3YXJlOiB5b3UgY2FuIHJlZGlzdHJpYnV0ZSBpdCBhbmQvb3IgbW9kaWZ5CiAgICBpdCB1bmRlciB0aGUgdGVybXMgb2YgdGhlIEdOVSBHZW5lcmFsIFB1YmxpYyBMaWNlbnNlIGFzIHB1Ymxpc2hlZCBieQogICAgdGhlIEZyZWUgU29mdHdhcmUgRm91bmRhdGlvbiwgZWl0aGVyIHZlcnNpb24gMyBvZiB0aGUgTGljZW5zZSwgb3IKICAgIChhdCB5b3VyIG9wdGlvbikgYW55IGxhdGVyIHZlcnNpb24uCgogICAgVGhpcyBwcm9ncmFtIGlzIGRpc3RyaWJ1dGVkIGluIHRoZSBob3BlIHRoYXQgaXQgd2lsbCBiZSB1c2VmdWwsCiAgICBidXQgV0lUSE9VVCBBTlkgV0FSUkFOVFk7IHdpdGhvdXQgZXZlbiB0aGUgaW1wbGllZCB3YXJyYW50eSBvZgogICAgTUVSQ0hBTlRBQklMSVRZIG9yIEZJVE5FU1MgRk9SIEEgUEFSVElDVUxBUiBQVVJQT1NFLiAgU2VlIHRoZQogICAgR05VIEdlbmVyYWwgUHVibGljIExpY2Vuc2UgZm9yIG1vcmUgZGV0YWlscy4KCiAgICBZb3Ugc2hvdWxkIGhhdmUgcmVjZWl2ZWQgYSBjb3B5IG9mIHRoZSBHTlUgR2VuZXJhbCBQdWJsaWMgTGljZW5zZQogICAgYWxvbmcgd2l0aCB0aGlzIHByb2dyYW0uICBJZiBub3QsIHNlZSB7aHR0cDovL3d3dy5nbnUub3JnL2xpY2Vuc2VzL30uCgogICAgSG9tZTogaHR0cHM6Ly9naXRodWIuY29tL2dvcmhpbGwvdUJsb2NrCiovCgooZnVuY3Rpb24oKSB7CiAgICAndXNlIHN0cmljdCc7CiAgICBpZiAoIGFtem5hZHMgKSB7CiAgICAgICAgcmV0dXJuOwogICAgfQogICAgdmFyIHcgPSB3aW5kb3c7CiAgICB2YXIgbm9vcGZuID0gZnVuY3Rpb24oKSB7CiAgICAgICAgOwogICAgfS5iaW5kKCk7CiAgICB2YXIgYW16bmFkcyA9IHsKICAgICAgICBhcHBlbmRTY3JpcHRUYWc6IG5vb3BmbiwKICAgICAgICBhcHBlbmRUYXJnZXRpbmdUb0FkU2VydmVyVXJsOiBub29wZm4sCiAgICAgICAgYXBwZW5kVGFyZ2V0aW5nVG9RdWVyeVN0cmluZzogbm9vcGZuLAogICAgICAgIGNsZWFyVGFyZ2V0aW5nRnJvbUdQVEFzeW5jOiBub29wZm4sCiAgICAgICAgZG9BbGxUYXNrczogbm9vcGZuLAogICAgICAgIGRvR2V0QWRzQXN5bmM6IG5vb3BmbiwKICAgICAgICBkb1Rhc2s6IG5vb3BmbiwKICAgICAgICBkZXRlY3RJZnJhbWVBbmRHZXRVUkw6IG5vb3BmbiwKICAgICAgICBnZXRBZHM6IG5vb3BmbiwKICAgICAgICBnZXRBZHNBc3luYzogbm9vcGZuLAogICAgICAgIGdldEFkRm9yU2xvdDogbm9vcGZuLAogICAgICAgIGdldEFkc0NhbGxiYWNrOiBub29wZm4sCiAgICAgICAgZ2V0RGlzcGxheUFkczogbm9vcGZuLAogICAgICAgIGdldERpc3BsYXlBZHNBc3luYzogbm9vcGZuLAogICAgICAgIGdldERpc3BsYXlBZHNDYWxsYmFjazogbm9vcGZuLAogICAgICAgIGdldEtleXM6IG5vb3BmbiwKICAgICAgICBnZXRSZWZlcnJlclVSTDogbm9vcGZuLAogICAgICAgIGdldFNjcmlwdFNvdXJjZTogbm9vcGZuLAogICAgICAgIGdldFRhcmdldGluZzogbm9vcGZuLAogICAgICAgIGdldFRva2Vuczogbm9vcGZuLAogICAgICAgIGdldFZhbGlkTWlsbGlzZWNvbmRzOiBub29wZm4sCiAgICAgICAgZ2V0VmlkZW9BZHM6IG5vb3BmbiwKICAgICAgICBnZXRWaWRlb0Fkc0FzeW5jOiBub29wZm4sCiAgICAgICAgZ2V0VmlkZW9BZHNDYWxsYmFjazogbm9vcGZuLAogICAgICAgIGhhbmRsZUNhbGxCYWNrOiBub29wZm4sCiAgICAgICAgaGFzQWRzOiBub29wZm4sCiAgICAgICAgcmVuZGVyQWQ6IG5vb3BmbiwKICAgICAgICBzYXZlQWRzOiBub29wZm4sCiAgICAgICAgc2V0VGFyZ2V0aW5nOiBub29wZm4sCiAgICAgICAgc2V0VGFyZ2V0aW5nRm9yR1BUQXN5bmM6IG5vb3BmbiwKICAgICAgICBzZXRUYXJnZXRpbmdGb3JHUFRTeW5jOiBub29wZm4sCiAgICAgICAgdHJ5R2V0QWRzQXN5bmM6IG5vb3BmbiwKICAgICAgICB1cGRhdGVBZHM6IG5vb3BmbgogICAgfTsKICAgIHcuYW16bmFkcyA9IGFtem5hZHM7CiAgICB3LmFtem5fYWRzID0gdy5hbXpuX2FkcyB8fCBub29wZm47CiAgICB3LmFheF93cml0ZSA9IHcuYWF4X3dyaXRlIHx8IG5vb3BmbjsKICAgIHcuYWF4X3JlbmRlcl9hZCA9IHcuYWF4X3JlbmRlcl9hZCB8fCBub29wZm47Cn0pKCk7Cg=="); } { - let checked = engine.check_network_request(&Request::new( - "https://www.googletagservices.com/tag/js/gpt.js", - "https://tvguide.com/", - "script", - ).unwrap()); + let checked = engine.check_network_request( + &Request::new( + "https://www.googletagservices.com/tag/js/gpt.js", + "https://tvguide.com/", + "script", + ) + .unwrap(), + ); assert_eq!( checked.matched, true, "Expected match, got filter {:?}, exception {:?}", @@ -334,10 +358,12 @@ fn check_live_redirects() { } } +#[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. #[test] /// Ensure that two different engines loaded from the same textual filter set serialize to /// identical buffers. fn stable_serialization() { + use adblock::Serialize; let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); let ser1 = engine1.serialize_raw().unwrap(); @@ -347,10 +373,12 @@ fn stable_serialization() { assert_eq!(ser1, ser2); } +#[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. #[test] /// Ensure that one engine's serialization result can be exactly reproduced by another engine after /// deserializing from it. fn stable_serialization_through_load() { + use adblock::Serialize; let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); let ser1 = engine1.serialize_raw().unwrap(); diff --git a/tests/matching.rs b/tests/matching.rs index 9cbf28b7..fb061e71 100644 --- a/tests/matching.rs +++ b/tests/matching.rs @@ -1,9 +1,10 @@ -use adblock::Engine; use adblock::filters::network::NetworkFilter; +use adblock::filters::network::NetworkFilterMaskHelper; use adblock::filters::network::NetworkMatchable; use adblock::regex_manager::RegexManager; use adblock::request::Request; use adblock::resources::{MimeType, Resource, ResourceType}; +use adblock::Engine; use serde::{Deserialize, Serialize}; diff --git a/tests/simple_use.rs b/tests/simple_use.rs index 260c41d0..4b4f9259 100644 --- a/tests/simple_use.rs +++ b/tests/simple_use.rs @@ -1,5 +1,5 @@ -use adblock::Engine; use adblock::request::Request; +use adblock::Engine; #[test] fn check_simple_use() { @@ -16,7 +16,8 @@ fn check_simple_use() { "http://example.com/-advertisement-icon.", "http://example.com/helloworld", "image", - ).unwrap(); + ) + .unwrap(); let blocker_result = engine.check_network_request(&request); assert!(blocker_result.matched); } diff --git a/tests/test_utils.rs b/tests/test_utils.rs index 408aa5dc..a4e3a3db 100644 --- a/tests/test_utils.rs +++ b/tests/test_utils.rs @@ -2,10 +2,12 @@ //! needed outside of this directory. #[cfg(not(target_arch = "wasm32"))] -pub fn rules_from_lists(lists: impl IntoIterator>) -> impl Iterator { - fn read_file_lines(filename: &str) -> impl Iterator { - use std::io::{BufRead, BufReader}; +pub fn rules_from_lists( + lists: impl IntoIterator>, +) -> impl Iterator { + fn read_file_lines(filename: &str) -> impl Iterator { use std::fs::File; + use std::io::{BufRead, BufReader}; let reader = BufReader::new(File::open(filename).unwrap()); reader.lines().map(|r| r.unwrap()) diff --git a/tests/ublock-coverage.rs b/tests/ublock-coverage.rs index a0c3b0a3..cc641c2a 100644 --- a/tests/ublock-coverage.rs +++ b/tests/ublock-coverage.rs @@ -1,5 +1,5 @@ -use adblock::Engine; use adblock::request::Request; +use adblock::Engine; use serde::Deserialize; @@ -49,7 +49,7 @@ fn get_blocker_engine() -> Engine { Engine::from_rules_parametrised(rules, Default::default(), true, false) } -fn get_blocker_engine_default(extra_rules: impl IntoIterator>) -> Engine { +fn get_blocker_engine_default(extra_rules: impl IntoIterator>) -> Engine { let rules = rules_from_lists([ "data/easylist.to/easylist/easylist.txt", "data/easylist.to/easylist/easyprivacy.txt", @@ -69,14 +69,11 @@ fn get_blocker_engine_default(extra_rules: impl IntoIterator>, + requests: &[(Request, bool)], + ) { + let (network_filters, _) = parse_filters(filters, true, Default::default()); + + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, // optimizations will reduce number of rules + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + + requests.iter().for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &Default::default()); + if *expected_result { + assert!(matched_rule.matched, "Expected match for {}", req.url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + req.url, matched_rule.filter + ); + } + }); + } + + #[test] + fn redirect_blocking_exception() { + let filters = [ + "||imdb-video.media-imdb.com$media,redirect=noop-0.1s.mp3", + "@@||imdb-video.media-imdb.com^$domain=imdb.com", + ]; + + let request = Request::new( + "https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", + "https://www.imdb.com/video/13", + "media", + ) + .unwrap(); + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); + + resources + .add_resource(Resource::simple( + "noop-0.1s.mp3", + crate::resources::MimeType::AudioMp3, + "mp3", + )) + .unwrap(); + + let matched_rule = blocker.check(&request, &resources); + assert_eq!(matched_rule.matched, false); + assert_eq!(matched_rule.important, false); + assert_eq!( + matched_rule.redirect, + Some("data:audio/mp3;base64,bXAz".to_string()) + ); + assert_eq!( + matched_rule.exception, + Some("@@||imdb-video.media-imdb.com^$domain=imdb.com".to_string()) + ); + } + + #[test] + fn redirect_exception() { + let filters = [ + "||imdb-video.media-imdb.com$media,redirect=noop-0.1s.mp3", + "@@||imdb-video.media-imdb.com^$domain=imdb.com,redirect=noop-0.1s.mp3", + ]; + + let request = Request::new( + "https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", + "https://www.imdb.com/video/13", + "media", + ) + .unwrap(); + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); + + resources + .add_resource(Resource::simple( + "noop-0.1s.mp3", + crate::resources::MimeType::AudioMp3, + "mp3", + )) + .unwrap(); + + let matched_rule = blocker.check(&request, &resources); + assert_eq!(matched_rule.matched, false); + assert_eq!(matched_rule.important, false); + assert_eq!(matched_rule.redirect, None); + assert_eq!( + matched_rule.exception, + Some( + "@@||imdb-video.media-imdb.com^$domain=imdb.com,redirect=noop-0.1s.mp3".to_string() + ) + ); + } + + #[test] + fn redirect_rule_redirection() { + let filters = [ + "||doubleclick.net^", + "||www3.doubleclick.net^$xmlhttprequest,redirect-rule=noop.txt,domain=lineups.fun", + ]; + + let request = + Request::new("https://www3.doubleclick.net", "https://lineups.fun", "xhr").unwrap(); + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); + + resources + .add_resource(Resource::simple( + "noop.txt", + crate::resources::MimeType::TextPlain, + "noop", + )) + .unwrap(); + + let matched_rule = blocker.check(&request, &resources); + assert_eq!(matched_rule.matched, true); + assert_eq!(matched_rule.important, false); + assert_eq!( + matched_rule.redirect, + Some("data:text/plain;base64,bm9vcA==".to_string()) + ); + assert_eq!(matched_rule.exception, None); + } + + #[test] + fn badfilter_does_not_match() { + let filters = ["||foo.com$badfilter"]; + let url_results = [( + Request::new("https://foo.com", "https://bar.com", "image").unwrap(), + false, + )]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(request, expected_result)| (request, expected_result)) + .collect(); + + test_requests_filters(&filters, &request_expectations); + } + + #[test] + fn badfilter_cancels_with_same_id() { + let filters = [ + "||foo.com$domain=bar.com|foo.com,badfilter", + "||foo.com$domain=foo.com|bar.com", + ]; + let url_results = [( + Request::new("https://foo.com", "https://bar.com", "image").unwrap(), + false, + )]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(request, expected_result)| (request, expected_result)) + .collect(); + + test_requests_filters(&filters, &request_expectations); + } + + #[test] + fn badfilter_does_not_cancel_similar_filter() { + let filters = [ + "||foo.com$domain=bar.com|foo.com,badfilter", + "||foo.com$domain=foo.com|bar.com,image", + ]; + let url_results = [( + Request::new("https://foo.com", "https://bar.com", "image").unwrap(), + true, + )]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(request, expected_result)| (request, expected_result)) + .collect(); + + test_requests_filters(&filters, &request_expectations); + } + + #[test] + fn hostname_regex_filter_works() { + let filters = [ + "||alimc*.top^$domain=letv.com", + "||aa*.top^$domain=letv.com", + ]; + let url_results = [ + ( + Request::new( + "https://r.alimc1.top/test.js", + "https://minisite.letv.com/", + "script", + ) + .unwrap(), + true, + ), + ( + Request::new( + "https://www.baidu.com/test.js", + "https://minisite.letv.com/", + "script", + ) + .unwrap(), + false, + ), + ( + Request::new( + "https://r.aabb.top/test.js", + "https://example.com/", + "script", + ) + .unwrap(), + false, + ), + ( + Request::new( + "https://r.aabb.top/test.js", + "https://minisite.letv.com/", + "script", + ) + .unwrap(), + true, + ), + ]; + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options = BlockerOptions { + enable_optimizations: false, // optimizations will reduce number of rules + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + let resources = ResourceStorage::default(); + + url_results.into_iter().for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", req.url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + req.url, matched_rule.filter + ); + } + }); + } + + #[test] + fn get_csp_directives() { + let filters = [ + "$csp=script-src 'self' * 'unsafe-inline',domain=thepiratebay.vip|pirateproxy.live|thehiddenbay.com|downloadpirate.com|thepiratebay10.org|kickass.vip|pirateproxy.app|ukpass.co|prox.icu|pirateproxy.life", + "$csp=worker-src 'none',domain=pirateproxy.live|thehiddenbay.com|tpb.party|thepiratebay.org|thepiratebay.vip|thepiratebay10.org|flashx.cc|vidoza.co|vidoza.net", + "||1337x.to^$csp=script-src 'self' 'unsafe-inline'", + "@@^no-csp^$csp=script-src 'self' 'unsafe-inline'", + "^duplicated-directive^$csp=worker-src 'none'", + "@@^disable-all^$csp", + "^first-party-only^$csp=script-src 'none',1p", + ]; + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options = BlockerOptions { + enable_optimizations: false, + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + + { + // No directives should be returned for requests that are not `document` or `subdocument` content types. + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "https://pirateproxy.live/static/custom_ads.js", + "https://pirateproxy.live", + "script" + ) + .unwrap() + ), + None + ); + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "https://pirateproxy.live/static/custom_ads.js", + "https://pirateproxy.live", + "image" + ) + .unwrap() + ), + None + ); + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "https://pirateproxy.live/static/custom_ads.js", + "https://pirateproxy.live", + "object" + ) + .unwrap() + ), + None + ); + } + { + // A single directive should be returned if only one match is present in the engine, for both document and subdocument types + assert_eq!( + blocker.get_csp_directives( + &Request::new("https://example.com", "https://vidoza.co", "document").unwrap() + ), + Some(String::from("worker-src 'none'")) + ); + assert_eq!( + blocker.get_csp_directives( + &Request::new("https://example.com", "https://vidoza.net", "subdocument") + .unwrap() + ), + Some(String::from("worker-src 'none'")) + ); + } + { + // Multiple merged directives should be returned if more than one match is present in the engine + let possible_results = [ + Some(String::from( + "script-src 'self' * 'unsafe-inline',worker-src 'none'", + )), + Some(String::from( + "worker-src 'none',script-src 'self' * 'unsafe-inline'", + )), + ]; + assert!(possible_results.contains( + &blocker.get_csp_directives( + &Request::new( + "https://example.com", + "https://pirateproxy.live", + "document" + ) + .unwrap() + ) + )); + assert!(possible_results.contains( + &blocker.get_csp_directives( + &Request::new( + "https://example.com", + "https://pirateproxy.live", + "subdocument" + ) + .unwrap() + ) + )); + } + { + // A directive with an exception should not be returned + assert_eq!( + blocker.get_csp_directives( + &Request::new("https://1337x.to", "https://1337x.to", "document").unwrap() + ), + Some(String::from("script-src 'self' 'unsafe-inline'")) + ); + assert_eq!( + blocker.get_csp_directives( + &Request::new("https://1337x.to/no-csp", "https://1337x.to", "subdocument") + .unwrap() + ), + None + ); + } + { + // Multiple identical directives should only appear in the output once + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "https://example.com/duplicated-directive", + "https://flashx.cc", + "document" + ) + .unwrap() + ), + Some(String::from("worker-src 'none'")) + ); + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "https://example.com/duplicated-directive", + "https://flashx.cc", + "subdocument" + ) + .unwrap() + ), + Some(String::from("worker-src 'none'")) + ); + } + { + // A CSP exception with no corresponding directive should disable all CSP injections for the page + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "https://1337x.to/duplicated-directive/disable-all", + "https://thepiratebay10.org", + "document" + ) + .unwrap() + ), + None + ); + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "https://1337x.to/duplicated-directive/disable-all", + "https://thepiratebay10.org", + "document" + ) + .unwrap() + ), + None + ); + } + { + // A CSP exception with a partyness modifier should only match where the modifier applies + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "htps://github.com/first-party-only", + "https://example.com", + "subdocument" + ) + .unwrap() + ), + None + ); + assert_eq!( + blocker.get_csp_directives( + &Request::new( + "https://example.com/first-party-only", + "https://example.com", + "document" + ) + .unwrap() + ), + Some(String::from("script-src 'none'")) + ); + } + } + + #[test] + fn test_removeparam() { + let filters = [ + "||example.com^$removeparam=test", + "*$removeparam=fbclid", + "/script.js$redirect-rule=noopjs", + "^block^$important", + "$removeparam=testCase,~xhr", + ]; + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options = BlockerOptions { + enable_optimizations: true, + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); + + resources + .add_resource(Resource::simple( + "noopjs", + crate::resources::MimeType::ApplicationJavascript, + "(() => {})()", + )) + .unwrap(); + + let result = blocker.check( + &Request::new( + "https://example.com?q=1&test=2#blue", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com?q=1#blue".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?test=2&q=1#blue", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com?q=1#blue".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?test=2#blue", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com#blue".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new("https://example.com?q=1#blue", "https://antonok.com", "xhr").unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, None); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?q=1&test=2", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?test=2&q=1", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); + assert!(!result.matched); + + let result = blocker.check( + &Request::new("https://example.com?test=2", "https://antonok.com", "xhr").unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, Some("https://example.com".into())); + assert!(!result.matched); + + let result = blocker.check( + &Request::new("https://example.com?test=2", "https://antonok.com", "image").unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, None); + assert!(!result.matched); + + let result = blocker.check( + &Request::new("https://example.com?q=1", "https://antonok.com", "xhr").unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, None); + assert!(!result.matched); + + let result = blocker.check( + &Request::new("https://example.com?q=fbclid", "https://antonok.com", "xhr").unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, None); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?fbclid=10938&q=1&test=2", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://test.com?fbclid=10938&q=1&test=2", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://test.com?q=1&test=2".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?q1=1&q2=2&q3=3&test=2&q4=4&q5=5&fbclid=39", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com?q1=1&q2=2&q3=3&q4=4&q5=5".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?q1=1&q1=2&test=2&test=3", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com?q1=1&q1=2".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/script.js?test=2#blue", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com/script.js#blue".into()) + ); + assert_eq!( + result.redirect, + Some("data:application/javascript;base64,KCgpID0+IHt9KSgp".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/block/script.js?test=2", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, None); + assert_eq!( + result.redirect, + Some("data:application/javascript;base64,KCgpID0+IHt9KSgp".into()) + ); + assert!(result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", + "https://antonok.com", + "xhr", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, None); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", + "https://antonok.com", + "image", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, None); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", + "https://antonok.com", + "subdocument", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com/Path/?Test=ABC&testcase=AbC".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", + "https://antonok.com", + "document", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com/Path/?Test=ABC&testcase=AbC".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?Test=ABC?123&test=3#&test=4#b", + "https://antonok.com", + "document", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com?Test=ABC?123#&test=4#b".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?Test=ABC&testCase=5", + "https://antonok.com", + "document", + ) + .unwrap(), + &resources, + ); + assert_eq!( + result.rewritten_url, + Some("https://example.com?Test=ABC".into()) + ); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com?Test=ABC&testCase=5", + "https://antonok.com", + "image", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.rewritten_url, None); + assert!(!result.matched); + } + + /// Tests ported from the previous query parameter stripping logic in brave-core + #[test] + fn removeparam_brave_core_tests() { + let testcases = [ + // (original url, expected url after filtering) + ("https://example.com/?fbclid=1234", "https://example.com/"), + ("https://example.com/?fbclid=1234&", "https://example.com/"), + ("https://example.com/?&fbclid=1234", "https://example.com/"), + ("https://example.com/?gclid=1234", "https://example.com/"), + ( + "https://example.com/?fbclid=0&gclid=1&msclkid=a&mc_eid=a1", + "https://example.com/", + ), + ( + "https://example.com/?fbclid=&foo=1&bar=2&gclid=abc", + "https://example.com/?fbclid=&foo=1&bar=2", + ), + ( + "https://example.com/?fbclid=&foo=1&gclid=1234&bar=2", + "https://example.com/?fbclid=&foo=1&bar=2", + ), + ( + "http://u:p@example.com/path/file.html?foo=1&fbclid=abcd#fragment", + "http://u:p@example.com/path/file.html?foo=1#fragment", + ), + ("https://example.com/?__s=1234-abcd", "https://example.com/"), + // Obscure edge cases that break most parsers: + ( + "https://example.com/?fbclid&foo&&gclid=2&bar=&%20", + "https://example.com/?fbclid&foo&&bar=&%20", + ), + ( + "https://example.com/?fbclid=1&1==2&=msclkid&foo=bar&&a=b=c&", + "https://example.com/?1==2&=msclkid&foo=bar&&a=b=c&", + ), + ( + "https://example.com/?fbclid=1&=2&?foo=yes&bar=2+", + "https://example.com/?=2&?foo=yes&bar=2+", + ), + ( + "https://example.com/?fbclid=1&a+b+c=some%20thing&1%202=3+4", + "https://example.com/?a+b+c=some%20thing&1%202=3+4", + ), + // Conditional query parameter stripping + /*("https://example.com/?mkt_tok=123&foo=bar", + "https://example.com/?foo=bar"),*/ + ]; + + let filters = [ + "fbclid", + "gclid", + "msclkid", + "mc_eid", + "dclid", + "oly_anon_id", + "oly_enc_id", + "_openstat", + "vero_conv", + "vero_id", + "wickedid", + "yclid", + "__s", + "rb_clickid", + "s_cid", + "ml_subscriber", + "ml_subscriber_hash", + "twclid", + "gbraid", + "wbraid", + "_hsenc", + "__hssc", + "__hstc", + "__hsfp", + "hsCtaTracking", + "oft_id", + "oft_k", + "oft_lk", + "oft_d", + "oft_c", + "oft_ck", + "oft_ids", + "oft_sk", + "ss_email_id", + "bsft_uid", + "bsft_clkid", + "vgo_ee", + "igshid", + ] + .iter() + .map(|s| format!("*$removeparam={}", s)) + .collect::>(); + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options = BlockerOptions { + enable_optimizations: true, + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + let resources = ResourceStorage::default(); + + for (original, expected) in testcases.into_iter() { + let result = blocker.check( + &Request::new(original, "https://example.net", "xhr").unwrap(), + &resources, + ); + let expected = if original == expected { + None + } else { + Some(expected.to_string()) + }; + assert_eq!( + expected, result.rewritten_url, + "Filtering parameters on {} failed", + original + ); + } + } + + #[test] + fn test_removeparam_same_tokens() { + let filters = ["$removeparam=example1_", "$removeparam=example1-"]; + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options = BlockerOptions { + enable_optimizations: true, + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + + let result = blocker.check( + &Request::new( + "https://example.com?example1_=1&example1-=2", + "https://example.com", + "xhr", + ) + .unwrap(), + &Default::default(), + ); + assert_eq!(result.rewritten_url, Some("https://example.com".into())); + assert!(!result.matched); + } + + #[test] + fn test_redirect_priority() { + let filters = [ + ".txt^$redirect-rule=a", + "||example.com^$redirect-rule=b:10", + "/text$redirect-rule=c:20", + "@@^excepta^$redirect-rule=a", + "@@^exceptb10^$redirect-rule=b:10", + "@@^exceptc20^$redirect-rule=c:20", + ]; + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options = BlockerOptions { + enable_optimizations: true, + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); + fn add_simple_resource( + resources: &mut ResourceStorage, + identifier: &str, + ) -> Option { + resources + .add_resource(Resource::simple( + identifier, + crate::resources::MimeType::TextPlain, + identifier, + )) + .unwrap(); + Some(format!( + "data:text/plain;base64,{}", + base64::encode(identifier) + )) + } + let a_redirect = add_simple_resource(&mut resources, "a"); + let b_redirect = add_simple_resource(&mut resources, "b"); + let c_redirect = add_simple_resource(&mut resources, "c"); + + let result = blocker.check( + &Request::new( + "https://example.net/test", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, None); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.net/test.txt", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, a_redirect); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/test.txt", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, b_redirect); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/text.txt", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, c_redirect); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/exceptc20/text.txt", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, b_redirect); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/exceptb10/text.txt", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, c_redirect); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/exceptc20/exceptb10/text.txt", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, a_redirect); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/exceptc20/exceptb10/excepta/text.txt", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, None); + assert!(!result.matched); + + let result = blocker.check( + &Request::new( + "https://example.com/exceptc20/exceptb10/text", + "https://example.com", + "xmlhttprequest", + ) + .unwrap(), + &resources, + ); + assert_eq!(result.redirect, None); + assert!(!result.matched); + } + + #[test] + fn tags_enable_works() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("http://example.com/somelongpath/test/2.html", true), + ("https://brianbondy.com/about", false), + ("https://brave.com/about", false), + ]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }) + .collect(); + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, // optimizations will reduce number of rules + }; + + let mut blocker = Blocker::new(network_filters, &blocker_options); + let resources = Default::default(); + blocker.enable_tags(&["stuff"]); + assert_eq!( + blocker.tags_enabled, + HashSet::from_iter([String::from("stuff")].into_iter()) + ); + assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 2); + + request_expectations + .into_iter() + .for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", req.url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + req.url, matched_rule.filter + ); + } + }); + } + + #[test] + fn tags_enable_adds_tags() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("http://example.com/somelongpath/test/2.html", true), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), + ]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }) + .collect(); + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, // optimizations will reduce number of rules + }; + + let mut blocker = Blocker::new(network_filters, &blocker_options); + let resources = Default::default(); + blocker.enable_tags(&["stuff"]); + blocker.enable_tags(&["brian"]); + assert_eq!( + blocker.tags_enabled, + HashSet::from_iter([String::from("brian"), String::from("stuff")].into_iter()) + ); + assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 4); + + request_expectations + .into_iter() + .for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", req.url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + req.url, matched_rule.filter + ); + } + }); + } + + #[test] + fn tags_disable_works() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", false), + ("http://example.com/somelongpath/test/2.html", false), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), + ]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }) + .collect(); + + let (network_filters, _) = parse_filters(&filters, true, Default::default()); + + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, // optimizations will reduce number of rules + }; + + let mut blocker = Blocker::new(network_filters, &blocker_options); + let resources = Default::default(); + blocker.enable_tags(&["brian", "stuff"]); + assert_eq!( + blocker.tags_enabled, + HashSet::from_iter([String::from("brian"), String::from("stuff")].into_iter()) + ); + assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 4); + blocker.disable_tags(&["stuff"]); + assert_eq!( + blocker.tags_enabled, + HashSet::from_iter([String::from("brian")].into_iter()) + ); + assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 2); + + request_expectations + .into_iter() + .for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", req.url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + req.url, matched_rule.filter + ); + } + }); + } + + #[test] + fn filter_add_badfilter_error() { + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, + }; + + let mut blocker = Blocker::new(Vec::new(), &blocker_options); + + let filter = NetworkFilter::parse("adv$badfilter", true, Default::default()).unwrap(); + let added = blocker.add_filter(filter); + assert!(added.is_err()); + assert_eq!(added.err().unwrap(), BlockerError::BadFilterAddUnsupported); + } + + #[test] + #[ignore] + fn filter_add_twice_handling_error() { + { + // Not allow filter to be added twice hwn the engine is not optimised + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: false, + }; + + let mut blocker = Blocker::new(Vec::new(), &blocker_options); + + let filter = NetworkFilter::parse("adv", true, Default::default()).unwrap(); + blocker.add_filter(filter.clone()).unwrap(); + assert!( + blocker.filter_exists(&filter), + "Expected filter to be inserted" + ); + let added = blocker.add_filter(filter); + assert!(added.is_err(), "Expected repeated insertion to fail"); + assert_eq!( + added.err().unwrap(), + BlockerError::FilterExists, + "Expected specific error on repeated insertion fail" + ); + } + { + // Allow filter to be added twice when the engine is optimised + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: true, + }; + + let mut blocker = Blocker::new(Vec::new(), &blocker_options); + + let filter = NetworkFilter::parse("adv", true, Default::default()).unwrap(); + blocker.add_filter(filter.clone()).unwrap(); + let added = blocker.add_filter(filter); + assert!(added.is_ok()); + } + } + + #[test] + fn filter_add_tagged() { + // Allow filter to be added twice when the engine is optimised + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: true, + }; + + let mut blocker = Blocker::new(Vec::new(), &blocker_options); + let resources = Default::default(); + blocker.enable_tags(&["brian"]); + + blocker + .add_filter(NetworkFilter::parse("adv$tag=stuff", true, Default::default()).unwrap()) + .unwrap(); + blocker + .add_filter( + NetworkFilter::parse("somelongpath/test$tag=stuff", true, Default::default()) + .unwrap(), + ) + .unwrap(); + blocker + .add_filter( + NetworkFilter::parse("||brianbondy.com/$tag=brian", true, Default::default()) + .unwrap(), + ) + .unwrap(); + blocker + .add_filter( + NetworkFilter::parse("||brave.com$tag=brian", true, Default::default()).unwrap(), + ) + .unwrap(); + + let url_results = [ + ("http://example.com/advert.html", false), + ("http://example.com/somelongpath/test/2.html", false), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), + ]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }) + .collect(); + + request_expectations + .into_iter() + .for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", req.url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + req.url, matched_rule.filter + ); + } + }); + } + + #[test] + fn exception_force_check() { + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: true, + }; + + let mut filter_set = crate::lists::FilterSet::new(true); + filter_set + .add_filter("@@*ad_banner.png", Default::default()) + .unwrap(); + + let blocker = Blocker::new(filter_set.network_filters, &blocker_options); + let resources = Default::default(); + + let request = Request::new( + "http://example.com/ad_banner.png", + "https://example.com", + "other", + ) + .unwrap(); + + let matched_rule = blocker.check_parameterised(&request, &resources, false, true); + assert!(!matched_rule.matched); + assert!(matched_rule.exception.is_some()); + } + + #[test] + fn generichide() { + let blocker_options: BlockerOptions = BlockerOptions { + enable_optimizations: true, + }; + + let mut filter_set = crate::lists::FilterSet::new(true); + filter_set + .add_filter("@@||example.com$generichide", Default::default()) + .unwrap(); + + let blocker = Blocker::new(filter_set.network_filters, &blocker_options); + + assert!(blocker.check_generic_hide( + &Request::new("https://example.com", "https://example.com", "other").unwrap() + )); + } +} + +#[cfg(test)] +mod placeholder_string_tests { + /// If this changes, be sure to update the documentation for [`BlockerResult`] as well. + #[test] + fn test_constant_placeholder_string() { + let mut filter_set = crate::lists::FilterSet::new(false); + filter_set + .add_filter("||example.com^", Default::default()) + .unwrap(); + let engine = crate::Engine::from_filter_set(filter_set, true); + let block = engine.check_network_request( + &crate::request::Request::new("https://example.com", "https://example.com", "document") + .unwrap(), + ); + assert_eq!(block.filter, Some("NetworkFilter".to_string())); + } +} + +#[cfg(test)] +mod legacy_rule_parsing_tests { + use super::super::*; + use crate::blocker::{Blocker, BlockerOptions}; + use crate::lists::{parse_filters, FilterFormat, ParseOptions}; + use crate::network_filter_list::vec_hashmap_len; + use crate::test_utils::rules_from_lists; + + struct ListCounts { + pub filters: usize, + pub cosmetic_filters: usize, + pub exceptions: usize, + pub duplicates: usize, + } + + impl std::ops::Add for ListCounts { + type Output = ListCounts; + + fn add(self, other: ListCounts) -> Self::Output { + ListCounts { + filters: self.filters + other.filters, + cosmetic_filters: self.cosmetic_filters + other.cosmetic_filters, + exceptions: self.exceptions + other.exceptions, + duplicates: 0, // Don't bother trying to calculate - lists could have cross-duplicated entries + } + } + } + + // number of expected EasyList cosmetic rules from old engine is 31144, but is incorrect as it skips a few particularly long rules that are nevertheless valid + // easyList = { 24478, 31144, 0, 5589 }; + // not handling (and not including) filters with the following options: + // - $popup + // - $elemhide + // difference from original counts caused by not handling document/subdocument options and possibly miscounting on the blocker side. + // Printing all non-cosmetic, non-html, non-comment/-empty rules and ones with no unsupported options yields 29142 items + // This engine also handles 3 rules that old one does not + const EASY_LIST: ListCounts = ListCounts { + filters: 35597, // 36259 - 662 exceptions + cosmetic_filters: if cfg!(feature = "css-validation") { + 23072 + } else { + 23080 + }, + exceptions: 662, + duplicates: 0, + }; + // easyPrivacy = { 11817, 0, 0, 1020 }; + // differences in counts explained by hashset size underreporting as detailed in the next two cases + const EASY_PRIVACY: ListCounts = ListCounts { + filters: 52278, // 52998 - 720 exceptions + cosmetic_filters: 21, + exceptions: 720, + duplicates: 2, + }; + // ublockUnbreak = { 4, 8, 0, 94 }; + // differences in counts explained by client.hostAnchoredExceptionHashSet->GetSize() underreporting when compared to client.numHostAnchoredExceptionFilters + const UBLOCK_UNBREAK: ListCounts = ListCounts { + filters: 4, + cosmetic_filters: 8, + exceptions: 98, + duplicates: 0, + }; + // braveUnbreak = { 31, 0, 0, 4 }; + // differences in counts explained by client.hostAnchoredHashSet->GetSize() underreporting when compared to client.numHostAnchoredFilters + const BRAVE_UNBREAK: ListCounts = ListCounts { + filters: 32, + cosmetic_filters: 0, + exceptions: 4, + duplicates: 0, + }; + // disconnectSimpleMalware = { 2450, 0, 0, 0 }; + const DISCONNECT_SIMPLE_MALWARE: ListCounts = ListCounts { + filters: 2450, + cosmetic_filters: 0, + exceptions: 0, + duplicates: 0, + }; + // spam404MainBlacklist = { 5629, 166, 0, 0 }; + const SPAM_404_MAIN_BLACKLIST: ListCounts = ListCounts { + filters: 5629, + cosmetic_filters: 166, + exceptions: 0, + duplicates: 0, + }; + const MALWARE_DOMAIN_LIST: ListCounts = ListCounts { + filters: 1104, + cosmetic_filters: 0, + exceptions: 0, + duplicates: 3, + }; + const MALWARE_DOMAINS: ListCounts = ListCounts { + filters: 26853, + cosmetic_filters: 0, + exceptions: 0, + duplicates: 48, + }; + + fn check_list_counts( + rule_lists: impl IntoIterator>, + format: FilterFormat, + expectation: ListCounts, + ) { + let rules = rules_from_lists(rule_lists); + + let (network_filters, cosmetic_filters) = parse_filters( + rules, + true, + ParseOptions { + format, + ..Default::default() + }, + ); + + assert_eq!( + ( + network_filters.len(), + network_filters.iter().filter(|f| f.is_exception()).count(), + cosmetic_filters.len() + ), + ( + expectation.filters + expectation.exceptions, + expectation.exceptions, + expectation.cosmetic_filters + ), + "Number of collected filters does not match expectation" + ); + + let blocker_options = BlockerOptions { + enable_optimizations: false, // optimizations will reduce number of rules + }; + + let blocker = Blocker::new(network_filters, &blocker_options); + + // Some filters in the filter_map are pointed at by multiple tokens, increasing the total number of items + assert!( + vec_hashmap_len(&blocker.exceptions.filter_map) + + vec_hashmap_len(&blocker.generic_hide.filter_map) + >= expectation.exceptions, + "Number of collected exceptions does not match expectation" + ); + + assert!( + vec_hashmap_len(&blocker.filters.filter_map) + + vec_hashmap_len(&blocker.importants.filter_map) + + vec_hashmap_len(&blocker.redirects.filter_map) + + vec_hashmap_len(&blocker.redirects.filter_map) + + vec_hashmap_len(&blocker.csp.filter_map) + >= expectation.filters - expectation.duplicates, + "Number of collected network filters does not match expectation" + ); + } + + #[test] + fn parse_easylist() { + check_list_counts( + ["./data/easylist.to/easylist/easylist.txt"], + FilterFormat::Standard, + EASY_LIST, + ); + } + + #[test] + fn parse_easyprivacy() { + check_list_counts( + ["./data/easylist.to/easylist/easyprivacy.txt"], + FilterFormat::Standard, + EASY_PRIVACY, + ); + } + + #[test] + fn parse_ublock_unbreak() { + check_list_counts( + ["./data/test/ublock-unbreak.txt"], + FilterFormat::Standard, + UBLOCK_UNBREAK, + ); + } + + #[test] + fn parse_brave_unbreak() { + check_list_counts( + ["./data/test/brave-unbreak.txt"], + FilterFormat::Standard, + BRAVE_UNBREAK, + ); + } + + #[test] + fn parse_brave_disconnect_simple_malware() { + check_list_counts( + ["./data/test/disconnect-simple-malware.txt"], + FilterFormat::Standard, + DISCONNECT_SIMPLE_MALWARE, + ); + } + + #[test] + fn parse_spam404_main_blacklist() { + check_list_counts( + ["./data/test/spam404-main-blacklist.txt"], + FilterFormat::Standard, + SPAM_404_MAIN_BLACKLIST, + ); + } + + #[test] + fn parse_malware_domain_list() { + check_list_counts( + ["./data/test/malwaredomainlist.txt"], + FilterFormat::Hosts, + MALWARE_DOMAIN_LIST, + ); + } + + #[test] + fn parse_malware_domain_list_just_hosts() { + check_list_counts( + ["./data/test/malwaredomainlist_justhosts.txt"], + FilterFormat::Hosts, + MALWARE_DOMAIN_LIST, + ); + } + + #[test] + fn parse_malware_domains() { + check_list_counts( + ["./data/test/malwaredomains.txt"], + FilterFormat::Hosts, + MALWARE_DOMAINS, + ); + } + + #[test] + fn parse_multilist() { + let expectation = EASY_LIST + EASY_PRIVACY + UBLOCK_UNBREAK + BRAVE_UNBREAK; + check_list_counts( + [ + "./data/easylist.to/easylist/easylist.txt", + "./data/easylist.to/easylist/easyprivacy.txt", + "./data/test/ublock-unbreak.txt", + "./data/test/brave-unbreak.txt", + ], + FilterFormat::Standard, + expectation, + ) + } + + #[test] + fn parse_malware_multilist() { + let expectation = SPAM_404_MAIN_BLACKLIST + DISCONNECT_SIMPLE_MALWARE; + check_list_counts( + [ + "./data/test/spam404-main-blacklist.txt", + "./data/test/disconnect-simple-malware.txt", + ], + FilterFormat::Standard, + expectation, + ) + } + + #[test] + fn parse_hosts_formats() { + let mut expectation = MALWARE_DOMAIN_LIST + MALWARE_DOMAINS; + expectation.duplicates = 69; + check_list_counts( + [ + "./data/test/malwaredomainlist.txt", + "./data/test/malwaredomains.txt", + ], + FilterFormat::Hosts, + expectation, + ) + } +} diff --git a/tests/unit/content_blocking.rs b/tests/unit/content_blocking.rs new file mode 100644 index 00000000..07b9cd83 --- /dev/null +++ b/tests/unit/content_blocking.rs @@ -0,0 +1,794 @@ +#[cfg(test)] +mod ab2cb_tests { + use super::super::*; + + fn test_from_abp(abp_rule: &str, cb: &str) { + let filter = crate::lists::parse_filter(abp_rule, true, Default::default()) + .expect("Rule under test could not be parsed"); + assert_eq!( + CbRuleEquivalent::try_from(filter) + .unwrap() + .into_iter() + .collect::>(), + serde_json::from_str::>(cb) + .expect("content blocking rule under test could not be deserialized") + ); + } + + #[test] + fn ad_tests() { + test_from_abp( + "&ad_box_", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "&ad_box_" + } + }]"####, + ); + test_from_abp( + "&ad_channel=", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "&ad_channel=" + } + }]"####, + ); + test_from_abp( + "+advertorial.", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "\\+advertorial\\." + } + }]"####, + ); + test_from_abp( + "&prvtof=*&poru=", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "&prvtof=.*&poru=" + } + }]"####, + ); + test_from_abp( + "-ad-180x150px.", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "-ad-180x150px\\." + } + }]"####, + ); + test_from_abp( + "://findnsave.*.*/api/groupon.json?", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "://findnsave\\..*\\..*/api/groupon\\.json\\?" + } + }]"####, + ); + test_from_abp( + "|https://$script,third-party,domain=tamilrockers.ws", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "if-domain": ["*tamilrockers.ws"], + "load-type": ["third-party"], + "resource-type": ["script"], + "url-filter": "^https://" + } + }]"####, + ); + test_from_abp("||com/banners/$image,object,subdocument,domain=~pingdom.com|~thetvdb.com|~tooltrucks.com", r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?com/banners/", + "unless-domain": [ + "*pingdom.com", + "*thetvdb.com", + "*tooltrucks.com" + ], + "resource-type": [ + "image" + ] + } + }, { + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?com/banners/", + "unless-domain": [ + "*pingdom.com", + "*thetvdb.com", + "*tooltrucks.com" + ], + "resource-type": [ + "document" + ], + "load-type": [ + "third-party" + ] + }, + "action": { + "type": "block" + } + }]"####); + test_from_abp( + "$image,third-party,xmlhttprequest,domain=rd.com", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^https?://", + "if-domain": [ + "*rd.com" + ], + "resource-type": [ + "image", + "raw" + ], + "load-type": [ + "third-party" + ] + } + }]"####, + ); + test_from_abp( + "|https://r.i.ua^", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^https://r\\.i\\.ua" + } + }]"####, + ); + test_from_abp( + "|ws://$domain=4shared.com", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^wss?://", + "if-domain": [ + "*4shared.com" + ] + } + }]"####, + ); + } + + #[test] + fn element_hiding_tests() { + test_from_abp( + "###A9AdsMiddleBoxTop", + r####"[{ + "action": { + "type": "css-display-none", + "selector": "#A9AdsMiddleBoxTop" + }, + "trigger": { + "url-filter": ".*" + } + }]"####, + ); + test_from_abp( + "thedailygreen.com#@##AD_banner", + r####"[{ + "action": { + "type": "css-display-none", + "selector": "#AD_banner" + }, + "trigger": { + "url-filter": ".*", + "unless-domain": [ + "thedailygreen.com" + ] + } + }]"####, + ); + test_from_abp( + "sprouts.com,tbns.com.au#@##AdImage", + r####"[{ + "action": { + "type": "css-display-none", + "selector": "#AdImage" + }, + "trigger": { + "url-filter": ".*", + "unless-domain": [ + "sprouts.com", + "tbns.com.au" + ] + } + }]"####, + ); + test_from_abp( + r#"santander.co.uk#@#a[href^="http://ad-emea.doubleclick.net/"]"#, + r####"[{ + "action": { + "type": "css-display-none", + "selector": "a[href^=\"http://ad-emea.doubleclick.net/\"]" + }, + "trigger": { + "url-filter": ".*", + "unless-domain": [ + "santander.co.uk" + ] + } + }]"####, + ); + test_from_abp( + "search.safefinder.com,search.snapdo.com###ABottomD", + r####"[{ + "action": { + "type": "css-display-none", + "selector": "#ABottomD" + }, + "trigger": { + "url-filter": ".*", + "if-domain": [ + "search.safefinder.com", + "search.snapdo.com" + ] + } + }]"####, + ); + test_from_abp( + r#"tweakguides.com###adbar > br + p[style="text-align: center"] + p[style="text-align: center"]"#, + r####"[{ + "action": { + "type": "css-display-none", + "selector": "#adbar > br + p[style=\"text-align: center\"] + p[style=\"text-align: center\"]" + }, + "trigger": { + "url-filter": ".*", + "if-domain": [ + "tweakguides.com" + ] + } + }]"####, + ); + } + + /* TODO - `$popup` is currently unsupported by NetworkFilter + #[test] + fn popup_tests() { + test_from_abp("||admngronline.com^$popup,third-party", r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^https?://admngronline\\.com(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)", + "load-type": [ + "third-party" + ], + "resource-type": [ + "popup" + ] + } + }]"####); + test_from_abp("||bet365.com^*affiliate=$popup", r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^https?://bet365\\.com(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$).*affiliate=", + "resource-type": [ + "popup" + ] + } + }]"####); + } + */ + + #[test] + fn third_party() { + test_from_abp( + "||007-gateway.com^$third-party", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?007-gateway\\.com", + "load-type": [ + "third-party" + ] + } + }]"####, + ); + test_from_abp( + "||allestörungen.at^$third-party", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?xn--allestrungen-9ib\\.at", + "load-type": [ + "third-party" + ] + } + }]"####, + ); + test_from_abp( + "||anet*.tradedoubler.com^$third-party", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?anet.*\\.tradedoubler\\.com", + "load-type": [ + "third-party" + ] + } + }]"####, + ); + test_from_abp("||doubleclick.net^$third-party,domain=3news.co.nz|92q.com|abc-7.com|addictinggames.com|allbusiness.com|allthingsd.com|bizjournals.com|bloomberg.com|bnn.ca|boom92houston.com|boom945.com|boomphilly.com|break.com|cbc.ca|cbs19.tv|cbs3springfield.com|cbsatlanta.com|cbslocal.com|complex.com|dailymail.co.uk|darkhorizons.com|doubleviking.com|euronews.com|extratv.com|fandango.com|fox19.com|fox5vegas.com|gorillanation.com|hawaiinewsnow.com|hellobeautiful.com|hiphopnc.com|hot1041stl.com|hothiphopdetroit.com|hotspotatl.com|hulu.com|imdb.com|indiatimes.com|indyhiphop.com|ipowerrichmond.com|joblo.com|kcra.com|kctv5.com|ketv.com|koat.com|koco.com|kolotv.com|kpho.com|kptv.com|ksat.com|ksbw.com|ksfy.com|ksl.com|kypost.com|kysdc.com|live5news.com|livestation.com|livestream.com|metro.us|metronews.ca|miamiherald.com|my9nj.com|myboom1029.com|mycolumbusmagic.com|mycolumbuspower.com|myfoxdetroit.com|myfoxorlando.com|myfoxphilly.com|myfoxphoenix.com|myfoxtampabay.com|nbcrightnow.com|neatorama.com|necn.com|neopets.com|news.com.au|news4jax.com|newsone.com|nintendoeverything.com|oldschoolcincy.com|own3d.tv|pagesuite-professional.co.uk|pandora.com|player.theplatform.com|ps3news.com|radio.com|radionowindy.com|rottentomatoes.com|sbsun.com|shacknews.com|sk-gaming.com|ted.com|thebeatdfw.com|theboxhouston.com|theglobeandmail.com|timesnow.tv|tv2.no|twitch.tv|universalsports.com|ustream.tv|wapt.com|washingtonpost.com|wate.com|wbaltv.com|wcvb.com|wdrb.com|wdsu.com|wflx.com|wfmz.com|wfsb.com|wgal.com|whdh.com|wired.com|wisn.com|wiznation.com|wlky.com|wlns.com|wlwt.com|wmur.com|wnem.com|wowt.com|wral.com|wsj.com|wsmv.com|wsvn.com|wtae.com|wthr.com|wxii12.com|wyff4.com|yahoo.com|youtube.com|zhiphopcleveland.com", r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?doubleclick\\.net", + "load-type": [ + "third-party" + ], + "if-domain": [ + "*3news.co.nz", + "*92q.com", + "*abc-7.com", + "*addictinggames.com", + "*allbusiness.com", + "*allthingsd.com", + "*bizjournals.com", + "*bloomberg.com", + "*bnn.ca", + "*boom92houston.com", + "*boom945.com", + "*boomphilly.com", + "*break.com", + "*cbc.ca", + "*cbs19.tv", + "*cbs3springfield.com", + "*cbsatlanta.com", + "*cbslocal.com", + "*complex.com", + "*dailymail.co.uk", + "*darkhorizons.com", + "*doubleviking.com", + "*euronews.com", + "*extratv.com", + "*fandango.com", + "*fox19.com", + "*fox5vegas.com", + "*gorillanation.com", + "*hawaiinewsnow.com", + "*hellobeautiful.com", + "*hiphopnc.com", + "*hot1041stl.com", + "*hothiphopdetroit.com", + "*hotspotatl.com", + "*hulu.com", + "*imdb.com", + "*indiatimes.com", + "*indyhiphop.com", + "*ipowerrichmond.com", + "*joblo.com", + "*kcra.com", + "*kctv5.com", + "*ketv.com", + "*koat.com", + "*koco.com", + "*kolotv.com", + "*kpho.com", + "*kptv.com", + "*ksat.com", + "*ksbw.com", + "*ksfy.com", + "*ksl.com", + "*kypost.com", + "*kysdc.com", + "*live5news.com", + "*livestation.com", + "*livestream.com", + "*metro.us", + "*metronews.ca", + "*miamiherald.com", + "*my9nj.com", + "*myboom1029.com", + "*mycolumbusmagic.com", + "*mycolumbuspower.com", + "*myfoxdetroit.com", + "*myfoxorlando.com", + "*myfoxphilly.com", + "*myfoxphoenix.com", + "*myfoxtampabay.com", + "*nbcrightnow.com", + "*neatorama.com", + "*necn.com", + "*neopets.com", + "*news.com.au", + "*news4jax.com", + "*newsone.com", + "*nintendoeverything.com", + "*oldschoolcincy.com", + "*own3d.tv", + "*pagesuite-professional.co.uk", + "*pandora.com", + "*player.theplatform.com", + "*ps3news.com", + "*radio.com", + "*radionowindy.com", + "*rottentomatoes.com", + "*sbsun.com", + "*shacknews.com", + "*sk-gaming.com", + "*ted.com", + "*thebeatdfw.com", + "*theboxhouston.com", + "*theglobeandmail.com", + "*timesnow.tv", + "*tv2.no", + "*twitch.tv", + "*universalsports.com", + "*ustream.tv", + "*wapt.com", + "*washingtonpost.com", + "*wate.com", + "*wbaltv.com", + "*wcvb.com", + "*wdrb.com", + "*wdsu.com", + "*wflx.com", + "*wfmz.com", + "*wfsb.com", + "*wgal.com", + "*whdh.com", + "*wired.com", + "*wisn.com", + "*wiznation.com", + "*wlky.com", + "*wlns.com", + "*wlwt.com", + "*wmur.com", + "*wnem.com", + "*wowt.com", + "*wral.com", + "*wsj.com", + "*wsmv.com", + "*wsvn.com", + "*wtae.com", + "*wthr.com", + "*wxii12.com", + "*wyff4.com", + "*yahoo.com", + "*youtube.com", + "*zhiphopcleveland.com" + ] + } + }]"####); + test_from_abp("||dt00.net^$third-party,domain=~marketgid.com|~marketgid.ru|~marketgid.ua|~mgid.com|~thechive.com", r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?dt00\\.net", + "load-type": [ + "third-party" + ], + "unless-domain": [ + "*marketgid.com", + "*marketgid.ru", + "*marketgid.ua", + "*mgid.com", + "*thechive.com" + ] + } + }]"####); + test_from_abp("||amazonaws.com/newscloud-production/*/backgrounds/$domain=crescent-news.com|daily-jeff.com|recordpub.com|state-journal.com|the-daily-record.com|the-review.com|times-gazette.com", r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?amazonaws\\.com/newscloud-production/.*/backgrounds/", + "if-domain": [ + "*crescent-news.com", + "*daily-jeff.com", + "*recordpub.com", + "*state-journal.com", + "*the-daily-record.com", + "*the-review.com", + "*times-gazette.com" + ] + } + }]"####); + test_from_abp( + "||d1noellhv8fksc.cloudfront.net^", + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?d1noellhv8fksc\\.cloudfront\\.net" + } + }]"####, + ); + } + + #[test] + fn whitelist() { + test_from_abp( + "@@||google.com/recaptcha/$domain=mediafire.com", + r####"[{ + "action": { + "type": "ignore-previous-rules" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?google\\.com/recaptcha/", + "if-domain": [ + "*mediafire.com" + ] + } + }]"####, + ); + test_from_abp( + "@@||ad4.liverail.com/?compressed|$domain=majorleaguegaming.com|pbs.org|wikihow.com", + r####"[{ + "action": { + "type": "ignore-previous-rules" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?ad4\\.liverail\\.com/\\?compressed$", + "if-domain": [ + "*majorleaguegaming.com", + "*pbs.org", + "*wikihow.com" + ] + } + }]"####, + ); + test_from_abp( + "@@||googletagservices.com/tag/js/gpt.js$domain=allestoringen.nl|allestörungen.at", + r####"[{ + "action": { + "type": "ignore-previous-rules" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?googletagservices\\.com/tag/js/gpt\\.js", + "if-domain": [ + "*allestoringen.nl", + "*xn--allestrungen-9ib.at" + ] + } + }]"####, + ); + test_from_abp( + "@@||advertising.autotrader.co.uk^$~third-party", + r####"[{ + "action": { + "type": "ignore-previous-rules" + }, + "trigger": { + "load-type": [ + "first-party" + ], + "url-filter": "^[^:]+:(//)?([^/]+\\.)?advertising\\.autotrader\\.co\\.uk" + } + }]"####, + ); + test_from_abp( + "@@||advertising.racingpost.com^$image,script,stylesheet,~third-party,xmlhttprequest", + r####"[{ + "action": { + "type": "ignore-previous-rules" + }, + "trigger": { + "load-type": [ + "first-party" + ], + "url-filter": "^[^:]+:(//)?([^/]+\\.)?advertising\\.racingpost\\.com", + "resource-type": [ + "image", + "style-sheet", + "script", + "raw" + ] + } + }]"####, + ); + } + + #[test] + fn test_ignore_previous_fp_documents() { + assert_eq!( + vec![ignore_previous_fp_documents()], + serde_json::from_str::>( + r####"[{ + "trigger":{ + "url-filter":".*", + "resource-type":["document"], + "load-type":["first-party"] + }, + "action":{"type":"ignore-previous-rules"} + }]"#### + ) + .expect("content blocking rule under test could not be deserialized") + ); + } + + #[test] + fn escape_literal_backslashes() { + test_from_abp( + r#"||gamer.no/?module=Tumedia\DFProxy\Modules^"#, + r####"[{ + "action": { + "type": "block" + }, + "trigger": { + "url-filter": "^[^:]+:(//)?([^/]+\\.)?gamer\\.no/\\?module=tumedia\\\\dfproxy\\\\modules" + } + }]"####, + ); + } +} + +#[cfg(test)] +mod filterset_tests { + use crate::lists::{FilterSet, ParseOptions, RuleTypes}; + + const FILTER_LIST: &[&str] = &[ + "||example.com^$script", + "||test.net^$image,third-party", + "/trackme.js^$script", + "example.com##.ad-banner", + "##.ad-640x480", + "##p.sponsored", + ]; + + #[test] + fn convert_all_rules() -> Result<(), ()> { + let mut set = FilterSet::new(true); + set.add_filters(FILTER_LIST, Default::default()); + + let (cb_rules, used_rules) = set.into_content_blocking()?; + assert_eq!(used_rules, FILTER_LIST); + + // All 6 rules plus `ignore_previous_fp_documents()` + assert_eq!(cb_rules.len(), 7); + + Ok(()) + } + + #[test] + fn convert_network_only() -> Result<(), ()> { + let parse_opts = ParseOptions { + rule_types: RuleTypes::NetworkOnly, + ..Default::default() + }; + + let mut set = FilterSet::new(true); + set.add_filters(FILTER_LIST, parse_opts); + + let (cb_rules, used_rules) = set.into_content_blocking()?; + assert_eq!(used_rules, &FILTER_LIST[0..3]); + + // 3 network rules plus `ignore_previous_fp_documents()` + assert_eq!(cb_rules.len(), 4); + + Ok(()) + } + + #[test] + fn convert_cosmetic_only() -> Result<(), ()> { + let parse_opts = ParseOptions { + rule_types: RuleTypes::CosmeticOnly, + ..Default::default() + }; + + let mut set = FilterSet::new(true); + set.add_filters(FILTER_LIST, parse_opts); + + let (cb_rules, used_rules) = set.into_content_blocking()?; + assert_eq!(used_rules, &FILTER_LIST[3..6]); + + // 3 cosmetic rules only + assert_eq!(cb_rules.len(), 3); + + Ok(()) + } + + #[test] + fn ignore_unsupported_rules() -> Result<(), ()> { + let mut set = FilterSet::new(true); + set.add_filters(FILTER_LIST, Default::default()); + set.add_filters( + [ + // unicode characters + "||rgmechanics.info/uploads/660х90_", + "||insaattrendy.com/Upload/bükerbanner*.jpg", + // from domain + "/siropu/am/core.min.js$script,important,from=~audi-sport.net|~hifiwigwam.com", + // leading zero-width space + r#"​##a[href^="https://www.g2fame.com/"] > img"#, + ], + Default::default(), + ); + + let (cb_rules, used_rules) = set.into_content_blocking()?; + assert_eq!(used_rules, FILTER_LIST); + + // All 6 rules plus `ignore_previous_fp_documents()` + assert_eq!(cb_rules.len(), 7); + + Ok(()) + } + + #[test] + fn punycode_if_domains() -> Result<(), ()> { + let list = [ + "smskaraborg.se,örnsköldsviksgymnasium.se,mojligheternashusab.se##.env-modal-dialog__backdrop", + ]; + let mut set = FilterSet::new(true); + set.add_filters(&list, Default::default()); + + let (cb_rules, used_rules) = set.into_content_blocking()?; + assert_eq!(used_rules, list); + + assert_eq!(cb_rules.len(), 1); + assert!(cb_rules[0].trigger.if_domain.is_some()); + assert_eq!( + cb_rules[0].trigger.if_domain.as_ref().unwrap(), + &[ + "smskaraborg.se", + "xn--rnskldsviksgymnasium-29be.se", + "mojligheternashusab.se" + ] + ); + + Ok(()) + } + + #[test] + fn convert_cosmetic_filter_locations() -> Result<(), ()> { + let list = [ + r"/^dizipal\d+\.com$/##.web", + r"/^example\d+\.com$/,test.net,b.*##.ad", + ]; + let mut set = FilterSet::new(true); + set.add_filters(&list, Default::default()); + + let (cb_rules, used_rules) = set.into_content_blocking()?; + assert_eq!(used_rules.len(), 1); + assert_eq!(cb_rules.len(), 1); + assert!(cb_rules[0].trigger.if_domain.is_some()); + assert_eq!( + cb_rules[0].trigger.if_domain.as_ref().unwrap(), + &["test.net"] + ); + + Ok(()) + } +} diff --git a/tests/unit/cosmetic_filter_cache.rs b/tests/unit/cosmetic_filter_cache.rs new file mode 100644 index 00000000..ff7c39c1 --- /dev/null +++ b/tests/unit/cosmetic_filter_cache.rs @@ -0,0 +1,691 @@ +#[cfg(test)] +mod key_from_selector_tests { + use super::super::key_from_selector; + + #[test] + fn no_escapes() { + assert_eq!(key_from_selector(r#"#selector"#).unwrap(), "#selector"); + assert_eq!( + key_from_selector(r#"#ad-box[href="https://popads.net"]"#).unwrap(), + "#ad-box" + ); + assert_eq!(key_from_selector(r#".p"#).unwrap(), ".p"); + assert_eq!(key_from_selector(r#".ad #ad.adblockblock"#).unwrap(), ".ad"); + assert_eq!( + key_from_selector(r#"#container.contained"#).unwrap(), + "#container" + ); + } + + #[test] + fn escaped_characters() { + assert_eq!( + key_from_selector(r"#Meebo\:AdElement\.Root").unwrap(), + "#Meebo:AdElement.Root" + ); + assert_eq!( + key_from_selector(r"#\ Banner\ Ad\ -\ 590\ x\ 90").unwrap(), + "# Banner Ad - 590 x 90" + ); + assert_eq!(key_from_selector(r"#\ rek").unwrap(), "# rek"); + assert_eq!( + key_from_selector(r#"#\:rr .nH[role="main"] .mq:first-child"#).unwrap(), + "#:rr" + ); + assert_eq!( + key_from_selector(r#"#adspot-300x600\,300x250-pos-1"#).unwrap(), + "#adspot-300x600,300x250-pos-1" + ); + assert_eq!( + key_from_selector(r#"#adv_\'146\'"#).unwrap(), + "#adv_\'146\'" + ); + assert_eq!( + key_from_selector(r#"#oas-mpu-left\<\/div\>"#).unwrap(), + "#oas-mpu-left" + ); + assert_eq!( + key_from_selector(r#".Trsp\(op\).Trsdu\(3s\)"#).unwrap(), + ".Trsp(op)" + ); + } + + #[test] + fn escape_codes() { + assert_eq!( + key_from_selector(r#"#\5f _mom_ad_12"#).unwrap(), + "#__mom_ad_12" + ); + assert_eq!( + key_from_selector(r#"#\5f _nq__hh[style="display:block!important"]"#).unwrap(), + "#__nq__hh" + ); + assert_eq!( + key_from_selector(r#"#\31 000-014-ros"#).unwrap(), + "#1000-014-ros" + ); + assert_eq!(key_from_selector(r#"#\33 00X250ad"#).unwrap(), "#300X250ad"); + assert_eq!(key_from_selector(r#"#\5f _fixme"#).unwrap(), "#__fixme"); + assert_eq!(key_from_selector(r#"#\37 28ad"#).unwrap(), "#728ad"); + } + + #[test] + fn bad_escapes() { + assert!(key_from_selector(r#"#\5ffffffffff overflows"#).is_none()); + assert!(key_from_selector(r#"#\5fffffff is_too_large"#).is_none()); + } +} + +#[cfg(test)] +mod cosmetic_cache_tests { + use super::super::*; + use crate::resources::Resource; + + fn cache_from_rules(rules: Vec<&str>) -> CosmeticFilterCache { + let parsed_rules = rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(); + + CosmeticFilterCache::from_rules(parsed_rules) + } + + #[test] + fn exceptions() { + let cfcache = cache_from_rules(vec!["~example.com##.item", "sub.example.com#@#.item2"]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "example.com", false); + expected.exceptions.insert(".item".into()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + expected.exceptions.insert(".item2".into()); + assert_eq!(out, expected); + } + + #[test] + fn exceptions2() { + let cfcache = cache_from_rules(vec!["example.com,~sub.example.com##.item"]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "example.com", false); + expected.hide_selectors.insert(".item".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + expected.exceptions.insert(".item".into()); + assert_eq!(out, expected); + } + + #[test] + fn style_exceptions() { + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##.element:style(background: #fff)", + "sub.test.example.com#@#.element:style(background: #fff)", + "a1.sub.example.com##.element", + "a2.sub.example.com##.element:style(background: #000)", + "a3.example.com##.element:style(background: #000)", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); + expected.hide_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.hide_selectors.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter::from_css( + ".element".to_string(), + "background: #fff".to_string(), + )) + .unwrap(), + ); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); + expected.procedural_actions.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter::from_css( + ".element".to_string(), + "background: #000".to_string(), + )) + .unwrap(), + ); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); + expected.procedural_actions.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter::from_css( + ".element".to_string(), + "background: #000".to_string(), + )) + .unwrap(), + ); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter::from_css( + ".element".to_string(), + "background: #fff".to_string(), + )) + .unwrap(), + ); + assert_eq!(out, expected); + } + + #[test] + fn script_exceptions() { + use crate::resources::{MimeType, ResourceType}; + + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##+js(set-constant.js, atob, trueFunc)", + "sub.test.example.com#@#+js(set-constant.js, atob, trueFunc)", + "cosmetic.net##+js(nowebrtc.js)", + "g.cosmetic.net##+js(window.open-defuser.js)", + "c.g.cosmetic.net#@#+js(nowebrtc.js)", + "d.g.cosmetic.net#@#+js()", + ]); + let resources = ResourceStorage::from_resources([ + Resource { + name: "set-constant.js".into(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("set-constant.js, {{1}}, {{2}}"), + dependencies: vec![], + permission: Default::default(), + }, + Resource::simple( + "nowebrtc.js", + MimeType::ApplicationJavascript, + "nowebrtc.js", + ), + Resource::simple( + "window.open-defuser.js", + MimeType::ApplicationJavascript, + "window.open-defuser.js", + ), + ]); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.injected_script = + "try {\nset-constant.js, atob, trueFunc\n} catch ( e ) { }\n".to_owned(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "cosmetic.net", false); + expected.injected_script = "try {\nnowebrtc.js\n} catch ( e ) { }\n".to_owned(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "g.cosmetic.net", false); + expected.injected_script = "try {\nnowebrtc.js\n} catch ( e ) { }\ntry {\nwindow.open-defuser.js\n} catch ( e ) { }\n".to_owned(); + // order is non-deterministic + if out != expected { + expected.injected_script = "try {\nwindow.open-defuser.js\n} catch ( e ) { }\ntry {\nnowebrtc.js\n} catch ( e ) { }\n".to_owned(); + assert_eq!(out, expected); + } + + let out = cfcache.hostname_cosmetic_resources(&resources, "c.g.cosmetic.net", false); + expected.injected_script = "try {\nwindow.open-defuser.js\n} catch ( e ) { }\n".to_owned(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "d.g.cosmetic.net", false); + expected.injected_script = "".to_owned(); + assert_eq!(out, expected); + } + + #[test] + fn remove_exceptions() { + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##.element:remove()", + "sub.test.example.com#@#.element:remove()", + "a1.sub.example.com##.element", + "a2.sub.example.com##.element:remove()", + "a3.example.com##.element:remove()", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); + expected.hide_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.hide_selectors.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::Remove), + }) + .unwrap(), + ); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); + expected.procedural_actions.clear(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); + expected.procedural_actions.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::Remove), + }) + .unwrap(), + ); + assert_eq!(out, expected); + } + + #[test] + fn remove_attr_exceptions() { + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##.element:remove-attr(style)", + "sub.test.example.com#@#.element:remove-attr(style)", + "a1.sub.example.com##.element", + "a2.sub.example.com##.element:remove-attr(src)", + "a3.example.com##.element:remove-attr(src)", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); + expected.hide_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.hide_selectors.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::RemoveAttr("style".to_string())), + }) + .unwrap(), + ); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); + expected.procedural_actions.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::RemoveAttr("src".to_string())), + }) + .unwrap(), + ); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); + expected.procedural_actions.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::RemoveAttr("src".to_string())), + }) + .unwrap(), + ); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::RemoveAttr("style".to_string())), + }) + .unwrap(), + ); + assert_eq!(out, expected); + } + + #[test] + fn remove_class_exceptions() { + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##.element:remove-class(overlay)", + "sub.test.example.com#@#.element:remove-class(overlay)", + "a1.sub.example.com##.element", + "a2.sub.example.com##.element:remove-class(banner)", + "a3.example.com##.element:remove-class(banner)", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); + expected.hide_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.hide_selectors.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::RemoveClass("overlay".to_string())), + }) + .unwrap(), + ); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); + expected.procedural_actions.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::RemoveClass("banner".to_string())), + }) + .unwrap(), + ); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); + expected.procedural_actions.clear(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::RemoveClass("banner".to_string())), + }) + .unwrap(), + ); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector(".element".to_string())], + action: Some(CosmeticFilterAction::RemoveClass("overlay".to_string())), + }) + .unwrap(), + ); + assert_eq!(out, expected); + } + + #[test] + #[cfg(feature = "css-validation")] + fn procedural_actions() { + let cfcache = cache_from_rules(vec![ + "example.com##div:has(video):remove()", + "example.com##div:has-text(Ad):remove()", + "example.com##div:has-text(Sponsored) > p", + "example.com##div:has-text(Cookie) > p:remove-class(overlay)", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "example.com", false); + let mut expected = UrlSpecificResources::empty(); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![CosmeticFilterOperator::CssSelector( + "div:has(video)".to_string(), + )], + action: Some(CosmeticFilterAction::Remove), + }) + .unwrap(), + ); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![ + CosmeticFilterOperator::CssSelector("div".to_string()), + CosmeticFilterOperator::HasText("Ad".to_string()), + ], + action: Some(CosmeticFilterAction::Remove), + }) + .unwrap(), + ); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![ + CosmeticFilterOperator::CssSelector("div".to_string()), + CosmeticFilterOperator::HasText("Cookie".to_string()), + CosmeticFilterOperator::CssSelector(" > p".to_string()), + ], + action: Some(CosmeticFilterAction::RemoveClass("overlay".to_string())), + }) + .unwrap(), + ); + expected.procedural_actions.insert( + serde_json::to_string(&ProceduralOrActionFilter { + selector: vec![ + CosmeticFilterOperator::CssSelector("div".to_string()), + CosmeticFilterOperator::HasText("Sponsored".to_string()), + CosmeticFilterOperator::CssSelector(" > p".to_string()), + ], + action: None, + }) + .unwrap(), + ); + assert_eq!(out, expected); + } + + /// Avoid impossible type inference for type parameter `impl AsRef` + const EMPTY: &[&str] = &[]; + + #[test] + fn matching_hidden_class_id_selectors() { + let rules = [ + "##.a-class", + "###simple-id", + "##.a-class .with .children", + "##.children .including #simple-id", + "##a.a-class", + ]; + let cfcache = CosmeticFilterCache::from_rules( + rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(), + ); + + let out = cfcache.hidden_class_id_selectors(["with"], EMPTY, &HashSet::default()); + assert_eq!(out, Vec::::new()); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["with"], &HashSet::default()); + assert_eq!(out, Vec::::new()); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["a-class"], &HashSet::default()); + assert_eq!(out, Vec::::new()); + + let out = cfcache.hidden_class_id_selectors(["simple-id"], EMPTY, &HashSet::default()); + assert_eq!(out, Vec::::new()); + + let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &HashSet::default()); + assert_eq!(out, [".a-class", ".a-class .with .children"]); + + let out = + cfcache.hidden_class_id_selectors(["children", "a-class"], EMPTY, &HashSet::default()); + assert_eq!( + out, + [ + ".children .including #simple-id", + ".a-class", + ".a-class .with .children", + ] + ); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["simple-id"], &HashSet::default()); + assert_eq!(out, ["#simple-id"]); + + let out = cfcache.hidden_class_id_selectors( + ["children", "a-class"], + ["simple-id"], + &HashSet::default(), + ); + assert_eq!( + out, + [ + ".children .including #simple-id", + ".a-class", + ".a-class .with .children", + "#simple-id", + ] + ); + } + + #[test] + fn class_id_exceptions() { + let rules = vec![ + "##.a-class", + "###simple-id", + "##.a-class .with .children", + "##.children .including #simple-id", + "##a.a-class", + "example.*#@#.a-class", + "~test.com###test-element", + ]; + let cfcache = CosmeticFilterCache::from_rules( + rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(), + ); + let resources = ResourceStorage::default(); + let exceptions = cfcache + .hostname_cosmetic_resources(&resources, "example.co.uk", false) + .exceptions; + + let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &exceptions); + assert_eq!(out, [".a-class .with .children"]); + + let out = + cfcache.hidden_class_id_selectors(["children", "a-class"], ["simple-id"], &exceptions); + assert_eq!( + out, + [ + ".children .including #simple-id", + ".a-class .with .children", + "#simple-id", + ] + ); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["test-element"], &exceptions); + assert_eq!(out, ["#test-element"]); + + let exceptions = cfcache + .hostname_cosmetic_resources(&resources, "a1.test.com", false) + .exceptions; + + let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &exceptions); + assert_eq!(out, [".a-class", ".a-class .with .children"]); + + let out = + cfcache.hidden_class_id_selectors(["children", "a-class"], ["simple-id"], &exceptions); + assert_eq!( + out, + [ + ".children .including #simple-id", + ".a-class", + ".a-class .with .children", + "#simple-id", + ] + ); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["test-element"], &exceptions); + assert_eq!(out, Vec::::new()); + } + + #[test] + fn misc_generic_exceptions() { + let rules = vec![ + "##a[href=\"bad.com\"]", + "##div > p", + "##a[href=\"notbad.com\"]", + "example.com#@#div > p", + "~example.com##a[href=\"notbad.com\"]", + ]; + let cfcache = CosmeticFilterCache::from_rules( + rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(), + ); + let resources = ResourceStorage::default(); + + let hide_selectors = cfcache + .hostname_cosmetic_resources(&resources, "test.com", false) + .hide_selectors; + let mut expected_hides = HashSet::new(); + expected_hides.insert("a[href=\"bad.com\"]".to_owned()); + expected_hides.insert("div > p".to_owned()); + expected_hides.insert("a[href=\"notbad.com\"]".to_owned()); + assert_eq!(hide_selectors, expected_hides); + + let hide_selectors = cfcache + .hostname_cosmetic_resources(&resources, "example.com", false) + .hide_selectors; + let mut expected_hides = HashSet::new(); + expected_hides.insert("a[href=\"bad.com\"]".to_owned()); + assert_eq!(hide_selectors, expected_hides); + } + + #[test] + fn apply_to_tld() { + use crate::resources::ResourceType; + + // toolforge.org and github.io are examples of TLDs with multiple segments. These rules + // should still be parsed correctly and applied on corresponding subdomains. + let rules = vec![ + "toolforge.org##+js(abort-on-property-read, noAdBlockers)", + "github.io##div.adToBlock", + ]; + let cfcache = CosmeticFilterCache::from_rules( + rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(), + ); + let resources = ResourceStorage::from_resources([Resource { + name: "abort-on-property-read.js".into(), + aliases: vec!["aopr".to_string()], + kind: ResourceType::Template, + content: base64::encode("abort-on-property-read.js, {{1}}"), + dependencies: vec![], + permission: Default::default(), + }]); + + let injected_script = cfcache + .hostname_cosmetic_resources(&resources, "antonok.toolforge.org", false) + .injected_script; + assert_eq!( + injected_script, + "try {\nabort-on-property-read.js, noAdBlockers\n} catch ( e ) { }\n" + ); + + let hide_selectors = cfcache + .hostname_cosmetic_resources(&resources, "antonok.github.io", false) + .hide_selectors; + let mut expected_hides = HashSet::new(); + expected_hides.insert("div.adToBlock".to_owned()); + assert_eq!(hide_selectors, expected_hides); + } +} diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs new file mode 100644 index 00000000..cbc5abf3 --- /dev/null +++ b/tests/unit/engine.rs @@ -0,0 +1,884 @@ +#[cfg(test)] +mod tests { + use super::super::*; + use crate::lists::FilterFormat; + use crate::resources::MimeType; + + #[test] + fn tags_enable_adds_tags() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("http://example.com/somelongpath/test/2.html", true), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), + ]; + + let mut engine = Engine::from_rules(&filters, Default::default()); + engine.enable_tags(&["stuff"]); + engine.enable_tags(&["brian"]); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + url, matched_rule.filter + ); + } + }); + } + + #[test] + fn tags_disable_works() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", false), + ("http://example.com/somelongpath/test/2.html", false), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), + ]; + + let mut engine = Engine::from_rules(&filters, Default::default()); + engine.enable_tags(&["brian", "stuff"]); + engine.disable_tags(&["stuff"]); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + url, matched_rule.filter + ); + } + }); + } + + #[test] + fn exception_tags_inactive_by_default() { + let filters = [ + "adv", + "||brianbondy.com/$tag=brian", + "@@||brianbondy.com/$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("https://brianbondy.com/about", false), + ("https://brianbondy.com/advert", true), + ]; + + let engine = Engine::from_rules(&filters, Default::default()); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + url, matched_rule.filter + ); + } + }); + } + + #[test] + fn exception_tags_works() { + let filters = [ + "adv", + "||brianbondy.com/$tag=brian", + "@@||brianbondy.com/$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("https://brianbondy.com/about", false), + ("https://brianbondy.com/advert", false), + ]; + + let mut engine = Engine::from_rules(&filters, Default::default()); + engine.enable_tags(&["brian", "stuff"]); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + url, matched_rule.filter + ); + } + }); + } + + #[test] + fn document() { + let filters = ["||example.com$document", "@@||sub.example.com$document"]; + + let engine = Engine::from_rules_debug(&filters, Default::default()); + + assert!( + engine + .check_network_request( + &Request::new("https://example.com", "https://example.com", "document") + .unwrap() + ) + .matched + ); + assert!( + !engine + .check_network_request( + &Request::new("https://example.com", "https://example.com", "script").unwrap() + ) + .matched + ); + assert!(engine + .check_network_request( + &Request::new( + "https://sub.example.com", + "https://sub.example.com", + "document" + ) + .unwrap() + ) + .exception + .is_some()); + } + + #[test] + fn implicit_all() { + { + let engine = Engine::from_rules_debug(["||example.com^"], Default::default()); + assert!( + engine + .check_network_request( + &Request::new("https://example.com", "https://example.com", "document") + .unwrap() + ) + .matched + ); + } + { + let engine = + Engine::from_rules_debug(["||example.com^$first-party"], Default::default()); + assert!( + engine + .check_network_request( + &Request::new("https://example.com", "https://example.com", "document") + .unwrap() + ) + .matched + ); + } + { + let engine = Engine::from_rules_debug(["||example.com^$script"], Default::default()); + assert!( + !engine + .check_network_request( + &Request::new("https://example.com", "https://example.com", "document") + .unwrap() + ) + .matched + ); + } + { + let engine = Engine::from_rules_debug(["||example.com^$~script"], Default::default()); + assert!( + !engine + .check_network_request( + &Request::new("https://example.com", "https://example.com", "document") + .unwrap() + ) + .matched + ); + } + { + let engine = Engine::from_rules_debug( + ["||example.com^$document", "@@||example.com^$generichide"], + Default::default(), + ); + assert!( + engine + .check_network_request( + &Request::new("https://example.com", "https://example.com", "document") + .unwrap() + ) + .matched + ); + } + { + let engine = Engine::from_rules_debug( + ["example.com"], + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!( + engine + .check_network_request( + &Request::new("https://example.com", "https://example.com", "document") + .unwrap() + ) + .matched + ); + } + { + let engine = Engine::from_rules_debug(["||example.com/path"], Default::default()); + assert!( + !engine + .check_network_request( + &Request::new( + "https://example.com/path", + "https://example.com/path", + "document" + ) + .unwrap() + ) + .matched + ); + } + { + let engine = Engine::from_rules_debug(["||example.com/path^"], Default::default()); + assert!( + !engine + .check_network_request( + &Request::new( + "https://example.com/path", + "https://example.com/path", + "document" + ) + .unwrap() + ) + .matched + ); + } + } + + #[test] + fn generichide() { + let filters = [ + "##.donotblock", + "##a[href=\"generic.com\"]", + "@@||example.com$generichide", + "example.com##.block", + "@@||example2.com/test.html$generichide", + "example2.com##.block", + ]; + let url_results = [ + ("https://example.com", vec![".block"], true), + ("https://example.com/test.html", vec![".block"], true), + ( + "https://example2.com", + vec![".block", "a[href=\"generic.com\"]"], + false, + ), + ("https://example2.com/test.html", vec![".block"], true), + ]; + + let engine = Engine::from_rules(&filters, Default::default()); + + url_results + .into_iter() + .for_each(|(url, expected_result, expected_generichide)| { + let result = engine.url_cosmetic_resources(url); + assert_eq!( + result.hide_selectors, + expected_result + .iter() + .map(|s| s.to_string()) + .collect::>() + ); + assert_eq!(result.generichide, expected_generichide); + }); + } + + #[test] + fn important_redirect() { + let mut filter_set = FilterSet::new(true); + filter_set.add_filters([ + "||addthis.com^$important,3p,domain=~missingkids.com|~missingkids.org|~sainsburys.jobs|~sitecore.com|~amd.com", + "||addthis.com/*/addthis_widget.js$script,redirect=addthis.com/addthis_widget.js", + ], Default::default()); + let mut engine = Engine::from_filter_set(filter_set, false); + + engine + .add_resource(Resource::simple( + "addthis.com/addthis_widget.js", + MimeType::ApplicationJavascript, + "window.addthis = undefined", + )) + .unwrap(); + + let request = Request::new("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script").unwrap(); + let result = engine.check_network_request(&request); + + assert!(result.redirect.is_some()); + } + + #[test] + fn check_match_case_regex_filtering() { + { + // match case without regex is discarded + let engine = Engine::from_rules_debug(["ad.png$match-case"], Default::default()); + let request = + Request::new("https://example.com/ad.png", "https://example.com", "image").unwrap(); + assert!(!engine.check_network_request(&request).matched); + } + { + // /^https:\/\/[0-9a-z]{3,}\.[-a-z]{10,}\.(?:li[fv]e|top|xyz)\/[a-z]{8}\/\?utm_campaign=\w{40,}/$doc,match-case,domain=life|live|top|xyz + let engine = Engine::from_rules_debug( + [ + r#"/^https:\/\/[0-9a-z]{3,}\.[-a-z]{10,}\.(?:li[fv]e|top|xyz)\/[a-z]{8}\/\?utm_campaign=\w{40,}/$doc,match-case,domain=life|live|top|xyz"#, + ], + Default::default(), + ); + let request = Request::new("https://www.exampleaaa.xyz/testtest/?utm_campaign=aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd", "https://www.exampleaaa.xyz/testtest/?utm_campaign=aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd", "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/((?!www)[a-z]{3,}|\d{2})?\.?[-0-9a-z]{6,}\.[a-z]{2,6}\/(?:[a-z]{6,8}\/)?\/?\?u=[0-9a-z]{7}&o=[0-9a-z]{7}/$doc,frame,match-case,domain=buzz|com|de|fun|guru|info|life|live|mobi|online|pw|site|space|top|us|xyz + let engine = Engine::from_rules_debug([r#"/^https?:\/\/((?!www)[a-z]{3,}|\d{2})?\.?[-0-9a-z]{6,}\.[a-z]{2,6}\/(?:[a-z]{6,8}\/)?\/?\?u=[0-9a-z]{7}&o=[0-9a-z]{7}/$doc,frame,match-case,domain=buzz|com|de|fun|guru|info|life|live|mobi|online|pw|site|space|top|us|xyz"#], Default::default()); + let request = Request::new("https://example.com/aaaaaa/?u=aaaaaaa&o=bbbbbbb", + "https://example.com/aaaaaa/?u=aaaaaaa&o=bbbbbbb", + "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org + let engine = Engine::from_rules_debug([r#"/^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org"#], Default::default()); + let request = Request::new("https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?cid=aaaaaaaaaabbbbbb&qs5=\n&sid=a", + "https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?cid=aaaaaaaaaabbbbbb&qs5=\n&sid=a", + "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?sid=[_0-9a-f]{1,32}(?:&qs\d=\S+)?&cid=[-_0-9a-zA-Z]{16,36}$/$doc,match-case,domain=com|info|net|org + let engine = Engine::from_rules_debug([r#"/^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org"#], Default::default()); + let request = Request::new("https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?sid=1&qs1=\n&cid=aaaaaaaaaabbbbbb", + "https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?sid=1&qs1=\n&cid=aaaaaaaaaabbbbbb", + "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + // /^http:\/\/[a-z]{5}\.[a-z]{5}\.com\/[a-z]{10}\.apk$/$doc,match-case,domain=com + let engine = Engine::from_rules_debug( + [ + r#"/^http:\/\/[a-z]{5}\.[a-z]{5}\.com\/[a-z]{10}\.apk$/$doc,match-case,domain=com"#, + ], + Default::default(), + ); + let request = Request::new( + "http://abcde.abcde.com/aaaaabbbbb.apk", + "http://abcde.abcde.com/aaaaabbbbb.apk", + "document", + ) + .unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /\/[A-Z]\/[-0-9a-z]{5,}\.com\/(?:[0-9a-f]{2}\/){3}[0-9a-f]{32}\.js$/$script,1p,match-case + let engine = Engine::from_rules_debug([r#"/\/[A-Z]\/[-0-9a-z]{5,}\.com\/(?:[0-9a-f]{2}\/){3}[0-9a-f]{32}\.js$/$script,1p,match-case"#], Default::default()); + let request = Request::new("/A/aaaaa.com/aa/bb/cc/aaaaaaaabbbbbbbbccccccccdddddddd.js", + "/A/aaaaa.com/aa/bb/cc/aaaaaaaabbbbbbbbccccccccdddddddd.js", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.com\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.com\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case"#], Default::default()); + let request = Request::new("https://aa.example.com/aAaaa/12222", + "https://aa.example.net/aAaaa/12222", + "frame").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.website\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.website\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case"#], Default::default()); + let request = Request::new("https://aa.example.website/aAaaa/12222", + "https://aa.example.website/aAaaa/12222", + "frame").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/[a-z]{8,15}\.top(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.top/articles.html", + "https://examples.top/articles.html", + "frame").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + // /^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.json$/$xhr,3p,match-case + let engine = Engine::from_rules_debug( + [r#"/^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.json$/$xhr,3p,match-case"#], + Default::default(), + ); + let request = Request::new( + "https://examples.top/abcd.json", + "https://examples.com/abcd.json", + "xhr", + ) + .unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - inferring unescaped `$` inside regex pattern + /*{ + // /^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$css,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$css,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.top/abcd.css?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", + "https://examples.com/abcd.css?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", + "stylesheet").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - inferring unescaped `$` inside regex pattern + /*{ + // /^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.png\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$image,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.png\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$image,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.top/abcd.png?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", + "https://examples.com/abcd.png?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", + "image").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/[a-z]{8,15}\.xyz(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.xyz(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.xyz/articles.html", + "https://examples.xyz/articles.html", + "frame").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + // /^https?:\/\/cdn\.[a-z]{4,6}\.xyz\/app\.js$/$script,3p,match-case + let engine = Engine::from_rules_debug( + [r#"/^https?:\/\/cdn\.[a-z]{4,6}\.xyz\/app\.js$/$script,3p,match-case"#], + Default::default(), + ); + let request = Request::new( + "https://cdn.abcde.xyz/app.js", + "https://cdn.abcde.com/app.js", + "script", + ) + .unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https:\/\/a\.[-0-9a-z]{4,16}\.(?:club|com?|cyou|info|net|ru|site|top?|xxx|xyz)\/(?=[a-z]{0,6}[0-9A-Z])[0-9a-zA-Z]{7}\.js$/$script,match-case + let engine = Engine::from_rules_debug([r#"/^https:\/\/a\.[-0-9a-z]{4,16}\.(?:club|com?|cyou|info|net|ru|site|top?|xxx|xyz)\/(?=[a-z]{0,6}[0-9A-Z])[0-9a-zA-Z]{7}\.js$/$script,match-case"#], Default::default()); + let request = Request::new("https://a.abcd.club/aaaaaaA.js", + "https://a.abcd.club/aaaaaaA.js", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + // /^https:\/\/cdn\.jsdelivr\.net\/npm\/[-a-z_]{4,22}@latest\/dist\/script\.min\.js$/$script,3p,match-case + let engine = Engine::from_rules_debug( + [ + r#"/^https:\/\/cdn\.jsdelivr\.net\/npm\/[-a-z_]{4,22}@latest\/dist\/script\.min\.js$/$script,3p,match-case"#, + ], + Default::default(), + ); + let request = Request::new( + "https://cdn.jsdelivr.net/npm/abcd@latest/dist/script.min.js", + "https://cdn.jsdelivr.com/npm/abcd@latest/dist/script.min.js", + "script", + ) + .unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - inferring unescaped `$` inside regex pattern + /*{ + // /^https?:\/\/[-.0-9a-z]+\/script\.js$/$script,1p,strict3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[-.0-9a-z]+\/script\.js$/$script,1p,strict3p,match-case"#], Default::default()); + let request = Request::new("https://www.example.com/script.js", + "https://www.abc.com/script.js", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - unicode not supported in network filter + /*{ + let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default()); + let request = Request::new("https://example.com/tesT߶", + "https://example.com", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - unicode not supported in network filter + /*{ + let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default()); + let request = Request::new("https://example-tesT߶.com/tesT", + "https://example.com", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + } + + #[test] + fn scriptlet_permissions() { + use crate::resources::{PermissionMask, ResourceType}; + const UBO_PERM: PermissionMask = PermissionMask::from_bits(0b00000001); + const BRAVE_PERM: PermissionMask = PermissionMask::from_bits(0b00000011); + + let resources = [ + Resource::simple( + "refresh-defuser.js", + MimeType::ApplicationJavascript, + "refresh-defuser", + ), + Resource { + name: "trusted-set-cookie.js".to_string(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("trusted-set-cookie"), + dependencies: vec![], + permission: UBO_PERM, + }, + Resource { + name: "brave-fix.js".to_string(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("brave-fix"), + dependencies: vec![], + permission: BRAVE_PERM, + }, + ]; + + let mut filter_set = FilterSet::new(false); + filter_set.add_filters( + [ + "sub1.example.com##+js(refresh-defuser)", + "sub2.example.com##+js(trusted-set-cookie)", + "sub3.example.com##+js(brave-fix)", + ], + Default::default(), + ); + filter_set.add_filters( + [ + "sub4.example.com##+js(refresh-defuser)", + "sub5.example.com##+js(trusted-set-cookie)", + "sub6.example.com##+js(brave-fix)", + ], + ParseOptions { + permissions: UBO_PERM, + ..Default::default() + }, + ); + filter_set.add_filters( + [ + "sub7.example.com##+js(refresh-defuser)", + "sub8.example.com##+js(trusted-set-cookie)", + "sub9.example.com##+js(brave-fix)", + ], + ParseOptions { + permissions: BRAVE_PERM, + ..Default::default() + }, + ); + + let mut engine = Engine::from_filter_set(filter_set, true); + engine.use_resources(resources); + + fn wrap_try(scriptlet_content: &str) -> String { + format!("try {{\n{}\n}} catch ( e ) {{ }}\n", scriptlet_content) + } + + assert_eq!( + engine + .url_cosmetic_resources("https://sub1.example.com") + .injected_script, + wrap_try("refresh-defuser") + ); + assert_eq!( + engine + .url_cosmetic_resources("https://sub2.example.com") + .injected_script, + "" + ); + assert_eq!( + engine + .url_cosmetic_resources("https://sub3.example.com") + .injected_script, + "" + ); + + assert_eq!( + engine + .url_cosmetic_resources("https://sub4.example.com") + .injected_script, + wrap_try("refresh-defuser") + ); + assert_eq!( + engine + .url_cosmetic_resources("https://sub5.example.com") + .injected_script, + wrap_try("trusted-set-cookie") + ); + assert_eq!( + engine + .url_cosmetic_resources("https://sub6.example.com") + .injected_script, + "" + ); + + assert_eq!( + engine + .url_cosmetic_resources("https://sub7.example.com") + .injected_script, + wrap_try("refresh-defuser") + ); + assert_eq!( + engine + .url_cosmetic_resources("https://sub8.example.com") + .injected_script, + wrap_try("trusted-set-cookie") + ); + assert_eq!( + engine + .url_cosmetic_resources("https://sub9.example.com") + .injected_script, + wrap_try("brave-fix") + ); + } + + #[test] + fn quoted_scriptlet_args() { + use crate::resources::{MimeType, ResourceType}; + + let resources = [ + Resource { + name: "trusted-set-local-storage-item.js".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("function trustedSetLocalStorageItem(key = '', value = '') { setLocalStorageItemFn('local', true, key, value); }"), + dependencies: vec![], + permission: Default::default(), + }, + ]; + + let mut filter_set = FilterSet::new(false); + filter_set.add_filters([ + r#"dailymail.co.uk##+js(trusted-set-local-storage-item, mol.ads.cmp.tcf.cache, '{"getTCData":{"cmpId":27,"cmpVersion":3,"gdprApplies":true,"tcfPolicyVersion":2,"tcString":"CPyz5QAPyz5QAAbADCENC6CgAAAAAAAAAAwIAAASjAJINW4gCLMscGaQEIoEAIgjCQggUAAFAILRAQAODgp2VgE6MIkAAAUARABAhwAQAQCAAASABCAAJAAwQAAAiAQAAAAQCAAAMCAILACgAAAABANAhRCgAECQAyIAIpTAgKgSCAFsKAAADJCQCAKgMAKARGgEACIIARGAAACwMAgBICFggABMQbBAAMACAESoBoCTEwBACDQFgBkADLAGzAPsA_ACAAEFAIwASYAp8BaAFpAOqAfIBDoCJgEiAKRAXIAyMBk4DlAI_gSKEQEwBkADLAGzAPsA_ACAAEYAJMAU8A6oB8gEOgJEAUiAuQBkYDJwHKAR_AkU.f_gAAagAAAAA","eventStatus":"useractioncomplete","cmpStatus":"loaded","isServiceSpecific":true,"useNonStandardStacks":false,"publisherCC":"GB","purposeOneTreatment":false,"addtlConsent":"1~","acmVersion":2,"molGvlVersion":"186.gb.web","nrvString":"1~","nrvVersion":1,"repromptVersion":5},"getStoredRepromptVersion":5,"hasUserConsentedToAll":false,"hasUserDissentedToAll":true,"getConsentDegree":"no","getValidTCData":{"cmpId":27,"cmpVersion":3,"gdprApplies":true,"tcfPolicyVersion":2,"tcString":"CPyz5QAPyz5QAAbADCENC6CgAAAAAAAAAAwIAAASjAJINW4gCLMscGaQEIoEAIgjCQggUAAFAILRAQAODgp2VgE6MIkAAAUARABAhwAQAQCAAASABCAAJAAwQAAAiAQAAAAQCAAAMCAILACgAAAABANAhRCgAECQAyIAIpTAgKgSCAFsKAAADJCQCAKgMAKARGgEACIIARGAAACwMAgBICFggABMQbBAAMACAESoBoCTEwBACDQFgBkADLAGzAPsA_ACAAEFAIwASYAp8BaAFpAOqAfIBDoCJgEiAKRAXIAyMBk4DlAI_gSKEQEwBkADLAGzAPsA_ACAAEYAJMAU8A6oB8gEOgJEAUiAuQBkYDJwHKAR_AkU.f_gAAagAAAAA","listenerId":1,"eventStatus":"useractioncomplete","cmpStatus":"loaded","isServiceSpecific":true,"useNonStandardStacks":false,"publisherCC":"GB","purposeOneTreatment":false,"addtlConsent":"1~","acmVersion":2,"molGvlVersion":"186.gb.web","nrvString":"1~","nrvVersion":1,"repromptVersion":5}}')"#, + // invalid - unclosed quoted arg + r#"example.com##+js(trusted-set-local-storage-item, "test)"#, + // invalid - closing quote does not surround the argument + r#"example.com##+js(trusted-set-local-storage-item, "test"test, 3)"#, + ], Default::default()); + + let mut engine = Engine::from_filter_set(filter_set, true); + engine.use_resources(resources); + + assert_eq!(engine.url_cosmetic_resources("https://dailymail.co.uk").injected_script, r#"function trustedSetLocalStorageItem(key = '', value = '') { setLocalStorageItemFn('local', true, key, value); } +try { +trustedSetLocalStorageItem("mol.ads.cmp.tcf.cache", "{\"getTCData\":{\"cmpId\":27,\"cmpVersion\":3,\"gdprApplies\":true,\"tcfPolicyVersion\":2,\"tcString\":\"CPyz5QAPyz5QAAbADCENC6CgAAAAAAAAAAwIAAASjAJINW4gCLMscGaQEIoEAIgjCQggUAAFAILRAQAODgp2VgE6MIkAAAUARABAhwAQAQCAAASABCAAJAAwQAAAiAQAAAAQCAAAMCAILACgAAAABANAhRCgAECQAyIAIpTAgKgSCAFsKAAADJCQCAKgMAKARGgEACIIARGAAACwMAgBICFggABMQbBAAMACAESoBoCTEwBACDQFgBkADLAGzAPsA_ACAAEFAIwASYAp8BaAFpAOqAfIBDoCJgEiAKRAXIAyMBk4DlAI_gSKEQEwBkADLAGzAPsA_ACAAEYAJMAU8A6oB8gEOgJEAUiAuQBkYDJwHKAR_AkU.f_gAAagAAAAA\",\"eventStatus\":\"useractioncomplete\",\"cmpStatus\":\"loaded\",\"isServiceSpecific\":true,\"useNonStandardStacks\":false,\"publisherCC\":\"GB\",\"purposeOneTreatment\":false,\"addtlConsent\":\"1~\",\"acmVersion\":2,\"molGvlVersion\":\"186.gb.web\",\"nrvString\":\"1~\",\"nrvVersion\":1,\"repromptVersion\":5},\"getStoredRepromptVersion\":5,\"hasUserConsentedToAll\":false,\"hasUserDissentedToAll\":true,\"getConsentDegree\":\"no\",\"getValidTCData\":{\"cmpId\":27,\"cmpVersion\":3,\"gdprApplies\":true,\"tcfPolicyVersion\":2,\"tcString\":\"CPyz5QAPyz5QAAbADCENC6CgAAAAAAAAAAwIAAASjAJINW4gCLMscGaQEIoEAIgjCQggUAAFAILRAQAODgp2VgE6MIkAAAUARABAhwAQAQCAAASABCAAJAAwQAAAiAQAAAAQCAAAMCAILACgAAAABANAhRCgAECQAyIAIpTAgKgSCAFsKAAADJCQCAKgMAKARGgEACIIARGAAACwMAgBICFggABMQbBAAMACAESoBoCTEwBACDQFgBkADLAGzAPsA_ACAAEFAIwASYAp8BaAFpAOqAfIBDoCJgEiAKRAXIAyMBk4DlAI_gSKEQEwBkADLAGzAPsA_ACAAEYAJMAU8A6oB8gEOgJEAUiAuQBkYDJwHKAR_AkU.f_gAAagAAAAA\",\"listenerId\":1,\"eventStatus\":\"useractioncomplete\",\"cmpStatus\":\"loaded\",\"isServiceSpecific\":true,\"useNonStandardStacks\":false,\"publisherCC\":\"GB\",\"purposeOneTreatment\":false,\"addtlConsent\":\"1~\",\"acmVersion\":2,\"molGvlVersion\":\"186.gb.web\",\"nrvString\":\"1~\",\"nrvVersion\":1,\"repromptVersion\":5}}") +} catch ( e ) { } +"#.to_owned()); + + assert_eq!( + engine + .url_cosmetic_resources("https://example.com") + .injected_script, + "" + ); + } +} + +#[cfg(not(feature = "flatbuffers"))] // No serialization for flatbuffers yet. +mod serialization_tests { + use super::super::*; + use crate::resources::MimeType; + + #[test] + fn serialization_retains_tags() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("http://example.com/somelongpath/test/2.html", true), + ("https://brianbondy.com/about", false), + ("https://brave.com/about", false), + ]; + + let mut engine = Engine::from_rules(&filters, Default::default()); + engine.enable_tags(&["stuff"]); + engine.enable_tags(&["brian"]); + let serialized = engine.serialize_raw().unwrap(); + let mut deserialized_engine = Engine::default(); + deserialized_engine.enable_tags(&["stuff"]); + deserialized_engine.deserialize(&serialized).unwrap(); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = deserialized_engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!( + !matched_rule.matched, + "Expected no match for {}, matched with {:?}", + url, matched_rule.filter + ); + } + }); + } + + #[test] + fn deserialization_backwards_compatible_plain() { + // deserialization_generate_simple(); + // assert!(false); + // converted from the legacy compressed format + let serialized = [ + 209, 217, 58, 175, 0, 220, 0, 17, 145, 128, 145, 128, 145, 128, 145, 128, 145, 128, + 145, 129, 207, 202, 167, 36, 217, 43, 56, 97, 176, 145, 157, 145, 206, 0, 3, 31, 255, + 129, 1, 169, 97, 100, 45, 98, 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, 192, + 192, 192, 207, 186, 136, 69, 13, 115, 187, 170, 226, 192, 192, 145, 128, 144, 195, 145, + 128, 144, 144, 128, 128, 145, 128, 144, 145, 128, + ]; + let mut deserialized_engine = Engine::default(); + deserialized_engine.deserialize(&serialized).unwrap(); + + let url = "http://example.com/ad-banner.gif"; + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = deserialized_engine.check_network_request(&request); + assert!(matched_rule.matched, "Expected match for {}", url); + } + + #[test] + fn deserialization_backwards_compatible_tags() { + // deserialization_generate_tags(); + // assert!(false); + // converted from the legacy compressed format + let serialized = [ + 209, 217, 58, 175, 0, 220, 0, 17, 145, 128, 145, 128, 145, 128, 145, 128, 145, 128, + 145, 128, 145, 128, 145, 157, 145, 206, 0, 3, 31, 255, 129, 1, 169, 97, 100, 45, 98, + 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, 192, 163, 97, 98, 99, 192, 207, 126, + 212, 53, 83, 113, 159, 143, 134, 192, 192, 195, 145, 128, 144, 144, 128, 128, 145, 128, + 144, 145, 128, + ]; + let mut deserialized_engine = Engine::default(); + + deserialized_engine.enable_tags(&[]); + deserialized_engine.deserialize(&serialized).unwrap(); + let url = "http://example.com/ad-banner.gif"; + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = deserialized_engine.check_network_request(&request); + assert!(!matched_rule.matched, "Expected NO match for {}", url); + + deserialized_engine.enable_tags(&["abc"]); + deserialized_engine.deserialize(&serialized).unwrap(); + + let url = "http://example.com/ad-banner.gif"; + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = deserialized_engine.check_network_request(&request); + assert!(matched_rule.matched, "Expected match for {}", url); + } + + #[test] + fn deserialization_generate_simple() { + let mut engine = Engine::from_rules(&["ad-banner"], Default::default()); + let serialized = engine.serialize_raw().unwrap(); + println!("Engine serialized: {:?}", serialized); + engine.deserialize(&serialized).unwrap(); + } + + #[test] + fn deserialization_generate_tags() { + let mut engine = Engine::from_rules(&["ad-banner$tag=abc"], Default::default()); + engine.use_tags(&["abc"]); + let serialized = engine.serialize_raw().unwrap(); + println!("Engine serialized: {:?}", serialized); + engine.deserialize(&serialized).unwrap(); + } + + #[test] + fn deserialization_generate_resources() { + let mut engine = Engine::from_rules(&["ad-banner$redirect=nooptext"], Default::default()); + + engine.use_resources([ + Resource::simple("nooptext", MimeType::TextPlain, ""), + Resource::simple("noopcss", MimeType::TextCss, ""), + ]); + + let serialized = engine.serialize_raw().unwrap(); + println!("Engine serialized: {:?}", serialized); + engine.deserialize(&serialized).unwrap(); + } + + #[test] + fn redirect_resource_insertion_works() { + let mut engine = Engine::from_rules( + &["ad-banner$redirect=nooptext", "script.js$redirect=noop.js"], + Default::default(), + ); + + let script = r#" +(function() { + ; +})(); + + "#; + let mut resources = [ + Resource::simple("nooptext", MimeType::TextPlain, ""), + Resource::simple("noopjs", MimeType::ApplicationJavascript, script), + ]; + resources[1].aliases.push("noop.js".to_string()); + engine.use_resources(resources); + + let url = "http://example.com/ad-banner.gif"; + let request = Request::new(url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + assert!(matched_rule.matched, "Expected match for {}", url); + assert_eq!( + matched_rule.redirect, + Some("data:text/plain;base64,".to_owned()), + "Expected redirect to contain resource" + ); + + let url = "http://example.com/script.js"; + let request = Request::new(url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + assert!(matched_rule.matched, "Expected match for {}", url); + assert_eq!( + matched_rule.redirect, + Some(format!( + "data:application/javascript;base64,{}", + base64::encode(format!("{}", script)) + )), + "Expected redirect to contain resource" + ); + } +} diff --git a/tests/unit/filters/cosmetic.rs b/tests/unit/filters/cosmetic.rs new file mode 100644 index 00000000..58e06cd6 --- /dev/null +++ b/tests/unit/filters/cosmetic.rs @@ -0,0 +1,1202 @@ +#[cfg(test)] +mod parse_tests { + use super::super::*; + + /// An easily modified summary of a `CosmeticFilter` rule to be used in tests. + #[derive(Debug, PartialEq)] + struct CosmeticFilterBreakdown { + entities: Option>, + hostnames: Option>, + not_entities: Option>, + not_hostnames: Option>, + selector: SelectorType, + action: Option, + + unhide: bool, + script_inject: bool, + } + + impl From<&CosmeticFilter> for CosmeticFilterBreakdown { + fn from(filter: &CosmeticFilter) -> CosmeticFilterBreakdown { + CosmeticFilterBreakdown { + entities: filter.entities.as_ref().cloned(), + hostnames: filter.hostnames.as_ref().cloned(), + not_entities: filter.not_entities.as_ref().cloned(), + not_hostnames: filter.not_hostnames.as_ref().cloned(), + selector: SelectorType::from(filter), + action: filter.action.as_ref().cloned(), + + unhide: filter.mask.contains(CosmeticFilterMask::UNHIDE), + script_inject: filter.mask.contains(CosmeticFilterMask::SCRIPT_INJECT), + } + } + } + + impl From for CosmeticFilterBreakdown { + fn from(filter: CosmeticFilter) -> CosmeticFilterBreakdown { + (&filter).into() + } + } + + impl Default for CosmeticFilterBreakdown { + fn default() -> Self { + CosmeticFilterBreakdown { + entities: None, + hostnames: None, + not_entities: None, + not_hostnames: None, + selector: SelectorType::PlainCss(String::from("")), + action: None, + + unhide: false, + script_inject: false, + } + } + } + + #[derive(Debug, PartialEq)] + enum SelectorType { + PlainCss(String), + Procedural(Vec), + } + + impl From<&CosmeticFilter> for SelectorType { + fn from(v: &CosmeticFilter) -> Self { + if let Some(selector) = v.plain_css_selector() { + Self::PlainCss(selector.to_string()) + } else { + Self::Procedural(v.selector.clone()) + } + } + } + + fn parse_cf(rule: &str) -> Result { + CosmeticFilter::parse(rule, false, Default::default()) + } + + /// Asserts that `rule` parses into a `CosmeticFilter` equivalent to the summary provided by + /// `expected`. + fn check_parse_result(rule: &str, expected: CosmeticFilterBreakdown) { + let filter: CosmeticFilterBreakdown = parse_cf(rule).unwrap().into(); + assert_eq!(expected, filter); + } + + #[test] + fn simple_selectors() { + check_parse_result( + "##div.popup", + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss("div.popup".to_string()), + ..Default::default() + }, + ); + check_parse_result( + "###selector", + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss("#selector".to_string()), + ..Default::default() + }, + ); + check_parse_result( + "##.selector", + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(".selector".to_string()), + ..Default::default() + }, + ); + check_parse_result( + "##a[href=\"foo.com\"]", + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss("a[href=\"foo.com\"]".to_string()), + ..Default::default() + }, + ); + check_parse_result( + "##[href=\"foo.com\"]", + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss("[href=\"foo.com\"]".to_string()), + ..Default::default() + }, + ); + } + + /// Produces a sorted vec of the hashes of all the given domains. + /// + /// For convenience, the return value is wrapped in a `Some()` to be consumed by a + /// `CosmeticFilterBreakdown`. + fn sort_hash_domains(domains: Vec<&str>) -> Option> { + let mut hashes: Vec<_> = domains.iter().map(|d| crate::utils::fast_hash(d)).collect(); + hashes.sort(); + Some(hashes) + } + + #[test] + fn hostnames() { + check_parse_result( + r#"u00p.com##div[class^="adv-box"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"div[class^="adv-box"]"#.to_string()), + hostnames: sort_hash_domains(vec!["u00p.com"]), + ..Default::default() + }, + ); + check_parse_result( + r#"distractify.com##div[class*="AdInArticle"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"div[class*="AdInArticle"]"#.to_string()), + hostnames: sort_hash_domains(vec!["distractify.com"]), + ..Default::default() + }, + ); + check_parse_result( + r#"soundtrackcollector.com,the-numbers.com##a[href^="http://affiliates.allposters.com/"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"a[href^="http://affiliates.allposters.com/"]"#.to_string(), + ), + hostnames: sort_hash_domains(vec!["soundtrackcollector.com", "the-numbers.com"]), + ..Default::default() + }, + ); + check_parse_result( + r#"thelocal.at,thelocal.ch,thelocal.de,thelocal.dk,thelocal.es,thelocal.fr,thelocal.it,thelocal.no,thelocal.se##div[class*="-widget"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"div[class*="-widget"]"#.to_string()), + hostnames: sort_hash_domains(vec![ + "thelocal.at", + "thelocal.ch", + "thelocal.de", + "thelocal.dk", + "thelocal.es", + "thelocal.fr", + "thelocal.it", + "thelocal.no", + "thelocal.se", + ]), + ..Default::default() + }, + ); + check_parse_result( + r#"base64decode.org,base64encode.org,beautifyjson.org,minifyjson.org,numgen.org,pdfmrg.com,pdfspl.com,prettifycss.com,pwdgen.org,strlength.com,strreverse.com,uglifyjs.net,urldecoder.org##div[class^="banner_"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"div[class^="banner_"]"#.to_string()), + hostnames: sort_hash_domains(vec![ + "base64decode.org", + "base64encode.org", + "beautifyjson.org", + "minifyjson.org", + "numgen.org", + "pdfmrg.com", + "pdfspl.com", + "prettifycss.com", + "pwdgen.org", + "strlength.com", + "strreverse.com", + "uglifyjs.net", + "urldecoder.org", + ]), + ..Default::default() + }, + ); + check_parse_result( + r#"adforum.com,alliednews.com,americustimesrecorder.com,andovertownsman.com,athensreview.com,batesvilleheraldtribune.com,bdtonline.com,channel24.pk,chickashanews.com,claremoreprogress.com,cleburnetimesreview.com,clintonherald.com,commercejournal.com,commercial-news.com,coopercrier.com,cordeledispatch.com,corsicanadailysun.com,crossville-chronicle.com,cullmantimes.com,dailyiowegian.com,dailyitem.com,daltondailycitizen.com,derrynews.com,duncanbanner.com,eagletribune.com,edmondsun.com,effinghamdailynews.com,enewscourier.com,enidnews.com,farmtalknewspaper.com,fayettetribune.com,flasharcade.com,flashgames247.com,flyergroup.com,foxsportsasia.com,gainesvilleregister.com,gloucestertimes.com,goshennews.com,greensburgdailynews.com,heraldbanner.com,heraldbulletin.com,hgazette.com,homemagonline.com,itemonline.com,jacksonvilleprogress.com,jerusalemonline.com,joplinglobe.com,journal-times.com,journalexpress.net,kexp.org,kokomotribune.com,lockportjournal.com,mankatofreepress.com,mcalesternews.com,mccrearyrecord.com,mcleansborotimesleader.com,meadvilletribune.com,meridianstar.com,mineralwellsindex.com,montgomery-herald.com,mooreamerican.com,moultrieobserver.com,muskogeephoenix.com,ncnewsonline.com,newburyportnews.com,newsaegis.com,newsandtribune.com,niagara-gazette.com,njeffersonnews.com,normantranscript.com,opposingviews.com,orangeleader.com,oskaloosa.com,ottumwacourier.com,outlookmoney.com,palestineherald.com,panews.com,paulsvalleydailydemocrat.com,pellachronicle.com,pharostribune.com,pressrepublican.com,pryordailytimes.com,randolphguide.com,record-eagle.com,register-herald.com,register-news.com,reporter.net,rockwallheraldbanner.com,roysecityheraldbanner.com,rushvillerepublican.com,salemnews.com,sentinel-echo.com,sharonherald.com,shelbyvilledailyunion.com,siteslike.com,standardmedia.co.ke,starbeacon.com,stwnewspress.com,suwanneedemocrat.com,tahlequahdailypress.com,theadanews.com,theawesomer.com,thedailystar.com,thelandonline.com,themoreheadnews.com,thesnaponline.com,tiftongazette.com,times-news.com,timesenterprise.com,timessentinel.com,timeswv.com,tonawanda-news.com,tribdem.com,tribstar.com,unionrecorder.com,valdostadailytimes.com,washtimesherald.com,waurikademocrat.com,wcoutlook.com,weatherforddemocrat.com,woodwardnews.net,wrestlinginc.com##div[style="width:300px; height:250px;"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"div[style="width:300px; height:250px;"]"#.to_string(), + ), + hostnames: sort_hash_domains(vec![ + "adforum.com", + "alliednews.com", + "americustimesrecorder.com", + "andovertownsman.com", + "athensreview.com", + "batesvilleheraldtribune.com", + "bdtonline.com", + "channel24.pk", + "chickashanews.com", + "claremoreprogress.com", + "cleburnetimesreview.com", + "clintonherald.com", + "commercejournal.com", + "commercial-news.com", + "coopercrier.com", + "cordeledispatch.com", + "corsicanadailysun.com", + "crossville-chronicle.com", + "cullmantimes.com", + "dailyiowegian.com", + "dailyitem.com", + "daltondailycitizen.com", + "derrynews.com", + "duncanbanner.com", + "eagletribune.com", + "edmondsun.com", + "effinghamdailynews.com", + "enewscourier.com", + "enidnews.com", + "farmtalknewspaper.com", + "fayettetribune.com", + "flasharcade.com", + "flashgames247.com", + "flyergroup.com", + "foxsportsasia.com", + "gainesvilleregister.com", + "gloucestertimes.com", + "goshennews.com", + "greensburgdailynews.com", + "heraldbanner.com", + "heraldbulletin.com", + "hgazette.com", + "homemagonline.com", + "itemonline.com", + "jacksonvilleprogress.com", + "jerusalemonline.com", + "joplinglobe.com", + "journal-times.com", + "journalexpress.net", + "kexp.org", + "kokomotribune.com", + "lockportjournal.com", + "mankatofreepress.com", + "mcalesternews.com", + "mccrearyrecord.com", + "mcleansborotimesleader.com", + "meadvilletribune.com", + "meridianstar.com", + "mineralwellsindex.com", + "montgomery-herald.com", + "mooreamerican.com", + "moultrieobserver.com", + "muskogeephoenix.com", + "ncnewsonline.com", + "newburyportnews.com", + "newsaegis.com", + "newsandtribune.com", + "niagara-gazette.com", + "njeffersonnews.com", + "normantranscript.com", + "opposingviews.com", + "orangeleader.com", + "oskaloosa.com", + "ottumwacourier.com", + "outlookmoney.com", + "palestineherald.com", + "panews.com", + "paulsvalleydailydemocrat.com", + "pellachronicle.com", + "pharostribune.com", + "pressrepublican.com", + "pryordailytimes.com", + "randolphguide.com", + "record-eagle.com", + "register-herald.com", + "register-news.com", + "reporter.net", + "rockwallheraldbanner.com", + "roysecityheraldbanner.com", + "rushvillerepublican.com", + "salemnews.com", + "sentinel-echo.com", + "sharonherald.com", + "shelbyvilledailyunion.com", + "siteslike.com", + "standardmedia.co.ke", + "starbeacon.com", + "stwnewspress.com", + "suwanneedemocrat.com", + "tahlequahdailypress.com", + "theadanews.com", + "theawesomer.com", + "thedailystar.com", + "thelandonline.com", + "themoreheadnews.com", + "thesnaponline.com", + "tiftongazette.com", + "times-news.com", + "timesenterprise.com", + "timessentinel.com", + "timeswv.com", + "tonawanda-news.com", + "tribdem.com", + "tribstar.com", + "unionrecorder.com", + "valdostadailytimes.com", + "washtimesherald.com", + "waurikademocrat.com", + "wcoutlook.com", + "weatherforddemocrat.com", + "woodwardnews.net", + "wrestlinginc.com", + ]), + ..Default::default() + }, + ); + } + + #[test] + fn href() { + check_parse_result( + r#"##a[href$="/vghd.shtml"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"a[href$="/vghd.shtml"]"#.to_string()), + ..Default::default() + }, + ); + check_parse_result( + r#"##a[href*=".adk2x.com/"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"a[href*=".adk2x.com/"]"#.to_string()), + ..Default::default() + }, + ); + check_parse_result( + r#"##a[href^="//40ceexln7929.com/"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"a[href^="//40ceexln7929.com/"]"#.to_string()), + ..Default::default() + }, + ); + check_parse_result( + r#"##a[href*=".trust.zone"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"a[href*=".trust.zone"]"#.to_string()), + ..Default::default() + }, + ); + check_parse_result( + r#"tf2maps.net##a[href="http://forums.tf2maps.net/payments.php"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"a[href="http://forums.tf2maps.net/payments.php"]"#.to_string(), + ), + hostnames: sort_hash_domains(vec!["tf2maps.net"]), + ..Default::default() + }, + ); + check_parse_result( + r#"rarbg.to,rarbg.unblockall.org,rarbgaccess.org,rarbgmirror.com,rarbgmirror.org,rarbgmirror.xyz,rarbgproxy.com,rarbgproxy.org,rarbgunblock.com##a[href][target="_blank"] > button"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"a[href][target="_blank"] > button"#.to_string(), + ), + hostnames: sort_hash_domains(vec![ + "rarbg.to", + "rarbg.unblockall.org", + "rarbgaccess.org", + "rarbgmirror.com", + "rarbgmirror.org", + "rarbgmirror.xyz", + "rarbgproxy.com", + "rarbgproxy.org", + "rarbgunblock.com", + ]), + ..Default::default() + }, + ); + } + + #[test] + fn injected_scripts() { + check_parse_result( + r#"hentaifr.net,jeu.info,tuxboard.com,xstory-fr.com##+js(goyavelab-defuser.js)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"goyavelab-defuser.js"#.to_string()), + hostnames: sort_hash_domains(vec![ + "hentaifr.net", + "jeu.info", + "tuxboard.com", + "xstory-fr.com", + ]), + script_inject: true, + ..Default::default() + }, + ); + check_parse_result( + r#"haus-garten-test.de,sozialversicherung-kompetent.de##+js(set-constant.js, Object.keys, trueFunc)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"set-constant.js, Object.keys, trueFunc"#.to_string(), + ), + hostnames: sort_hash_domains(vec![ + "haus-garten-test.de", + "sozialversicherung-kompetent.de", + ]), + script_inject: true, + ..Default::default() + }, + ); + check_parse_result( + r#"airliners.de,auszeit.bio,autorevue.at,clever-tanken.de,fanfiktion.de,finya.de,frag-mutti.de,frustfrei-lernen.de,fussballdaten.de,gameswelt.*,liga3-online.de,lz.de,mt.de,psychic.de,rimondo.com,spielen.de,weltfussball.at,weristdeinfreund.de##+js(abort-current-inline-script.js, Number.isNaN)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"abort-current-inline-script.js, Number.isNaN"#.to_string(), + ), + hostnames: sort_hash_domains(vec![ + "airliners.de", + "auszeit.bio", + "autorevue.at", + "clever-tanken.de", + "fanfiktion.de", + "finya.de", + "frag-mutti.de", + "frustfrei-lernen.de", + "fussballdaten.de", + "liga3-online.de", + "lz.de", + "mt.de", + "psychic.de", + "rimondo.com", + "spielen.de", + "weltfussball.at", + "weristdeinfreund.de", + ]), + entities: sort_hash_domains(vec!["gameswelt"]), + script_inject: true, + ..Default::default() + }, + ); + check_parse_result( + r#"prad.de##+js(abort-on-property-read.js, document.cookie)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"abort-on-property-read.js, document.cookie"#.to_string(), + ), + hostnames: sort_hash_domains(vec!["prad.de"]), + script_inject: true, + ..Default::default() + }, + ); + check_parse_result( + r#"computerbild.de##+js(abort-on-property-read.js, Date.prototype.toUTCString)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"abort-on-property-read.js, Date.prototype.toUTCString"#.to_string(), + ), + hostnames: sort_hash_domains(vec!["computerbild.de"]), + script_inject: true, + ..Default::default() + }, + ); + check_parse_result( + r#"computerbild.de##+js(setTimeout-defuser.js, ())return)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"setTimeout-defuser.js, ())return"#.to_string()), + hostnames: sort_hash_domains(vec!["computerbild.de"]), + script_inject: true, + ..Default::default() + }, + ); + } + + #[test] + fn entities() { + check_parse_result( + r#"monova.*##+js(nowebrtc.js)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"nowebrtc.js"#.to_string()), + entities: sort_hash_domains(vec!["monova"]), + script_inject: true, + ..Default::default() + }, + ); + check_parse_result( + r#"monova.*##tr.success.desktop"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"tr.success.desktop"#.to_string()), + entities: sort_hash_domains(vec!["monova"]), + ..Default::default() + }, + ); + check_parse_result( + r#"monova.*#@#script + [class] > [class]:first-child"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss( + r#"script + [class] > [class]:first-child"#.to_string(), + ), + entities: sort_hash_domains(vec!["monova"]), + unhide: true, + ..Default::default() + }, + ); + check_parse_result( + r#"adshort.im,adsrt.*#@#[id*="ScriptRoot"]"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"[id*="ScriptRoot"]"#.to_string()), + hostnames: sort_hash_domains(vec!["adshort.im"]), + entities: sort_hash_domains(vec!["adsrt"]), + unhide: true, + ..Default::default() + }, + ); + check_parse_result( + r#"downloadsource.*##.date:not(dt):style(display: block !important;)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#".date:not(dt)"#.to_string()), + entities: sort_hash_domains(vec!["downloadsource"]), + action: Some(CosmeticFilterAction::Style( + "display: block !important;".into(), + )), + ..Default::default() + }, + ); + } + + #[test] + fn styles() { + check_parse_result( + r#"chip.de##.video-wrapper > video[style]:style(display:block!important;padding-top:0!important;)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#".video-wrapper > video[style]"#.to_string()), + hostnames: sort_hash_domains(vec!["chip.de"]), + action: Some(CosmeticFilterAction::Style( + "display:block!important;padding-top:0!important;".into(), + )), + ..Default::default() + }, + ); + check_parse_result( + r#"allmusic.com##.advertising.medium-rectangle:style(min-height: 1px !important;)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#".advertising.medium-rectangle"#.to_string()), + hostnames: sort_hash_domains(vec!["allmusic.com"]), + action: Some(CosmeticFilterAction::Style( + "min-height: 1px !important;".into(), + )), + ..Default::default() + }, + ); + #[cfg(feature = "css-validation")] + check_parse_result( + r#"quora.com##.signup_wall_prevent_scroll .SiteHeader,.signup_wall_prevent_scroll .LoggedOutFooter,.signup_wall_prevent_scroll .ContentWrapper:style(filter: none !important;)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#".signup_wall_prevent_scroll .SiteHeader, .signup_wall_prevent_scroll .LoggedOutFooter, .signup_wall_prevent_scroll .ContentWrapper"#.to_string()), + hostnames: sort_hash_domains(vec!["quora.com"]), + action: Some(CosmeticFilterAction::Style("filter: none !important;".into())), + ..Default::default() + } + ); + check_parse_result( + r#"imdb.com##body#styleguide-v2:style(background-color: #e3e2dd !important; background-image: none !important;)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"body#styleguide-v2"#.to_string()), + hostnames: sort_hash_domains(vec!["imdb.com"]), + action: Some(CosmeticFilterAction::Style( + "background-color: #e3e2dd !important; background-image: none !important;" + .into(), + )), + ..Default::default() + }, + ); + check_parse_result( + r#"streamcloud.eu###login > div[style^="width"]:style(display: block !important)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"#login > div[style^="width"]"#.to_string()), + hostnames: sort_hash_domains(vec!["streamcloud.eu"]), + action: Some(CosmeticFilterAction::Style( + "display: block !important".into(), + )), + ..Default::default() + }, + ); + check_parse_result( + r#"moonbit.co.in,moondoge.co.in,moonliteco.in##[src^="//coinad.com/ads/"]:style(visibility: collapse !important)"#, + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss(r#"[src^="//coinad.com/ads/"]"#.to_string()), + hostnames: sort_hash_domains(vec![ + "moonbit.co.in", + "moondoge.co.in", + "moonliteco.in", + ]), + action: Some(CosmeticFilterAction::Style( + "visibility: collapse !important".into(), + )), + ..Default::default() + }, + ); + } + + #[test] + fn unicode() { + check_parse_result( + "###неделя", + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss("#неделя".to_string()), + ..Default::default() + }, + ); + check_parse_result( + "неlloworlд.com#@##week", + CosmeticFilterBreakdown { + selector: SelectorType::PlainCss("#week".to_string()), + hostnames: sort_hash_domains(vec!["xn--lloworl-5ggb3f.com"]), + unhide: true, + ..Default::default() + }, + ); + } + + /// As of writing, these procedural filters with multiple comma-separated selectors aren't + /// fully supported by uBO. Here, they are treated as parsing errors. + #[test] + #[cfg(feature = "css-validation")] + fn multi_selector_procedural_filters() { + assert!(parse_cf("example.com##h1:has-text(Example Domain),p:has-text(More)").is_err()); + assert!(parse_cf("example.com##h1,p:has-text(ill)").is_err()); + assert!(parse_cf("example.com##h1:has-text(om),p").is_err()); + } + + #[test] + #[cfg(feature = "css-validation")] + fn procedural_operators() { + /// Check against simple `example.com` domains. Domain parsing is well-handled by other + /// tests, but procedural filters cannot be generic. + fn check_procedural(raw: &str, expected_selectors: Vec) { + check_parse_result( + &format!("example.com##{}", raw), + CosmeticFilterBreakdown { + selector: SelectorType::Procedural(expected_selectors), + hostnames: sort_hash_domains(vec!["example.com"]), + ..Default::default() + }, + ); + } + check_procedural( + ".items:has-text(Sponsored)", + vec![ + CosmeticFilterOperator::CssSelector(".items".to_string()), + CosmeticFilterOperator::HasText("Sponsored".to_string()), + ], + ); + check_procedural( + "div.items:has(p):has-text(Sponsored)", + vec![ + CosmeticFilterOperator::CssSelector("div.items:has(p)".to_string()), + CosmeticFilterOperator::HasText("Sponsored".to_string()), + ], + ); + check_procedural( + "div.items:has-text(Sponsored):has(p)", + vec![ + CosmeticFilterOperator::CssSelector("div.items".to_string()), + CosmeticFilterOperator::HasText("Sponsored".to_string()), + CosmeticFilterOperator::CssSelector(":has(p)".to_string()), + ], + ); + check_procedural( + ".items:has-text(Sponsored) .container", + vec![ + CosmeticFilterOperator::CssSelector(".items".to_string()), + CosmeticFilterOperator::HasText("Sponsored".to_string()), + CosmeticFilterOperator::CssSelector(" .container".to_string()), + ], + ); + check_procedural( + ".items:has-text(Sponsored) > .container", + vec![ + CosmeticFilterOperator::CssSelector(".items".to_string()), + CosmeticFilterOperator::HasText("Sponsored".to_string()), + CosmeticFilterOperator::CssSelector(" > .container".to_string()), + ], + ); + check_procedural( + ".items:has-text(Sponsored) + .container:has-text(Ad) ~ div", + vec![ + CosmeticFilterOperator::CssSelector(".items".to_string()), + CosmeticFilterOperator::HasText("Sponsored".to_string()), + CosmeticFilterOperator::CssSelector(" + .container".to_string()), + CosmeticFilterOperator::HasText("Ad".to_string()), + CosmeticFilterOperator::CssSelector(" ~ div".to_string()), + ], + ); + } + + #[test] + #[cfg(feature = "css-validation")] + fn unsupported() { + assert!(parse_cf("yandex.*##.serp-item:if(:scope > div.organic div.organic__subtitle:matches-css-after(content: /[Рр]еклама/))").is_err()); + assert!(parse_cf( + r#"facebook.com,facebookcorewwwi.onion##.ego_column:if(a[href^="/campaign/landing"])"# + ) + .is_err()); + assert!(parse_cf(r#"readcomiconline.to##^script:has-text(this[atob)"#).is_err()); + assert!(parse_cf("##").is_err()); + assert!(parse_cf("").is_err()); + + // `:has` was previously limited to procedural filtering, but is now a native CSS feature. + assert!( + parse_cf(r#"thedailywtf.com##.article-body > div:has(a[href*="utm_medium"])"#).is_ok() + ); + + // `:has-text` and `:xpath` are now supported procedural filters + assert!(parse_cf("twitter.com##article:has-text(/Promoted|Gesponsert|Реклама|Promocionado/):xpath(../..)").is_ok()); + + // generic procedural filters are not supported + assert!(parse_cf("##.t-rec > .t886:has-text(cookies)").is_err()); + } + + #[test] + fn hidden_generic() { + let rule = parse_cf("##.selector").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.com##.selector").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.*##.selector").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.com,~a.test.com##.selector").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.*,~a.test.com##.selector").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.*,~a.test.*##.selector").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.com#@#.selector").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("~test.com##.selector").unwrap(); + assert_eq!( + CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), + parse_cf("##.selector").unwrap().into(), + ); + + let rule = parse_cf("~test.*##.selector").unwrap(); + assert_eq!( + CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), + parse_cf("##.selector").unwrap().into(), + ); + + let rule = parse_cf("~test.*,~a.test.*##.selector").unwrap(); + assert_eq!( + CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), + parse_cf("##.selector").unwrap().into(), + ); + + let rule = parse_cf("test.com##.selector:style(border-radius: 13px)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.*##.selector:style(border-radius: 13px)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("~test.com##.selector:style(border-radius: 13px)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("~test.*##.selector:style(border-radius: 13px)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.com#@#.selector:style(border-radius: 13px)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.com##+js(nowebrtc.js)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.*##+js(nowebrtc.js)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("~test.com##+js(nowebrtc.js)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("~test.*##+js(nowebrtc.js)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + + let rule = parse_cf("test.com#@#+js(nowebrtc.js)").unwrap(); + assert!(rule.hidden_generic_rule().is_none()); + } +} + +#[cfg(test)] +mod util_tests { + use super::super::*; + use crate::utils::fast_hash; + + #[test] + fn label_hashing() { + assert_eq!( + get_hashes_from_labels("foo.bar.baz", 11, 11), + vec![ + fast_hash("baz"), + fast_hash("bar.baz"), + fast_hash("foo.bar.baz") + ] + ); + assert_eq!( + get_hashes_from_labels("foo.bar.baz.com", 15, 8), + vec![ + fast_hash("baz.com"), + fast_hash("bar.baz.com"), + fast_hash("foo.bar.baz.com") + ] + ); + assert_eq!( + get_hashes_from_labels("foo.bar.baz.com", 11, 11), + vec![ + fast_hash("baz"), + fast_hash("bar.baz"), + fast_hash("foo.bar.baz") + ] + ); + assert_eq!( + get_hashes_from_labels("foo.bar.baz.com", 11, 8), + vec![ + fast_hash("baz"), + fast_hash("bar.baz"), + fast_hash("foo.bar.baz") + ] + ); + } + + #[test] + fn without_public_suffix() { + assert_eq!(get_hostname_without_public_suffix("", ""), None); + assert_eq!(get_hostname_without_public_suffix("com", ""), None); + assert_eq!(get_hostname_without_public_suffix("com", "com"), None); + assert_eq!( + get_hostname_without_public_suffix("foo.com", "foo.com"), + Some(("foo", "com")) + ); + assert_eq!( + get_hostname_without_public_suffix("foo.bar.com", "bar.com"), + Some(("foo.bar", "com")) + ); + assert_eq!( + get_hostname_without_public_suffix("test.github.io", "test.github.io"), + Some(("test", "github.io")) + ); + } +} + +#[cfg(test)] +mod matching_tests { + use super::super::*; + use crate::utils::bin_lookup; + + trait MatchByStr { + fn matches(&self, request_entities: &[Hash], request_hostnames: &[Hash]) -> bool; + fn matches_str(&self, hostname: &str, domain: &str) -> bool; + } + + impl MatchByStr for CosmeticFilter { + /// `hostname` and `domain` should be specified as, e.g. "subdomain.domain.com" and + /// "domain.com", respectively. This function will panic if the specified `domain` is + /// longer than the specified `hostname`. + fn matches_str(&self, hostname: &str, domain: &str) -> bool { + debug_assert!(hostname.len() >= domain.len()); + + let request_entities = get_entity_hashes_from_labels(hostname, domain); + + let request_hostnames = get_hostname_hashes_from_labels(hostname, domain); + + self.matches(&request_entities[..], &request_hostnames[..]) + } + + /// Check whether this rule applies to content from the hostname and domain corresponding to + /// the provided hash lists. + /// + /// See the `matches_str` test function for an example of how to convert hostnames and + /// domains into the appropriate hash lists. + fn matches(&self, request_entities: &[Hash], request_hostnames: &[Hash]) -> bool { + let has_hostname_constraint = self.has_hostname_constraint(); + if !has_hostname_constraint { + return true; + } + if request_entities.is_empty() + && request_hostnames.is_empty() + && has_hostname_constraint + { + return false; + } + + if let Some(ref filter_not_hostnames) = self.not_hostnames { + if request_hostnames + .iter() + .any(|hash| bin_lookup(filter_not_hostnames, *hash)) + { + return false; + } + } + + if let Some(ref filter_not_entities) = self.not_entities { + if request_entities + .iter() + .any(|hash| bin_lookup(filter_not_entities, *hash)) + { + return false; + } + } + + if self.hostnames.is_some() || self.entities.is_some() { + if let Some(ref filter_hostnames) = self.hostnames { + if request_hostnames + .iter() + .any(|hash| bin_lookup(filter_hostnames, *hash)) + { + return true; + } + } + + if let Some(ref filter_entities) = self.entities { + if request_entities + .iter() + .any(|hash| bin_lookup(filter_entities, *hash)) + { + return true; + } + } + + return false; + } + + true + } + } + + fn parse_cf(rule: &str) -> Result { + CosmeticFilter::parse(rule, false, Default::default()) + } + + #[test] + fn generic_filter() { + let rule = parse_cf("##.selector").unwrap(); + assert!(rule.matches_str("foo.com", "foo.com")); + } + + #[test] + fn single_domain() { + let rule = parse_cf("foo.com##.selector").unwrap(); + assert!(rule.matches_str("foo.com", "foo.com")); + assert!(!rule.matches_str("bar.com", "bar.com")); + } + + #[test] + fn multiple_domains() { + let rule = parse_cf("foo.com,test.com##.selector").unwrap(); + assert!(rule.matches_str("foo.com", "foo.com")); + assert!(rule.matches_str("test.com", "test.com")); + assert!(!rule.matches_str("bar.com", "bar.com")); + } + + #[test] + fn subdomain() { + let rule = parse_cf("foo.com,test.com##.selector").unwrap(); + assert!(rule.matches_str("sub.foo.com", "foo.com")); + assert!(rule.matches_str("sub.test.com", "test.com")); + + let rule = parse_cf("foo.com,sub.test.com##.selector").unwrap(); + assert!(rule.matches_str("sub.test.com", "test.com")); + assert!(!rule.matches_str("test.com", "test.com")); + assert!(!rule.matches_str("com", "com")); + } + + #[test] + fn entity() { + let rule = parse_cf("foo.com,sub.test.*##.selector").unwrap(); + assert!(rule.matches_str("foo.com", "foo.com")); + assert!(rule.matches_str("bar.foo.com", "foo.com")); + assert!(rule.matches_str("sub.test.com", "test.com")); + assert!(rule.matches_str("sub.test.fr", "test.fr")); + assert!(!rule.matches_str("sub.test.evil.biz", "evil.biz")); + + let rule = parse_cf("foo.*##.selector").unwrap(); + assert!(rule.matches_str("foo.co.uk", "foo.co.uk")); + assert!(rule.matches_str("bar.foo.co.uk", "foo.co.uk")); + assert!(rule.matches_str("baz.bar.foo.co.uk", "foo.co.uk")); + assert!(!rule.matches_str("foo.evil.biz", "evil.biz")); + } + + #[test] + fn nonmatching() { + let rule = parse_cf("foo.*##.selector").unwrap(); + assert!(!rule.matches_str("foo.bar.com", "bar.com")); + assert!(!rule.matches_str("bar-foo.com", "bar-foo.com")); + } + + #[test] + fn entity_negations() { + let rule = parse_cf("~foo.*##.selector").unwrap(); + assert!(!rule.matches_str("foo.com", "foo.com")); + assert!(rule.matches_str("foo.evil.biz", "evil.biz")); + + let rule = parse_cf("~foo.*,~bar.*##.selector").unwrap(); + assert!(rule.matches_str("baz.com", "baz.com")); + assert!(!rule.matches_str("foo.com", "foo.com")); + assert!(!rule.matches_str("sub.foo.com", "foo.com")); + assert!(!rule.matches_str("bar.com", "bar.com")); + assert!(!rule.matches_str("sub.bar.com", "bar.com")); + } + + #[test] + fn hostname_negations() { + let rule = parse_cf("~foo.com##.selector").unwrap(); + assert!(!rule.matches_str("foo.com", "foo.com")); + assert!(!rule.matches_str("bar.foo.com", "foo.com")); + assert!(rule.matches_str("foo.com.bar", "com.bar")); + assert!(rule.matches_str("foo.co.uk", "foo.co.uk")); + + let rule = parse_cf("~foo.com,~foo.de,~bar.com##.selector").unwrap(); + assert!(!rule.matches_str("foo.com", "foo.com")); + assert!(!rule.matches_str("sub.foo.com", "foo.com")); + assert!(!rule.matches_str("foo.de", "foo.de")); + assert!(!rule.matches_str("sub.foo.de", "foo.de")); + assert!(!rule.matches_str("bar.com", "bar.com")); + assert!(!rule.matches_str("sub.bar.com", "bar.com")); + assert!(rule.matches_str("bar.de", "bar.de")); + assert!(rule.matches_str("sub.bar.de", "bar.de")); + } + + #[test] + fn entity_with_suffix_exception() { + let rule = parse_cf("foo.*,~foo.com##.selector").unwrap(); + assert!(!rule.matches_str("foo.com", "foo.com")); + assert!(!rule.matches_str("sub.foo.com", "foo.com")); + assert!(rule.matches_str("foo.de", "foo.de")); + assert!(rule.matches_str("sub.foo.de", "foo.de")); + } + + #[test] + fn entity_with_subdomain_exception() { + let rule = parse_cf("foo.*,~sub.foo.*##.selector").unwrap(); + assert!(rule.matches_str("foo.com", "foo.com")); + assert!(rule.matches_str("foo.de", "foo.de")); + assert!(!rule.matches_str("sub.foo.com", "foo.com")); + assert!(!rule.matches_str("bar.com", "bar.com")); + assert!(rule.matches_str("sub2.foo.com", "foo.com")); + } + + #[test] + fn no_domain_provided() { + let rule = parse_cf("foo.*##.selector").unwrap(); + assert!(!rule.matches_str("foo.com", "")); + } + + #[test] + fn no_hostname_provided() { + let rule = parse_cf("domain.com##.selector").unwrap(); + assert!(!rule.matches_str("", "")); + let rule = parse_cf("domain.*##.selector").unwrap(); + assert!(!rule.matches_str("", "")); + let rule = parse_cf("~domain.*##.selector").unwrap(); + assert!(!rule.matches_str("", "")); + let rule = parse_cf("~domain.com##.selector").unwrap(); + assert!(!rule.matches_str("", "")); + } + + #[test] + fn respects_etld() { + let rule = parse_cf("github.io##.selector").unwrap(); + assert!(rule.matches_str("test.github.io", "github.io")); + } + + #[test] + fn multiple_selectors() { + assert!( + parse_cf("youtube.com##.masthead-ad-control,.ad-div,.pyv-afc-ads-container").is_ok() + ); + assert!(parse_cf("m.economictimes.com###appBanner,#stickyBanner").is_ok()); + assert!(parse_cf("googledrivelinks.com###wpsafe-generate, #wpsafe-link:style(display: block !important;)").is_ok()); + } + + #[test] + fn actions() { + assert!(parse_cf("example.com###adBanner:style(background: transparent)").is_ok()); + assert!(parse_cf("example.com###adBanner:remove()").is_ok()); + assert!(parse_cf("example.com###adBanner:remove-attr(style)").is_ok()); + assert!(parse_cf("example.com###adBanner:remove-class(src)").is_ok()); + } + + #[test] + fn zero_width_space() { + assert!(parse_cf(r#"​##a[href^="https://www.g2fame.com/"] > img"#).is_err()); + } + + #[test] + fn adg_regex() { + assert!(parse_cf(r"/^dizipal\d+\.com$/##.web").is_err()); + // Filter is still salvageable if at least one location is supported + assert!(parse_cf(r"/^dizipal\d+\.com,test.net$/##.web").is_ok()); + } + + #[test] + #[cfg(feature = "css-validation")] + fn abp_has_conversion() { + let rule = + parse_cf("imgur.com#?#div.Gallery-Sidebar-PostContainer:-abp-has(div.promoted-hover)") + .unwrap(); + assert_eq!( + rule.plain_css_selector(), + Some("div.Gallery-Sidebar-PostContainer:has(div.promoted-hover)") + ); + let rule = + parse_cf(r##"webtools.fineaty.com#?#div[class*=" hidden-"]:-abp-has(.adsbygoogle)"##) + .unwrap(); + assert_eq!( + rule.plain_css_selector(), + Some(r#"div[class*=" hidden-"]:has(.adsbygoogle)"#) + ); + let rule = parse_cf(r##"facebook.com,facebookcorewwwi.onion#?#._6y8t:-abp-has(a[href="/ads/about/?entry_product=ad_preferences"])"##).unwrap(); + assert_eq!( + rule.plain_css_selector(), + Some(r#"._6y8t:has(a[href="/ads/about/?entry_product=ad_preferences"])"#) + ); + let rule = + parse_cf(r##"mtgarena.pro#?##root > div > div:-abp-has(> .vm-placement)"##).unwrap(); + assert_eq!( + rule.plain_css_selector(), + Some(r#"#root > div > div:has(> .vm-placement)"#) + ); + // Error without `#?#`: + assert!( + parse_cf(r##"mtgarena.pro###root > div > div:-abp-has(> .vm-placement)"##).is_err() + ); + } +} + +#[cfg(test)] +#[cfg(feature = "css-validation")] +mod css_validation_tests { + use super::super::*; + + #[test] + fn bad_selector_inputs() { + assert!(validate_css_selector(r#"rm -rf ./*"#, false).is_err()); + assert!(validate_css_selector( + r#"javascript:alert("All pseudo-classes are valid")"#, + false + ) + .is_ok()); + assert!(validate_css_selector( + r#"javascript:alert("But opening comments are still forbidden" /*)"#, + false + ) + .is_err()); + assert!(validate_css_selector(r#"This is not a CSS selector."#, false).is_err()); + assert!(validate_css_selector(r#"./malware.sh"#, false).is_err()); + assert!(validate_css_selector(r#"https://safesite.ru"#, false).is_err()); + assert!(validate_css_selector( + r#"(function(){var e=60;return String.fromCharCode(e.charCodeAt(0))})();"#, + false + ) + .is_err()); + assert!(validate_css_selector(r#"#!/usr/bin/sh"#, false).is_err()); + assert!(validate_css_selector(r#"input,input/*"#, false).is_err()); + // Accept a closing comment within a string. It should still be impossible to create an + // opening comment to match it. + assert!(validate_css_selector( + r#"input[x="*/{}*{background:url(https://hackvertor.co.uk/images/logo.gif)}"]"#, + false + ) + .is_ok()); + } + + #[test] + fn escaped_quote_in_tag_name() { + assert_eq!( + validate_css_selector(r#"head\""#, false), + Ok(vec![CosmeticFilterOperator::CssSelector( + r#"head\""#.to_string() + )]) + ); + } +} diff --git a/tests/unit/filters/network.rs b/tests/unit/filters/network.rs new file mode 100644 index 00000000..56aca699 --- /dev/null +++ b/tests/unit/filters/network.rs @@ -0,0 +1,1318 @@ +#[cfg(test)] +mod parse_tests { + use super::super::*; + + #[derive(Debug, PartialEq)] + struct NetworkFilterBreakdown { + filter: Option, + hostname: Option, + opt_domains: Option>, + opt_not_domains: Option>, + modifier_option: Option, + + // filter type + is_exception: bool, + is_hostname_anchor: bool, + is_right_anchor: bool, + is_left_anchor: bool, + is_regex: bool, + is_csp: bool, + is_plain: bool, + is_important: bool, + + // Options + first_party: bool, + from_network_types: bool, + from_font: bool, + from_image: bool, + from_media: bool, + from_object: bool, + from_other: bool, + from_ping: bool, + from_script: bool, + from_stylesheet: bool, + from_subdocument: bool, + from_websocket: bool, + from_xml_http_request: bool, + from_document: bool, + match_case: bool, + third_party: bool, + } + + impl From<&NetworkFilter> for NetworkFilterBreakdown { + fn from(filter: &NetworkFilter) -> NetworkFilterBreakdown { + NetworkFilterBreakdown { + filter: filter.filter.string_view(), + hostname: filter.hostname.as_ref().cloned(), + opt_domains: filter.opt_domains.as_ref().cloned(), + opt_not_domains: filter.opt_not_domains.as_ref().cloned(), + modifier_option: filter.modifier_option.as_ref().cloned(), + + // filter type + is_exception: filter.is_exception(), + is_hostname_anchor: filter.is_hostname_anchor(), + is_right_anchor: filter.is_right_anchor(), + is_left_anchor: filter.is_left_anchor(), + is_regex: filter.is_regex(), + is_csp: filter.is_csp(), + is_plain: filter.is_plain(), + is_important: filter.is_important(), + + // Options + first_party: filter.mask.first_party(), + from_network_types: filter.mask.contains(NetworkFilterMask::FROM_NETWORK_TYPES), + from_font: filter.mask.contains(NetworkFilterMask::FROM_FONT), + from_image: filter.mask.contains(NetworkFilterMask::FROM_IMAGE), + from_media: filter.mask.contains(NetworkFilterMask::FROM_MEDIA), + from_object: filter.mask.contains(NetworkFilterMask::FROM_OBJECT), + from_other: filter.mask.contains(NetworkFilterMask::FROM_OTHER), + from_ping: filter.mask.contains(NetworkFilterMask::FROM_PING), + from_script: filter.mask.contains(NetworkFilterMask::FROM_SCRIPT), + from_stylesheet: filter.mask.contains(NetworkFilterMask::FROM_STYLESHEET), + from_subdocument: filter.mask.contains(NetworkFilterMask::FROM_SUBDOCUMENT), + from_websocket: filter.mask.contains(NetworkFilterMask::FROM_WEBSOCKET), + from_xml_http_request: filter.mask.contains(NetworkFilterMask::FROM_XMLHTTPREQUEST), + from_document: filter.mask.contains(NetworkFilterMask::FROM_DOCUMENT), + match_case: filter.mask.match_case(), + third_party: filter.mask.third_party(), + } + } + } + + fn default_network_filter_breakdown() -> NetworkFilterBreakdown { + NetworkFilterBreakdown { + filter: None, + hostname: None, + opt_domains: None, + opt_not_domains: None, + modifier_option: None, + + // filter type + is_exception: false, + is_hostname_anchor: false, + is_right_anchor: false, + is_left_anchor: false, + is_regex: false, + is_csp: false, + is_plain: false, + is_important: false, + + // Options + first_party: true, + from_network_types: true, + from_font: true, + from_image: true, + from_media: true, + from_object: true, + from_other: true, + from_ping: true, + from_script: true, + from_stylesheet: true, + from_subdocument: true, + from_websocket: true, + from_xml_http_request: true, + from_document: false, + match_case: false, + third_party: true, + } + } + + #[test] + // pattern + fn parses_plain_pattern() { + { + let filter = NetworkFilter::parse("ads", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("ads")); + defaults.is_plain = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = NetworkFilter::parse("/ads/foo-", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("/ads/foo-")); + defaults.is_plain = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("/ads/foo-$important", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("/ads/foo-")); + defaults.is_plain = true; + defaults.is_important = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("foo.com/ads$important", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("foo.com/ads")); + defaults.is_plain = true; + defaults.is_important = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // ||pattern + fn parses_hostname_anchor_pattern() { + { + let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = None; + defaults.hostname = Some(String::from("foo.com")); + defaults.is_plain = true; + defaults.is_hostname_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("||foo.com$important", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = None; + defaults.hostname = Some(String::from("foo.com")); + defaults.is_plain = true; + defaults.is_hostname_anchor = true; + defaults.is_important = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("||foo.com/bar/baz$important", true, Default::default()) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("/bar/baz")); + defaults.is_plain = true; + defaults.is_hostname_anchor = true; + defaults.is_important = true; + defaults.is_left_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // ||pattern| + fn parses_hostname_right_anchor_pattern() { + { + let filter = NetworkFilter::parse("||foo.com|", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = None; + defaults.is_plain = true; + defaults.is_right_anchor = true; + defaults.is_hostname_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("||foo.com|$important", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = None; + defaults.is_plain = true; + defaults.is_important = true; + defaults.is_right_anchor = true; + defaults.is_hostname_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("||foo.com/bar/baz|$important", true, Default::default()) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("/bar/baz")); + defaults.is_plain = true; + defaults.is_important = true; + defaults.is_left_anchor = true; + defaults.is_right_anchor = true; + defaults.is_hostname_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("||foo.com^bar/*baz|$important", true, Default::default()) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("^bar/*baz")); + defaults.is_important = true; + defaults.is_left_anchor = true; + defaults.is_right_anchor = true; + defaults.is_hostname_anchor = true; + defaults.is_regex = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // |pattern + fn parses_left_anchor_pattern() { + { + let filter = NetworkFilter::parse("|foo.com", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("foo.com")); + defaults.is_plain = true; + defaults.is_left_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("|foo.com/bar/baz", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("foo.com/bar/baz")); + defaults.is_plain = true; + defaults.is_left_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("|foo.com^bar/*baz", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("foo.com^bar/*baz")); + defaults.is_regex = true; + defaults.is_left_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // |pattern| + fn parses_left_right_anchor_pattern() { + { + let filter = NetworkFilter::parse("|foo.com|", true, Default::default()).unwrap(); + + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("foo.com")); + defaults.is_plain = true; + defaults.is_right_anchor = true; + defaults.is_left_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = NetworkFilter::parse("|foo.com/bar|", true, Default::default()).unwrap(); + + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("foo.com/bar")); + defaults.is_plain = true; + defaults.is_right_anchor = true; + defaults.is_left_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = NetworkFilter::parse("|foo.com*bar^|", true, Default::default()).unwrap(); + + let mut defaults = default_network_filter_breakdown(); + defaults.filter = Some(String::from("foo.com*bar^")); + defaults.is_regex = true; + defaults.is_right_anchor = true; + defaults.is_left_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // ||regexp + fn parses_hostname_anchor_regex_pattern() { + { + let filter = NetworkFilter::parse("||foo.com*bar^", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("bar^")); + defaults.is_hostname_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("||foo.com^bar*/baz^", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("^bar*/baz^")); + defaults.is_hostname_anchor = true; + defaults.is_left_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // ||regexp| + fn parses_hostname_right_anchor_regex_pattern() { + { + let filter = NetworkFilter::parse("||foo.com*bar^|", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("bar^")); + defaults.is_hostname_anchor = true; + defaults.is_right_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("||foo.com^bar*/baz^|", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("^bar*/baz^")); + defaults.is_hostname_anchor = true; + defaults.is_left_anchor = true; + defaults.is_right_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // |regexp + fn parses_hostname_left_anchor_regex_pattern() { + { + let filter = NetworkFilter::parse("|foo.com*bar^", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = None; + defaults.filter = Some(String::from("foo.com*bar^")); + defaults.is_left_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("|foo.com^bar*/baz^", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = None; + defaults.filter = Some(String::from("foo.com^bar*/baz^")); + defaults.is_left_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // |regexp| + fn parses_hostname_left_right_anchor_regex_pattern() { + { + let filter = NetworkFilter::parse("|foo.com*bar^|", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = None; + defaults.filter = Some(String::from("foo.com*bar^")); + defaults.is_left_anchor = true; + defaults.is_right_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = + NetworkFilter::parse("|foo.com^bar*/baz^|", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = None; + defaults.filter = Some(String::from("foo.com^bar*/baz^")); + defaults.is_left_anchor = true; + defaults.is_right_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + // @@pattern + fn parses_exception_pattern() { + { + let filter = NetworkFilter::parse("@@ads", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.is_exception = true; + defaults.filter = Some(String::from("ads")); + defaults.is_plain = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = NetworkFilter::parse("@@||foo.com/ads", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.is_exception = true; + defaults.filter = Some(String::from("/ads")); + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_left_anchor = true; + defaults.is_plain = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = NetworkFilter::parse("@@|foo.com/ads", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.is_exception = true; + defaults.filter = Some(String::from("foo.com/ads")); + defaults.is_left_anchor = true; + defaults.is_plain = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = NetworkFilter::parse("@@|foo.com/ads|", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.is_exception = true; + defaults.filter = Some(String::from("foo.com/ads")); + defaults.is_left_anchor = true; + defaults.is_plain = true; + defaults.is_right_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = NetworkFilter::parse("@@foo.com/ads|", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.is_exception = true; + defaults.filter = Some(String::from("foo.com/ads")); + defaults.is_plain = true; + defaults.is_right_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = + NetworkFilter::parse("@@||foo.com/ads|", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.is_exception = true; + defaults.filter = Some(String::from("/ads")); + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_left_anchor = true; + defaults.is_plain = true; + defaults.is_right_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + } + + // Options + + #[test] + fn accepts_any_content_type() { + { + let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.from_network_types = true; + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = + NetworkFilter::parse("||foo.com$first-party", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.from_network_types = true; + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.first_party = true; + defaults.third_party = false; + + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = + NetworkFilter::parse("||foo.com$third-party", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.from_network_types = true; + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.first_party = false; + defaults.third_party = true; + + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = + NetworkFilter::parse("||foo.com$domain=test.com", true, Default::default()) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.from_network_types = true; + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.opt_domains = Some(vec![utils::fast_hash("test.com")]); + + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + { + let filter = + NetworkFilter::parse("||foo.com$domain=test.com", true, Default::default()) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.from_network_types = true; + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.opt_domains = Some(vec![utils::fast_hash("test.com")]); + + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + } + + #[test] + fn parses_important() { + { + let filter = + NetworkFilter::parse("||foo.com$important", true, Default::default()).unwrap(); + assert_eq!(filter.is_important(), true); + } + { + // parses ~important + let filter = NetworkFilter::parse("||foo.com$~important", true, Default::default()); + assert_eq!(filter.err(), Some(NetworkFilterError::NegatedImportant)); + } + { + // defaults to false + let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); + assert_eq!(filter.is_important(), false); + } + } + + #[test] + fn parses_csp() { + { + let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); + assert_eq!(filter.modifier_option, None); + } + { + // parses simple CSP + let filter = + NetworkFilter::parse(r#"||foo.com$csp=self bar """#, true, Default::default()) + .unwrap(); + assert_eq!(filter.is_csp(), true); + assert_eq!(filter.modifier_option, Some(String::from(r#"self bar """#))); + } + { + // parses empty CSP + let filter = NetworkFilter::parse("||foo.com$csp", true, Default::default()).unwrap(); + assert_eq!(filter.is_csp(), true); + assert_eq!(filter.modifier_option, None); + } + { + // CSP mixed with content type is an error + let filter = NetworkFilter::parse( + r#"||foo.com$domain=foo|bar,csp=self bar "",image"#, + true, + Default::default(), + ); + assert_eq!(filter.err(), Some(NetworkFilterError::CspWithContentType)); + } + } + + #[test] + fn parses_domain() { + // parses domain + { + let filter = + NetworkFilter::parse("||foo.com$domain=bar.com", true, Default::default()).unwrap(); + assert_eq!(filter.opt_domains, Some(vec![utils::fast_hash("bar.com")])); + assert_eq!(filter.opt_not_domains, None); + } + { + let filter = + NetworkFilter::parse("||foo.com$domain=bar.com|baz.com", true, Default::default()) + .unwrap(); + let mut domains = vec![utils::fast_hash("bar.com"), utils::fast_hash("baz.com")]; + domains.sort_unstable(); + assert_eq!(filter.opt_domains, Some(domains)); + assert_eq!(filter.opt_not_domains, None); + } + + // parses ~domain + { + let filter = + NetworkFilter::parse("||foo.com$domain=~bar.com", true, Default::default()) + .unwrap(); + assert_eq!(filter.opt_domains, None); + assert_eq!( + filter.opt_not_domains, + Some(vec![utils::fast_hash("bar.com")]) + ); + } + { + let filter = NetworkFilter::parse( + "||foo.com$domain=~bar.com|~baz.com", + true, + Default::default(), + ) + .unwrap(); + assert_eq!(filter.opt_domains, None); + let mut domains = vec![utils::fast_hash("bar.com"), utils::fast_hash("baz.com")]; + domains.sort_unstable(); + assert_eq!(filter.opt_not_domains, Some(domains)); + } + // parses domain and ~domain + { + let filter = NetworkFilter::parse( + "||foo.com$domain=~bar.com|baz.com", + true, + Default::default(), + ) + .unwrap(); + assert_eq!(filter.opt_domains, Some(vec![utils::fast_hash("baz.com")])); + assert_eq!( + filter.opt_not_domains, + Some(vec![utils::fast_hash("bar.com")]) + ); + } + { + let filter = NetworkFilter::parse( + "||foo.com$domain=bar.com|~baz.com", + true, + Default::default(), + ) + .unwrap(); + assert_eq!(filter.opt_domains, Some(vec![utils::fast_hash("bar.com")])); + assert_eq!( + filter.opt_not_domains, + Some(vec![utils::fast_hash("baz.com")]) + ); + } + { + let filter = + NetworkFilter::parse("||foo.com$domain=foo|~bar|baz", true, Default::default()) + .unwrap(); + let mut domains = vec![utils::fast_hash("foo"), utils::fast_hash("baz")]; + domains.sort(); + assert_eq!(filter.opt_domains, Some(domains)); + assert_eq!(filter.opt_not_domains, Some(vec![utils::fast_hash("bar")])); + } + // defaults to no constraint + { + let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); + assert_eq!(filter.opt_domains, None); + assert_eq!(filter.opt_not_domains, None); + } + // `from` is an alias for `domain` + { + let filter = + NetworkFilter::parse("||foo.com$from=bar.com", true, Default::default()).unwrap(); + assert_eq!(filter.opt_domains, Some(vec![utils::fast_hash("bar.com")])); + assert_eq!(filter.opt_not_domains, None); + } + { + let filter = NetworkFilter::parse( + r"||video.twimg.com/ext_tw_video/*/*.m3u8$domain=/^i[a-z]*\.strmrdr[a-z]+\..*/", + true, + Default::default(), + ); + assert_eq!(filter.err(), Some(NetworkFilterError::NoSupportedDomains)); + } + } + + #[test] + fn parses_redirects() { + // parses redirect + { + let filter = + NetworkFilter::parse("||foo.com$redirect=bar.js", true, Default::default()) + .unwrap(); + assert_eq!(filter.modifier_option, Some(String::from("bar.js"))); + } + { + let filter = + NetworkFilter::parse("$redirect=bar.js", true, Default::default()).unwrap(); + assert_eq!(filter.modifier_option, Some(String::from("bar.js"))); + } + // parses ~redirect + { + // ~redirect is not a valid option + let filter = NetworkFilter::parse("||foo.com$~redirect", true, Default::default()); + assert_eq!(filter.err(), Some(NetworkFilterError::NegatedRedirection)); + } + // parses redirect without a value + { + // Not valid + let filter = NetworkFilter::parse("||foo.com$redirect", true, Default::default()); + assert_eq!(filter.err(), Some(NetworkFilterError::EmptyRedirection)); + } + { + let filter = NetworkFilter::parse("||foo.com$redirect=", true, Default::default()); + assert_eq!(filter.err(), Some(NetworkFilterError::EmptyRedirection)) + } + // defaults to false + { + let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); + assert_eq!(filter.modifier_option, None); + } + } + + #[test] + fn parses_removeparam() { + { + let filter = NetworkFilter::parse("||foo.com^$removeparam", true, Default::default()); + assert!(filter.is_err()); + } + { + let filter = NetworkFilter::parse("$~removeparam=test", true, Default::default()); + assert!(filter.is_err()); + } + { + let filter = + NetworkFilter::parse("@@||foo.com^$removeparam=test", true, Default::default()); + assert!(filter.is_err()); + } + { + let filter = NetworkFilter::parse("||foo.com^$removeparam=", true, Default::default()); + assert!(filter.is_err()); + } + { + let filter = NetworkFilter::parse( + "||foo.com^$removeparam=test,redirect=test", + true, + Default::default(), + ); + assert!(filter.is_err()); + } + { + let filter = NetworkFilter::parse( + "||foo.com^$removeparam=test,removeparam=test2", + true, + Default::default(), + ); + assert!(filter.is_err()); + } + { + let filter = + NetworkFilter::parse("||foo.com^$removeparam=𝐔𝐍𝐈𝐂𝐎𝐃𝐄🧋", true, Default::default()); + assert!(filter.is_err()); + } + { + let filter = + NetworkFilter::parse("||foo.com^$removeparam=/abc.*/", true, Default::default()); + assert_eq!(filter, Err(NetworkFilterError::RemoveparamRegexUnsupported)); + } + { + let filter = + NetworkFilter::parse("||foo.com^$removeparam=test", true, Default::default()) + .unwrap(); + assert!(filter.is_removeparam()); + assert_eq!(filter.modifier_option, Some("test".into())); + } + } + + #[test] + fn parses_match_case() { + // match-case on non-regex rules is invalid + { + assert!( + NetworkFilter::parse("||foo.com$match-case", true, Default::default()).is_err() + ); + } + { + assert!( + NetworkFilter::parse("||foo.com$image,match-case", true, Default::default()) + .is_err() + ); + } + { + assert!(NetworkFilter::parse( + "||foo.com$media,match-case,image", + true, + Default::default() + ) + .is_err()); + } + // match-case on regex rules is ok + { + let filter = NetworkFilter::parse( + r#"/foo[0-9]*\.com/$media,match-case,image"#, + true, + Default::default(), + ) + .unwrap(); + assert_eq!(filter.mask.match_case(), true); + } + { + let filter = NetworkFilter::parse(r#"/^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?\$/$css,3p,match-case"#, true, Default::default()).unwrap(); + assert_eq!(filter.mask.match_case(), true); + } + + // parses ~match-case + { + // ~match-case is not supported + let filter = NetworkFilter::parse("||foo.com$~match-case", true, Default::default()); + assert_eq!( + filter.err(), + Some(NetworkFilterError::NegatedOptionMatchCase) + ); + } + + // defaults to false + { + let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); + assert_eq!(filter.mask.match_case(), false) + } + } + + #[test] + fn parses_first_party() { + // parses first-party + assert_eq!( + NetworkFilter::parse("||foo.com$first-party", true, Default::default()) + .unwrap() + .mask + .first_party(), + true + ); + assert_eq!( + NetworkFilter::parse("@@||foo.com$first-party", true, Default::default()) + .unwrap() + .mask + .first_party(), + true + ); + assert_eq!( + NetworkFilter::parse("@@||foo.com|$first-party", true, Default::default()) + .unwrap() + .mask + .first_party(), + true + ); + // parses ~first-party + assert_eq!( + NetworkFilter::parse("||foo.com$~first-party", true, Default::default()) + .unwrap() + .mask + .first_party(), + false + ); + assert_eq!( + NetworkFilter::parse( + "||foo.com$first-party,~first-party", + true, + Default::default() + ) + .unwrap() + .mask + .first_party(), + false + ); + // defaults to true + assert_eq!( + NetworkFilter::parse("||foo.com", true, Default::default()) + .unwrap() + .mask + .first_party(), + true + ); + } + + #[test] + fn parses_third_party() { + // parses third-party + assert_eq!( + NetworkFilter::parse("||foo.com$third-party", true, Default::default()) + .unwrap() + .mask + .third_party(), + true + ); + assert_eq!( + NetworkFilter::parse("@@||foo.com$third-party", true, Default::default()) + .unwrap() + .mask + .third_party(), + true + ); + assert_eq!( + NetworkFilter::parse("@@||foo.com|$third-party", true, Default::default()) + .unwrap() + .mask + .third_party(), + true + ); + assert_eq!( + NetworkFilter::parse("||foo.com$~first-party", true, Default::default()) + .unwrap() + .mask + .third_party(), + true + ); + // parses ~third-party + assert_eq!( + NetworkFilter::parse("||foo.com$~third-party", true, Default::default()) + .unwrap() + .mask + .third_party(), + false + ); + assert_eq!( + NetworkFilter::parse( + "||foo.com$first-party,~third-party", + true, + Default::default() + ) + .unwrap() + .mask + .third_party(), + false + ); + // defaults to true + assert_eq!( + NetworkFilter::parse("||foo.com", true, Default::default()) + .unwrap() + .mask + .third_party(), + true + ); + } + + #[test] + fn parses_generic_hide() { + { + let filter = NetworkFilter::parse("||foo.com$generichide", true, Default::default()); + assert!(filter.is_err()); + } + { + let filter = + NetworkFilter::parse("@@||foo.com$generichide", true, Default::default()).unwrap(); + assert_eq!(filter.is_exception(), true); + assert_eq!(filter.is_generic_hide(), true); + } + { + let filter = + NetworkFilter::parse("@@||foo.com|$generichide", true, Default::default()).unwrap(); + assert_eq!(filter.is_exception(), true); + assert_eq!(filter.is_generic_hide(), true); + } + { + let filter = NetworkFilter::parse( + "@@$generichide,domain=example.com", + true, + Default::default(), + ) + .unwrap(); + assert_eq!(filter.is_generic_hide(), true); + let breakdown = NetworkFilterBreakdown::from(&filter); + assert_eq!( + breakdown.opt_domains, + Some(vec![utils::fast_hash("example.com")]) + ); + } + { + let filter = NetworkFilter::parse("||foo.com", true, Default::default()).unwrap(); + assert_eq!(filter.is_generic_hide(), false); + } + } + + #[test] + fn parses_hosts_style() { + { + let filter = NetworkFilter::parse_hosts_style("example.com", true).unwrap(); + assert!(filter.raw_line.is_some()); + assert_eq!(*filter.raw_line.clone().unwrap(), "||example.com^"); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some("example.com".to_string()); + defaults.is_plain = true; + defaults.is_hostname_anchor = true; + defaults.is_right_anchor = true; + defaults.from_document = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = NetworkFilter::parse_hosts_style("www.example.com", true).unwrap(); + assert!(filter.raw_line.is_some()); + assert_eq!(*filter.raw_line.clone().unwrap(), "||example.com^"); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some("example.com".to_string()); + defaults.is_plain = true; + defaults.is_hostname_anchor = true; + defaults.is_right_anchor = true; + defaults.from_document = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + { + let filter = NetworkFilter::parse_hosts_style("malware.example.com", true).unwrap(); + assert!(filter.raw_line.is_some()); + assert_eq!(*filter.raw_line.clone().unwrap(), "||malware.example.com^"); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some("malware.example.com".to_string()); + defaults.is_plain = true; + defaults.is_hostname_anchor = true; + defaults.is_right_anchor = true; + defaults.from_document = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)) + } + } + + #[test] + fn handles_unsupported_options() { + let options = vec!["genericblock", "inline-script", "popunder", "popup", "woot"]; + + for option in options { + let filter = + NetworkFilter::parse(&format!("||foo.com${}", option), true, Default::default()); + assert!(filter.err().is_some()); + } + } + + #[test] + fn handles_content_type_options() { + let options = vec![ + "font", + "image", + "media", + "object", + "object-subrequest", + "other", + "ping", + "script", + "stylesheet", + "subdocument", + "websocket", + "xmlhttprequest", + "xhr", + ]; + + fn set_all_options(breakdown: &mut NetworkFilterBreakdown, value: bool) { + breakdown.from_font = value; + breakdown.from_image = value; + breakdown.from_media = value; + breakdown.from_object = value; + breakdown.from_other = value; + breakdown.from_ping = value; + breakdown.from_script = value; + breakdown.from_stylesheet = value; + breakdown.from_subdocument = value; + breakdown.from_websocket = value; + breakdown.from_xml_http_request = value; + } + + fn set_option(option: &str, breakdown: &mut NetworkFilterBreakdown, value: bool) { + match option { + "font" => breakdown.from_font = value, + "image" => breakdown.from_image = value, + "media" => breakdown.from_media = value, + "object" => breakdown.from_object = value, + "object-subrequest" => breakdown.from_object = value, + "other" => breakdown.from_other = value, + "ping" => breakdown.from_ping = value, + "script" => breakdown.from_script = value, + "stylesheet" => breakdown.from_stylesheet = value, + "subdocument" => breakdown.from_subdocument = value, + "websocket" => breakdown.from_websocket = value, + "xmlhttprequest" => breakdown.from_xml_http_request = value, + "xhr" => breakdown.from_xml_http_request = value, + _ => unreachable!(), + } + } + + for option in options { + // positive + { + let filter = NetworkFilter::parse( + &format!("||foo.com${}", option), + true, + Default::default(), + ) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.from_network_types = false; + set_all_options(&mut defaults, false); + set_option(&option, &mut defaults, true); + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + + { + let filter = NetworkFilter::parse( + &format!("||foo.com$object,{}", option), + true, + Default::default(), + ) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.from_network_types = false; + set_all_options(&mut defaults, false); + set_option(&option, &mut defaults, true); + set_option("object", &mut defaults, true); + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + + { + let filter = NetworkFilter::parse( + &format!("||foo.com$domain=bar.com,{}", option), + true, + Default::default(), + ) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.from_network_types = false; + defaults.opt_domains = Some(vec![utils::fast_hash("bar.com")]); + set_all_options(&mut defaults, false); + set_option(&option, &mut defaults, true); + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + + // negative + { + let filter = NetworkFilter::parse( + &format!("||foo.com$~{}", option), + true, + Default::default(), + ) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.from_network_types = false; + set_all_options(&mut defaults, true); + set_option(&option, &mut defaults, false); + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + + { + let filter = NetworkFilter::parse( + &format!("||foo.com${},~{}", option, option), + true, + Default::default(), + ) + .unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.from_network_types = false; + set_all_options(&mut defaults, true); + set_option(&option, &mut defaults, false); + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + // default - positive + { + let filter = + NetworkFilter::parse(&format!("||foo.com"), true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.is_hostname_anchor = true; + defaults.is_plain = true; + defaults.from_network_types = true; + set_all_options(&mut defaults, true); + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } + } + } + + #[test] + fn binary_serialization_works() { + use rmp_serde::{Deserializer, Serializer}; + { + let filter = + NetworkFilter::parse("||foo.com/bar/baz$important", true, Default::default()) + .unwrap(); + + let mut encoded = Vec::new(); + filter + .serialize(&mut Serializer::new(&mut encoded)) + .unwrap(); + let mut de = Deserializer::new(&encoded[..]); + let decoded: NetworkFilter = Deserialize::deserialize(&mut de).unwrap(); + + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("/bar/baz")); + defaults.is_plain = true; + defaults.is_hostname_anchor = true; + defaults.is_important = true; + defaults.is_left_anchor = true; + assert_eq!(defaults, NetworkFilterBreakdown::from(&decoded)) + } + { + let filter = NetworkFilter::parse("||foo.com*bar^", true, Default::default()).unwrap(); + let mut defaults = default_network_filter_breakdown(); + defaults.hostname = Some(String::from("foo.com")); + defaults.filter = Some(String::from("bar^")); + defaults.is_hostname_anchor = true; + defaults.is_regex = true; + defaults.is_plain = false; + + let mut encoded = Vec::new(); + filter + .serialize(&mut Serializer::new(&mut encoded)) + .unwrap(); + let mut de = Deserializer::new(&encoded[..]); + let decoded: NetworkFilter = Deserialize::deserialize(&mut de).unwrap(); + + assert_eq!(defaults, NetworkFilterBreakdown::from(&decoded)); + assert_eq!( + RegexManager::default().matches( + decoded.mask, + decoded.filter.iter(), + decoded.key(), + "bar/" + ), + true + ); + } + } + + #[test] + fn parse_empty_host_anchor_exception() { + let filter_parsed = + NetworkFilter::parse("@@||$domain=auth.wi-fi.ru", true, Default::default()); + assert!(filter_parsed.is_ok()); + + let filter = filter_parsed.unwrap(); + + let mut defaults = default_network_filter_breakdown(); + + defaults.hostname = Some(String::from("")); + defaults.is_hostname_anchor = true; + defaults.is_exception = true; + defaults.is_plain = true; + defaults.from_network_types = true; + defaults.opt_domains = Some(vec![utils::fast_hash("auth.wi-fi.ru")]); + assert_eq!(defaults, NetworkFilterBreakdown::from(&filter)); + } +} + +#[cfg(test)] +mod hash_collision_tests { + use super::super::*; + + use crate::lists::parse_filters; + use crate::test_utils; + use std::collections::HashMap; + + #[test] + fn check_rule_ids_no_collisions() { + let rules = test_utils::rules_from_lists([ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", + ]) + .filter(|f| f != "||www.bred4tula.com^"); // remove known collision; + let (network_filters, _) = parse_filters(rules, true, Default::default()); + + let mut filter_ids: HashMap = HashMap::new(); + + for filter in network_filters { + let id = filter.get_id(); + let rule = *filter.raw_line.unwrap_or_default(); + let existing_rule = filter_ids.get(&id); + assert!( + existing_rule.is_none() || existing_rule.unwrap() == &rule, + "ID {} for {} already present from {}", + id, + rule, + existing_rule.unwrap() + ); + filter_ids.insert(id, rule); + } + } +} diff --git a/tests/unit/filters/network_matchers.rs b/tests/unit/filters/network_matchers.rs new file mode 100644 index 00000000..e21673cf --- /dev/null +++ b/tests/unit/filters/network_matchers.rs @@ -0,0 +1,888 @@ +#[cfg(test)] +mod match_tests { + use super::super::*; + use crate::filters::network::*; + + #[test] + fn is_anchored_by_hostname_works() { + // matches empty hostname + assert_eq!(is_anchored_by_hostname("", "foo.com", false), true); + + // does not match when filter hostname is longer than hostname + assert_eq!( + is_anchored_by_hostname("bar.foo.com", "foo.com", false), + false + ); + assert_eq!(is_anchored_by_hostname("b", "", false), false); + assert_eq!(is_anchored_by_hostname("foo.com", "foo.co", false), false); + + // does not match if there is not match + assert_eq!(is_anchored_by_hostname("bar", "foo.com", false), false); + + // ## prefix match + // matches exact match + assert_eq!(is_anchored_by_hostname("", "", false), true); + assert_eq!(is_anchored_by_hostname("f", "f", false), true); + assert_eq!(is_anchored_by_hostname("foo", "foo", false), true); + assert_eq!(is_anchored_by_hostname("foo.com", "foo.com", false), true); + assert_eq!(is_anchored_by_hostname(".com", ".com", false), true); + assert_eq!(is_anchored_by_hostname("com.", "com.", false), true); + + // matches partial + // Single label + assert_eq!(is_anchored_by_hostname("foo", "foo.com", false), true); + assert_eq!(is_anchored_by_hostname("foo.", "foo.com", false), true); + assert_eq!(is_anchored_by_hostname(".foo", ".foo.com", false), true); + assert_eq!(is_anchored_by_hostname(".foo.", ".foo.com", false), true); + + // Multiple labels + assert_eq!(is_anchored_by_hostname("foo.com", "foo.com.", false), true); + assert_eq!(is_anchored_by_hostname("foo.com.", "foo.com.", false), true); + assert_eq!( + is_anchored_by_hostname(".foo.com.", ".foo.com.", false), + true + ); + assert_eq!(is_anchored_by_hostname(".foo.com", ".foo.com", false), true); + + assert_eq!( + is_anchored_by_hostname("foo.bar", "foo.bar.com", false), + true + ); + assert_eq!( + is_anchored_by_hostname("foo.bar.", "foo.bar.com", false), + true + ); + + // does not match partial prefix + // Single label + assert_eq!(is_anchored_by_hostname("foo", "foobar.com", false), false); + assert_eq!(is_anchored_by_hostname("fo", "foo.com", false), false); + assert_eq!(is_anchored_by_hostname(".foo", "foobar.com", false), false); + + // Multiple labels + assert_eq!( + is_anchored_by_hostname("foo.bar", "foo.barbaz.com", false), + false + ); + assert_eq!( + is_anchored_by_hostname(".foo.bar", ".foo.barbaz.com", false), + false + ); + + // ## suffix match + // matches partial + // Single label + assert_eq!(is_anchored_by_hostname("com", "foo.com", false), true); + assert_eq!(is_anchored_by_hostname(".com", "foo.com", false), true); + assert_eq!(is_anchored_by_hostname(".com.", "foo.com.", false), true); + assert_eq!(is_anchored_by_hostname("com.", "foo.com.", false), true); + + // Multiple labels + assert_eq!( + is_anchored_by_hostname("foo.com.", ".foo.com.", false), + true + ); + assert_eq!(is_anchored_by_hostname("foo.com", ".foo.com", false), true); + + // does not match partial + // Single label + assert_eq!(is_anchored_by_hostname("om", "foo.com", false), false); + assert_eq!(is_anchored_by_hostname("com", "foocom", false), false); + + // Multiple labels + assert_eq!( + is_anchored_by_hostname("foo.bar.com", "baz.bar.com", false), + false + ); + assert_eq!( + is_anchored_by_hostname("fo.bar.com", "foo.bar.com", false), + false + ); + assert_eq!( + is_anchored_by_hostname(".fo.bar.com", "foo.bar.com", false), + false + ); + assert_eq!( + is_anchored_by_hostname("bar.com", "foobar.com", false), + false + ); + assert_eq!( + is_anchored_by_hostname(".bar.com", "foobar.com", false), + false + ); + + // ## infix match + // matches partial + assert_eq!(is_anchored_by_hostname("bar", "foo.bar.com", false), true); + assert_eq!(is_anchored_by_hostname("bar.", "foo.bar.com", false), true); + assert_eq!(is_anchored_by_hostname(".bar.", "foo.bar.com", false), true); + } + + fn filter_match_url(filter: &str, url: &str, matching: bool) { + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let request = request::Request::new(url, "https://example.com", "other").unwrap(); + + assert!( + network_filter.matches_test(&request) == matching, + "Expected match={} for {} {:?} on {}", + matching, + filter, + network_filter, + url + ); + } + + fn hosts_filter_match_url(filter: &str, url: &str, matching: bool) { + let network_filter = NetworkFilter::parse_hosts_style(filter, true).unwrap(); + let request = request::Request::new(url, "https://example.com", "other").unwrap(); + + assert!( + network_filter.matches_test(&request) == matching, + "Expected match={} for {} {:?} on {}", + matching, + filter, + network_filter, + url + ); + } + + #[test] + // pattern + fn check_pattern_plain_filter_filter_works() { + filter_match_url("foo", "https://bar.com/foo", true); + filter_match_url("foo", "https://bar.com/baz/foo", true); + filter_match_url("foo", "https://bar.com/q=foo/baz", true); + filter_match_url("foo", "https://foo.com", true); + filter_match_url("-foo-", "https://bar.com/baz/42-foo-q", true); + filter_match_url("&fo.o=+_-", "https://bar.com?baz=42&fo.o=+_-", true); + filter_match_url("foo/bar/baz", "https://bar.com/foo/bar/baz", true); + filter_match_url("com/bar/baz", "https://bar.com/bar/baz", true); + filter_match_url("https://bar.com/bar/baz", "https://bar.com/bar/baz", true); + } + + #[test] + // ||pattern + fn check_pattern_hostname_anchor_filter_works() { + filter_match_url("||foo.com", "https://foo.com/bar", true); + filter_match_url("||foo.com/bar", "https://foo.com/bar", true); + filter_match_url("||foo", "https://foo.com/bar", true); + filter_match_url("||foo", "https://baz.foo.com/bar", true); + filter_match_url("||foo", "https://foo.baz.com/bar", true); + filter_match_url("||foo.baz", "https://foo.baz.com/bar", true); + filter_match_url("||foo.baz.", "https://foo.baz.com/bar", true); + + filter_match_url("||foo.baz.com^", "https://foo.baz.com/bar", true); + filter_match_url("||foo.baz^", "https://foo.baz.com/bar", false); + + filter_match_url("||foo", "https://baz.com", false); + filter_match_url("||foo", "https://foo-bar.baz.com/bar", false); + filter_match_url("||foo.com", "https://foo.de", false); + filter_match_url("||foo.com", "https://bar.foo.de", false); + filter_match_url("||s.foo.com", "https://substring.s.foo.com", true); + filter_match_url("||s.foo.com", "https://substrings.foo.com", false); + } + + #[test] + fn check_hosts_style_works() { + hosts_filter_match_url("foo.com", "https://foo.com/bar", true); + hosts_filter_match_url("foo.foo.com", "https://foo.com/bar", false); + hosts_filter_match_url("www.foo.com", "https://foo.com/bar", true); + hosts_filter_match_url("com.foo", "https://foo.baz.com/bar", false); + hosts_filter_match_url("foo.baz", "https://foo.baz.com/bar", false); + + hosts_filter_match_url("foo.baz.com", "https://foo.baz.com/bar", true); + hosts_filter_match_url("foo.baz", "https://foo.baz.com/bar", false); + + hosts_filter_match_url("foo.com", "https://baz.com", false); + hosts_filter_match_url("bar.baz.com", "https://foo-bar.baz.com/bar", false); + hosts_filter_match_url("foo.com", "https://foo.de", false); + hosts_filter_match_url("foo.com", "https://bar.foo.de", false); + } + + #[test] + // ||pattern| + fn check_pattern_hostname_right_anchor_filter_works() { + filter_match_url("||foo.com|", "https://foo.com", true); + filter_match_url("||foo.com/bar|", "https://foo.com/bar", true); + + filter_match_url("||foo.com/bar|", "https://foo.com/bar/baz", false); + filter_match_url("||foo.com/bar|", "https://foo.com/", false); + filter_match_url("||bar.com/bar|", "https://foo.com/", false); + } + + #[test] + // pattern| + fn check_pattern_right_anchor_filter_works() { + filter_match_url("foo.com", "https://foo.com", true); + filter_match_url("foo|", "https://bar.com/foo", true); + filter_match_url("foo|", "https://bar.com/foo/", false); + filter_match_url("foo|", "https://bar.com/foo/baz", false); + } + + #[test] + // |pattern + fn check_pattern_left_anchor_filter_works() { + filter_match_url("|http", "http://foo.com", true); + filter_match_url("|http", "https://foo.com", true); + filter_match_url("|https://", "https://foo.com", true); + + filter_match_url("https", "http://foo.com", false); + } + + #[test] + // |pattern| + fn check_pattern_left_right_anchor_filter_works() { + filter_match_url("|https://foo.com|", "https://foo.com", true); + } + + #[test] + // ||pattern + left-anchor + fn check_pattern_hostname_left_anchor_filter_works() { + filter_match_url("||foo.com^test", "https://foo.com/test", true); + filter_match_url("||foo.com/test", "https://foo.com/test", true); + filter_match_url("||foo.com^test", "https://foo.com/tes", false); + filter_match_url("||foo.com/test", "https://foo.com/tes", false); + + filter_match_url("||foo.com^", "https://foo.com/test", true); + + filter_match_url("||foo.com/test*bar", "https://foo.com/testbar", true); + filter_match_url("||foo.com^test*bar", "https://foo.com/testbar", true); + } + + #[test] + // ||hostname^*/pattern + fn check_pattern_hostname_anchor_regex_filter_works() { + filter_match_url("||foo.com^*/bar", "https://foo.com/bar", false); + filter_match_url("||com^*/bar", "https://foo.com/bar", false); + filter_match_url("||foo^*/bar", "https://foo.com/bar", false); + + // @see https://github.com/cliqz-oss/adblocker/issues/29 + filter_match_url("||foo.co^aaa/", "https://bar.foo.com/bbb/aaa/", false); + filter_match_url("||foo.com^aaa/", "https://bar.foo.com/bbb/aaa/", false); + + filter_match_url("||com*^bar", "https://foo.com/bar", true); + filter_match_url("||foo.com^bar", "https://foo.com/bar", true); + filter_match_url("||com^bar", "https://foo.com/bar", true); + filter_match_url("||foo*^bar", "https://foo.com/bar", true); + filter_match_url("||foo*/bar", "https://foo.com/bar", true); + filter_match_url("||foo*com/bar", "https://foo.com/bar", true); + filter_match_url("||foo2*com/bar", "https://foo2.com/bar", true); + filter_match_url("||foo*com*/bar", "https://foo.com/bar", true); + filter_match_url("||foo*com*^bar", "https://foo.com/bar", true); + filter_match_url("||*foo*com*^bar", "https://foo.com/bar", true); + filter_match_url("||*/bar", "https://foo.com/bar", true); + filter_match_url("||*^bar", "https://foo.com/bar", true); + filter_match_url("||*com/bar", "https://foo.com/bar", true); + filter_match_url("||*.com/bar", "https://foo.com/bar", true); + filter_match_url("||*foo.com/bar", "https://foo.com/bar", true); + filter_match_url("||*com/bar", "https://foo.com/bar", true); + filter_match_url("||*com*/bar", "https://foo.com/bar", true); + filter_match_url("||*com*^bar", "https://foo.com/bar", true); + } + + #[test] + fn check_pattern_hostname_anchor_regex_filter_works_realisitic() { + filter_match_url( + "||vimeo.com^*?type=", + "https://vimeo.com/ablincoln/fatal_attraction?type=pageview&target=%2F193641463", + true, + ); + } + + #[test] + fn check_pattern_hostname_left_right_anchor_regex_filter_works() { + filter_match_url("||geo*.hltv.org^", "https://geo2.hltv.org/rekl13.php", true); + filter_match_url( + "||www*.swatchseries.to^", + "https://www1.swatchseries.to/sw.js", + true, + ); + filter_match_url("||imp*.tradedoubler.com^", "https://impde.tradedoubler.com/imp?type(js)g(22608602)a(1725113)epi(30148500144427100033372010772028)preurl(https://pixel.mathtag.com/event/js?mt_id=1160537&mt_adid=166882&mt_exem=&mt_excl=&v1=&v2=&v3=&s1=&s2=&s3=&mt_nsync=1&redirect=https%3A%2F%2Fad28.ad-srv.net%2Fc%2Fczqwm6dm6kagr2j%3Ftprde%3D)768489806", true); + } + + #[test] + fn check_pattern_exception_works() { + { + let filter = "@@||fastly.net/ad2/$image,script,xmlhttprequest"; + let url = "https://0914.global.ssl.fastly.net/ad2/script/x.js?cb=1549980040838"; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let request = + request::Request::new(url, "https://www.gamespot.com/metro-exodus/", "script") + .unwrap(); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + { + let filter = "@@||swatchseries.to/public/js/edit-show.js$script,domain=swatchseries.to"; + let url = "https://www1.swatchseries.to/public/js/edit-show.js"; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let request = request::Request::new( + url, + "https://www1.swatchseries.to/serie/roswell_new_mexico", + "script", + ) + .unwrap(); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + } + + #[test] + fn check_pattern_match_case() { + filter_match_url( + r#"/BannerAd[0-9]/$match-case"#, + "https://example.com/BannerAd0.gif", + true, + ); + filter_match_url( + r#"/BannerAd[0-9]/$match-case"#, + "https://example.com/bannerad0.gif", + false, + ); + } + + #[test] + fn check_ws_vs_http_matching() { + let network_filter = + NetworkFilter::parse("|ws://$domain=4shared.com", true, Default::default()).unwrap(); + + assert!(network_filter.matches_test( + &request::Request::new("ws://example.com", "https://4shared.com", "websocket").unwrap() + )); + assert!(network_filter.matches_test( + &request::Request::new("wss://example.com", "https://4shared.com", "websocket") + .unwrap() + )); + assert!(!network_filter.matches_test( + &request::Request::new("http://example.com", "https://4shared.com", "script").unwrap() + )); + assert!(!network_filter.matches_test( + &request::Request::new("https://example.com", "https://4shared.com", "script").unwrap() + )); + + // The `ws://` and `wss://` protocols should be used, rather than the resource type. + assert!(network_filter.matches_test( + &request::Request::new("ws://example.com", "https://4shared.com", "script").unwrap() + )); + assert!(network_filter.matches_test( + &request::Request::new("wss://example.com", "https://4shared.com", "script").unwrap() + )); + assert!(!network_filter.matches_test( + &request::Request::new("http://example.com", "https://4shared.com", "websocket") + .unwrap() + )); + assert!(!network_filter.matches_test( + &request::Request::new("https://example.com", "https://4shared.com", "websocket") + .unwrap() + )); + } + + fn check_options(filter: &NetworkFilter, request: &request::Request) -> bool { + super::super::check_options(filter.mask, request) + && super::super::check_included_domains(filter.opt_domains.as_deref(), request) + && super::super::check_excluded_domains(filter.opt_not_domains.as_deref(), request) + } + + #[test] + // options + fn check_options_works() { + // cpt test + { + let network_filter = + NetworkFilter::parse("||foo$image", true, Default::default()).unwrap(); + let request = request::Request::new("https://foo.com/bar", "", "image").unwrap(); + assert_eq!(check_options(&network_filter, &request), true); + } + { + let network_filter = + NetworkFilter::parse("||foo$image", true, Default::default()).unwrap(); + let request = request::Request::new("https://foo.com/bar", "", "script").unwrap(); + assert_eq!(check_options(&network_filter, &request), false); + } + { + let network_filter = + NetworkFilter::parse("||foo$~image", true, Default::default()).unwrap(); + let request = request::Request::new("https://foo.com/bar", "", "script").unwrap(); + assert_eq!(check_options(&network_filter, &request), true); + } + + // ~third-party + { + let network_filter = + NetworkFilter::parse("||foo$~third-party", true, Default::default()).unwrap(); + let request = + request::Request::new("https://foo.com/bar", "http://baz.foo.com", "").unwrap(); + assert_eq!(check_options(&network_filter, &request), true); + } + { + let network_filter = + NetworkFilter::parse("||foo$~third-party", true, Default::default()).unwrap(); + let request = + request::Request::new("https://foo.com/bar", "http://baz.bar.com", "").unwrap(); + assert_eq!(check_options(&network_filter, &request), false); + } + + // ~first-party + { + let network_filter = + NetworkFilter::parse("||foo$~first-party", true, Default::default()).unwrap(); + let request = + request::Request::new("https://foo.com/bar", "http://baz.bar.com", "").unwrap(); + assert_eq!(check_options(&network_filter, &request), true); + } + { + let network_filter = + NetworkFilter::parse("||foo$~first-party", true, Default::default()).unwrap(); + let request = + request::Request::new("https://foo.com/bar", "http://baz.foo.com", "").unwrap(); + assert_eq!(check_options(&network_filter, &request), false); + } + + // opt-domain + { + let network_filter = + NetworkFilter::parse("||foo$domain=foo.com", true, Default::default()).unwrap(); + let request = + request::Request::new("https://foo.com/bar", "http://foo.com", "").unwrap(); + assert_eq!(check_options(&network_filter, &request), true); + } + { + let network_filter = + NetworkFilter::parse("||foo$domain=foo.com", true, Default::default()).unwrap(); + let request = + request::Request::new("https://foo.com/bar", "http://bar.com", "").unwrap(); + assert_eq!(check_options(&network_filter, &request), false); + } + + // opt-not-domain + { + let network_filter = + NetworkFilter::parse("||foo$domain=~bar.com", true, Default::default()).unwrap(); + let request = + request::Request::new("https://foo.com/bar", "http://foo.com", "").unwrap(); + assert_eq!(check_options(&network_filter, &request), true); + } + { + let network_filter = + NetworkFilter::parse("||foo$domain=~bar.com", true, Default::default()).unwrap(); + let request = + request::Request::new("https://foo.com/bar", "http://bar.com", "").unwrap(); + assert_eq!(check_options(&network_filter, &request), false); + } + } + + #[test] + fn check_domain_option_subsetting_works() { + { + let network_filter = NetworkFilter::parse( + "adv$domain=example.com|~foo.example.com", + true, + Default::default(), + ) + .unwrap(); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://example.com", "") + .unwrap() + ) == true + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://foo.example.com", "") + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new( + "http://example.net/adv", + "http://subfoo.foo.example.com", + "" + ) + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://bar.example.com", "") + .unwrap() + ) == true + ); + assert!( + network_filter.matches_test( + &request::Request::new( + "http://example.net/adv", + "http://anotherexample.com", + "" + ) + .unwrap() + ) == false + ); + } + { + let network_filter = NetworkFilter::parse( + "adv$domain=~example.com|~foo.example.com", + true, + Default::default(), + ) + .unwrap(); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://example.com", "") + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://foo.example.com", "") + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new( + "http://example.net/adv", + "http://subfoo.foo.example.com", + "" + ) + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://bar.example.com", "") + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new( + "http://example.net/adv", + "http://anotherexample.com", + "" + ) + .unwrap() + ) == true + ); + } + { + let network_filter = NetworkFilter::parse( + "adv$domain=example.com|foo.example.com", + true, + Default::default(), + ) + .unwrap(); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://example.com", "") + .unwrap() + ) == true + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://foo.example.com", "") + .unwrap() + ) == true + ); + assert!( + network_filter.matches_test( + &request::Request::new( + "http://example.net/adv", + "http://subfoo.foo.example.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://bar.example.com", "") + .unwrap() + ) == true + ); + assert!( + network_filter.matches_test( + &request::Request::new( + "http://example.net/adv", + "http://anotherexample.com", + "" + ) + .unwrap() + ) == false + ); + } + { + let network_filter = NetworkFilter::parse( + "adv$domain=~example.com|foo.example.com", + true, + Default::default(), + ) + .unwrap(); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://example.com", "") + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://foo.example.com", "") + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new( + "http://example.net/adv", + "http://subfoo.foo.example.com", + "" + ) + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://bar.example.com", "") + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new( + "http://example.net/adv", + "http://anotherexample.com", + "" + ) + .unwrap() + ) == false + ); + } + { + let network_filter = + NetworkFilter::parse("adv$domain=com|~foo.com", true, Default::default()).unwrap(); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://com", "").unwrap() + ) == true + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://foo.com", "").unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://subfoo.foo.com", "") + .unwrap() + ) == false + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://bar.com", "").unwrap() + ) == true + ); + assert!( + network_filter.matches_test( + &request::Request::new("http://example.net/adv", "http://co.uk", "").unwrap() + ) == false + ); + } + } + + #[test] + fn check_unicode_handled() { + filter_match_url( + "||firstrowsports.li/frame/", + "https://firstrowsports.li/frame/bar", + true, + ); + filter_match_url( + "||fırstrowsports.eu/pu/", + "https://fırstrowsports.eu/pu/foo", + true, + ); + filter_match_url( + "||fırstrowsports.eu/pu/", + "https://xn--frstrowsports-39b.eu/pu/foo", + true, + ); + + filter_match_url("||atđhe.net/pu/", "https://atđhe.net/pu/foo", true); + filter_match_url("||atđhe.net/pu/", "https://xn--athe-1ua.net/pu/foo", true); + + filter_match_url("foo", "https://example.com/Ѥ/foo", true); + filter_match_url("Ѥ", "https://example.com/Ѥ/foo", true); + } + + #[test] + fn check_regex_escaping_handled() { + // A few rules that are not correctly escaped for rust Regex + { + // regex escaping "\/" unrecognised + let filter = + r#"/^https?:\/\/.*(bitly|bit)\.(com|ly)\/.*/$domain=123movies.com|1337x.to"#; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let url = "https://bit.ly/bar/"; + let source = "http://123movies.com"; + let request = request::Request::new(url, source, "").unwrap(); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + { + // regex escaping "\:" unrecognised + let filter = r#"/\:\/\/data.*\.com\/[a-zA-Z0-9]{30,}/$third-party,xmlhttprequest"#; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let url = "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer"; + let source = "http://123movies.com"; + let request = request::Request::new(url, source, "xmlhttprequest").unwrap(); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + // + { + let filter = r#"/\.(accountant|bid|click|club|com|cricket|date|download|faith|link|loan|lol|men|online|party|racing|review|science|site|space|stream|top|trade|webcam|website|win|xyz|com)\/(([0-9]{2,9})(\.|\/)(css|\?)?)$/$script,stylesheet,third-party,xmlhttprequest"#; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let url = "https://hello.club/123.css"; + let source = "http://123movies.com"; + let request = request::Request::new(url, source, "stylesheet").unwrap(); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + } + + #[test] + #[ignore] // Not going to handle lookaround regexes + #[cfg(feature = "regex-debug-info")] + fn check_lookaround_regex_handled() { + { + let filter = r#"/^https?:\/\/([0-9a-z\-]+\.)?(9anime|animeland|animenova|animeplus|animetoon|animewow|gamestorrent|goodanime|gogoanime|igg-games|kimcartoon|memecenter|readcomiconline|toonget|toonova|watchcartoononline)\.[a-z]{2,4}\/(?!([Ee]xternal|[Ii]mages|[Ss]cripts|[Uu]ploads|ac|ajax|assets|combined|content|cov|cover|(img\/bg)|(img\/icon)|inc|jwplayer|player|playlist-cat-rss|static|thumbs|wp-content|wp-includes)\/)(.*)/$image,other,script,~third-party,xmlhttprequest,domain=~animeland.hu"#; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let url = "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer"; + let source = "http://123movies.com"; + let request = request::Request::new(url, source, "script").unwrap(); + let mut regex_manager = RegexManager::default(); + assert!(regex_manager.get_compiled_regex_count() == 0); + assert!( + network_filter.matches(&request, &mut regex_manager) == true, + "Expected match for {} on {}", + filter, + url + ); + assert!(regex_manager.get_compiled_regex_count() == 1); + } + } + + #[test] + fn check_empty_host_anchor_matches() { + { + let filter = "||$domain=auth.wi-fi.ru"; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let url = "https://example.com/ad.js"; + let source = "http://auth.wi-fi.ru"; + let request = request::Request::new(url, source, "script").unwrap(); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + { + let filter = "@@||$domain=auth.wi-fi.ru"; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let url = "https://example.com/ad.js"; + let source = "http://auth.wi-fi.ru"; + let request = request::Request::new(url, source, "script").unwrap(); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + } + + #[test] + fn check_url_path_regex_matches() { + { + let filter = "@@||www.google.com/aclk?*&adurl=$document,~third-party"; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let url = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; + let source = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; + let request = request::Request::new(url, source, "document").unwrap(); + assert!(!request.is_third_party); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + { + let filter = "@@||www.google.*/aclk?$first-party"; + let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); + let url = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; + let source = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; + let request = request::Request::new(url, source, "main_frame").unwrap(); + assert!(!request.is_third_party); + assert!( + network_filter.matches_test(&request) == true, + "Expected match for {} on {}", + filter, + url + ); + } + } + + #[test] + fn check_get_url_after_hostname_handles_bad_input() { + // The function requires the hostname to necessarily be there in the URL, + // but should fail gracefully if that is not the case. + // Graceful failure here is returning an empty string for the rest of the URL + assert_eq!( + get_url_after_hostname("https://www.google.com/ad", "google.com"), + "/ad" + ); + assert_eq!( + get_url_after_hostname( + "https://www.google.com/?aclksa=l&ai=DChcSEwioqMfq5", + "google.com" + ), + "/?aclksa=l&ai=DChcSEwioqMfq5" + ); + assert_eq!( + get_url_after_hostname( + "https://www.google.com/?aclksa=l&ai=DChcSEwioqMfq5", + "www.google.com" + ), + "/?aclksa=l&ai=DChcSEwioqMfq5" + ); + assert_eq!( + get_url_after_hostname( + "https://www.youtube.com/?aclksa=l&ai=DChcSEwioqMfq5", + "google.com" + ), + "" + ); + } +} diff --git a/tests/unit/lists.rs b/tests/unit/lists.rs new file mode 100644 index 00000000..3dd0b3c9 --- /dev/null +++ b/tests/unit/lists.rs @@ -0,0 +1,451 @@ +#[cfg(test)] +mod tests { + use super::super::*; + + #[test] + fn parse_hosts_style() { + { + let input = "www.malware.com"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_ok()); + } + { + let input = "www.malware.com/virus.txt"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_err()); + } + { + let input = "127.0.0.1 www.malware.com"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_ok()); + } + { + let input = "127.0.0.1\t\twww.malware.com"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_ok()); + } + { + let input = "0.0.0.0 www.malware.com"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_ok()); + } + { + let input = "0.0.0.0 www.malware.com # replace after issue #289336 is addressed"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_ok()); + } + { + let input = "! Title: list.txt"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_err()); + } + { + let input = "127.0.0.1 localhost"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_err()); + } + { + let input = "127.0.0.1 com"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_err()); + } + { + let input = ".com"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_err()); + } + { + let input = "*.com"; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_err()); + } + { + let input = "www."; + let result = parse_filter( + input, + true, + ParseOptions { + format: FilterFormat::Hosts, + ..Default::default() + }, + ); + assert!(result.is_err()); + } + } + + #[test] + fn adguard_cosmetic_detection() { + { + let input = r#"example.org$$script[data-src="banner"]"#; + let result = parse_filter(input, true, Default::default()); + assert!(result.is_err()); + } + { + let input = "example.org##+js(set-local-storage-item, Test, $$remove$$)"; + let result = parse_filter(input, true, Default::default()); + assert!(result.is_ok()); + } + { + let input = "[$app=org.example.app]example.com##.textad"; + let result = parse_filter(input, true, Default::default()); + assert!(result.is_err()); + } + { + let input = r#"[$domain=/^i\[a-z\]*\.strmrdr\[a-z\]+\..*/]##+js(set-constant, adscfg.enabled, false)"#; + let result = parse_filter(input, true, Default::default()); + assert!(result.is_err()); + } + } + + #[test] + fn parse_filter_failed_fuzz_1() { + let input = "Ѥ"; + let result = parse_filter(input, true, Default::default()); + assert!(result.is_ok()); + } + + #[test] + fn parse_filter_failed_fuzz_2() { + assert!(parse_filter(r#"###\\\00DB \008D"#, true, Default::default()).is_ok()); + assert!(parse_filter(r#"###\Û"#, true, Default::default()).is_ok()); + } + + #[test] + fn parse_filter_failed_fuzz_3() { + let input = "||$3p=/"; + let result = parse_filter(input, true, Default::default()); + assert!(result.is_ok()); + } + + #[test] + fn parse_filter_failed_fuzz_4() { + // \\##+js(,\xdd\x8d + let parsed = parse_filter( + &String::from_utf8(vec![92, 35, 35, 43, 106, 115, 40, 44, 221, 141]).unwrap(), + true, + Default::default(), + ); + #[cfg(feature = "css-validation")] + assert!(parsed.is_err()); + #[cfg(not(feature = "css-validation"))] + assert!(parsed.is_ok()); + } + + #[test] + #[cfg(feature = "css-validation")] + fn parse_filter_opening_comment() { + assert!(parse_filter("##input,input/*", true, Default::default(),).is_err()); + } + + #[test] + fn test_parse_expires_interval() { + assert_eq!(ExpiresInterval::try_from("0 hour"), Err(())); + assert_eq!(ExpiresInterval::try_from("0 hours"), Err(())); + assert_eq!( + ExpiresInterval::try_from("1 hour"), + Ok(ExpiresInterval::Hours(1)) + ); + assert_eq!( + ExpiresInterval::try_from("1 hours"), + Ok(ExpiresInterval::Hours(1)) + ); + assert_eq!( + ExpiresInterval::try_from("2 hours"), + Ok(ExpiresInterval::Hours(2)) + ); + assert_eq!( + ExpiresInterval::try_from("2 hour"), + Ok(ExpiresInterval::Hours(2)) + ); + assert_eq!(ExpiresInterval::try_from("3.5 hours"), Err(())); + assert_eq!( + ExpiresInterval::try_from("336 hours"), + Ok(ExpiresInterval::Hours(336)) + ); + assert_eq!(ExpiresInterval::try_from("337 hours"), Err(())); + + assert_eq!(ExpiresInterval::try_from("0 day"), Err(())); + assert_eq!(ExpiresInterval::try_from("0 days"), Err(())); + assert_eq!( + ExpiresInterval::try_from("1 day"), + Ok(ExpiresInterval::Days(1)) + ); + assert_eq!( + ExpiresInterval::try_from("1 days"), + Ok(ExpiresInterval::Days(1)) + ); + assert_eq!( + ExpiresInterval::try_from("2 days"), + Ok(ExpiresInterval::Days(2)) + ); + assert_eq!( + ExpiresInterval::try_from("2 day"), + Ok(ExpiresInterval::Days(2)) + ); + assert_eq!(ExpiresInterval::try_from("3.5 days"), Err(())); + assert_eq!( + ExpiresInterval::try_from("14 days"), + Ok(ExpiresInterval::Days(14)) + ); + assert_eq!(ExpiresInterval::try_from("15 days"), Err(())); + + assert_eq!(ExpiresInterval::try_from("-5 hours"), Err(())); + assert_eq!(ExpiresInterval::try_from("+5 hours"), Err(())); + + assert_eq!( + ExpiresInterval::try_from("2 days (update frequency)"), + Ok(ExpiresInterval::Days(2)) + ); + assert_eq!( + ExpiresInterval::try_from("2 hours (update frequency)"), + Ok(ExpiresInterval::Hours(2)) + ); + } + + #[test] + fn test_parsing_list_metadata() { + let list = [ + "[Adblock Plus 2.0]", + "! Title: 0131 Block List", + "! Homepage: https://austinhuang.me/0131-block-list", + "! Licence: https://creativecommons.org/licenses/by-sa/4.0/", + "! Expires: 7 days", + "! Version: 20220411", + "", + "! => https://austinhuang.me/0131-block-list/list.txt", + ]; + + let mut filter_set = FilterSet::new(false); + let metadata = filter_set.add_filters(list, ParseOptions::default()); + + assert_eq!(metadata.title, Some("0131 Block List".to_string())); + assert_eq!( + metadata.homepage, + Some("https://austinhuang.me/0131-block-list".to_string()) + ); + assert_eq!(metadata.expires, Some(ExpiresInterval::Days(7))); + assert_eq!(metadata.redirect, None); + } + + #[test] + /// Some lists are formatted in unusual ways. This example has a version string with + /// non-numeric characters and an `Expires` field with extra information trailing afterwards. + /// Valid fields should still be recognized and parsed accordingly. + fn test_parsing_list_best_effort() { + let list = [ + "[Adblock Plus 2]", + "!-----------------------------------", + "! ABOUT", + "!-----------------------------------", + "! Version: 1.2.0.0", + "! Title: ABPVN Advanced", + "! Last modified: 09/03/2021", + "! Expires: 7 days (update frequency)", + "! Homepage: https://www.haopro.net/", + ]; + + let mut filter_set = FilterSet::new(false); + let metadata = filter_set.add_filters(list, ParseOptions::default()); + + assert_eq!(metadata.title, Some("ABPVN Advanced".to_string())); + assert_eq!( + metadata.homepage, + Some("https://www.haopro.net/".to_string()) + ); + assert_eq!(metadata.expires, Some(ExpiresInterval::Days(7))); + assert_eq!(metadata.redirect, None); + } + + #[test] + fn test_read_metadata() { + { + let list = r##"! Title: uBlock₀ filters – Annoyances +! Description: Filters optimized for uBlock Origin, to be used with Fanboy's +! and/or Adguard's "Annoyances" list(s) +! Expires: 4 days +! Last modified: %timestamp% +! License: https://github.com/uBlockOrigin/uAssets/blob/master/LICENSE +! Homepage: https://github.com/uBlockOrigin/uAssets +! Forums: https://github.com/uBlockOrigin/uAssets/issues"##; + let metadata = read_list_metadata(&list); + + assert_eq!( + metadata.title, + Some("uBlock₀ filters – Annoyances".to_string()) + ); + assert_eq!( + metadata.homepage, + Some("https://github.com/uBlockOrigin/uAssets".to_string()) + ); + assert_eq!(metadata.expires, Some(ExpiresInterval::Days(4))); + assert_eq!(metadata.redirect, None); + } + { + let list = r##"[uBlock Origin] +! Title: PersianBlocker +! Description: سرانجام، یک لیست بهینه و گسترده برای مسدودسازی تبلیغ ها و ردیاب ها در سایت های پارسی زبان! +! Expires: 2 days +! Last modified: 2022-12-11 +! Homepage: https://github.com/MasterKia/PersianBlocker +! License: AGPLv3 (https://github.com/MasterKia/PersianBlocker/blob/main/LICENSE) + +! مشکل/پیشنهاد: https://github.com/MasterKia/PersianBlocker/issues +! مشارکت: https://github.com/MasterKia/PersianBlocker/pulls + +! لیستی برای برگرداندن آزادی کاربران، چون هر کاربر این آزادی را دارد که چه چیزی وارد مرورگرش می‌شود و چه چیزی وارد نمی‌شود +!-------------------------v Experimental Generic Filters v-----------------------! +! applicationha.com, androidgozar.com, downloadkral.com, gold-team.org, iranecar.com, icoff.ee, koolakmag.ir, +!! mybia4music.com, my-film.pw, pedal.ir, vgdl.ir, sakhamusic.ir +/wp-admin/admin-ajax.php?postviews_id=$xhr +"##; + let metadata = read_list_metadata(&list); + + assert_eq!(metadata.title, Some("PersianBlocker".to_string())); + assert_eq!( + metadata.homepage, + Some("https://github.com/MasterKia/PersianBlocker".to_string()) + ); + assert_eq!(metadata.expires, Some(ExpiresInterval::Days(2))); + assert_eq!(metadata.redirect, None); + } + } + + #[test] + fn parse_cosmetic_variants() { + { + let input = "example.com##.selector"; + let result = parse_filter(input, true, Default::default()); + assert!(matches!(result, Ok(ParsedFilter::Cosmetic(..)))); + } + { + let input = "9gag.com#?#article:-abp-has(.promoted)"; + let result = parse_filter(input, true, Default::default()); + assert!(matches!(result, Ok(ParsedFilter::Cosmetic(..)))); + } + #[cfg(feature = "css-validation")] + { + let input = "sportowefakty.wp.pl#@?#body > [class]:not([id]):matches-css(position: fixed):matches-css(top: 0px)"; + let result = parse_filter(input, true, Default::default()); + assert!(matches!(result, Ok(ParsedFilter::Cosmetic(..)))); + } + { + let input = + r#"odkrywamyzakryte.com#%#//scriptlet("abort-on-property-read", "sc_adv_out")"#; + let result = parse_filter(input, true, Default::default()); + assert!(matches!( + result, + Err(FilterParseError::Cosmetic( + CosmeticFilterError::UnsupportedSyntax + )) + )); + } + { + let input = "bikeradar.com,spiegel.de#@%#!function(){function b(){}function a(a){return{get:function(){return a},set:b}}function c(a)"; + let result = parse_filter(input, true, Default::default()); + assert!(matches!( + result, + Err(FilterParseError::Cosmetic( + CosmeticFilterError::UnsupportedSyntax + )) + )); + } + { + let input = "nczas.com#$#.adsbygoogle { position: absolute!important; left: -3000px!important; }"; + let result = parse_filter(input, true, Default::default()); + assert!(matches!( + result, + Err(FilterParseError::Cosmetic( + CosmeticFilterError::UnsupportedSyntax + )) + )); + } + { + let input = + "kurnik.pl#@$#.adsbygoogle { height: 1px !important; width: 1px !important; }"; + let result = parse_filter(input, true, Default::default()); + assert!(matches!( + result, + Err(FilterParseError::Cosmetic( + CosmeticFilterError::UnsupportedSyntax + )) + )); + } + } +} diff --git a/tests/unit/network_filter_list.rs b/tests/unit/network_filter_list.rs new file mode 100644 index 00000000..ae8a8953 --- /dev/null +++ b/tests/unit/network_filter_list.rs @@ -0,0 +1,338 @@ +#[cfg(test)] +mod tests { + use super::super::*; + + #[test] + fn insert_dup_works() { + let mut dup_map: HashMap> = HashMap::new(); + + // inserts into empty + insert_dup(&mut dup_map, 1, String::from("foo")); + assert_eq!(dup_map.get(&1), Some(&vec![String::from("foo")])); + + // adds item + insert_dup(&mut dup_map, 1, String::from("bar")); + assert_eq!( + dup_map.get(&1), + Some(&vec![String::from("bar"), String::from("foo")]) + ); + + // inserts into another key item + insert_dup(&mut dup_map, 123, String::from("baz")); + assert_eq!(dup_map.get(&123), Some(&vec![String::from("baz")])); + assert_eq!( + dup_map.get(&1), + Some(&vec![String::from("bar"), String::from("foo")]) + ); + } + + #[test] + fn token_histogram_works() { + // handle the case of just 1 token + { + let tokens = vec![(0, vec![vec![111]])]; + let (total_tokens, histogram) = token_histogram(&tokens); + assert_eq!(total_tokens, 1); + assert_eq!(histogram.get(&111), Some(&1)); + // include bad tokens + assert_eq!(histogram.get(&fast_hash("http")), Some(&1)); + assert_eq!(histogram.get(&fast_hash("www")), Some(&1)); + } + + // handle the case of repeating tokens + { + let tokens = vec![(0, vec![vec![111]]), (1, vec![vec![111]])]; + let (total_tokens, histogram) = token_histogram(&tokens); + assert_eq!(total_tokens, 2); + assert_eq!(histogram.get(&111), Some(&2)); + // include bad tokens + assert_eq!(histogram.get(&fast_hash("http")), Some(&2)); + assert_eq!(histogram.get(&fast_hash("www")), Some(&2)); + } + + // handle the different token set sizes + { + let tokens = vec![ + (0, vec![vec![111, 123, 132]]), + (1, vec![vec![111], vec![123], vec![132]]), + (2, vec![vec![111, 123], vec![132]]), + (3, vec![vec![111, 111], vec![111]]), + ]; + let (total_tokens, histogram) = token_histogram(&tokens); + assert_eq!(total_tokens, 12); + assert_eq!(histogram.get(&111), Some(&6)); + assert_eq!(histogram.get(&123), Some(&3)); + assert_eq!(histogram.get(&132), Some(&3)); + // include bad tokens + assert_eq!(histogram.get(&fast_hash("http")), Some(&12)); + assert_eq!(histogram.get(&fast_hash("www")), Some(&12)); + } + } + + #[test] + fn network_filter_list_new_works() { + { + let filters = ["||foo.com"]; + let network_filters: Vec<_> = filters + .into_iter() + .map(|f| NetworkFilter::parse(&f, true, Default::default())) + .filter_map(Result::ok) + .collect(); + let filter_list = NetworkFilterList::new(network_filters, false); + let maybe_matching_filter = filter_list.filter_map.get(&fast_hash("foo")); + assert!(maybe_matching_filter.is_some(), "Expected filter not found"); + } + // choses least frequent token + { + let filters = ["||foo.com", "||bar.com/foo"]; + let network_filters: Vec<_> = filters + .into_iter() + .map(|f| NetworkFilter::parse(&f, true, Default::default())) + .filter_map(Result::ok) + .collect(); + let filter_list = NetworkFilterList::new(network_filters, false); + assert_eq!( + filter_list.filter_map.get(&fast_hash("bar")).unwrap().len(), + 1 + ); + assert_eq!( + filter_list.filter_map.get(&fast_hash("foo")).unwrap().len(), + 1 + ); + } + // choses blacklisted token when no other choice + { + let filters = ["||foo.com", "||foo.com/bar", "||www"]; + let network_filters: Vec<_> = filters + .into_iter() + .map(|f| NetworkFilter::parse(&f, true, Default::default())) + .filter_map(Result::ok) + .collect(); + let filter_list = NetworkFilterList::new(network_filters, false); + assert!( + filter_list.filter_map.get(&fast_hash("www")).is_some(), + "Filter matching {} not found", + "www" + ); + assert_eq!( + filter_list.filter_map.get(&fast_hash("www")).unwrap().len(), + 1 + ); + } + // uses domain as token when only one domain + { + let filters = ["||foo.com", "||foo.com$domain=bar.com"]; + let network_filters: Vec<_> = filters + .into_iter() + .map(|f| NetworkFilter::parse(&f, true, Default::default())) + .filter_map(Result::ok) + .collect(); + let filter_list = NetworkFilterList::new(network_filters, false); + assert!( + filter_list.filter_map.get(&fast_hash("bar.com")).is_some(), + "Filter matching {} not found", + "bar.com" + ); + assert_eq!( + filter_list + .filter_map + .get(&fast_hash("bar.com")) + .unwrap() + .len(), + 1 + ); + } + // dispatches filter to multiple buckets per domain options if no token in main part + { + let filters = ["foo*$domain=bar.com|baz.com"]; + let network_filters: Vec<_> = filters + .into_iter() + .map(|f| NetworkFilter::parse(&f, true, Default::default())) + .filter_map(Result::ok) + .collect(); + let filter_list = NetworkFilterList::new(network_filters, false); + assert_eq!(filter_list.filter_map.len(), 2); + assert!( + filter_list.filter_map.get(&fast_hash("bar.com")).is_some(), + "Filter matching {} not found", + "bar.com" + ); + assert_eq!( + filter_list + .filter_map + .get(&fast_hash("bar.com")) + .unwrap() + .len(), + 1 + ); + assert!( + filter_list.filter_map.get(&fast_hash("baz.com")).is_some(), + "Filter matching {} not found", + "baz.com" + ); + assert_eq!( + filter_list + .filter_map + .get(&fast_hash("baz.com")) + .unwrap() + .len(), + 1 + ); + } + } + + fn test_requests_filters( + filters: impl IntoIterator>, + requests: &[(Request, bool)], + ) { + let network_filters: Vec<_> = filters + .into_iter() + .map(|f| NetworkFilter::parse(&f.as_ref(), true, Default::default())) + .filter_map(Result::ok) + .collect(); + let filter_list = NetworkFilterList::new(network_filters, false); + let mut regex_manager = RegexManager::default(); + + requests.into_iter().for_each(|(req, expected_result)| { + let matched_rule = filter_list.check(&req, &HashSet::new(), &mut regex_manager); + if *expected_result { + assert!(matched_rule.is_some(), "Expected match for {}", req.url); + } else { + assert!( + matched_rule.is_none(), + "Expected no match for {}, matched with {}", + req.url, + matched_rule.unwrap().to_string() + ); + } + }); + } + + #[test] + fn network_filter_list_check_works_plain_filter() { + // includes cases with fall back to 0 bucket (no tokens from a rule) + let filters = [ + "foo", + "-foo-", + "&fo.o=+_-", + "foo/bar/baz", + "com/bar/baz", + "https://bar.com/bar/baz", + ]; + + let url_results = [ + ("https://bar.com/foo", true), + ("https://bar.com/baz/foo", true), + ("https://bar.com/q=foo/baz", true), + ("https://foo.com", true), + ("https://bar.com/baz/42-foo-q", true), + ("https://bar.com?baz=42&fo.o=+_-", true), + ("https://bar.com/foo/bar/baz", true), + ("https://bar.com/bar/baz", true), + ]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }) + .collect(); + + test_requests_filters(&filters, &request_expectations); + } + + #[test] + fn network_filter_list_check_works_hostname_anchor() { + let filters = [ + "||foo.com", + "||bar.com/bar", + "||coo.baz.", + "||foo.bar.com^", + "||foo.baz^", + ]; + + let url_results = [ + ("https://foo.com/bar", true), + ("https://bar.com/bar", true), + ("https://baz.com/bar", false), + ("https://baz.foo.com/bar", true), + ("https://coo.baz.com/bar", true), + ("https://foo.bar.com/bar", true), + ("https://foo.baz.com/bar", false), + ("https://baz.com", false), + ("https://foo-bar.baz.com/bar", false), + ("https://foo.de", false), + ("https://bar.foo.de", false), + ]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }) + .collect(); + + test_requests_filters(&filters, &request_expectations); + } + + #[test] + fn network_filter_list_check_works_unicode() { + let filters = [ + "||firstrowsports.li/frame/", + "||fırstrowsports.eu/pu/", + "||atđhe.net/pu/", + ]; + + let url_results = [ + ("https://firstrowsports.li/frame/bar", true), + ("https://secondrowsports.li/frame/bar", false), + ("https://fırstrowsports.eu/pu/foo", true), + ("https://xn--frstrowsports-39b.eu/pu/foo", true), + ("https://atđhe.net/pu/foo", true), + ("https://xn--athe-1ua.net/pu/foo", true), + ]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }) + .collect(); + + test_requests_filters(&filters, &request_expectations); + } + + #[test] + fn network_filter_list_check_works_regex_escaping() { + let filters = [ + r#"/^https?:\/\/.*(bitly|bit)\.(com|ly)\/.*/$domain=123movies.com|1337x.to"#, + r#"/\:\/\/data.*\.com\/[a-zA-Z0-9]{30,}/$third-party,xmlhttprequest"#, + ]; + + let url_results = [ + ( + Request::new("https://bit.ly/bar/", "http://123movies.com", "").unwrap(), + true, + ), + ( + Request::new( + "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer", + "http://123movies.com", + "xmlhttprequest", + ) + .unwrap(), + true, + ), + ]; + + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(request, expected_result)| (request, expected_result)) + .collect(); + + test_requests_filters(&filters, &request_expectations); + } +} diff --git a/tests/unit/optimizer.rs b/tests/unit/optimizer.rs new file mode 100644 index 00000000..a5b07025 --- /dev/null +++ b/tests/unit/optimizer.rs @@ -0,0 +1,660 @@ +#[cfg(test)] +mod optimization_tests_pattern_group { + #[cfg(test)] + mod optimization_tests_pattern_group { + use super::*; + use crate::filters::network::NetworkMatchable; + use crate::lists; + use crate::regex_manager::CompiledRegex; + use crate::regex_manager::RegexManager; + use crate::request::Request; + use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder; + + fn check_regex_match(regex: &CompiledRegex, pattern: &str, matches: bool) { + let is_match = regex.is_match(pattern); + assert!( + is_match == matches, + "Expected {} match {} = {}", + regex.to_string(), + pattern, + matches + ); + } + + fn check_match( + regex_manager: &mut RegexManager, + filter: &NetworkFilter, + url_path: &str, + matches: bool, + ) { + let is_match = filter.matches( + &Request::new( + ("https://example.com/".to_string() + url_path).as_str(), + "https://google.com", + "", + ) + .unwrap(), + regex_manager, + ); + assert!( + is_match == matches, + "Expected {} match {} = {}", + filter.to_string(), + url_path, + matches + ); + } + + #[test] + fn regex_set_works() { + let regex_set = BytesRegexSetBuilder::new(&[ + r"/static/ad\.", + "/static/ad-", + "/static/ad/.*", + "/static/ads/.*", + "/static/adv/.*", + ]) + .unicode(false) + .build(); + + let fused_regex = CompiledRegex::CompiledSet(regex_set.unwrap()); + assert!(matches!(fused_regex, CompiledRegex::CompiledSet(_))); + check_regex_match(&fused_regex, "/static/ad.", true); + check_regex_match(&fused_regex, "/static/ad-", true); + check_regex_match(&fused_regex, "/static/ads-", false); + check_regex_match(&fused_regex, "/static/ad/", true); + check_regex_match(&fused_regex, "/static/ad", false); + check_regex_match(&fused_regex, "/static/ad/foobar", true); + check_regex_match(&fused_regex, "/static/ad/foobar/asd?q=1", true); + check_regex_match(&fused_regex, "/static/ads/", true); + check_regex_match(&fused_regex, "/static/ads", false); + check_regex_match(&fused_regex, "/static/ads/foobar", true); + check_regex_match(&fused_regex, "/static/ads/foobar/asd?q=1", true); + check_regex_match(&fused_regex, "/static/adv/", true); + check_regex_match(&fused_regex, "/static/adv", false); + check_regex_match(&fused_regex, "/static/adv/foobar", true); + check_regex_match(&fused_regex, "/static/adv/foobar/asd?q=1", true); + } + + #[test] + fn combines_simple_regex_patterns() { + let rules = [ + "/static/ad-", + "/static/ad.", + "/static/ad/*", + "/static/ads/*", + "/static/adv/*", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + + let optimization = SimplePatternGroup {}; + + filters + .iter() + .for_each(|f| assert!(optimization.select(f), "Expected rule to be selected")); + + let fused = optimization.fusion(&filters); + + assert!(fused.is_regex() == false, "Expected rule to not be a regex"); + assert_eq!( + fused.to_string(), + "/static/ad- <+> /static/ad. <+> /static/ad/* <+> /static/ads/* <+> /static/adv/*" + ); + let mut regex_manager = RegexManager::default(); + check_match(&mut regex_manager, &fused, "/static/ad-", true); + check_match(&mut regex_manager, &fused, "/static/ad.", true); + check_match(&mut regex_manager, &fused, "/static/ad%", false); + check_match(&mut regex_manager, &fused, "/static/ads-", false); + check_match(&mut regex_manager, &fused, "/static/ad/", true); + check_match(&mut regex_manager, &fused, "/static/ad", false); + check_match(&mut regex_manager, &fused, "/static/ad/foobar", true); + check_match( + &mut regex_manager, + &fused, + "/static/ad/foobar/asd?q=1", + true, + ); + check_match(&mut regex_manager, &fused, "/static/ads/", true); + check_match(&mut regex_manager, &fused, "/static/ads", false); + check_match(&mut regex_manager, &fused, "/static/ads/foobar", true); + check_match( + &mut regex_manager, + &fused, + "/static/ads/foobar/asd?q=1", + true, + ); + check_match(&mut regex_manager, &fused, "/static/adv/", true); + check_match(&mut regex_manager, &fused, "/static/adv", false); + check_match(&mut regex_manager, &fused, "/static/adv/foobar", true); + check_match( + &mut regex_manager, + &fused, + "/static/adv/foobar/asd?q=1", + true, + ); + } + + #[test] + fn separates_pattern_by_grouping() { + let rules = [ + "/analytics-v1.", + "/v1/pixel?", + "/api/v1/stat?", + "/analytics/v1/*$domain=~my.leadpages.net", + "/v1/ads/*", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + + let optimization = SimplePatternGroup {}; + + let (fused, skipped) = apply_optimisation(&optimization, filters); + + assert_eq!(fused.len(), 1); + let filter = fused.get(0).unwrap(); + assert_eq!( + filter.to_string(), + "/analytics-v1. <+> /v1/pixel? <+> /api/v1/stat? <+> /v1/ads/*" + ); + + assert!(filter.matches_test( + &Request::new( + "https://example.com/v1/pixel?", + "https://my.leadpages.net", + "" + ) + .unwrap() + )); + + assert_eq!(skipped.len(), 1); + let filter = skipped.get(0).unwrap(); + assert_eq!( + filter.to_string(), + "/analytics/v1/*$domain=~my.leadpages.net" + ); + + assert!(filter.matches_test( + &Request::new( + "https://example.com/analytics/v1/foobar", + "https://foo.leadpages.net", + "" + ) + .unwrap() + )) + } + } + + /* + #[cfg(test)] + mod optimization_tests_union_domain { + use super::*; + use crate::filters::network::NetworkMatchable; + use crate::lists; + use crate::request::Request; + use crate::utils; + + #[test] + fn merges_domains() { + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + let optimization = UnionDomainGroup {}; + let (fused, _) = apply_optimisation(&optimization, filters); + + assert_eq!(fused.len(), 1); + let filter = fused.get(0).unwrap(); + assert_eq!( + filter.to_string(), + "/analytics-v1$domain=google.com <+> /analytics-v1$domain=example.com" + ); + + let expected_domains = vec![ + utils::fast_hash("example.com"), + utils::fast_hash("google.com"), + ]; + assert!(filter.opt_domains.is_some()); + let filter_domains = filter.opt_domains.as_ref().unwrap(); + for dom in expected_domains { + assert!(filter_domains.contains(&dom)); + } + + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://google.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://foo.leadpages.net", + "" + ) + .unwrap() + ) == false + ); + } + + #[test] + fn skips_rules_with_no_domain() { + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", + "/analytics-v1", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + let optimization = UnionDomainGroup {}; + let (_, skipped) = apply_optimisation(&optimization, filters); + + assert_eq!(skipped.len(), 1); + let filter = skipped.get(0).unwrap(); + assert_eq!(filter.to_string(), "/analytics-v1"); + } + + #[test] + fn optimises_domains() { + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", + "/analytics-v1$domain=exampleone.com|exampletwo.com", + "/analytics-v1", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + + let optimization = UnionDomainGroup {}; + + let (fused, skipped) = apply_optimisation(&optimization, filters); + + assert_eq!(fused.len(), 1); + let filter = fused.get(0).unwrap(); + assert_eq!( + filter.to_string(), + "/analytics-v1$domain=google.com <+> /analytics-v1$domain=example.com <+> /analytics-v1$domain=exampleone.com|exampletwo.com" + ); + + assert_eq!(skipped.len(), 1); + let skipped_filter = skipped.get(0).unwrap(); + assert_eq!(skipped_filter.to_string(), "/analytics-v1"); + + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://google.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://example.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://exampletwo.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://foo.leadpages.net", + "" + ) + .unwrap() + ) == false + ); + } + } + */ + use super::super::*; + use crate::filters::network::NetworkMatchable; + use crate::lists; + use crate::regex_manager::CompiledRegex; + use crate::regex_manager::RegexManager; + use crate::request::Request; + use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder; + + fn check_regex_match(regex: &CompiledRegex, pattern: &str, matches: bool) { + let is_match = regex.is_match(pattern); + assert!( + is_match == matches, + "Expected {} match {} = {}", + regex.to_string(), + pattern, + matches + ); + } + + fn check_match( + regex_manager: &mut RegexManager, + filter: &NetworkFilter, + url_path: &str, + matches: bool, + ) { + let is_match = filter.matches( + &Request::new( + ("https://example.com/".to_string() + url_path).as_str(), + "https://google.com", + "", + ) + .unwrap(), + regex_manager, + ); + assert!( + is_match == matches, + "Expected {} match {} = {}", + filter.to_string(), + url_path, + matches + ); + } + + #[test] + fn regex_set_works() { + let regex_set = BytesRegexSetBuilder::new(&[ + r"/static/ad\.", + "/static/ad-", + "/static/ad/.*", + "/static/ads/.*", + "/static/adv/.*", + ]) + .unicode(false) + .build(); + + let fused_regex = CompiledRegex::CompiledSet(regex_set.unwrap()); + assert!(matches!(fused_regex, CompiledRegex::CompiledSet(_))); + check_regex_match(&fused_regex, "/static/ad.", true); + check_regex_match(&fused_regex, "/static/ad-", true); + check_regex_match(&fused_regex, "/static/ads-", false); + check_regex_match(&fused_regex, "/static/ad/", true); + check_regex_match(&fused_regex, "/static/ad", false); + check_regex_match(&fused_regex, "/static/ad/foobar", true); + check_regex_match(&fused_regex, "/static/ad/foobar/asd?q=1", true); + check_regex_match(&fused_regex, "/static/ads/", true); + check_regex_match(&fused_regex, "/static/ads", false); + check_regex_match(&fused_regex, "/static/ads/foobar", true); + check_regex_match(&fused_regex, "/static/ads/foobar/asd?q=1", true); + check_regex_match(&fused_regex, "/static/adv/", true); + check_regex_match(&fused_regex, "/static/adv", false); + check_regex_match(&fused_regex, "/static/adv/foobar", true); + check_regex_match(&fused_regex, "/static/adv/foobar/asd?q=1", true); + } + + #[test] + fn combines_simple_regex_patterns() { + let rules = [ + "/static/ad-", + "/static/ad.", + "/static/ad/*", + "/static/ads/*", + "/static/adv/*", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + + let optimization = SimplePatternGroup {}; + + filters + .iter() + .for_each(|f| assert!(optimization.select(f), "Expected rule to be selected")); + + let fused = optimization.fusion(&filters); + + assert!(fused.is_regex() == false, "Expected rule to not be a regex"); + assert_eq!( + fused.to_string(), + "/static/ad- <+> /static/ad. <+> /static/ad/* <+> /static/ads/* <+> /static/adv/*" + ); + let mut regex_manager = RegexManager::default(); + check_match(&mut regex_manager, &fused, "/static/ad-", true); + check_match(&mut regex_manager, &fused, "/static/ad.", true); + check_match(&mut regex_manager, &fused, "/static/ad%", false); + check_match(&mut regex_manager, &fused, "/static/ads-", false); + check_match(&mut regex_manager, &fused, "/static/ad/", true); + check_match(&mut regex_manager, &fused, "/static/ad", false); + check_match(&mut regex_manager, &fused, "/static/ad/foobar", true); + check_match( + &mut regex_manager, + &fused, + "/static/ad/foobar/asd?q=1", + true, + ); + check_match(&mut regex_manager, &fused, "/static/ads/", true); + check_match(&mut regex_manager, &fused, "/static/ads", false); + check_match(&mut regex_manager, &fused, "/static/ads/foobar", true); + check_match( + &mut regex_manager, + &fused, + "/static/ads/foobar/asd?q=1", + true, + ); + check_match(&mut regex_manager, &fused, "/static/adv/", true); + check_match(&mut regex_manager, &fused, "/static/adv", false); + check_match(&mut regex_manager, &fused, "/static/adv/foobar", true); + check_match( + &mut regex_manager, + &fused, + "/static/adv/foobar/asd?q=1", + true, + ); + } + + #[test] + fn separates_pattern_by_grouping() { + let rules = [ + "/analytics-v1.", + "/v1/pixel?", + "/api/v1/stat?", + "/analytics/v1/*$domain=~my.leadpages.net", + "/v1/ads/*", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + + let optimization = SimplePatternGroup {}; + + let (fused, skipped) = apply_optimisation(&optimization, filters); + + assert_eq!(fused.len(), 1); + let filter = fused.get(0).unwrap(); + assert_eq!( + filter.to_string(), + "/analytics-v1. <+> /v1/pixel? <+> /api/v1/stat? <+> /v1/ads/*" + ); + + assert!(filter.matches_test( + &Request::new( + "https://example.com/v1/pixel?", + "https://my.leadpages.net", + "" + ) + .unwrap() + )); + + assert_eq!(skipped.len(), 1); + let filter = skipped.get(0).unwrap(); + assert_eq!( + filter.to_string(), + "/analytics/v1/*$domain=~my.leadpages.net" + ); + + assert!(filter.matches_test( + &Request::new( + "https://example.com/analytics/v1/foobar", + "https://foo.leadpages.net", + "" + ) + .unwrap() + )) + } +} + +/* +#[cfg(test)] +mod optimization_tests_union_domain { + use super::*; + use crate::filters::network::NetworkMatchable; + use crate::lists; + use crate::request::Request; + use crate::utils; + + #[test] + fn merges_domains() { + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + let optimization = UnionDomainGroup {}; + let (fused, _) = apply_optimisation(&optimization, filters); + + assert_eq!(fused.len(), 1); + let filter = fused.get(0).unwrap(); + assert_eq!( + filter.to_string(), + "/analytics-v1$domain=google.com <+> /analytics-v1$domain=example.com" + ); + + let expected_domains = vec![ + utils::fast_hash("example.com"), + utils::fast_hash("google.com"), + ]; + assert!(filter.opt_domains.is_some()); + let filter_domains = filter.opt_domains.as_ref().unwrap(); + for dom in expected_domains { + assert!(filter_domains.contains(&dom)); + } + + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://google.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://foo.leadpages.net", + "" + ) + .unwrap() + ) == false + ); + } + + #[test] + fn skips_rules_with_no_domain() { + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", + "/analytics-v1", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + let optimization = UnionDomainGroup {}; + let (_, skipped) = apply_optimisation(&optimization, filters); + + assert_eq!(skipped.len(), 1); + let filter = skipped.get(0).unwrap(); + assert_eq!(filter.to_string(), "/analytics-v1"); + } + + #[test] + fn optimises_domains() { + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", + "/analytics-v1$domain=exampleone.com|exampletwo.com", + "/analytics-v1", + ]; + + let (filters, _) = lists::parse_filters(&rules, true, Default::default()); + + let optimization = UnionDomainGroup {}; + + let (fused, skipped) = apply_optimisation(&optimization, filters); + + assert_eq!(fused.len(), 1); + let filter = fused.get(0).unwrap(); + assert_eq!( + filter.to_string(), + "/analytics-v1$domain=google.com <+> /analytics-v1$domain=example.com <+> /analytics-v1$domain=exampleone.com|exampletwo.com" + ); + + assert_eq!(skipped.len(), 1); + let skipped_filter = skipped.get(0).unwrap(); + assert_eq!(skipped_filter.to_string(), "/analytics-v1"); + + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://google.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://example.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://exampletwo.com", + "" + ) + .unwrap() + ) == true + ); + assert!( + filter.matches_test( + &Request::new( + "https://example.com/analytics-v1/foobar", + "https://foo.leadpages.net", + "" + ) + .unwrap() + ) == false + ); + } +} +*/ diff --git a/tests/unit/regex_manager.rs b/tests/unit/regex_manager.rs new file mode 100644 index 00000000..044079fc --- /dev/null +++ b/tests/unit/regex_manager.rs @@ -0,0 +1,71 @@ +#[cfg(all(test, feature = "regex-debug-info"))] +mod tests { + use super::super::*; + + use crate::filters::network::NetworkFilter; + use crate::filters::network::NetworkMatchable; + use crate::request; + + use mock_instant::global::MockClock; + + fn make_filter(line: &str) -> NetworkFilter { + NetworkFilter::parse(line, true, Default::default()).unwrap() + } + + fn make_request(url: &str) -> request::Request { + request::Request::new(url, "https://example.com", "other").unwrap() + } + + fn get_active_regex_count(regex_manager: &RegexManager) -> usize { + regex_manager + .get_debug_regex_data() + .iter() + .filter(|x| x.regex.is_some()) + .count() + } + + #[test] + fn simple_match() { + let mut regex_manager = RegexManager::default(); + regex_manager.update_time(); + + let filter = make_filter("||geo*.hltv.org^"); + assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); + assert_eq!(get_active_regex_count(®ex_manager), 1); + assert_eq!(regex_manager.get_debug_regex_data().len(), 1); + } + + #[test] + fn discard_and_recreate() { + let mut regex_manager = RegexManager::default(); + regex_manager.update_time(); + + let filter = make_filter("||geo*.hltv.org^"); + assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); + assert_eq!(regex_manager.get_compiled_regex_count(), 1); + assert_eq!(get_active_regex_count(®ex_manager), 1); + + MockClock::advance(DEFAULT_DISCARD_UNUSED_TIME - Duration::from_secs(1)); + regex_manager.update_time(); + // The entry shouldn't be discarded because was used during + // last REGEX_MANAGER_DISCARD_TIME. + assert_eq!(get_active_regex_count(®ex_manager), 1); + + // The entry is entry is outdated, but should be discarded only + // in the next cleanup() call. The call was 2 sec ago and is throttled + // now. + MockClock::advance(DEFAULT_CLEAN_UP_INTERVAL - Duration::from_secs(1)); + regex_manager.update_time(); + assert_eq!(get_active_regex_count(®ex_manager), 1); + + MockClock::advance(Duration::from_secs(2)); + regex_manager.update_time(); + // The entry is now outdated & cleanup() should be called => discard. + assert_eq!(get_active_regex_count(®ex_manager), 0); + + // The entry is recreated, get_compiled_regex_count() increased +1. + assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); + assert_eq!(regex_manager.get_compiled_regex_count(), 2); + assert_eq!(get_active_regex_count(®ex_manager), 1); + } +} diff --git a/tests/unit/request.rs b/tests/unit/request.rs new file mode 100644 index 00000000..e5cd61e8 --- /dev/null +++ b/tests/unit/request.rs @@ -0,0 +1,193 @@ +#[cfg(test)] +mod tests { + use super::super::*; + + fn build_request( + raw_type: &str, + url: &str, + schema: &str, + hostname: &str, + domain: &str, + source_hostname: &str, + source_domain: &str, + ) -> Request { + let third_party = source_domain != domain; + + Request::from_detailed_parameters( + raw_type, + url, + schema, + hostname, + source_hostname, + third_party, + url.to_string(), + ) + } + + #[test] + fn new_works() { + let simple_example = build_request( + "document", + "https://example.com/ad", + "https", + "example.com", + "example.com", + "example.com", + "example.com", + ); + assert_eq!(simple_example.is_https, true); + assert_eq!(simple_example.is_supported, true); + assert_eq!(simple_example.is_third_party, false); + assert_eq!(simple_example.request_type, RequestType::Document); + assert_eq!( + simple_example.source_hostname_hashes.unwrap().as_slice(), + vec![utils::fast_hash("example.com"), utils::fast_hash("com")], + ); + + let unsupported_example = build_request( + "document", + "file://example.com/ad", + "file", + "example.com", + "example.com", + "example.com", + "example.com", + ); + assert_eq!(unsupported_example.is_https, false); + assert_eq!(unsupported_example.is_http, false); + assert_eq!(unsupported_example.is_supported, false); + + let first_party = build_request( + "document", + "https://subdomain.example.com/ad", + "https", + "subdomain.example.com", + "example.com", + "example.com", + "example.com", + ); + assert_eq!(first_party.is_https, true); + assert_eq!(first_party.is_supported, true); + assert_eq!(first_party.is_third_party, false); + + let third_party = build_request( + "document", + "https://subdomain.anotherexample.com/ad", + "https", + "subdomain.anotherexample.com", + "anotherexample.com", + "example.com", + "example.com", + ); + assert_eq!(third_party.is_https, true); + assert_eq!(third_party.is_supported, true); + assert_eq!(third_party.is_third_party, true); + + let websocket = build_request( + "document", + "wss://subdomain.anotherexample.com/ad", + "wss", + "subdomain.anotherexample.com", + "anotherexample.com", + "example.com", + "example.com", + ); + assert_eq!(websocket.is_https, false); + assert_eq!(websocket.is_https, false); + assert_eq!(websocket.is_supported, true); + assert_eq!(websocket.is_third_party, true); + assert_eq!(websocket.request_type, RequestType::Websocket); + + let assumed_https = build_request( + "document", + "//subdomain.anotherexample.com/ad", + "", + "subdomain.anotherexample.com", + "anotherexample.com", + "example.com", + "example.com", + ); + assert_eq!(assumed_https.is_https, true); + assert_eq!(assumed_https.is_http, false); + assert_eq!(assumed_https.is_supported, true); + } + + fn tokenize(tokens: &[&str], extra_tokens: &[utils::Hash]) -> Vec { + let mut tokens: Vec<_> = tokens.into_iter().map(|t| utils::fast_hash(&t)).collect(); + tokens.extend(extra_tokens); + tokens + } + + #[test] + fn tokens_works() { + let simple_example = build_request( + "document", + "https://subdomain.example.com/ad", + "https", + "subdomain.example.com", + "example.com", + "subdomain.example.com", + "example.com", + ); + assert_eq!( + simple_example + .source_hostname_hashes + .as_ref() + .unwrap() + .as_slice(), + tokenize(&["subdomain.example.com", "example.com", "com",], &[]).as_slice() + ); + let tokens = simple_example.get_tokens(); + assert_eq!( + tokens.as_slice(), + tokenize(&["https", "subdomain", "example", "com", "ad"], &[0]).as_slice() + ) + } + + #[test] + fn parses_urls() { + let parsed = Request::new( + "https://subdomain.example.com/ad", + "https://example.com/", + "document", + ) + .unwrap(); + assert_eq!(parsed.is_https, true); + assert_eq!(parsed.is_supported, true); + assert_eq!(parsed.is_third_party, false); + assert_eq!(parsed.request_type, RequestType::Document); + + // assert_eq!(parsed.domain, "example.com"); + assert_eq!(parsed.hostname, "subdomain.example.com"); + + // assert_eq!(parsed.source_domain, "example.com"); + assert_eq!( + parsed.source_hostname_hashes.unwrap().as_slice(), + vec![utils::fast_hash("example.com"), utils::fast_hash("com")], + ); + // assert_eq!(parsed.source_hostname, "example.com"); + + let bad_url = Request::new( + "subdomain.example.com/ad", + "https://example.com/", + "document", + ); + assert_eq!(bad_url.err(), Some(RequestError::HostnameParseError)); + } + + #[test] + fn fuzzing_errors() { + { + let parsed = Request::new("https://߶", "https://example.com", "other"); + assert!(parsed.is_ok()); + } + { + let parsed = Request::new( + &format!("https://{}", std::str::from_utf8(&[9, 9, 64]).unwrap()), + "https://example.com", + "other", + ); + assert!(parsed.is_err()); + } + } +} diff --git a/tests/unit/resources/resource_assembler.rs b/tests/unit/resources/resource_assembler.rs new file mode 100644 index 00000000..d337c197 --- /dev/null +++ b/tests/unit/resources/resource_assembler.rs @@ -0,0 +1,291 @@ +#[cfg(test)] +mod tests { + use super::super::*; + + #[test] + fn test_war_resource_assembly() { + let web_accessible_resource_dir = + Path::new("data/test/fake-uBO-files/web_accessible_resources"); + let redirect_resources_path = Path::new("data/test/fake-uBO-files/redirect-resources.js"); + let resources = + assemble_web_accessible_resources(web_accessible_resource_dir, redirect_resources_path); + + let expected_resource_names = vec![ + "1x1.gif", + "2x2.png", + "3x2.png", + "32x32.png", + "addthis_widget.js", + "amazon_ads.js", + "amazon_apstag.js", + "ampproject_v0.js", + "chartbeat.js", + //"click-to-load.html" is ignored because it has a params field. + "doubleclick_instream_ad_status.js", + "empty", + "fingerprint2.js", + "fingerprint3.js", + "google-analytics_analytics.js", + "google-analytics_cx_api.js", + "google-analytics_ga.js", + "google-analytics_inpage_linkid.js", + "google-ima.js", + "googlesyndication_adsbygoogle.js", + "googletagservices_gpt.js", + "hd-main.js", + "ligatus_angular-tag.js", + "mxpnl_mixpanel.js", + "monkeybroker.js", + "noeval.js", + "noeval-silent.js", + "nobab.js", + "nobab2.js", + "nofab.js", + "noop-0.1s.mp3", + "noop-0.5s.mp3", + "noop-1s.mp4", + "noop.html", + "noop.js", + "noop.txt", + "noop-vmap1.0.xml", + "outbrain-widget.js", + "popads.js", + "popads-dummy.js", + "prebid-ads.js", + "scorecardresearch_beacon.js", + "window.open-defuser.js", + ]; + + for name in expected_resource_names { + dbg!(&name); + assert!( + resources + .iter() + .find(|resource| { + if let ResourceType::Mime(_) = resource.kind { + resource.name == name + } else { + false + } + }) + .is_some(), + "{:?}", + name + ); + } + + let serialized = serde_json::to_string(&resources).expect("serialize resources"); + + let reserialized: Vec = + serde_json::from_str(&serialized).expect("deserialize resources"); + + assert_eq!(reserialized[0].name, "1x1.gif"); + assert_eq!(reserialized[0].aliases, vec!["1x1-transparent.gif"]); + assert_eq!(reserialized[0].kind, ResourceType::Mime(MimeType::ImageGif)); + + assert_eq!(reserialized[34].name, "noop.js"); + assert_eq!( + reserialized[34].aliases, + vec!["noopjs", "abp-resource:blank-js"] + ); + assert_eq!( + reserialized[34].kind, + ResourceType::Mime(MimeType::ApplicationJavascript) + ); + let noopjs_contents = std::fs::read_to_string(Path::new( + "data/test/fake-uBO-files/web_accessible_resources/noop.js", + )) + .unwrap() + .replace('\r', ""); + assert_eq!( + std::str::from_utf8( + &base64::decode(&reserialized[34].content).expect("decode base64 content") + ) + .expect("convert to utf8 string"), + noopjs_contents, + ); + } + + #[test] + fn test_scriptlet_resource_assembly2() { + let scriptlets_path = Path::new("data/test/fake-uBO-files/scriptlets2.js"); + #[allow(deprecated)] + let resources = assemble_scriptlet_resources(scriptlets_path); + + let expected_resource_names = vec![ + "abort-current-inline-script.js", + "abort-on-property-read.js", + "abort-on-property-write.js", + "abort-on-stack-trace.js", + "addEventListener-defuser.js", + "addEventListener-logger.js", + "json-prune.js", + "nano-setInterval-booster.js", + "nano-setTimeout-booster.js", + "noeval-if.js", + "no-fetch-if.js", + "no-floc.js", + "remove-attr.js", + "remove-class.js", + "no-requestAnimationFrame-if.js", + "set-constant.js", + "no-setInterval-if.js", + "no-setTimeout-if.js", + "webrtc-if.js", + "window.name-defuser", + "overlay-buster.js", + "alert-buster.js", + "gpt-defuser.js", + "nowebrtc.js", + "golem.de.js", + "upmanager-defuser.js", + "smartadserver.com.js", + "adfly-defuser.js", + "disable-newtab-links.js", + "damoh-defuser.js", + "twitch-videoad.js", + "fingerprint2.js", + "cookie-remover.js", + ]; + + for name in expected_resource_names { + assert!( + resources + .iter() + .find(|resource| { + match resource.kind { + ResourceType::Template + | ResourceType::Mime(MimeType::ApplicationJavascript) => { + resource.name == name + } + _ => false, + } + }) + .is_some(), + "failed to find {}", + name + ); + } + + let serialized = serde_json::to_string(&resources).expect("serialize resources"); + + let reserialized: Vec = + serde_json::from_str(&serialized).expect("deserialize resources"); + + assert_eq!(reserialized[0].name, "abort-current-inline-script.js"); + assert_eq!(reserialized[0].aliases, vec!["acis.js"]); + assert_eq!(reserialized[0].kind, ResourceType::Template); + + assert_eq!(reserialized[17].name, "no-setTimeout-if.js"); + assert_eq!( + reserialized[17].aliases, + vec!["nostif.js", "setTimeout-defuser.js"] + ); + assert_eq!(reserialized[17].kind, ResourceType::Template); + + assert_eq!(reserialized[20].name, "overlay-buster.js"); + assert_eq!(reserialized[20].aliases, Vec::::new()); + assert_eq!( + reserialized[20].kind, + ResourceType::Mime(MimeType::ApplicationJavascript) + ); + assert_eq!( + std::str::from_utf8( + &base64::decode(&reserialized[20].content).expect("decode base64 content") + ).expect("convert to utf8 string"), + "(function() {\nif ( window !== window.top ) {\nreturn;\n}\nvar tstart;\nvar ttl = 30000;\nvar delay = 0;\nvar delayStep = 50;\nvar buster = function() {\nvar docEl = document.documentElement,\nbodyEl = document.body,\nvw = Math.min(docEl.clientWidth, window.innerWidth),\nvh = Math.min(docEl.clientHeight, window.innerHeight),\ntol = Math.min(vw, vh) * 0.05,\nel = document.elementFromPoint(vw/2, vh/2),\nstyle, rect;\nfor (;;) {\nif ( el === null || el.parentNode === null || el === bodyEl ) {\nbreak;\n}\nstyle = window.getComputedStyle(el);\nif ( parseInt(style.zIndex, 10) >= 1000 || style.position === 'fixed' ) {\nrect = el.getBoundingClientRect();\nif ( rect.left <= tol && rect.top <= tol && (vw - rect.right) <= tol && (vh - rect.bottom) < tol ) {\nel.parentNode.removeChild(el);\ntstart = Date.now();\nel = document.elementFromPoint(vw/2, vh/2);\nbodyEl.style.setProperty('overflow', 'auto', 'important');\ndocEl.style.setProperty('overflow', 'auto', 'important');\ncontinue;\n}\n}\nel = el.parentNode;\n}\nif ( (Date.now() - tstart) < ttl ) {\ndelay = Math.min(delay + delayStep, 1000);\nsetTimeout(buster, delay);\n}\n};\nvar domReady = function(ev) {\nif ( ev ) {\ndocument.removeEventListener(ev.type, domReady);\n}\ntstart = Date.now();\nsetTimeout(buster, delay);\n};\nif ( document.readyState === 'loading' ) {\ndocument.addEventListener('DOMContentLoaded', domReady);\n} else {\ndomReady();\n}\n})();\n", + ); + + assert_eq!(reserialized[6].name, "json-prune.js"); + assert_eq!(reserialized[6].aliases, Vec::::new()); + assert_eq!(reserialized[6].kind, ResourceType::Template); + assert_eq!( + std::str::from_utf8( + &base64::decode(&reserialized[6].content).expect("decode base64 content") + ).expect("convert to utf8 string"), + "(function() {\nconst rawPrunePaths = '{{1}}';\nconst rawNeedlePaths = '{{2}}';\nconst prunePaths = rawPrunePaths !== '{{1}}' && rawPrunePaths !== ''\n? rawPrunePaths.split(/ +/)\n: [];\nlet needlePaths;\nlet log, reLogNeedle;\nif ( prunePaths.length !== 0 ) {\nneedlePaths = prunePaths.length !== 0 &&\nrawNeedlePaths !== '{{2}}' && rawNeedlePaths !== ''\n? rawNeedlePaths.split(/ +/)\n: [];\n} else {\nlog = console.log.bind(console);\nlet needle;\nif ( rawNeedlePaths === '' || rawNeedlePaths === '{{2}}' ) {\nneedle = '.?';\n} else if ( rawNeedlePaths.charAt(0) === '/' && rawNeedlePaths.slice(-1) === '/' ) {\nneedle = rawNeedlePaths.slice(1, -1);\n} else {\nneedle = rawNeedlePaths.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n}\nreLogNeedle = new RegExp(needle);\n}\nconst findOwner = function(root, path, prune = false) {\nlet owner = root;\nlet chain = path;\nfor (;;) {\nif ( typeof owner !== 'object' || owner === null ) {\nreturn false;\n}\nconst pos = chain.indexOf('.');\nif ( pos === -1 ) {\nif ( prune === false ) {\nreturn owner.hasOwnProperty(chain);\n}\nif ( chain === '*' ) {\nfor ( const key in owner ) {\nif ( owner.hasOwnProperty(key) === false ) { continue; }\ndelete owner[key];\n}\n} else if ( owner.hasOwnProperty(chain) ) {\ndelete owner[chain];\n}\nreturn true;\n}\nconst prop = chain.slice(0, pos);\nif (\nprop === '[]' && Array.isArray(owner) ||\nprop === '*' && owner instanceof Object\n) {\nconst next = chain.slice(pos + 1);\nlet found = false;\nfor ( const key of Object.keys(owner) ) {\nfound = findOwner(owner[key], next, prune) || found;\n}\nreturn found;\n}\nif ( owner.hasOwnProperty(prop) === false ) { return false; }\nowner = owner[prop];\nchain = chain.slice(pos + 1);\n}\n};\nconst mustProcess = function(root) {\nfor ( const needlePath of needlePaths ) {\nif ( findOwner(root, needlePath) === false ) {\nreturn false;\n}\n}\nreturn true;\n};\nconst pruner = function(o) {\nif ( log !== undefined ) {\nconst json = JSON.stringify(o, null, 2);\nif ( reLogNeedle.test(json) ) {\nlog('uBO:', location.hostname, json);\n}\nreturn o;\n}\nif ( mustProcess(o) === false ) { return o; }\nfor ( const path of prunePaths ) {\nfindOwner(o, path, true);\n}\nreturn o;\n};\nJSON.parse = new Proxy(JSON.parse, {\napply: function() {\nreturn pruner(Reflect.apply(...arguments));\n},\n});\nResponse.prototype.json = new Proxy(Response.prototype.json, {\napply: function() {\nreturn Reflect.apply(...arguments).then(o => pruner(o));\n},\n});\n})();\n", + ); + } + + #[test] + fn test_scriptlet_resource_assembly() { + let scriptlets_path = Path::new("data/test/fake-uBO-files/scriptlets.js"); + #[allow(deprecated)] + let resources = assemble_scriptlet_resources(scriptlets_path); + + let expected_resource_names = vec![ + "abort-current-inline-script.js", + "abort-on-property-read.js", + "abort-on-property-write.js", + "addEventListener-defuser.js", + "addEventListener-logger.js", + "json-prune.js", + "nano-setInterval-booster.js", + "nano-setTimeout-booster.js", + "noeval-if.js", + "remove-attr.js", + "requestAnimationFrame-if.js", + "set-constant.js", + "setInterval-defuser.js", + "no-setInterval-if.js", + "setTimeout-defuser.js", + "no-setTimeout-if.js", + "webrtc-if.js", + "window.name-defuser", + "overlay-buster.js", + "alert-buster.js", + "gpt-defuser.js", + "nowebrtc.js", + "golem.de.js", + "upmanager-defuser.js", + "smartadserver.com.js", + "adfly-defuser.js", + "disable-newtab-links.js", + "damoh-defuser.js", + "twitch-videoad.js", + "fingerprint2.js", + "cookie-remover.js", + ]; + + for name in expected_resource_names { + assert!( + resources + .iter() + .find(|resource| { + match resource.kind { + ResourceType::Template + | ResourceType::Mime(MimeType::ApplicationJavascript) => { + resource.name == name + } + _ => false, + } + }) + .is_some(), + "failed to find {}", + name + ); + } + + let serialized = serde_json::to_string(&resources).expect("serialize resources"); + + let reserialized: Vec = + serde_json::from_str(&serialized).expect("deserialize resources"); + + assert_eq!(reserialized[0].name, "abort-current-inline-script.js"); + assert_eq!(reserialized[0].aliases, vec!["acis.js"]); + assert_eq!(reserialized[0].kind, ResourceType::Template); + + assert_eq!(reserialized[18].name, "overlay-buster.js"); + assert_eq!(reserialized[18].aliases, Vec::::new()); + assert_eq!( + reserialized[18].kind, + ResourceType::Mime(MimeType::ApplicationJavascript) + ); + assert_eq!( + std::str::from_utf8( + &base64::decode(&reserialized[18].content).expect("decode base64 content") + ).expect("convert to utf8 string"), + "(function() {\nif ( window !== window.top ) {\nreturn;\n}\nvar tstart;\nvar ttl = 30000;\nvar delay = 0;\nvar delayStep = 50;\nvar buster = function() {\nvar docEl = document.documentElement,\nbodyEl = document.body,\nvw = Math.min(docEl.clientWidth, window.innerWidth),\nvh = Math.min(docEl.clientHeight, window.innerHeight),\ntol = Math.min(vw, vh) * 0.05,\nel = document.elementFromPoint(vw/2, vh/2),\nstyle, rect;\nfor (;;) {\nif ( el === null || el.parentNode === null || el === bodyEl ) {\nbreak;\n}\nstyle = window.getComputedStyle(el);\nif ( parseInt(style.zIndex, 10) >= 1000 || style.position === 'fixed' ) {\nrect = el.getBoundingClientRect();\nif ( rect.left <= tol && rect.top <= tol && (vw - rect.right) <= tol && (vh - rect.bottom) < tol ) {\nel.parentNode.removeChild(el);\ntstart = Date.now();\nel = document.elementFromPoint(vw/2, vh/2);\nbodyEl.style.setProperty('overflow', 'auto', 'important');\ndocEl.style.setProperty('overflow', 'auto', 'important');\ncontinue;\n}\n}\nel = el.parentNode;\n}\nif ( (Date.now() - tstart) < ttl ) {\ndelay = Math.min(delay + delayStep, 1000);\nsetTimeout(buster, delay);\n}\n};\nvar domReady = function(ev) {\nif ( ev ) {\ndocument.removeEventListener(ev.type, domReady);\n}\ntstart = Date.now();\nsetTimeout(buster, delay);\n};\nif ( document.readyState === 'loading' ) {\ndocument.addEventListener('DOMContentLoaded', domReady);\n} else {\ndomReady();\n}\n})();\n", + ); + } +} diff --git a/tests/unit/resources/resource_storage.rs b/tests/unit/resources/resource_storage.rs new file mode 100644 index 00000000..48932de2 --- /dev/null +++ b/tests/unit/resources/resource_storage.rs @@ -0,0 +1,792 @@ +#[cfg(test)] +mod extract_function_name_tests { + use super::super::extract_function_name; + + #[test] + fn test_extract_function_name() { + assert_eq!(extract_function_name("function test() {}"), Some("test")); + assert_eq!(extract_function_name("function $() {}"), Some("$")); + assert_eq!(extract_function_name("function _() {}"), Some("_")); + assert_eq!(extract_function_name("function ಠ_ಠ() {}"), Some("ಠ_ಠ")); + assert_eq!( + extract_function_name("function\ntest\n(\n)\n{\n}"), + Some("test") + ); + assert_eq!( + extract_function_name("function\ttest\t(\t)\t{\t}"), + Some("test") + ); + assert_eq!( + extract_function_name("function test() { (function inner() {})() }"), + Some("test") + ); + assert_eq!( + extract_function_name("let e = function test() { (function inner() {})() }"), + None + ); + assert_eq!( + extract_function_name("function () { (function inner() {})() }"), + None + ); + } +} + +#[cfg(test)] +mod arg_parsing_util_tests { + use super::super::*; + + #[test] + fn test_index_next_unescaped_separator() { + assert_eq!( + index_next_unescaped_separator(r#"``"#, '`'), + (Some(0), false) + ); + assert_eq!( + index_next_unescaped_separator(r#"\``"#, '`'), + (Some(2), true) + ); + assert_eq!( + index_next_unescaped_separator(r#"\\``"#, '`'), + (Some(2), false) + ); + assert_eq!( + index_next_unescaped_separator(r#"\\\``"#, '`'), + (Some(4), true) + ); + assert_eq!( + index_next_unescaped_separator(r#"\\\\``"#, '`'), + (Some(4), false) + ); + assert_eq!( + index_next_unescaped_separator(r#"\`\\\``"#, '`'), + (Some(6), true) + ); + assert_eq!( + index_next_unescaped_separator(r#"\\\`\``"#, '`'), + (Some(6), true) + ); + assert_eq!( + index_next_unescaped_separator(r#"\\\`\\``"#, '`'), + (Some(6), true) + ); + + assert_eq!( + index_next_unescaped_separator(r#"\,test\,"#, ','), + (None, true) + ) + } + + #[test] + fn test_normalize_arg() { + assert_eq!(normalize_arg(r#"\`"#, '`'), r#"`"#); + assert_eq!(normalize_arg(r#"\\\`"#, '`'), r#"\\`"#); + assert_eq!(normalize_arg(r#"\`\\\`"#, '`'), r#"`\\`"#); + assert_eq!(normalize_arg(r#"\\\`\`"#, '`'), r#"\\``"#); + assert_eq!(normalize_arg(r#"\\\`\\`"#, '`'), r#"\\`\\`"#); + } +} + +#[cfg(test)] +mod redirect_storage_tests { + use super::super::*; + use crate::resources::MimeType; + + #[test] + fn get_resource_by_name() { + let mut storage = ResourceStorage::default(); + storage + .add_resource(Resource::simple( + "name.js", + MimeType::ApplicationJavascript, + "resource data", + )) + .unwrap(); + + assert_eq!( + storage.get_redirect_resource("name.js"), + Some(format!( + "data:application/javascript;base64,{}", + base64::encode("resource data") + )), + ); + } + + #[test] + fn get_resource_by_alias() { + let mut storage = ResourceStorage::default(); + let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); + r.aliases.push("alias.js".to_string()); + storage.add_resource(r).unwrap(); + + assert_eq!( + storage.get_redirect_resource("alias.js"), + Some(format!( + "data:application/javascript;base64,{}", + base64::encode("resource data") + )), + ); + } + + #[test] + fn permissions() { + let mut storage = ResourceStorage::default(); + let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); + r.aliases.push("alias.js".to_string()); + r.permission = PermissionMask::from_bits(0b00000001); + storage.add_resource(r).unwrap(); + + assert_eq!(storage.get_redirect_resource("name.js"), None,); + assert_eq!(storage.get_redirect_resource("alias.js"), None,); + } +} + +#[cfg(test)] +mod scriptlet_storage_tests { + use super::super::*; + use crate::resources::MimeType; + + #[test] + fn parse_argslist() { + let args = parse_scriptlet_args("scriptlet, hello world, foobar").unwrap(); + assert_eq!(args, vec!["scriptlet", "hello world", "foobar"]); + } + + #[test] + fn parse_argslist_noargs() { + let args = parse_scriptlet_args("scriptlet").unwrap(); + assert_eq!(args, vec!["scriptlet"]); + } + + #[test] + fn parse_argslist_empty() { + let args = parse_scriptlet_args("").unwrap(); + assert!(args.is_empty()); + } + + #[test] + fn parse_argslist_commas() { + let args = parse_scriptlet_args("scriptletname, one\\, two\\, three, four").unwrap(); + assert_eq!(args, vec!["scriptletname", "one, two, three", "four"]); + } + + #[test] + fn parse_argslist_badchars() { + let args = parse_scriptlet_args( + r##"scriptlet, "; window.location.href = bad.com; , '; alert("you're\, hacked"); , \u\r\l(bad.com) "##, + ); + assert_eq!(args, None); + } + + #[test] + fn parse_argslist_quoted() { + let args = parse_scriptlet_args( + r#"debug-scriptlet, 'test', '"test"', "test", "'test'", `test`, '`test`'"#, + ) + .unwrap(); + assert_eq!( + args, + vec![ + r#"debug-scriptlet"#, + r#"test"#, + r#""test""#, + r#"test"#, + r#"'test'"#, + r#"test"#, + r#"`test`"#, + ], + ); + let args = + parse_scriptlet_args(r#"debug-scriptlet, 'test,test', '', "", ' ', ' test '"#).unwrap(); + assert_eq!( + args, + vec![ + r#"debug-scriptlet"#, + r#"test,test"#, + r#""#, + r#""#, + r#" "#, + r#" test "#, + ], + ); + let args = parse_scriptlet_args( + r#"debug-scriptlet, test\,test, test\test, "test\test", 'test\test', "#, + ) + .unwrap(); + assert_eq!( + args, + vec![ + r#"debug-scriptlet"#, + r#"test,test"#, + r#"test\test"#, + r#"test\test"#, + r#"test\test"#, + r#""#, + ], + ); + let args = parse_scriptlet_args(r#"debug-scriptlet, "test"#); + assert_eq!(args, None); + let args = parse_scriptlet_args(r#"debug-scriptlet, 'test'"test""#); + assert_eq!(args, None); + } + + #[test] + fn parse_argslist_trailing_escaped_comma() { + let args = parse_scriptlet_args(r#"remove-node-text, script, \,mr=function(r\,"#).unwrap(); + assert_eq!(args, vec!["remove-node-text", "script", ",mr=function(r,"]); + } + + #[test] + fn get_patched_scriptlets() { + let resources = ResourceStorage::from_resources([ + Resource { + name: "greet.js".to_string(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("console.log('Hello {{1}}, my name is {{2}}')"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "alert.js".to_owned(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("alert('{{1}}')"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "blocktimer.js".to_owned(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("setTimeout(blockAds, {{1}})"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "null.js".to_owned(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("(()=>{})()"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "set-local-storage-item.js".to_owned(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode(r#"{{1}} that dollar signs in {{2}} are untouched"#), + dependencies: vec![], + permission: Default::default(), + }, + ]); + + assert_eq!( + resources.get_scriptlet_resources([("greet, world, adblock-rust", Default::default())]), + "try {\nconsole.log('Hello world, my name is adblock-rust')\n} catch ( e ) { }\n", + ); + assert_eq!( + resources + .get_scriptlet_resources([("alert, All systems are go!! ", Default::default())]), + "try {\nalert('All systems are go!!')\n} catch ( e ) { }\n", + ); + assert_eq!( + resources.get_scriptlet_resources([( + "alert, Uh oh\\, check the logs...", + Default::default() + )]), + "try {\nalert('Uh oh, check the logs...')\n} catch ( e ) { }\n", + ); + assert_eq!( + resources + .get_scriptlet_resources([(r#"alert, this has "quotes""#, Default::default())]), + "try {\nalert('this has \\\"quotes\\\"')\n} catch ( e ) { }\n", + ); + assert_eq!( + resources.get_scriptlet_resources([("blocktimer, 3000", Default::default())]), + "try {\nsetTimeout(blockAds, 3000)\n} catch ( e ) { }\n", + ); + assert_eq!( + resources.get_scriptlet_resources([("null", Default::default())]), + "try {\n(()=>{})()\n} catch ( e ) { }\n" + ); + assert_eq!( + resources.get_scriptlet_resources([("null, null", Default::default())]), + "try {\n(()=>{})()\n} catch ( e ) { }\n", + ); + assert_eq!( + resources.get_scriptlet_resources([("greet, everybody", Default::default())]), + "try {\nconsole.log('Hello everybody, my name is {{2}}')\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resource("unit-testing", Default::default(), &mut vec![]), + Err(ScriptletResourceError::NoMatchingScriptlet), + ); + assert_eq!( + resources.get_scriptlet_resource("", Default::default(), &mut vec![]), + Err(ScriptletResourceError::MissingScriptletName), + ); + + assert_eq!( + resources.get_scriptlet_resources([( + "set-local-storage-item, Test, $remove$", + Default::default() + )]), + "try {\nTest that dollar signs in $remove$ are untouched\n} catch ( e ) { }\n", + ); + } + + #[test] + fn parse_template_file_format() { + let resources = ResourceStorage::from_resources([ + Resource { + name: "abort-current-inline-script.js".into(), + aliases: vec!["acis.js".into()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("(function() {alert(\"hi\");})();"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "abort-on-property-read.js".into(), + aliases: vec!["aopr.js".into()], + kind: ResourceType::Template, + content: base64::encode("(function() {confirm(\"Do you want to {{1}}?\");})();"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "googletagservices_gpt.js".into(), + aliases: vec![ + "googletagservices.com/gpt.js".into(), + "googletagservices-gpt".into(), + ], + kind: ResourceType::Template, + content: base64::encode("function gpt(a1 = '', a2 = '') {console.log(a1, a2)}"), + dependencies: vec![], + permission: Default::default(), + }, + ]); + + assert_eq!( + resources.get_scriptlet_resources([("aopr, code", Default::default())]), + "try {\n(function() {confirm(\"Do you want to code?\");})();\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resources([("abort-on-property-read, write tests", Default::default())]), + "try {\n(function() {confirm(\"Do you want to write tests?\");})();\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resources([("abort-on-property-read.js, block advertisements", Default::default())]), + "try {\n(function() {confirm(\"Do you want to block advertisements?\");})();\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resources([("acis", Default::default())]), + "try {\n(function() {alert(\"hi\");})();\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resources([("acis.js", Default::default())]), + "try {\n(function() {alert(\"hi\");})();\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resources([("googletagservices_gpt.js", Default::default())]), + "function gpt(a1 = '', a2 = '') {console.log(a1, a2)}\ntry {\ngpt()\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resources([("googletagservices_gpt, test1", Default::default())]), + "function gpt(a1 = '', a2 = '') {console.log(a1, a2)}\ntry {\ngpt(\"test1\")\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resources([("googletagservices.com/gpt, test1, test2", Default::default())]), + "function gpt(a1 = '', a2 = '') {console.log(a1, a2)}\ntry {\ngpt(\"test1\", \"test2\")\n} catch ( e ) { }\n", + ); + + assert_eq!( + resources.get_scriptlet_resource( + r#"googletagservices.com/gpt.js, t"es't1, $te\st2$"#, + Default::default(), + &mut vec![] + ), + Ok(r#"gpt("t\"es't1", "$te\\st2$")"#.to_owned()), + ); + + // The alias does not have a `.js` extension, so it cannot be used for a scriptlet + // injection (only as a redirect resource). + assert_eq!( + resources.get_scriptlet_resource( + r#"googletagservices-gpt, t"es't1, te\st2"#, + Default::default(), + &mut vec![] + ), + Err(ScriptletResourceError::NoMatchingScriptlet), + ); + + // Object-style injection + assert_eq!( + resources.get_scriptlet_resource( + r#"googletagservices.com/gpt, { "test": true }"#, + Default::default(), + &mut vec![] + ), + Err(ScriptletResourceError::ScriptletArgObjectSyntaxUnsupported), + ); + } + + /// Currently, only 9 template arguments are supported - but reaching that limit should not + /// cause a panic. + #[test] + fn patch_argslist_many_args() { + let resources = ResourceStorage::from_resources([Resource { + name: "abort-current-script.js".into(), + aliases: vec!["acs.js".into()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode( + "{{1}} {{2}} {{3}} {{4}} {{5}} {{6}} {{7}} {{8}} {{9}} {{10}} {{11}} {{12}}", + ), + dependencies: vec![], + permission: Default::default(), + }]); + + let args = parse_scriptlet_args("acs, this, probably, is, going, to, break, brave, and, crash, it, instead, of, ignoring, it").unwrap(); + assert_eq!( + args, + vec![ + "acs", "this", "probably", "is", "going", "to", "break", "brave", "and", "crash", + "it", "instead", "of", "ignoring", "it" + ] + ); + + assert_eq!( + resources.get_scriptlet_resources([("acs, this, probably, is, going, to, break, brave, and, crash, it, instead, of, ignoring, it", Default::default())]), + "try {\nthis probably is going to break brave and crash {{10}} {{11}} {{12}}\n} catch ( e ) { }\n", + ); + } + + #[test] + fn permissions() { + const PERM01: PermissionMask = PermissionMask::from_bits(0b00000001); + const PERM10: PermissionMask = PermissionMask::from_bits(0b00000010); + const PERM11: PermissionMask = PermissionMask::from_bits(0b00000011); + let resources = ResourceStorage::from_resources([ + Resource::simple( + "default-perms.js", + MimeType::ApplicationJavascript, + "default-perms", + ), + Resource { + name: "perm0.js".into(), + aliases: vec!["0.js".to_string()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("perm0"), + dependencies: vec![], + permission: PERM01, + }, + Resource { + name: "perm1.js".into(), + aliases: vec!["1.js".to_string()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("perm1"), + dependencies: vec![], + permission: PERM10, + }, + Resource { + name: "perm10.js".into(), + aliases: vec!["10.js".to_string()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("perm10"), + dependencies: vec![], + permission: PERM11, + }, + ]); + + fn test_perm( + resources: &ResourceStorage, + perm: PermissionMask, + expect_ok: &[&str], + expect_fail: &[&str], + ) { + for ident in expect_ok { + if ident.len() > 2 { + assert_eq!( + resources.get_scriptlet_resources([(*ident, perm)]), + format!("try {{\n{}\n}} catch ( e ) {{ }}\n", ident), + ); + } else { + assert_eq!( + resources.get_scriptlet_resources([(*ident, perm)]), + format!("try {{\nperm{}\n}} catch ( e ) {{ }}\n", ident), + ); + } + } + + for ident in expect_fail { + assert_eq!( + resources.get_scriptlet_resource(ident, perm, &mut vec![]), + Err(ScriptletResourceError::InsufficientPermissions), + ); + } + } + + test_perm( + &resources, + Default::default(), + &["default-perms"], + &["perm0", "perm1", "perm10", "0", "1", "10"], + ); + test_perm( + &resources, + PERM01, + &["default-perms", "perm0", "0"], + &["perm1", "perm10", "1", "10"], + ); + test_perm( + &resources, + PERM10, + &["default-perms", "perm1", "1"], + &["perm0", "perm10", "0", "10"], + ); + test_perm( + &resources, + PERM11, + &["default-perms", "perm0", "perm1", "perm10", "0", "1", "10"], + &[], + ); + } + + #[test] + fn dependencies() { + const PERM01: PermissionMask = PermissionMask::from_bits(0b00000001); + let resources = ResourceStorage::from_resources([ + Resource::simple("simple.fn", MimeType::FnJavascript, "simple"), + Resource { + name: "permissioned.fn".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::FnJavascript), + content: base64::encode("permissioned"), + dependencies: vec!["a.fn".to_string(), "common.fn".to_string()], + permission: PERM01, + }, + Resource { + name: "a.fn".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::FnJavascript), + content: base64::encode("a"), + dependencies: vec!["common.fn".to_string()], + permission: Default::default(), + }, + Resource { + name: "b.fn".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::FnJavascript), + content: base64::encode("b"), + dependencies: vec!["common.fn".to_string()], + permission: Default::default(), + }, + Resource { + name: "common.fn".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::FnJavascript), + content: base64::encode("common"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "test.js".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("function test() {}"), + dependencies: vec![ + "permissioned.fn".to_string(), + "a.fn".to_string(), + "b.fn".to_string(), + "common.fn".to_string(), + ], + permission: Default::default(), + }, + Resource { + name: "deploop1.fn".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::FnJavascript), + content: base64::encode("deploop1"), + dependencies: vec!["deploop1.fn".to_string()], + permission: Default::default(), + }, + Resource { + name: "deploop2a.fn".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::FnJavascript), + content: base64::encode("deploop2a"), + dependencies: vec!["deploop2b.fn".to_string()], + permission: Default::default(), + }, + Resource { + name: "deploop2b.fn".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::FnJavascript), + content: base64::encode("deploop2b"), + dependencies: vec!["deploop2a.fn".to_string()], + permission: Default::default(), + }, + Resource { + name: "test-wrapper.js".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("function testWrapper() { test(arguments) }"), + dependencies: vec!["test.js".to_string()], + permission: Default::default(), + }, + Resource { + name: "shared.js".into(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("function shared() { }"), + dependencies: vec!["a.fn".to_string(), "b.fn".to_string()], + permission: Default::default(), + }, + ]); + + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("common.fn", &mut deps, Default::default()), + Ok(()) + ); + assert_eq!( + deps.iter() + .map(|dep| dep.name.to_string()) + .collect::>(), + vec!["common.fn"] + ); + } + + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("a.fn", &mut deps, Default::default()), + Ok(()) + ); + assert_eq!( + deps.iter() + .map(|dep| dep.name.to_string()) + .collect::>(), + vec!["a.fn", "common.fn"] + ); + } + + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("b.fn", &mut deps, Default::default()), + Ok(()) + ); + assert_eq!( + deps.iter() + .map(|dep| dep.name.to_string()) + .collect::>(), + vec!["b.fn", "common.fn"] + ); + } + + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("permissioned.fn", &mut deps, Default::default()), + Err(ScriptletResourceError::InsufficientPermissions) + ); + } + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("permissioned.fn", &mut deps, PERM01), + Ok(()) + ); + assert_eq!( + deps.iter() + .map(|dep| dep.name.to_string()) + .collect::>(), + vec!["permissioned.fn", "a.fn", "common.fn"] + ); + } + + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("test.js", &mut deps, Default::default()), + Err(ScriptletResourceError::InsufficientPermissions) + ); + } + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("test.js", &mut deps, PERM01), + Ok(()) + ); + assert_eq!( + deps.iter() + .map(|dep| dep.name.to_string()) + .collect::>(), + vec!["test.js", "permissioned.fn", "a.fn", "common.fn", "b.fn"] + ); + } + + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("deploop1.fn", &mut deps, Default::default()), + Ok(()) + ); + assert_eq!( + deps.iter() + .map(|dep| dep.name.to_string()) + .collect::>(), + vec!["deploop1.fn"] + ); + } + + { + let mut deps = vec![]; + assert_eq!( + resources.recursive_dependencies("deploop2a.fn", &mut deps, Default::default()), + Ok(()) + ); + assert_eq!( + deps.iter() + .map(|dep| dep.name.to_string()) + .collect::>(), + vec!["deploop2a.fn", "deploop2b.fn"] + ); + } + + assert_eq!(resources.get_scriptlet_resources([]), ""); + + assert_eq!( + resources.get_scriptlet_resources([("test, arg1, arg2", Default::default())]), + "" + ); + + assert_eq!(resources.get_scriptlet_resources([("test, arg1, arg2", PERM01)]), "permissioned\na\ncommon\nb\nfunction test() {}\ntry {\ntest(\"arg1\", \"arg2\")\n} catch ( e ) { }\n"); + + // Note: `test` still gets inserted as a dependency before it becomes apparent that + // `permissioned` is not authorized. However, this shouldn't have much detrimental effect. + assert_eq!( + resources.get_scriptlet_resources([("test-wrapper", Default::default())]), + "function test() {}\n" + ); + assert_eq!(resources.get_scriptlet_resources([("test-wrapper", PERM01)]), "function test() {}\npermissioned\na\ncommon\nb\nfunction testWrapper() { test(arguments) }\ntry {\ntestWrapper()\n} catch ( e ) { }\n"); + + assert_eq!(resources.get_scriptlet_resources([("test", PERM01), ("test-wrapper", PERM01)]), "permissioned\na\ncommon\nb\nfunction test() {}\nfunction testWrapper() { test(arguments) }\ntry {\ntest()\n} catch ( e ) { }\ntry {\ntestWrapper()\n} catch ( e ) { }\n"); + + assert_eq!( + resources.get_scriptlet_resources([("shared, argument", Default::default())]), + "a\ncommon\nb\nfunction shared() { }\ntry {\nshared(\"argument\")\n} catch ( e ) { }\n" + ); + assert_eq!(resources.get_scriptlet_resources([("test, 1", PERM01), ("test-wrapper, 2", PERM01), ("shared, 3", Default::default())]), "permissioned\na\ncommon\nb\nfunction test() {}\nfunction testWrapper() { test(arguments) }\nfunction shared() { }\ntry {\ntest(\"1\")\n} catch ( e ) { }\ntry {\ntestWrapper(\"2\")\n} catch ( e ) { }\ntry {\nshared(\"3\")\n} catch ( e ) { }\n"); + } +} diff --git a/tests/unit/utils.rs b/tests/unit/utils.rs new file mode 100644 index 00000000..c8c938b5 --- /dev/null +++ b/tests/unit/utils.rs @@ -0,0 +1,104 @@ +#[cfg(test)] +mod tests { + use super::super::*; + + #[test] + #[ignore] // won't match hard-coded values when using a different hash function + fn fast_hash_matches_ts() { + assert_eq!(fast_hash("hello world"), 4173747013); // cross-checked with the TS implementation + assert_eq!(fast_hash("ello worl"), 2759317833); // cross-checked with the TS implementation + assert_eq!(fast_hash(&"hello world"[1..10]), fast_hash("ello worl")); + assert_eq!(fast_hash(&"hello world"[1..5]), fast_hash("ello")); + } + + fn t(tokens: &[&str]) -> Vec { + tokens.into_iter().map(|t| fast_hash(&t)).collect() + } + + #[test] + fn tokenize_filter_works() { + assert_eq!( + tokenize_filter("", false, false).as_slice(), + t(&vec![]).as_slice() + ); + assert_eq!( + tokenize_filter("", true, false).as_slice(), + t(&vec![]).as_slice() + ); + assert_eq!( + tokenize_filter("", false, true).as_slice(), + t(&vec![]).as_slice() + ); + assert_eq!( + tokenize_filter("", true, true).as_slice(), + t(&vec![]).as_slice() + ); + assert_eq!( + tokenize_filter("", false, false).as_slice(), + t(&vec![]).as_slice() + ); + + assert_eq!( + tokenize_filter("foo/bar baz", false, false).as_slice(), + t(&vec!["foo", "bar", "baz"]).as_slice() + ); + assert_eq!( + tokenize_filter("foo/bar baz", true, false).as_slice(), + t(&vec!["bar", "baz"]).as_slice() + ); + assert_eq!( + tokenize_filter("foo/bar baz", true, true).as_slice(), + t(&vec!["bar"]).as_slice() + ); + assert_eq!( + tokenize_filter("foo/bar baz", false, true).as_slice(), + t(&vec!["foo", "bar"]).as_slice() + ); + assert_eq!( + tokenize_filter("foo////bar baz", false, true).as_slice(), + t(&vec!["foo", "bar"]).as_slice() + ); + } + + #[test] + fn tokenize_works() { + assert_eq!(tokenize("").as_slice(), t(&vec![]).as_slice()); + assert_eq!(tokenize("foo").as_slice(), t(&vec!["foo"]).as_slice()); + assert_eq!( + tokenize("foo/bar").as_slice(), + t(&vec!["foo", "bar"]).as_slice() + ); + assert_eq!( + tokenize("foo-bar").as_slice(), + t(&vec!["foo", "bar"]).as_slice() + ); + assert_eq!( + tokenize("foo.bar").as_slice(), + t(&vec!["foo", "bar"]).as_slice() + ); + assert_eq!( + tokenize("foo.barƬ").as_slice(), + t(&vec!["foo", "barƬ"]).as_slice() + ); + + // Tokens cannot be surrounded by * + assert_eq!(tokenize("foo.barƬ*").as_slice(), t(&vec!["foo"]).as_slice()); + assert_eq!( + tokenize("*foo.barƬ").as_slice(), + t(&vec!["barƬ"]).as_slice() + ); + assert_eq!(tokenize("*foo.barƬ*").as_slice(), t(&vec![]).as_slice()); + } + + #[test] + fn bin_lookup_works() { + assert_eq!(bin_lookup(&[], 42), false); + assert_eq!(bin_lookup(&[42], 42), true); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 42), true); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 1), true); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 3), true); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 43), false); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 0), false); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 5), false); + } +}