diff --git a/Cargo.toml b/Cargo.toml index 9edcf0ec..c16d5868 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ cssparser = { version = "0.29", optional = true } selectors = { version = "0.24", optional = true } serde_json = "1.0" thiserror = "1.0" -flatbuffers = { version = "25.2.10", optional = true } +flatbuffers = { version = "25.2.10" } [dev-dependencies] criterion = "=0.5.1" @@ -80,6 +80,10 @@ harness = false name = "bench_memory" harness = false +[[bench]] +name = "bench_serialization" +harness = false + # Currently disabled, as cosmetic filter internals # are no longer part of the crate's public API #[[bench]] @@ -97,4 +101,3 @@ css-validation = ["cssparser", "selectors"] content-blocking = [] embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled. resource-assembler = [] -flatbuffers-storage = [ "flatbuffers" ] diff --git a/benches/bench_matching.rs b/benches/bench_matching.rs index 553e2909..5803ebe1 100644 --- a/benches/bench_matching.rs +++ b/benches/bench_matching.rs @@ -205,81 +205,6 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) { group.finish(); } -fn serialization(c: &mut Criterion) { - let mut group = c.benchmark_group("blocker-serialization"); - - group.sample_size(20); - - group.bench_function("el+ep", move |b| { - let full_rules = rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - "data/easylist.to/easylist/easyprivacy.txt", - ]); - - let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) - }); - group.bench_function("el", move |b| { - let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); - - let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) - }); - group.bench_function("slimlist", move |b| { - let full_rules = rules_from_lists(&["data/slim-list.txt"]); - - let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) - }); - - group.finish(); -} - -fn deserialization(c: &mut Criterion) { - let mut group = c.benchmark_group("blocker-deserialization"); - - group.sample_size(20); - - group.bench_function("el+ep", move |b| { - let full_rules = rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - "data/easylist.to/easylist/easyprivacy.txt", - ]); - - let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize_raw().unwrap(); - - b.iter(|| { - let mut deserialized = Engine::default(); - assert!(deserialized.deserialize(&serialized).is_ok()); - }) - }); - group.bench_function("el", move |b| { - let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); - - let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize_raw().unwrap(); - - b.iter(|| { - let mut deserialized = Engine::default(); - assert!(deserialized.deserialize(&serialized).is_ok()); - }) - }); - group.bench_function("slimlist", move |b| { - let full_rules = rules_from_lists(&["data/slim-list.txt"]); - - let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize_raw().unwrap(); - - b.iter(|| { - let mut deserialized = Engine::default(); - assert!(deserialized.deserialize(&serialized).is_ok()); - }) - }); - - group.finish(); -} - fn rule_match_browserlike_comparable(c: &mut Criterion) { let mut group = c.benchmark_group("rule-match-browserlike"); @@ -395,7 +320,5 @@ criterion_group!( rule_match_parsed_elep_slimlist, rule_match_browserlike_comparable, rule_match_first_request, - serialization, - deserialization ); criterion_main!(benches); diff --git a/benches/bench_serialization.rs b/benches/bench_serialization.rs new file mode 100644 index 00000000..73c27de8 --- /dev/null +++ b/benches/bench_serialization.rs @@ -0,0 +1,85 @@ +use criterion::*; + +use adblock::Engine; + +#[path = "../tests/test_utils.rs"] +mod test_utils; +use test_utils::rules_from_lists; + +fn serialization(c: &mut Criterion) { + let mut group = c.benchmark_group("blocker-serialization"); + + group.sample_size(20); + + group.bench_function("el+ep", move |b| { + let full_rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", + ]); + + let engine = Engine::from_rules(full_rules, Default::default()); + b.iter(|| assert!(engine.serialize().unwrap().len() > 0)) + }); + group.bench_function("el", move |b| { + let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); + + let engine = Engine::from_rules(full_rules, Default::default()); + b.iter(|| assert!(engine.serialize().unwrap().len() > 0)) + }); + group.bench_function("slimlist", move |b| { + let full_rules = rules_from_lists(&["data/slim-list.txt"]); + + let engine = Engine::from_rules(full_rules, Default::default()); + b.iter(|| assert!(engine.serialize().unwrap().len() > 0)) + }); + + group.finish(); +} + +fn deserialization(c: &mut Criterion) { + let mut group = c.benchmark_group("blocker-deserialization"); + + group.sample_size(20); + + group.bench_function("el+ep", move |b| { + let full_rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", + ]); + + let engine = Engine::from_rules(full_rules, Default::default()); + let serialized = engine.serialize().unwrap(); + + b.iter(|| { + let mut deserialized = Engine::default(); + assert!(deserialized.deserialize(&serialized).is_ok()); + }) + }); + group.bench_function("el", move |b| { + let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); + + let engine = Engine::from_rules(full_rules, Default::default()); + let serialized = engine.serialize().unwrap(); + + b.iter(|| { + let mut deserialized = Engine::default(); + assert!(deserialized.deserialize(&serialized).is_ok()); + }) + }); + group.bench_function("slimlist", move |b| { + let full_rules = rules_from_lists(&["data/slim-list.txt"]); + + let engine = Engine::from_rules(full_rules, Default::default()); + let serialized = engine.serialize().unwrap(); + + b.iter(|| { + let mut deserialized = Engine::default(); + assert!(deserialized.deserialize(&serialized).is_ok()); + }) + }); + + group.finish(); +} + +criterion_group!(benches, serialization, deserialization); +criterion_main!(benches); diff --git a/examples/generate-dat.rs b/examples/generate-dat.rs index f5ad456d..86845c5f 100644 --- a/examples/generate-dat.rs +++ b/examples/generate-dat.rs @@ -21,7 +21,7 @@ fn main() { ) .unwrap(); assert!(engine.check_network_request(&request).exception.is_some()); - let serialized = engine.serialize_raw().expect("Could not serialize!"); + let serialized = engine.serialize().expect("Could not serialize!"); // Write to file let mut file = File::create("engine.dat").expect("Could not create serialization file"); diff --git a/js/src/lib.rs b/js/src/lib.rs index 77639e4c..7396162a 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -1,13 +1,15 @@ +use adblock::lists::{ + FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions, RuleTypes, +}; +use adblock::resources::resource_assembler::assemble_web_accessible_resources; +use adblock::resources::Resource; +use adblock::Engine as EngineInternal; use neon::prelude::*; use neon::types::buffer::TypedArray as _; use serde::{Deserialize, Serialize}; use std::cell::RefCell; -use std::sync::Mutex; use std::path::Path; -use adblock::Engine as EngineInternal; -use adblock::lists::{RuleTypes, FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions}; -use adblock::resources::Resource; -use adblock::resources::resource_assembler::assemble_web_accessible_resources; +use std::sync::Mutex; /// Use the JS context's JSON.stringify and JSON.parse as an FFI, at least until /// https://github.com/neon-bindings/neon/pull/953 is available @@ -17,14 +19,18 @@ mod json_ffi { /// Call `JSON.stringify` to convert the input to a `JsString`, then call serde_json to parse /// it to an instance of a native Rust type - pub fn from_js<'a, C: Context<'a>, T: DeserializeOwned>(cx: &mut C, input: Handle) -> NeonResult { + pub fn from_js<'a, C: Context<'a>, T: DeserializeOwned>( + cx: &mut C, + input: Handle, + ) -> NeonResult { let json: Handle = cx.global().get(cx, "JSON")?; let json_stringify: Handle = json.get(cx, "stringify")?; let undefined = JsUndefined::new(cx); let js_string = json_stringify .call(cx, undefined, [input])? - .downcast::(cx).or_throw(cx)?; + .downcast::(cx) + .or_throw(cx)?; match serde_json::from_str(&js_string.value(cx)) { Ok(v) => Ok(v), @@ -34,16 +40,16 @@ mod json_ffi { /// Use `serde_json` to stringify the input, then call `JSON.parse` to convert it to a /// `JsValue` - pub fn to_js<'a, C: Context<'a>, T: serde::Serialize>(cx: &mut C, input: &T) -> JsResult<'a, JsValue> { + pub fn to_js<'a, C: Context<'a>, T: serde::Serialize>( + cx: &mut C, + input: &T, + ) -> JsResult<'a, JsValue> { let input_handle = JsString::new(cx, serde_json::to_string(&input).unwrap()); let json: Handle = cx.global().get(cx, "JSON")?; let json_parse: Handle = json.get(cx, "parse")?; - json_parse - .call_with(cx) - .arg(input_handle) - .apply(cx) + json_parse.call_with(cx).arg(input_handle).apply(cx) } } @@ -61,10 +67,16 @@ impl FilterSet { fn add_filters(&self, rules: &[String], opts: ParseOptions) -> FilterListMetadata { self.0.borrow_mut().add_filters(rules, opts) } - fn add_filter(&self, filter: &str, opts: ParseOptions) -> Result<(), adblock::lists::FilterParseError> { + fn add_filter( + &self, + filter: &str, + opts: ParseOptions, + ) -> Result<(), adblock::lists::FilterParseError> { self.0.borrow_mut().add_filter(filter, opts) } - fn into_content_blocking(&self) -> Result<(Vec, Vec), ()> { + fn into_content_blocking( + &self, + ) -> Result<(Vec, Vec), ()> { self.0.borrow().clone().into_content_blocking() } } @@ -74,7 +86,10 @@ impl Finalize for FilterSet {} fn create_filter_set(mut cx: FunctionContext) -> JsResult> { match cx.argument_opt(0) { Some(arg) => { - let debug: bool = arg.downcast::(&mut cx).or_throw(&mut cx)?.value(&mut cx); + let debug: bool = arg + .downcast::(&mut cx) + .or_throw(&mut cx)? + .value(&mut cx); Ok(cx.boxed(FilterSet::new(debug))) } None => Ok(cx.boxed(FilterSet::default())), @@ -158,9 +173,7 @@ fn engine_constructor(mut cx: FunctionContext) -> JsResult> { }; EngineInternal::from_filter_set(rules, optimize) } - None => { - EngineInternal::from_filter_set(rules, true) - }, + None => EngineInternal::from_filter_set(rules, true), }; Ok(cx.boxed(Engine(Mutex::new(engine_internal)))) } @@ -175,7 +188,9 @@ fn engine_check(mut cx: FunctionContext) -> JsResult { let debug = match cx.argument_opt(4) { Some(arg) => { // Throw if the argument exists and it cannot be downcasted to a boolean - arg.downcast::(&mut cx).or_throw(&mut cx)?.value(&mut cx) + arg.downcast::(&mut cx) + .or_throw(&mut cx)? + .value(&mut cx) } None => false, }; @@ -230,10 +245,10 @@ fn engine_url_cosmetic_resources(mut cx: FunctionContext) -> JsResult { json_ffi::to_js(&mut cx, &result) } -fn engine_serialize_raw(mut cx: FunctionContext) -> JsResult { +fn engine_serialize(mut cx: FunctionContext) -> JsResult { let this = cx.argument::>(0)?; let serialized = if let Ok(engine) = this.0.lock() { - engine.serialize_raw().unwrap() + engine.serialize().unwrap() } else { cx.throw_error("Failed to acquire lock on engine")? }; @@ -336,14 +351,25 @@ fn ublock_resources(mut cx: FunctionContext) -> JsResult { let redirect_resources_path: String = cx.argument::(1)?.value(&mut cx); // `scriptlets_path` is optional, since adblock-rust parsing that file is now deprecated. let scriptlets_path = match cx.argument_opt(2) { - Some(arg) => Some(arg.downcast::(&mut cx).or_throw(&mut cx)?.value(&mut cx)), + Some(arg) => Some( + arg.downcast::(&mut cx) + .or_throw(&mut cx)? + .value(&mut cx), + ), None => None, }; - let mut resources = assemble_web_accessible_resources(&Path::new(&web_accessible_resource_dir), &Path::new(&redirect_resources_path)); + let mut resources = assemble_web_accessible_resources( + &Path::new(&web_accessible_resource_dir), + &Path::new(&redirect_resources_path), + ); if let Some(scriptlets_path) = scriptlets_path { #[allow(deprecated)] - resources.append(&mut adblock::resources::resource_assembler::assemble_scriptlet_resources(&Path::new(&scriptlets_path))); + resources.append( + &mut adblock::resources::resource_assembler::assemble_scriptlet_resources(&Path::new( + &scriptlets_path, + )), + ); } json_ffi::to_js(&mut cx, &resources) @@ -380,13 +406,19 @@ register_module!(mut m, { m.export_function("FilterSet_constructor", create_filter_set)?; m.export_function("FilterSet_addFilters", filter_set_add_filters)?; m.export_function("FilterSet_addFilter", filter_set_add_filter)?; - m.export_function("FilterSet_intoContentBlocking", filter_set_into_content_blocking)?; + m.export_function( + "FilterSet_intoContentBlocking", + filter_set_into_content_blocking, + )?; m.export_function("Engine_constructor", engine_constructor)?; m.export_function("Engine_check", engine_check)?; m.export_function("Engine_urlCosmeticResources", engine_url_cosmetic_resources)?; - m.export_function("Engine_hiddenClassIdSelectors", engine_hidden_class_id_selectors)?; - m.export_function("Engine_serializeRaw", engine_serialize_raw)?; + m.export_function( + "Engine_hiddenClassIdSelectors", + engine_hidden_class_id_selectors, + )?; + m.export_function("Engine_serialize", engine_serialize)?; m.export_function("Engine_deserialize", engine_deserialize)?; m.export_function("Engine_enableTag", engine_enable_tag)?; m.export_function("Engine_useResources", engine_use_resources)?; diff --git a/src/blocker.rs b/src/blocker.rs index d07ab85d..df85ffa4 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -5,7 +5,6 @@ use once_cell::sync::Lazy; use serde::Serialize; use std::collections::HashSet; use std::ops::DerefMut; -use thiserror::Error; use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; use crate::network_filter_list::NetworkFilterList; @@ -74,15 +73,6 @@ impl Default for BlockerResult { } } -/// Possible errors when adding a filter to a [`Blocker`]. -#[derive(Debug, Error, PartialEq)] -pub enum BlockerError { - #[error("$badfilter cannot be added (unsupported)")] - BadFilterAddUnsupported, - #[error("filter already exists")] - FilterExists, -} - // only check for tags in tagged and exception rule buckets, // pass empty set for the rest static NO_TAGS: Lazy> = Lazy::new(HashSet::new); @@ -377,13 +367,13 @@ impl Blocker { return None; } - let mut disabled_directives: HashSet<&str> = HashSet::new(); - let mut enabled_directives: HashSet<&str> = HashSet::new(); + let mut disabled_directives: HashSet = HashSet::new(); + let mut enabled_directives: HashSet = HashSet::new(); for filter in filters { if filter.is_exception() { if filter.is_csp() { - if let Some(csp_directive) = &filter.modifier_option { + if let Some(csp_directive) = filter.modifier_option { disabled_directives.insert(csp_directive); } else { // Exception filters with empty `csp` options will disable all CSP @@ -392,7 +382,7 @@ impl Blocker { } } } else if filter.is_csp() { - if let Some(csp_directive) = &filter.modifier_option { + if let Some(csp_directive) = filter.modifier_option { enabled_directives.insert(csp_directive); } } @@ -401,7 +391,7 @@ impl Blocker { let mut remaining_directives = enabled_directives.difference(&disabled_directives); let mut merged = if let Some(directive) = remaining_directives.next() { - String::from(*directive) + directive.to_string() } else { return None; }; @@ -414,7 +404,7 @@ impl Blocker { Some(merged) } - pub fn new(network_filters: Vec, options: &BlockerOptions) -> Blocker { + pub fn new(network_filters: Vec, options: &BlockerOptions) -> Self { // Capacity of filter subsets estimated based on counts in EasyList and EasyPrivacy - if necessary // the Vectors will grow beyond the pre-set capacity, but it is more efficient to allocate all at once // $csp= @@ -486,7 +476,7 @@ impl Blocker { tagged_filters_all.shrink_to_fit(); - Blocker { + Self { csp: NetworkFilterList::new(csp, options.enable_optimizations), exceptions: NetworkFilterList::new(exceptions, options.enable_optimizations), importants: NetworkFilterList::new(importants, options.enable_optimizations), @@ -506,85 +496,6 @@ impl Blocker { } } - /// If optimizations are enabled, the `Blocker` will be configured to automatically optimize - /// its filters after batch updates. However, even if they are disabled, it is possible to - /// manually call `optimize()`. It may be useful to have finer-grained control over - /// optimization scheduling when frequently updating filters. - pub fn optimize(&mut self) { - self.csp.optimize(); - self.exceptions.optimize(); - self.importants.optimize(); - self.redirects.optimize(); - // note - don't optimize removeparam - self.filters_tagged.optimize(); - self.filters.optimize(); - self.generic_hide.optimize(); - } - - /// Has this exact filter already been added? Note that this is a best-effort method and may - /// miss some filters, especially if optimizations are enabled. - pub fn filter_exists(&self, filter: &NetworkFilter) -> bool { - if filter.is_csp() { - self.csp.filter_exists(filter) - } else if filter.is_generic_hide() { - self.generic_hide.filter_exists(filter) - } else if filter.is_exception() { - self.exceptions.filter_exists(filter) - } else if filter.is_important() { - self.importants.filter_exists(filter) - } else if filter.is_redirect() { - self.redirects.filter_exists(filter) - } else if filter.is_removeparam() { - self.removeparam.filter_exists(filter) - } else if filter.tag.is_some() { - self.tagged_filters_all.iter().any(|f| f.id == filter.id) - } else { - self.filters.filter_exists(filter) - } - } - - /// Add a single filter to this [`Blocker`]. - /// - /// Filter optimization is skipped when using this method. - pub fn add_filter(&mut self, filter: NetworkFilter) -> Result<(), BlockerError> { - // Redirects are independent of blocking behavior. - if filter.is_redirect() { - self.redirects.add_filter(filter.clone()); - } - - if filter.is_badfilter() { - Err(BlockerError::BadFilterAddUnsupported) - } else if self.filter_exists(&filter) { - Err(BlockerError::FilterExists) - } else if filter.is_csp() { - self.csp.add_filter(filter); - Ok(()) - } else if filter.is_generic_hide() { - self.generic_hide.add_filter(filter); - Ok(()) - } else if filter.is_exception() { - self.exceptions.add_filter(filter); - Ok(()) - } else if filter.is_important() { - self.importants.add_filter(filter); - Ok(()) - } else if filter.is_removeparam() { - self.removeparam.add_filter(filter); - Ok(()) - } else if filter.tag.is_some() && !filter.is_redirect() { - // `tag` + `redirect` is unsupported - self.tagged_filters_all.push(filter); - let tags_enabled = self.tags_enabled().into_iter().collect::>(); - self.tags_with_set(tags_enabled); - Ok(()) - } else if (filter.is_redirect() && filter.also_block_redirect()) || !filter.is_redirect() { - self.filters.add_filter(filter); - Ok(()) - } else { - Ok(()) - } - } - pub fn use_tags(&mut self, tags: &[&str]) { let tag_set: HashSet = tags.iter().map(|&t| String::from(t)).collect(); self.tags_with_set(tag_set); diff --git a/src/data_format/mod.rs b/src/data_format/mod.rs index e9f26ace..dda51e31 100644 --- a/src/data_format/mod.rs +++ b/src/data_format/mod.rs @@ -9,9 +9,10 @@ mod v0; pub(crate) mod utils; -use crate::blocker::Blocker; use crate::cosmetic_filter_cache::CosmeticFilterCache; +use crate::blocker::Blocker; + /// Newer formats start with this magic byte sequence. /// Calculated as the leading 4 bytes of `echo -n 'brave/adblock-rust' | sha512sum`. const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf]; @@ -66,6 +67,13 @@ pub enum DeserializationError { /// crate. If you still need it for some reason, you can convert it using 0.7.x by /// deserializing and then reserializing it into the newer V0 format. LegacyFormatNoLongerSupported, + InvalidFlatBuffer(flatbuffers::InvalidFlatbuffer), +} + +impl From for DeserializationError { + fn from(x: std::convert::Infallible) -> Self { + match x {} + } } impl From for DeserializationError { @@ -74,10 +82,16 @@ impl From for DeserializationError { } } +impl From for DeserializationError { + fn from(e: flatbuffers::InvalidFlatbuffer) -> Self { + Self::InvalidFlatBuffer(e) + } +} + impl DeserializeFormat { - pub(crate) fn build(self) -> (Blocker, CosmeticFilterCache) { + pub(crate) fn build(self) -> Result<(Blocker, CosmeticFilterCache), DeserializationError> { match self { - Self::V0(v) => v.into(), + Self::V0(v) => v.try_into(), } } diff --git a/src/data_format/v0.rs b/src/data_format/v0.rs index 47c272b9..1c05ccea 100644 --- a/src/data_format/v0.rs +++ b/src/data_format/v0.rs @@ -9,8 +9,8 @@ use std::collections::{HashMap, HashSet}; use rmp_serde as rmps; use serde::{Deserialize, Serialize}; -use crate::blocker::Blocker; use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb, ProceduralOrActionFilter}; +use crate::filters::fb_network::flat::fb; use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; use crate::network_filter_list::NetworkFilterList; use crate::utils::Hash; @@ -18,6 +18,8 @@ use crate::utils::Hash; use super::utils::{stabilize_hashmap_serialization, stabilize_hashset_serialization}; use super::{DeserializationError, SerializationError}; +use crate::blocker::Blocker; + /// Each variant describes a single rule that is specific to a particular hostname. #[derive(Clone, Debug, Deserialize, Serialize)] enum LegacySpecificFilterType { @@ -247,17 +249,20 @@ where S: serde::Serializer, { #[derive(Serialize, Default)] - struct NetworkFilterListV0SerializeFmt<'a> { + struct NetworkFilterListV0SerializeFmt { + flatbuffer_memory: Vec, + + #[serde(serialize_with = "stabilize_hashmap_serialization")] + filter_map: HashMap>, + #[serde(serialize_with = "stabilize_hashmap_serialization")] - filter_map: HashMap>>, + unique_domains_hashes_map: HashMap, } let v0_list = NetworkFilterListV0SerializeFmt { - filter_map: list - .filter_map - .iter() - .map(|(k, v)| (*k, v.iter().map(|f| f.into()).collect())) - .collect(), + flatbuffer_memory: list.flatbuffer_memory.clone(), + filter_map: list.filter_map.clone(), + unique_domains_hashes_map: list.unique_domains_hashes_map.clone(), }; v0_list.serialize(s) @@ -369,26 +374,22 @@ impl From for NetworkFilter { #[derive(Debug, Deserialize, Default)] pub(crate) struct NetworkFilterListV0DeserializeFmt { - pub filter_map: HashMap>, + pub flatbuffer_memory: Vec, + pub filter_map: HashMap>, + pub unique_domains_hashes_map: HashMap, } -impl From for NetworkFilterList { - fn from(v: NetworkFilterListV0DeserializeFmt) -> Self { - Self { - filter_map: v - .filter_map - .into_iter() - .map(|(k, v)| { - ( - k, - v.into_iter() - .map(|f| std::sync::Arc::new(f.into())) - .collect(), - ) - }) - .collect(), - } +impl TryFrom for NetworkFilterList { + fn try_from(v: NetworkFilterListV0DeserializeFmt) -> Result { + let _ = fb::root_as_network_filter_list(&v.flatbuffer_memory)?; + Ok(Self { + flatbuffer_memory: v.flatbuffer_memory, + filter_map: v.filter_map, + unique_domains_hashes_map: v.unique_domains_hashes_map, + }) } + + type Error = DeserializationError; } /// Structural representation of adblock engine data that can be built up from deserialization and @@ -471,8 +472,8 @@ impl<'a> From<(&'a Blocker, &'a CosmeticFilterCache)> for SerializeFormat<'a> { } } -impl From for (Blocker, CosmeticFilterCache) { - fn from(v: DeserializeFormat) -> Self { +impl TryFrom for (Blocker, CosmeticFilterCache) { + fn try_from(v: DeserializeFormat) -> Result { use crate::cosmetic_filter_cache::HostnameFilterBin; let mut specific_rules: HostnameRuleDb = v.specific_rules.into(); @@ -480,16 +481,16 @@ impl From for (Blocker, CosmeticFilterCache) { specific_rules.procedural_action_exception = HostnameFilterBin(v.procedural_action_exception); - ( + Ok(( Blocker { - csp: v.csp.into(), - exceptions: v.exceptions.into(), - importants: v.importants.into(), - redirects: v.redirects.into(), + csp: v.csp.try_into()?, + exceptions: v.exceptions.try_into()?, + importants: v.importants.try_into()?, + redirects: v.redirects.try_into()?, removeparam: NetworkFilterList::default(), - filters_tagged: v.filters_tagged.into(), - filters: v.filters.into(), - generic_hide: v.generic_hide.into(), + filters_tagged: v.filters_tagged.try_into()?, + filters: v.filters.try_into()?, + generic_hide: v.generic_hide.try_into()?, tags_enabled: Default::default(), tagged_filters_all: v.tagged_filters_all.into_iter().map(|f| f.into()).collect(), @@ -507,6 +508,8 @@ impl From for (Blocker, CosmeticFilterCache) { misc_generic_selectors: v.misc_generic_selectors, }, - ) + )) } + + type Error = DeserializationError; } diff --git a/src/engine.rs b/src/engine.rs index 76e9bbab..af0ef0ab 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -43,8 +43,8 @@ use std::collections::HashSet; /// [`Engine::hidden_class_id_selectors`] on an ongoing basis to determine additional elements that /// should be hidden dynamically. pub struct Engine { - blocker: Blocker, - cosmetic_cache: CosmeticFilterCache, + pub(crate) blocker: Blocker, + pub(crate) cosmetic_cache: CosmeticFilterCache, resources: ResourceStorage, } @@ -121,32 +121,6 @@ impl Engine { } } - /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. - pub fn serialize_raw(&self) -> Result, crate::data_format::SerializationError> { - use crate::data_format::SerializeFormat; - - let serialize_format = SerializeFormat::build(&self.blocker, &self.cosmetic_cache); - - serialize_format.serialize() - } - - /// Deserialize the `Engine` from the binary format generated by `Engine::serialize_raw`. The - /// method will automatically select the correct deserialization implementation. - pub fn deserialize( - &mut self, - serialized: &[u8], - ) -> Result<(), crate::data_format::DeserializationError> { - use crate::data_format::DeserializeFormat; - let current_tags = self.blocker.tags_enabled(); - let deserialize_format = DeserializeFormat::deserialize(serialized)?; - let (blocker, cosmetic_cache) = deserialize_format.build(); - self.blocker = blocker; - self.blocker - .use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); - self.cosmetic_cache = cosmetic_cache; - Ok(()) - } - /// Check if a request for a network resource from `url`, of type `request_type`, initiated by /// `source_url`, should be blocked. pub fn check_network_request(&self, request: &Request) -> BlockerResult { @@ -270,6 +244,33 @@ impl Engine { pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { self.blocker.get_regex_debug_info() } + + /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. + pub fn serialize(&self) -> Result, crate::data_format::SerializationError> { + use crate::data_format::SerializeFormat; + + let serialize_format = SerializeFormat::build(&self.blocker, &self.cosmetic_cache); + + let result = serialize_format.serialize()?; + Ok(result) + } + + /// Deserialize the `Engine` from the binary format generated by `Engine::serialize`. The + /// method will automatically select the correct deserialization implementation. + pub fn deserialize( + &mut self, + serialized: &[u8], + ) -> Result<(), crate::data_format::DeserializationError> { + use crate::data_format::DeserializeFormat; + let current_tags = self.blocker.tags_enabled(); + let deserialize_format = DeserializeFormat::deserialize(serialized)?; + let (blocker, cosmetic_cache) = deserialize_format.build()?; + self.blocker = blocker; + self.blocker + .use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); + self.cosmetic_cache = cosmetic_cache; + Ok(()) + } } /// Static assertions for `Engine: Send + Sync` traits. diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs new file mode 100644 index 00000000..5dfde12c --- /dev/null +++ b/src/filters/fb_network.rs @@ -0,0 +1,314 @@ +use std::collections::HashMap; +use std::vec; + +use flatbuffers::WIPOffset; + +use crate::filters::network::{ + NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable, +}; + +use crate::network_filter_list::NetworkFilterList; +use crate::regex_manager::RegexManager; +use crate::request::Request; +use crate::utils::Hash; + +#[allow(dead_code, unused_imports, unsafe_code)] +#[path = "../flatbuffers/fb_network_filter_generated.rs"] +pub mod flat; +use flat::fb; + +pub(crate) struct FlatNetworkFiltersListBuilder<'a> { + builder: flatbuffers::FlatBufferBuilder<'a>, + filters: Vec>>, + + unique_domains_hashes: Vec, + unique_domains_hashes_map: HashMap, +} + +impl<'a> FlatNetworkFiltersListBuilder<'a> { + pub fn new() -> Self { + Self { + builder: flatbuffers::FlatBufferBuilder::new(), + filters: vec![], + unique_domains_hashes: vec![], + unique_domains_hashes_map: HashMap::new(), + } + } + + fn get_or_insert_unique_domain_hash(&mut self, h: &Hash) -> u16 { + if let Some(&index) = self.unique_domains_hashes_map.get(h) { + return index; + } + let index = self.unique_domains_hashes.len() as u16; + self.unique_domains_hashes.push(*h); + self.unique_domains_hashes_map.insert(*h, index); + return index; + } + + pub fn add(&mut self, network_filter: &NetworkFilter) -> u32 { + let opt_domains = network_filter.opt_domains.as_ref().map(|v| { + let mut o: Vec = v + .iter() + .map(|x| self.get_or_insert_unique_domain_hash(x)) + .collect(); + o.sort_unstable(); + o.dedup(); + self.builder.create_vector(&o) + }); + + let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| { + let mut o: Vec = v + .iter() + .map(|x| self.get_or_insert_unique_domain_hash(x)) + .collect(); + o.sort_unstable(); + o.dedup(); + self.builder.create_vector(&o) + }); + + let modifier_option = network_filter + .modifier_option + .as_ref() + .map(|s| self.builder.create_string(&s)); + + let hostname = network_filter + .hostname + .as_ref() + .map(|s| self.builder.create_string(&s)); + + let tag = network_filter + .tag + .as_ref() + .map(|s| self.builder.create_string(&s)); + + let patterns = if network_filter.filter.iter().len() > 0 { + let offsets: Vec> = network_filter + .filter + .iter() + .map(|s| self.builder.create_string(s)) + .collect(); + Some(self.builder.create_vector(&offsets)) + } else { + None + }; + + let raw_line = network_filter + .raw_line + .as_ref() + .map(|v| self.builder.create_string(v.as_str())); + + let filter = fb::NetworkFilter::create( + &mut self.builder, + &fb::NetworkFilterArgs { + mask: network_filter.mask.bits(), + patterns, + modifier_option, + opt_domains, + opt_not_domains, + hostname, + tag, + raw_line, + }, + ); + + self.filters.push(filter); + u32::try_from(self.filters.len() - 1).expect("< u32::MAX") + } + + pub fn finish(&mut self) -> Vec { + let filters = self.builder.create_vector(&self.filters); + + let unique_domains_hashes = self.builder.create_vector(&self.unique_domains_hashes); + + let storage = fb::NetworkFilterList::create( + &mut self.builder, + &&fb::NetworkFilterListArgs { + network_filters: Some(filters), + unique_domains_hashes: Some(unique_domains_hashes), + }, + ); + self.builder.finish(storage, None); + + let binary = Vec::from(self.builder.finished_data()); + binary + } +} +pub(crate) struct FlatPatterns<'a> { + patterns: Option>>, +} + +impl<'a> FlatPatterns<'a> { + #[inline(always)] + pub fn new( + patterns: Option>>, + ) -> Self { + Self { patterns } + } + + #[inline(always)] + pub fn iter(&self) -> FlatPatternsIterator { + FlatPatternsIterator { + patterns: self, + len: self.patterns.map_or(0, |d| d.len()), + index: 0, + } + } +} + +pub(crate) struct FlatPatternsIterator<'a> { + patterns: &'a FlatPatterns<'a>, + len: usize, + index: usize, +} + +impl<'a> Iterator for FlatPatternsIterator<'a> { + type Item = &'a str; + + #[inline(always)] + fn next(&mut self) -> Option { + self.patterns.patterns.map_or(None, |fi| { + if self.index < self.len { + self.index += 1; + Some(fi.get(self.index - 1)) + } else { + None + } + }) + } +} + +impl<'a> ExactSizeIterator for FlatPatternsIterator<'a> { + #[inline(always)] + fn len(&self) -> usize { + self.len + } +} + +pub(crate) struct FlatNetworkFilter<'a> { + key: u64, + owner: &'a NetworkFilterList, + fb_filter: &'a fb::NetworkFilter<'a>, + + pub(crate) mask: NetworkFilterMask, +} + +impl<'a> FlatNetworkFilter<'a> { + #[inline(always)] + pub fn new( + filter: &'a fb::NetworkFilter<'a>, + index: u32, + owner: &'a NetworkFilterList, + ) -> Self { + let list_address: *const NetworkFilterList = owner as *const NetworkFilterList; + + Self { + fb_filter: filter, + key: index as u64 | (((list_address) as u64) << 32), + mask: NetworkFilterMask::from_bits_retain(filter.mask()), + owner: owner, + } + } + + #[inline(always)] + pub fn tag(&self) -> Option<&'a str> { + self.fb_filter.tag() + } + + #[inline(always)] + pub fn modifier_option(&self) -> Option { + self.fb_filter.modifier_option().map(|o| o.to_string()) + } + + #[inline(always)] + pub fn include_domains(&self) -> Option<&[u16]> { + self.fb_filter.opt_domains().map(|data| { + let bytes = data.bytes(); + assert!(bytes.len() % std::mem::size_of::() == 0); + unsafe { + std::slice::from_raw_parts( + bytes.as_ptr() as *const u16, + bytes.len() / std::mem::size_of::(), + ) + } + }) + } + + #[inline(always)] + pub fn exclude_domains(&self) -> Option<&[u16]> { + self.fb_filter.opt_not_domains().map(|data| { + let bytes = data.bytes(); + assert!(bytes.len() % std::mem::size_of::() == 0); + unsafe { + std::slice::from_raw_parts( + bytes.as_ptr() as *const u16, + bytes.len() / std::mem::size_of::(), + ) + } + }) + } + + #[inline(always)] + pub fn hostname(&self) -> Option<&'a str> { + if self.mask.is_hostname_anchor() { + self.fb_filter.hostname() + } else { + None + } + } + + #[inline(always)] + pub fn patterns(&self) -> FlatPatterns { + FlatPatterns::new(self.fb_filter.patterns()) + } + + #[inline(always)] + pub fn raw_line(&self) -> Option { + self.fb_filter.raw_line().map(|v| v.to_string()) + } +} + +impl<'a> NetworkFilterMaskHelper for FlatNetworkFilter<'a> { + #[inline] + fn has_flag(&self, v: NetworkFilterMask) -> bool { + self.mask.contains(v) + } +} + +impl<'a> NetworkMatchable for FlatNetworkFilter<'a> { + fn matches(&self, request: &Request, regex_manager: &mut RegexManager) -> bool { + use crate::filters::network_matchers::{ + check_excluded_domains_mapped, check_included_domains_mapped, check_options, + check_pattern, + }; + if !check_options(self.mask, request) { + return false; + } + if !check_included_domains_mapped( + self.include_domains(), + request, + &self.owner.unique_domains_hashes_map, + ) { + return false; + } + if !check_excluded_domains_mapped( + self.exclude_domains(), + request, + &self.owner.unique_domains_hashes_map, + ) { + return false; + } + check_pattern( + self.mask, + self.patterns().iter(), + self.hostname(), + self.key, + request, + regex_manager, + ) + } + + #[cfg(test)] + fn matches_test(&self, request: &Request) -> bool { + self.matches(request, &mut RegexManager::default()) + } +} diff --git a/src/filters/mod.rs b/src/filters/mod.rs index 1edf83e3..d5ffb7eb 100644 --- a/src/filters/mod.rs +++ b/src/filters/mod.rs @@ -4,4 +4,5 @@ mod abstract_network; mod network_matchers; pub mod cosmetic; +pub mod fb_network; pub mod network; diff --git a/src/filters/network.rs b/src/filters/network.rs index 7d829ecb..c96cd1f5 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -986,22 +986,20 @@ pub trait NetworkMatchable { impl NetworkMatchable for NetworkFilter { fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool { - use crate::filters::network_matchers::{check_options, check_pattern}; - check_options( - self.mask, - self.opt_domains.as_deref(), - self.opt_domains_union, - self.opt_not_domains.as_deref(), - self.opt_not_domains_union, - request, - ) && check_pattern( - self.mask, - self.filter.iter(), - self.hostname.as_deref(), - (self as *const NetworkFilter) as u64, - request, - regex_manager, - ) + use crate::filters::network_matchers::{ + check_excluded_domains, check_included_domains, check_options, check_pattern, + }; + check_options(self.mask, request) + && check_included_domains(self.opt_domains.as_deref(), request) + && check_excluded_domains(self.opt_not_domains.as_deref(), request) + && check_pattern( + self.mask, + self.filter.iter(), + self.hostname.as_deref(), + (self as *const NetworkFilter) as u64, + request, + regex_manager, + ) } #[cfg(test)] diff --git a/src/filters/network_matchers.rs b/src/filters/network_matchers.rs index 4b5ec7c1..b30b0019 100644 --- a/src/filters/network_matchers.rs +++ b/src/filters/network_matchers.rs @@ -11,6 +11,7 @@ use crate::filters::network::{NetworkFilterMask, NetworkFilterMaskHelper}; use crate::regex_manager::RegexManager; use crate::request; use crate::utils::{self, Hash}; +use std::collections::HashMap; fn get_url_after_hostname<'a>(url: &'a str, hostname: &str) -> &'a str { let start = @@ -394,14 +395,8 @@ where } } -pub fn check_options<'a>( - mask: NetworkFilterMask, - opt_domains: Option<&'a [Hash]>, - opt_domains_union: Option, - opt_not_domains: Option<&'a [Hash]>, - opt_not_domains_union: Option, - request: &request::Request, -) -> bool { +#[inline] +pub fn check_options<'a>(mask: NetworkFilterMask, request: &request::Request) -> bool { // Bad filter never matches if mask.is_badfilter() { return false; @@ -416,20 +411,14 @@ pub fn check_options<'a>( { return false; } + true +} +#[inline] +pub fn check_included_domains(opt_domains: Option<&[Hash]>, request: &request::Request) -> bool { // Source URL must be among these domains to match if let Some(included_domains) = opt_domains.as_ref() { if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { - // If the union of included domains is recorded - if let Some(included_domains_union) = opt_domains_union { - // If there isn't any source hash that matches the union, there's no match at all - if source_hashes - .iter() - .all(|h| h & included_domains_union != *h) - { - return false; - } - } if source_hashes .iter() .all(|h| !utils::bin_lookup(included_domains, *h)) @@ -438,18 +427,38 @@ pub fn check_options<'a>( } } } + true +} +#[inline] +pub fn check_included_domains_mapped( + opt_domains: Option<&[u16]>, + request: &request::Request, + mapping: &HashMap, +) -> bool { + // Source URL must be among these domains to match + if let Some(included_domains) = opt_domains.as_ref() { + if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { + if source_hashes.iter().all(|h| { + mapping + .get(h) + .map_or(true, |index| !utils::bin_lookup(included_domains, *index)) + }) { + return false; + } + } + } + true +} + +#[inline] +pub fn check_excluded_domains( + opt_not_domains: Option<&[Hash]>, + request: &request::Request, +) -> bool { if let Some(excluded_domains) = opt_not_domains.as_ref() { if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { - // If the union of excluded domains is recorded - if let Some(excluded_domains_union) = opt_not_domains_union { - // If there's any source hash that matches the union, check the actual values - if source_hashes.iter().any(|h| { - (h & excluded_domains_union == *h) && utils::bin_lookup(excluded_domains, *h) - }) { - return false; - } - } else if source_hashes + if source_hashes .iter() .any(|h| utils::bin_lookup(excluded_domains, *h)) { @@ -461,6 +470,27 @@ pub fn check_options<'a>( true } +#[inline] +pub fn check_excluded_domains_mapped( + opt_not_domains: Option<&[u16]>, + request: &request::Request, + mapping: &HashMap, +) -> bool { + if let Some(excluded_domains) = opt_not_domains.as_ref() { + if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { + if source_hashes.iter().any(|h| { + mapping + .get(h) + .map_or(false, |index| utils::bin_lookup(excluded_domains, *index)) + }) { + return false; + } + } + } + + true +} + #[cfg(test)] #[path = "../../tests/unit/filters/network_matchers.rs"] mod unit_tests; diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index 540bbdc3..1da0373f 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -16,6 +16,8 @@ table NetworkFilter { hostname: string; tag: string; + + raw_line: string; } table NetworkFilterList { diff --git a/src/flatbuffers/fb_network_filter_generated.rs b/src/flatbuffers/fb_network_filter_generated.rs index 81e7128e..6fe91441 100644 --- a/src/flatbuffers/fb_network_filter_generated.rs +++ b/src/flatbuffers/fb_network_filter_generated.rs @@ -42,6 +42,7 @@ pub mod fb { pub const VT_MODIFIER_OPTION: flatbuffers::VOffsetT = 12; pub const VT_HOSTNAME: flatbuffers::VOffsetT = 14; pub const VT_TAG: flatbuffers::VOffsetT = 16; + pub const VT_RAW_LINE: flatbuffers::VOffsetT = 18; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -58,6 +59,9 @@ pub mod fb { args: &'args NetworkFilterArgs<'args>, ) -> flatbuffers::WIPOffset> { let mut builder = NetworkFilterBuilder::new(_fbb); + if let Some(x) = args.raw_line { + builder.add_raw_line(x); + } if let Some(x) = args.tag { builder.add_tag(x); } @@ -90,6 +94,7 @@ pub mod fb { let modifier_option = self.modifier_option().map(|x| x.to_string()); let hostname = self.hostname().map(|x| x.to_string()); let tag = self.tag().map(|x| x.to_string()); + let raw_line = self.raw_line().map(|x| x.to_string()); NetworkFilterT { mask, opt_domains, @@ -98,6 +103,7 @@ pub mod fb { modifier_option, hostname, tag, + raw_line, } } @@ -183,6 +189,16 @@ pub mod fb { .get::>(NetworkFilter::VT_TAG, None) } } + #[inline] + pub fn raw_line(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>(NetworkFilter::VT_RAW_LINE, None) + } + } } impl flatbuffers::Verifiable for NetworkFilter<'_> { @@ -218,6 +234,11 @@ pub mod fb { false, )? .visit_field::>("tag", Self::VT_TAG, false)? + .visit_field::>( + "raw_line", + Self::VT_RAW_LINE, + false, + )? .finish(); Ok(()) } @@ -232,6 +253,7 @@ pub mod fb { pub modifier_option: Option>, pub hostname: Option>, pub tag: Option>, + pub raw_line: Option>, } impl<'a> Default for NetworkFilterArgs<'a> { #[inline] @@ -244,6 +266,7 @@ pub mod fb { modifier_option: None, hostname: None, tag: None, + raw_line: None, } } } @@ -309,6 +332,13 @@ pub mod fb { .push_slot_always::>(NetworkFilter::VT_TAG, tag); } #[inline] + pub fn add_raw_line(&mut self, raw_line: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_RAW_LINE, + raw_line, + ); + } + #[inline] pub fn new( _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, ) -> NetworkFilterBuilder<'a, 'b, A> { @@ -335,6 +365,7 @@ pub mod fb { ds.field("modifier_option", &self.modifier_option()); ds.field("hostname", &self.hostname()); ds.field("tag", &self.tag()); + ds.field("raw_line", &self.raw_line()); ds.finish() } } @@ -348,6 +379,7 @@ pub mod fb { pub modifier_option: Option, pub hostname: Option, pub tag: Option, + pub raw_line: Option, } impl Default for NetworkFilterT { fn default() -> Self { @@ -359,6 +391,7 @@ pub mod fb { modifier_option: None, hostname: None, tag: None, + raw_line: None, } } } @@ -377,6 +410,7 @@ pub mod fb { let modifier_option = self.modifier_option.as_ref().map(|x| _fbb.create_string(x)); let hostname = self.hostname.as_ref().map(|x| _fbb.create_string(x)); let tag = self.tag.as_ref().map(|x| _fbb.create_string(x)); + let raw_line = self.raw_line.as_ref().map(|x| _fbb.create_string(x)); NetworkFilter::create( _fbb, &NetworkFilterArgs { @@ -387,6 +421,7 @@ pub mod fb { modifier_option, hostname, tag, + raw_line, }, ) } diff --git a/src/lib.rs b/src/lib.rs index 6d0b9dc8..57249acd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ pub mod regex_manager; pub mod request; pub mod resources; pub mod url_parser; + #[doc(hidden)] pub mod utils; diff --git a/src/network_filter_list.rs b/src/network_filter_list.rs index c2b8801c..79073fe9 100644 --- a/src/network_filter_list.rs +++ b/src/network_filter_list.rs @@ -1,37 +1,94 @@ -use std::{collections::HashMap, collections::HashSet, sync::Arc}; +use std::{collections::HashMap, collections::HashSet, fmt}; use serde::{Deserialize, Serialize}; -use crate::filters::network::NetworkFilter; -use crate::filters::network::NetworkMatchable; +use crate::filters::fb_network::flat::fb; +use crate::filters::fb_network::{FlatNetworkFilter, FlatNetworkFiltersListBuilder}; +use crate::filters::network::{ + NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable, +}; use crate::optimizer; use crate::regex_manager::RegexManager; use crate::request::Request; use crate::utils::{fast_hash, Hash}; -#[derive(Serialize, Deserialize, Default)] +pub struct CheckResult { + pub filter_mask: NetworkFilterMask, + pub modifier_option: Option, + pub raw_line: Option, +} + +impl From<&NetworkFilter> for CheckResult { + fn from(filter: &NetworkFilter) -> Self { + Self { + filter_mask: filter.mask, + modifier_option: filter.modifier_option.clone(), + raw_line: filter.raw_line.clone().map(|v| *v), + } + } +} + +impl fmt::Display for CheckResult { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + if let Some(ref raw_line) = self.raw_line { + write!(f, "{}", raw_line) + } else { + write!(f, "{}", self.filter_mask) + } + } +} + +impl NetworkFilterMaskHelper for CheckResult { + #[inline] + fn has_flag(&self, v: NetworkFilterMask) -> bool { + self.filter_mask.contains(v) + } +} + +#[derive(Serialize, Deserialize)] pub(crate) struct NetworkFilterList { - #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] - pub(crate) filter_map: HashMap>>, + pub(crate) flatbuffer_memory: Vec, + pub(crate) filter_map: HashMap>, + pub(crate) unique_domains_hashes_map: HashMap, +} + +impl Default for NetworkFilterList { + fn default() -> Self { + Self { + flatbuffer_memory: Default::default(), + filter_map: Default::default(), + unique_domains_hashes_map: Default::default(), + } + } } impl NetworkFilterList { - pub fn new(filters: Vec, optimize: bool) -> NetworkFilterList { + pub fn new(filters: Vec, optimize: bool) -> Self { // Compute tokens for all filters let filter_tokens: Vec<_> = filters .into_iter() .map(|filter| { let tokens = filter.get_tokens(); - (Arc::new(filter), tokens) + (filter, tokens) }) .collect(); // compute the tokens' frequency histogram let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); - // Build a HashMap of tokens to Network Filters (held through Arc, Atomic Reference Counter) - let mut filter_map = HashMap::with_capacity(filter_tokens.len()); + let mut flat_builder = FlatNetworkFiltersListBuilder::new(); + let mut filter_map = HashMap::>::new(); + + let mut optimizable = HashMap::>::new(); { - for (filter_pointer, multi_tokens) in filter_tokens { + for (network_filter, multi_tokens) in filter_tokens { + let index = if !optimize + || !optimizer::is_filter_optimizable_by_patterns(&network_filter) + { + Some(flat_builder.add(&network_filter)) + } else { + None + }; + for tokens in multi_tokens { let mut best_token: Hash = 0; let mut min_count = total_number_of_tokens + 1; @@ -48,106 +105,49 @@ impl NetworkFilterList { _ => {} } } - insert_dup(&mut filter_map, best_token, Arc::clone(&filter_pointer)); - } + if let Some(index) = index { + insert_dup(&mut filter_map, best_token, index); + } else { + insert_dup(&mut optimizable, best_token, network_filter.clone()); + } + } // tokens } } - let mut self_ = NetworkFilterList { filter_map }; - if optimize { - self_.optimize(); - } else { - self_.filter_map.shrink_to_fit(); - } + for (token, v) in optimizable { + let optimized = optimizer::optimize(v); - self_ - } - - pub fn optimize(&mut self) { - let mut optimized_map = HashMap::with_capacity(self.filter_map.len()); - for (key, filters) in self.filter_map.drain() { - let mut unoptimized: Vec = Vec::with_capacity(filters.len()); - let mut unoptimizable: Vec> = Vec::with_capacity(filters.len()); - for f in filters { - match Arc::try_unwrap(f) { - Ok(f) => unoptimized.push(f), - Err(af) => unoptimizable.push(af), + for filter in optimized { + let index = flat_builder.add(&filter); + insert_dup(&mut filter_map, token, index); } } - - let mut optimized: Vec<_> = if unoptimized.len() > 1 { - optimizer::optimize(unoptimized) - .into_iter() - .map(Arc::new) - .collect() - } else { - // nothing to optimize - unoptimized.into_iter().map(Arc::new).collect() - }; - - optimized.append(&mut unoptimizable); - optimized.shrink_to_fit(); - optimized_map.insert(key, optimized); - } - - // won't mutate anymore, shrink to fit items - optimized_map.shrink_to_fit(); - - self.filter_map = optimized_map; - } - - pub fn add_filter(&mut self, filter: NetworkFilter) { - let filter_tokens = filter.get_tokens(); - let total_rules = vec_hashmap_len(&self.filter_map); - let filter_pointer = Arc::new(filter); - - for tokens in filter_tokens { - let mut best_token: Hash = 0; - let mut min_count = total_rules + 1; - for token in tokens { - match self.filter_map.get(&token) { - None => { - min_count = 0; - best_token = token - } - Some(filters) if filters.len() < min_count => { - min_count = filters.len(); - best_token = token - } - _ => {} - } - } - - insert_dup( - &mut self.filter_map, - best_token, - Arc::clone(&filter_pointer), + } else { + debug_assert!( + optimizable.is_empty(), + "Should be empty if optimization is off" ); } - } - /// This may not work if the list has been optimized. - pub fn filter_exists(&self, filter: &NetworkFilter) -> bool { - let mut tokens: Vec<_> = filter.get_tokens().into_iter().flatten().collect(); + let flatbuffer_memory = flat_builder.finish(); + let root = fb::root_as_network_filter_list(&flatbuffer_memory) + .expect("Ok because it is created in the previous line"); - if tokens.is_empty() { - tokens.push(0) + let mut unique_domains_hashes_map: HashMap = HashMap::new(); + for (index, hash) in root.unique_domains_hashes().iter().enumerate() { + unique_domains_hashes_map.insert(hash, u16::try_from(index).expect("< u16 max")); } - for token in tokens { - if let Some(filters) = self.filter_map.get(&token) { - for saved_filter in filters { - if saved_filter.id == filter.id { - return true; - } - } - } - } + filter_map.shrink_to_fit(); + unique_domains_hashes_map.shrink_to_fit(); - false + Self { + flatbuffer_memory, + filter_map, + unique_domains_hashes_map, + } } - /// Returns the first found filter, if any, that matches the given request. The backing storage /// has a non-deterministic order, so this should be used for any category of filters where a /// match from each would be functionally equivalent. For example, if two different exception @@ -158,23 +158,30 @@ impl NetworkFilterList { request: &Request, active_tags: &HashSet, regex_manager: &mut RegexManager, - ) -> Option<&NetworkFilter> { + ) -> Option { if self.filter_map.is_empty() { return None; } + let filters_list = + unsafe { fb::root_as_network_filter_list_unchecked(&self.flatbuffer_memory) }; + let network_filters = filters_list.network_filters(); + for token in request.get_tokens_for_match() { if let Some(filter_bucket) = self.filter_map.get(token) { - for filter in filter_bucket { + for filter_index in filter_bucket { + let fb_filter = network_filters.get(*filter_index as usize); + let filter = FlatNetworkFilter::new(&fb_filter, *filter_index, self); + // if matched, also needs to be tagged with an active tag (or not tagged at all) if filter.matches(request, regex_manager) - && filter - .tag - .as_ref() - .map(|t| active_tags.contains(t)) - .unwrap_or(true) + && filter.tag().map_or(true, |t| active_tags.contains(t)) { - return Some(filter); + return Some(CheckResult { + filter_mask: filter.mask, + modifier_option: filter.modifier_option(), + raw_line: filter.raw_line(), + }); } } } @@ -192,25 +199,32 @@ impl NetworkFilterList { request: &Request, active_tags: &HashSet, regex_manager: &mut RegexManager, - ) -> Vec<&NetworkFilter> { - let mut filters: Vec<&NetworkFilter> = vec![]; + ) -> Vec { + let mut filters: Vec = vec![]; if self.filter_map.is_empty() { return filters; } + let filters_list = + unsafe { fb::root_as_network_filter_list_unchecked(&self.flatbuffer_memory) }; + let network_filters = filters_list.network_filters(); + for token in request.get_tokens_for_match() { if let Some(filter_bucket) = self.filter_map.get(token) { - for filter in filter_bucket { + for filter_index in filter_bucket { + let fb_filter = network_filters.get(*filter_index as usize); + let filter = FlatNetworkFilter::new(&fb_filter, *filter_index, self); + // if matched, also needs to be tagged with an active tag (or not tagged at all) if filter.matches(request, regex_manager) - && filter - .tag - .as_ref() - .map(|t| active_tags.contains(t)) - .unwrap_or(true) + && filter.tag().map_or(true, |t| active_tags.contains(t)) { - filters.push(filter); + filters.push(CheckResult { + filter_mask: filter.mask, + modifier_option: filter.modifier_option(), + raw_line: filter.raw_line(), + }); } } } @@ -222,8 +236,11 @@ impl NetworkFilterList { /// Inserts a value into the `Vec` under the specified key in the `HashMap`. The entry will be /// created if it does not exist. If it already exists, it will be inserted in the `Vec` in a /// sorted order. -fn insert_dup(map: &mut HashMap, H>, k: K, v: V) -where +pub(crate) fn insert_dup( + map: &mut HashMap, H>, + k: K, + v: V, +) where K: std::cmp::Ord + std::hash::Hash, V: PartialOrd, { @@ -235,6 +252,7 @@ where } } +#[cfg(test)] pub(crate) fn vec_hashmap_len( map: &HashMap, H>, ) -> usize { diff --git a/src/optimizer.rs b/src/optimizer.rs index 2c9e9217..bb109235 100644 --- a/src/optimizer.rs +++ b/src/optimizer.rs @@ -10,6 +10,14 @@ trait Optimization { fn select(&self, filter: &NetworkFilter) -> bool; } +pub fn is_filter_optimizable_by_patterns(filter: &NetworkFilter) -> bool { + filter.opt_domains.is_none() + && filter.opt_not_domains.is_none() + && !filter.is_hostname_anchor() + && !filter.is_redirect() + && !filter.is_csp() +} + /// Fuse `NetworkFilter`s together by applying optimizations sequentially. pub fn optimize(filters: Vec) -> Vec { let mut optimized: Vec = Vec::new(); @@ -129,11 +137,7 @@ impl Optimization for SimplePatternGroup { format!("{:b}:{:?}", filter.mask, filter.is_complete_regex()) } fn select(&self, filter: &NetworkFilter) -> bool { - filter.opt_domains.is_none() - && filter.opt_not_domains.is_none() - && !filter.is_hostname_anchor() - && !filter.is_redirect() - && !filter.is_csp() + is_filter_optimizable_by_patterns(filter) } } diff --git a/tests/legacy_harness.rs b/tests/legacy_harness.rs index 2bbb7340..bd695b1b 100644 --- a/tests/legacy_harness.rs +++ b/tests/legacy_harness.rs @@ -330,7 +330,7 @@ mod legacy_check_match { let mut engine_deserialized = Engine::default(); // second empty engine_deserialized.use_tags(tags); { - let engine_serialized = engine.serialize_raw().unwrap(); + let engine_serialized = engine.serialize().unwrap(); engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy } @@ -404,7 +404,7 @@ mod legacy_check_match { ); let mut engine_deserialized = Engine::default(); // second empty { - let engine_serialized = engine.serialize_raw().unwrap(); + let engine_serialized = engine.serialize().unwrap(); engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy } @@ -901,7 +901,7 @@ mod legacy_misc_tests { false, ); // enable debugging and disable optimizations - let serialized = engine.serialize_raw().unwrap(); + let serialized = engine.serialize().unwrap(); let mut engine2 = Engine::new(false); engine2.deserialize(&serialized).unwrap(); diff --git a/tests/live.rs b/tests/live.rs index f1d9d506..f2913eeb 100644 --- a/tests/live.rs +++ b/tests/live.rs @@ -264,29 +264,16 @@ fn check_live_redirects() { } } -#[test] -/// Ensure that two different engines loaded from the same textual filter set serialize to -/// identical buffers. -fn stable_serialization() { - let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); - let ser1 = engine1.serialize_raw().unwrap(); - - let engine2 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); - let ser2 = engine2.serialize_raw().unwrap(); - - assert_eq!(ser1, ser2); -} - #[test] /// Ensure that one engine's serialization result can be exactly reproduced by another engine after /// deserializing from it. fn stable_serialization_through_load() { let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); - let ser1 = engine1.serialize_raw().unwrap(); + let ser1 = engine1.serialize().unwrap(); let mut engine2 = Engine::new(true); engine2.deserialize(&ser1).unwrap(); - let ser2 = engine2.serialize_raw().unwrap(); + let ser2 = engine2.serialize().unwrap(); assert_eq!(ser1, ser2); } diff --git a/tests/ublock-coverage.rs b/tests/ublock-coverage.rs index ea4f8089..1b59a313 100644 --- a/tests/ublock-coverage.rs +++ b/tests/ublock-coverage.rs @@ -172,7 +172,7 @@ fn check_specifics_default() { #[test] fn check_basic_works_after_deserialization() { let engine = get_blocker_engine(); - let serialized = engine.serialize_raw().unwrap(); + let serialized = engine.serialize().unwrap(); let mut deserialized_engine = Engine::default(); deserialized_engine.deserialize(&serialized).unwrap(); diff --git a/tests/unit/blocker.rs b/tests/unit/blocker.rs index cba703e9..2c2ea6ee 100644 --- a/tests/unit/blocker.rs +++ b/tests/unit/blocker.rs @@ -1289,135 +1289,20 @@ mod blocker_tests { } #[test] - fn filter_add_badfilter_error() { - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - - let filter = NetworkFilter::parse("adv$badfilter", true, Default::default()).unwrap(); - let added = blocker.add_filter(filter); - assert!(added.is_err()); - assert_eq!(added.err().unwrap(), BlockerError::BadFilterAddUnsupported); - } - - #[test] - #[ignore] - fn filter_add_twice_handling_error() { - { - // Not allow filter to be added twice hwn the engine is not optimised - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: false, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - - let filter = NetworkFilter::parse("adv", true, Default::default()).unwrap(); - blocker.add_filter(filter.clone()).unwrap(); - assert!( - blocker.filter_exists(&filter), - "Expected filter to be inserted" - ); - let added = blocker.add_filter(filter); - assert!(added.is_err(), "Expected repeated insertion to fail"); - assert_eq!( - added.err().unwrap(), - BlockerError::FilterExists, - "Expected specific error on repeated insertion fail" - ); - } - { - // Allow filter to be added twice when the engine is optimised - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: true, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - - let filter = NetworkFilter::parse("adv", true, Default::default()).unwrap(); - blocker.add_filter(filter.clone()).unwrap(); - let added = blocker.add_filter(filter); - assert!(added.is_ok()); - } - } - - #[test] - fn filter_add_tagged() { - // Allow filter to be added twice when the engine is optimised + fn exception_force_check() { let blocker_options: BlockerOptions = BlockerOptions { enable_optimizations: true, }; - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - let resources = Default::default(); - blocker.enable_tags(&["brian"]); - - blocker - .add_filter(NetworkFilter::parse("adv$tag=stuff", true, Default::default()).unwrap()) - .unwrap(); - blocker - .add_filter( - NetworkFilter::parse("somelongpath/test$tag=stuff", true, Default::default()) - .unwrap(), - ) - .unwrap(); - blocker - .add_filter( - NetworkFilter::parse("||brianbondy.com/$tag=brian", true, Default::default()) - .unwrap(), - ) - .unwrap(); - blocker - .add_filter( - NetworkFilter::parse("||brave.com$tag=brian", true, Default::default()).unwrap(), - ) + let mut filter_set = crate::lists::FilterSet::new(true); + filter_set + .add_filter("@@*ad_banner.png", Default::default()) .unwrap(); - let url_results = [ - ("http://example.com/advert.html", false), - ("http://example.com/somelongpath/test/2.html", false), - ("https://brianbondy.com/about", true), - ("https://brave.com/about", true), - ]; - - let request_expectations: Vec<_> = url_results - .into_iter() - .map(|(url, expected_result)| { - let request = Request::new(url, "https://example.com", "other").unwrap(); - (request, expected_result) - }) - .collect(); + let blocker = Blocker::new(filter_set.network_filters, &blocker_options); - request_expectations - .into_iter() - .for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req, &resources); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", req.url); - } else { - assert!( - !matched_rule.matched, - "Expected no match for {}, matched with {:?}", - req.url, matched_rule.filter - ); - } - }); - } - - #[test] - fn exception_force_check() { - let blocker_options: BlockerOptions = BlockerOptions { - enable_optimizations: true, - }; - - let mut blocker = Blocker::new(Vec::new(), &blocker_options); let resources = Default::default(); - blocker - .add_filter(NetworkFilter::parse("@@*ad_banner.png", true, Default::default()).unwrap()) - .unwrap(); - let request = Request::new( "http://example.com/ad_banner.png", "https://example.com", @@ -1436,15 +1321,13 @@ mod blocker_tests { enable_optimizations: true, }; - let mut blocker = Blocker::new(Vec::new(), &blocker_options); - - blocker - .add_filter( - NetworkFilter::parse("@@||example.com$generichide", true, Default::default()) - .unwrap(), - ) + let mut filter_set = crate::lists::FilterSet::new(true); + filter_set + .add_filter("@@||example.com$generichide", Default::default()) .unwrap(); + let blocker = Blocker::new(filter_set.network_filters, &blocker_options); + assert!(blocker.check_generic_hide( &Request::new("https://example.com", "https://example.com", "other").unwrap() )); @@ -1465,7 +1348,10 @@ mod placeholder_string_tests { &crate::request::Request::new("https://example.com", "https://example.com", "document") .unwrap(), ); - assert_eq!(block.filter, Some("NetworkFilter".to_string())); + assert_eq!( + block.filter, + Some("100000001100110001111111111111".to_string()) + ); } } diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index 5043f8e7..528b50c9 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -152,7 +152,7 @@ mod tests { let mut engine = Engine::from_rules(&filters, Default::default()); engine.enable_tags(&["stuff"]); engine.enable_tags(&["brian"]); - let serialized = engine.serialize_raw().unwrap(); + let serialized = engine.serialize().unwrap(); let mut deserialized_engine = Engine::default(); deserialized_engine.enable_tags(&["stuff"]); deserialized_engine.deserialize(&serialized).unwrap(); @@ -178,11 +178,22 @@ mod tests { // assert!(false); // converted from the legacy compressed format let serialized = [ - 209, 217, 58, 175, 0, 220, 0, 19, 145, 128, 145, 128, 145, 128, 145, 128, 145, 128, - 145, 129, 207, 202, 167, 36, 217, 43, 56, 97, 176, 145, 157, 206, 0, 3, 31, 255, 129, - 1, 169, 97, 100, 45, 98, 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, 192, 192, - 192, 207, 186, 136, 69, 13, 115, 187, 170, 226, 192, 192, 145, 128, 144, 195, 145, 128, - 144, 144, 128, 128, 145, 128, 144, 145, 128, 128, 128, + 209, 217, 58, 175, 0, 220, 0, 19, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, + 0, 4, 0, 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, 8, 0, 8, 0, 0, + 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 147, 220, 0, + 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, + 0, 8, 0, 12, 0, 4, 0, 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 128, 128, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, + 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, + 128, 147, 220, 0, 88, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, 8, 0, 8, 0, 0, 0, 12, + 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 16, 0, 0, 0, 12, 0, 12, 0, 4, 0, 0, 0, 0, + 0, 8, 0, 12, 0, 0, 0, 204, 255, 31, 3, 0, 4, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, + 0, 97, 100, 45, 98, 97, 110, 110, 101, 114, 0, 0, 0, 129, 207, 202, 167, 36, 217, 43, + 56, 97, 176, 145, 0, 128, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, + 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, + 128, 144, 195, 145, 128, 144, 144, 128, 128, 145, 128, 144, 145, 128, 128, 128, ]; let mut deserialized_engine = Engine::default(); deserialized_engine.deserialize(&serialized).unwrap(); @@ -199,13 +210,25 @@ mod tests { // assert!(false); // converted from the legacy compressed format let serialized = [ - 209, 217, 58, 175, 0, 220, 0, 19, 145, 128, 145, 128, 145, 128, 145, 128, 145, 129, - 207, 202, 167, 36, 217, 43, 56, 97, 176, 145, 157, 206, 0, 3, 31, 255, 129, 1, 169, 97, - 100, 45, 98, 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, 192, 163, 97, 98, 99, - 192, 207, 126, 212, 53, 83, 113, 159, 143, 134, 192, 192, 145, 128, 145, 128, 145, 157, - 206, 0, 3, 31, 255, 129, 1, 169, 97, 100, 45, 98, 97, 110, 110, 101, 114, 192, 192, - 192, 192, 192, 192, 163, 97, 98, 99, 192, 207, 126, 212, 53, 83, 113, 159, 143, 134, - 192, 192, 195, 145, 128, 144, 144, 128, 128, 145, 128, 144, 145, 128, 128, 128, + 209, 217, 58, 175, 0, 220, 0, 19, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, + 0, 4, 0, 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, 8, 0, 8, 0, 0, + 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 147, 220, 0, + 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, + 0, 8, 0, 12, 0, 4, 0, 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 128, 128, 147, 220, 0, 112, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, + 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 24, 0, + 0, 0, 0, 0, 18, 0, 16, 0, 4, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 12, 0, 18, 0, 0, 0, 204, + 255, 31, 3, 0, 8, 0, 0, 0, 28, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 0, 97, 100, + 45, 98, 97, 110, 110, 101, 114, 0, 0, 0, 3, 0, 0, 0, 97, 98, 99, 0, 129, 207, 202, 167, + 36, 217, 43, 56, 97, 176, 145, 0, 128, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, + 12, 0, 4, 0, 8, 0, 8, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 128, 128, 147, 220, 0, 40, 16, 0, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 4, 0, 8, 0, 8, + 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 145, + 157, 206, 0, 3, 31, 255, 129, 1, 169, 97, 100, 45, 98, 97, 110, 110, 101, 114, 192, + 192, 192, 192, 192, 192, 163, 97, 98, 99, 192, 207, 126, 212, 53, 83, 113, 159, 143, + 134, 192, 192, 195, 145, 128, 144, 144, 128, 128, 145, 128, 144, 145, 128, 128, 128, ]; let mut deserialized_engine = Engine::default(); @@ -228,7 +251,7 @@ mod tests { #[test] fn deserialization_generate_simple() { let mut engine = Engine::from_rules(&["ad-banner"], Default::default()); - let serialized = engine.serialize_raw().unwrap(); + let serialized = engine.serialize().unwrap(); println!("Engine serialized: {:?}", serialized); engine.deserialize(&serialized).unwrap(); } @@ -237,7 +260,7 @@ mod tests { fn deserialization_generate_tags() { let mut engine = Engine::from_rules(&["ad-banner$tag=abc"], Default::default()); engine.use_tags(&["abc"]); - let serialized = engine.serialize_raw().unwrap(); + let serialized = engine.serialize().unwrap(); println!("Engine serialized: {:?}", serialized); engine.deserialize(&serialized).unwrap(); } @@ -251,7 +274,7 @@ mod tests { Resource::simple("noopcss", MimeType::TextCss, ""), ]); - let serialized = engine.serialize_raw().unwrap(); + let serialized = engine.serialize().unwrap(); println!("Engine serialized: {:?}", serialized); engine.deserialize(&serialized).unwrap(); } diff --git a/tests/unit/filters/network_matchers.rs b/tests/unit/filters/network_matchers.rs index f3d746c1..e21673cf 100644 --- a/tests/unit/filters/network_matchers.rs +++ b/tests/unit/filters/network_matchers.rs @@ -386,14 +386,9 @@ mod match_tests { } fn check_options(filter: &NetworkFilter, request: &request::Request) -> bool { - super::super::check_options( - filter.mask, - filter.opt_domains.as_deref(), - filter.opt_domains_union, - filter.opt_not_domains.as_deref(), - filter.opt_not_domains_union, - request, - ) + super::super::check_options(filter.mask, request) + && super::super::check_included_domains(filter.opt_domains.as_deref(), request) + && super::super::check_excluded_domains(filter.opt_not_domains.as_deref(), request) } #[test]