Skip to content

Flatbuffers impl #446

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ cssparser = { version = "0.29", optional = true }
selectors = { version = "0.24", optional = true }
serde_json = "1.0"
thiserror = "1.0"
flatbuffers = { version = "25.2.10", optional = true }
flatbuffers = { version = "25.2.10" }

[dev-dependencies]
criterion = "=0.5.1"
Expand Down Expand Up @@ -80,6 +80,10 @@ harness = false
name = "bench_memory"
harness = false

[[bench]]
name = "bench_serialization"
harness = false

# Currently disabled, as cosmetic filter internals
# are no longer part of the crate's public API
#[[bench]]
Expand All @@ -97,4 +101,3 @@ css-validation = ["cssparser", "selectors"]
content-blocking = []
embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled.
resource-assembler = []
flatbuffers-storage = [ "flatbuffers" ]
77 changes: 0 additions & 77 deletions benches/bench_matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,81 +205,6 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {
group.finish();
}

fn serialization(c: &mut Criterion) {
let mut group = c.benchmark_group("blocker-serialization");

group.sample_size(20);

group.bench_function("el+ep", move |b| {
let full_rules = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
"data/easylist.to/easylist/easyprivacy.txt",
]);

let engine = Engine::from_rules(full_rules, Default::default());
b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0))
});
group.bench_function("el", move |b| {
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0))
});
group.bench_function("slimlist", move |b| {
let full_rules = rules_from_lists(&["data/slim-list.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0))
});

group.finish();
}

fn deserialization(c: &mut Criterion) {
let mut group = c.benchmark_group("blocker-deserialization");

group.sample_size(20);

group.bench_function("el+ep", move |b| {
let full_rules = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
"data/easylist.to/easylist/easyprivacy.txt",
]);

let engine = Engine::from_rules(full_rules, Default::default());
let serialized = engine.serialize_raw().unwrap();

b.iter(|| {
let mut deserialized = Engine::default();
assert!(deserialized.deserialize(&serialized).is_ok());
})
});
group.bench_function("el", move |b| {
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
let serialized = engine.serialize_raw().unwrap();

b.iter(|| {
let mut deserialized = Engine::default();
assert!(deserialized.deserialize(&serialized).is_ok());
})
});
group.bench_function("slimlist", move |b| {
let full_rules = rules_from_lists(&["data/slim-list.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
let serialized = engine.serialize_raw().unwrap();

b.iter(|| {
let mut deserialized = Engine::default();
assert!(deserialized.deserialize(&serialized).is_ok());
})
});

group.finish();
}

fn rule_match_browserlike_comparable(c: &mut Criterion) {
let mut group = c.benchmark_group("rule-match-browserlike");

Expand Down Expand Up @@ -395,7 +320,5 @@ criterion_group!(
rule_match_parsed_elep_slimlist,
rule_match_browserlike_comparable,
rule_match_first_request,
serialization,
deserialization
);
criterion_main!(benches);
85 changes: 85 additions & 0 deletions benches/bench_serialization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use criterion::*;

use adblock::Engine;

#[path = "../tests/test_utils.rs"]
mod test_utils;
use test_utils::rules_from_lists;

fn serialization(c: &mut Criterion) {
let mut group = c.benchmark_group("blocker-serialization");

group.sample_size(20);

group.bench_function("el+ep", move |b| {
let full_rules = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
"data/easylist.to/easylist/easyprivacy.txt",
]);

let engine = Engine::from_rules(full_rules, Default::default());
b.iter(|| assert!(engine.serialize().unwrap().len() > 0))
});
group.bench_function("el", move |b| {
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
b.iter(|| assert!(engine.serialize().unwrap().len() > 0))
});
group.bench_function("slimlist", move |b| {
let full_rules = rules_from_lists(&["data/slim-list.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
b.iter(|| assert!(engine.serialize().unwrap().len() > 0))
});

group.finish();
}

fn deserialization(c: &mut Criterion) {
let mut group = c.benchmark_group("blocker-deserialization");

group.sample_size(20);

group.bench_function("el+ep", move |b| {
let full_rules = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
"data/easylist.to/easylist/easyprivacy.txt",
]);

let engine = Engine::from_rules(full_rules, Default::default());
let serialized = engine.serialize().unwrap();

b.iter(|| {
let mut deserialized = Engine::default();
assert!(deserialized.deserialize(&serialized).is_ok());
})
});
group.bench_function("el", move |b| {
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
let serialized = engine.serialize().unwrap();

b.iter(|| {
let mut deserialized = Engine::default();
assert!(deserialized.deserialize(&serialized).is_ok());
})
});
group.bench_function("slimlist", move |b| {
let full_rules = rules_from_lists(&["data/slim-list.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
let serialized = engine.serialize().unwrap();

b.iter(|| {
let mut deserialized = Engine::default();
assert!(deserialized.deserialize(&serialized).is_ok());
})
});

group.finish();
}

criterion_group!(benches, serialization, deserialization);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion examples/generate-dat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn main() {
)
.unwrap();
assert!(engine.check_network_request(&request).exception.is_some());
let serialized = engine.serialize_raw().expect("Could not serialize!");
let serialized = engine.serialize().expect("Could not serialize!");

// Write to file
let mut file = File::create("engine.dat").expect("Could not create serialization file");
Expand Down
86 changes: 59 additions & 27 deletions js/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
use adblock::lists::{
FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions, RuleTypes,
};
use adblock::resources::resource_assembler::assemble_web_accessible_resources;
use adblock::resources::Resource;
use adblock::Engine as EngineInternal;
use neon::prelude::*;
use neon::types::buffer::TypedArray as _;
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::sync::Mutex;
use std::path::Path;
use adblock::Engine as EngineInternal;
use adblock::lists::{RuleTypes, FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions};
use adblock::resources::Resource;
use adblock::resources::resource_assembler::assemble_web_accessible_resources;
use std::sync::Mutex;

/// Use the JS context's JSON.stringify and JSON.parse as an FFI, at least until
/// https://github.com/neon-bindings/neon/pull/953 is available
Expand All @@ -17,14 +19,18 @@ mod json_ffi {

/// Call `JSON.stringify` to convert the input to a `JsString`, then call serde_json to parse
/// it to an instance of a native Rust type
pub fn from_js<'a, C: Context<'a>, T: DeserializeOwned>(cx: &mut C, input: Handle<JsValue>) -> NeonResult<T> {
pub fn from_js<'a, C: Context<'a>, T: DeserializeOwned>(
cx: &mut C,
input: Handle<JsValue>,
) -> NeonResult<T> {
let json: Handle<JsObject> = cx.global().get(cx, "JSON")?;
let json_stringify: Handle<JsFunction> = json.get(cx, "stringify")?;

let undefined = JsUndefined::new(cx);
let js_string = json_stringify
.call(cx, undefined, [input])?
.downcast::<JsString, _>(cx).or_throw(cx)?;
.downcast::<JsString, _>(cx)
.or_throw(cx)?;

match serde_json::from_str(&js_string.value(cx)) {
Ok(v) => Ok(v),
Expand All @@ -34,16 +40,16 @@ mod json_ffi {

/// Use `serde_json` to stringify the input, then call `JSON.parse` to convert it to a
/// `JsValue`
pub fn to_js<'a, C: Context<'a>, T: serde::Serialize>(cx: &mut C, input: &T) -> JsResult<'a, JsValue> {
pub fn to_js<'a, C: Context<'a>, T: serde::Serialize>(
cx: &mut C,
input: &T,
) -> JsResult<'a, JsValue> {
let input_handle = JsString::new(cx, serde_json::to_string(&input).unwrap());

let json: Handle<JsObject> = cx.global().get(cx, "JSON")?;
let json_parse: Handle<JsFunction> = json.get(cx, "parse")?;

json_parse
.call_with(cx)
.arg(input_handle)
.apply(cx)
json_parse.call_with(cx).arg(input_handle).apply(cx)
}
}

Expand All @@ -61,10 +67,16 @@ impl FilterSet {
fn add_filters(&self, rules: &[String], opts: ParseOptions) -> FilterListMetadata {
self.0.borrow_mut().add_filters(rules, opts)
}
fn add_filter(&self, filter: &str, opts: ParseOptions) -> Result<(), adblock::lists::FilterParseError> {
fn add_filter(
&self,
filter: &str,
opts: ParseOptions,
) -> Result<(), adblock::lists::FilterParseError> {
self.0.borrow_mut().add_filter(filter, opts)
}
fn into_content_blocking(&self) -> Result<(Vec<adblock::content_blocking::CbRule>, Vec<String>), ()> {
fn into_content_blocking(
&self,
) -> Result<(Vec<adblock::content_blocking::CbRule>, Vec<String>), ()> {
self.0.borrow().clone().into_content_blocking()
}
}
Expand All @@ -74,7 +86,10 @@ impl Finalize for FilterSet {}
fn create_filter_set(mut cx: FunctionContext) -> JsResult<JsBox<FilterSet>> {
match cx.argument_opt(0) {
Some(arg) => {
let debug: bool = arg.downcast::<JsBoolean, _>(&mut cx).or_throw(&mut cx)?.value(&mut cx);
let debug: bool = arg
.downcast::<JsBoolean, _>(&mut cx)
.or_throw(&mut cx)?
.value(&mut cx);
Ok(cx.boxed(FilterSet::new(debug)))
}
None => Ok(cx.boxed(FilterSet::default())),
Expand Down Expand Up @@ -158,9 +173,7 @@ fn engine_constructor(mut cx: FunctionContext) -> JsResult<JsBox<Engine>> {
};
EngineInternal::from_filter_set(rules, optimize)
}
None => {
EngineInternal::from_filter_set(rules, true)
},
None => EngineInternal::from_filter_set(rules, true),
};
Ok(cx.boxed(Engine(Mutex::new(engine_internal))))
}
Expand All @@ -175,7 +188,9 @@ fn engine_check(mut cx: FunctionContext) -> JsResult<JsValue> {
let debug = match cx.argument_opt(4) {
Some(arg) => {
// Throw if the argument exists and it cannot be downcasted to a boolean
arg.downcast::<JsBoolean, _>(&mut cx).or_throw(&mut cx)?.value(&mut cx)
arg.downcast::<JsBoolean, _>(&mut cx)
.or_throw(&mut cx)?
.value(&mut cx)
}
None => false,
};
Expand Down Expand Up @@ -230,10 +245,10 @@ fn engine_url_cosmetic_resources(mut cx: FunctionContext) -> JsResult<JsValue> {
json_ffi::to_js(&mut cx, &result)
}

fn engine_serialize_raw(mut cx: FunctionContext) -> JsResult<JsArrayBuffer> {
fn engine_serialize(mut cx: FunctionContext) -> JsResult<JsArrayBuffer> {
let this = cx.argument::<JsBox<Engine>>(0)?;
let serialized = if let Ok(engine) = this.0.lock() {
engine.serialize_raw().unwrap()
engine.serialize().unwrap()
} else {
cx.throw_error("Failed to acquire lock on engine")?
};
Expand Down Expand Up @@ -336,14 +351,25 @@ fn ublock_resources(mut cx: FunctionContext) -> JsResult<JsValue> {
let redirect_resources_path: String = cx.argument::<JsString>(1)?.value(&mut cx);
// `scriptlets_path` is optional, since adblock-rust parsing that file is now deprecated.
let scriptlets_path = match cx.argument_opt(2) {
Some(arg) => Some(arg.downcast::<JsString, _>(&mut cx).or_throw(&mut cx)?.value(&mut cx)),
Some(arg) => Some(
arg.downcast::<JsString, _>(&mut cx)
.or_throw(&mut cx)?
.value(&mut cx),
),
None => None,
};

let mut resources = assemble_web_accessible_resources(&Path::new(&web_accessible_resource_dir), &Path::new(&redirect_resources_path));
let mut resources = assemble_web_accessible_resources(
&Path::new(&web_accessible_resource_dir),
&Path::new(&redirect_resources_path),
);
if let Some(scriptlets_path) = scriptlets_path {
#[allow(deprecated)]
resources.append(&mut adblock::resources::resource_assembler::assemble_scriptlet_resources(&Path::new(&scriptlets_path)));
resources.append(
&mut adblock::resources::resource_assembler::assemble_scriptlet_resources(&Path::new(
&scriptlets_path,
)),
);
}

json_ffi::to_js(&mut cx, &resources)
Expand Down Expand Up @@ -380,13 +406,19 @@ register_module!(mut m, {
m.export_function("FilterSet_constructor", create_filter_set)?;
m.export_function("FilterSet_addFilters", filter_set_add_filters)?;
m.export_function("FilterSet_addFilter", filter_set_add_filter)?;
m.export_function("FilterSet_intoContentBlocking", filter_set_into_content_blocking)?;
m.export_function(
"FilterSet_intoContentBlocking",
filter_set_into_content_blocking,
)?;

m.export_function("Engine_constructor", engine_constructor)?;
m.export_function("Engine_check", engine_check)?;
m.export_function("Engine_urlCosmeticResources", engine_url_cosmetic_resources)?;
m.export_function("Engine_hiddenClassIdSelectors", engine_hidden_class_id_selectors)?;
m.export_function("Engine_serializeRaw", engine_serialize_raw)?;
m.export_function(
"Engine_hiddenClassIdSelectors",
engine_hidden_class_id_selectors,
)?;
m.export_function("Engine_serialize", engine_serialize)?;
m.export_function("Engine_deserialize", engine_deserialize)?;
m.export_function("Engine_enableTag", engine_enable_tag)?;
m.export_function("Engine_useResources", engine_use_resources)?;
Expand Down
Loading