Skip to content

Commit 50548c4

Browse files
authored
Merge pull request #446 from brave/flatbuffers_impl
Flatbuffers storage for internal filters representation.
2 parents 67b7b70 + 537ab54 commit 50548c4

24 files changed

+860
-596
lines changed

Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ cssparser = { version = "0.29", optional = true }
4040
selectors = { version = "0.24", optional = true }
4141
serde_json = "1.0"
4242
thiserror = "1.0"
43-
flatbuffers = { version = "25.2.10", optional = true }
43+
flatbuffers = { version = "25.2.10" }
4444

4545
[dev-dependencies]
4646
criterion = "=0.5.1"
@@ -80,6 +80,10 @@ harness = false
8080
name = "bench_memory"
8181
harness = false
8282

83+
[[bench]]
84+
name = "bench_serialization"
85+
harness = false
86+
8387
# Currently disabled, as cosmetic filter internals
8488
# are no longer part of the crate's public API
8589
#[[bench]]
@@ -97,4 +101,3 @@ css-validation = ["cssparser", "selectors"]
97101
content-blocking = []
98102
embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled.
99103
resource-assembler = []
100-
flatbuffers-storage = [ "flatbuffers" ]

benches/bench_matching.rs

Lines changed: 0 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -205,81 +205,6 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {
205205
group.finish();
206206
}
207207

208-
fn serialization(c: &mut Criterion) {
209-
let mut group = c.benchmark_group("blocker-serialization");
210-
211-
group.sample_size(20);
212-
213-
group.bench_function("el+ep", move |b| {
214-
let full_rules = rules_from_lists(&[
215-
"data/easylist.to/easylist/easylist.txt",
216-
"data/easylist.to/easylist/easyprivacy.txt",
217-
]);
218-
219-
let engine = Engine::from_rules(full_rules, Default::default());
220-
b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0))
221-
});
222-
group.bench_function("el", move |b| {
223-
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
224-
225-
let engine = Engine::from_rules(full_rules, Default::default());
226-
b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0))
227-
});
228-
group.bench_function("slimlist", move |b| {
229-
let full_rules = rules_from_lists(&["data/slim-list.txt"]);
230-
231-
let engine = Engine::from_rules(full_rules, Default::default());
232-
b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0))
233-
});
234-
235-
group.finish();
236-
}
237-
238-
fn deserialization(c: &mut Criterion) {
239-
let mut group = c.benchmark_group("blocker-deserialization");
240-
241-
group.sample_size(20);
242-
243-
group.bench_function("el+ep", move |b| {
244-
let full_rules = rules_from_lists(&[
245-
"data/easylist.to/easylist/easylist.txt",
246-
"data/easylist.to/easylist/easyprivacy.txt",
247-
]);
248-
249-
let engine = Engine::from_rules(full_rules, Default::default());
250-
let serialized = engine.serialize_raw().unwrap();
251-
252-
b.iter(|| {
253-
let mut deserialized = Engine::default();
254-
assert!(deserialized.deserialize(&serialized).is_ok());
255-
})
256-
});
257-
group.bench_function("el", move |b| {
258-
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
259-
260-
let engine = Engine::from_rules(full_rules, Default::default());
261-
let serialized = engine.serialize_raw().unwrap();
262-
263-
b.iter(|| {
264-
let mut deserialized = Engine::default();
265-
assert!(deserialized.deserialize(&serialized).is_ok());
266-
})
267-
});
268-
group.bench_function("slimlist", move |b| {
269-
let full_rules = rules_from_lists(&["data/slim-list.txt"]);
270-
271-
let engine = Engine::from_rules(full_rules, Default::default());
272-
let serialized = engine.serialize_raw().unwrap();
273-
274-
b.iter(|| {
275-
let mut deserialized = Engine::default();
276-
assert!(deserialized.deserialize(&serialized).is_ok());
277-
})
278-
});
279-
280-
group.finish();
281-
}
282-
283208
fn rule_match_browserlike_comparable(c: &mut Criterion) {
284209
let mut group = c.benchmark_group("rule-match-browserlike");
285210

@@ -395,7 +320,5 @@ criterion_group!(
395320
rule_match_parsed_elep_slimlist,
396321
rule_match_browserlike_comparable,
397322
rule_match_first_request,
398-
serialization,
399-
deserialization
400323
);
401324
criterion_main!(benches);

benches/bench_serialization.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
use criterion::*;
2+
3+
use adblock::Engine;
4+
5+
#[path = "../tests/test_utils.rs"]
6+
mod test_utils;
7+
use test_utils::rules_from_lists;
8+
9+
fn serialization(c: &mut Criterion) {
10+
let mut group = c.benchmark_group("blocker-serialization");
11+
12+
group.sample_size(20);
13+
14+
group.bench_function("el+ep", move |b| {
15+
let full_rules = rules_from_lists(&[
16+
"data/easylist.to/easylist/easylist.txt",
17+
"data/easylist.to/easylist/easyprivacy.txt",
18+
]);
19+
20+
let engine = Engine::from_rules(full_rules, Default::default());
21+
b.iter(|| assert!(engine.serialize().unwrap().len() > 0))
22+
});
23+
group.bench_function("el", move |b| {
24+
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
25+
26+
let engine = Engine::from_rules(full_rules, Default::default());
27+
b.iter(|| assert!(engine.serialize().unwrap().len() > 0))
28+
});
29+
group.bench_function("slimlist", move |b| {
30+
let full_rules = rules_from_lists(&["data/slim-list.txt"]);
31+
32+
let engine = Engine::from_rules(full_rules, Default::default());
33+
b.iter(|| assert!(engine.serialize().unwrap().len() > 0))
34+
});
35+
36+
group.finish();
37+
}
38+
39+
fn deserialization(c: &mut Criterion) {
40+
let mut group = c.benchmark_group("blocker-deserialization");
41+
42+
group.sample_size(20);
43+
44+
group.bench_function("el+ep", move |b| {
45+
let full_rules = rules_from_lists(&[
46+
"data/easylist.to/easylist/easylist.txt",
47+
"data/easylist.to/easylist/easyprivacy.txt",
48+
]);
49+
50+
let engine = Engine::from_rules(full_rules, Default::default());
51+
let serialized = engine.serialize().unwrap();
52+
53+
b.iter(|| {
54+
let mut deserialized = Engine::default();
55+
assert!(deserialized.deserialize(&serialized).is_ok());
56+
})
57+
});
58+
group.bench_function("el", move |b| {
59+
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
60+
61+
let engine = Engine::from_rules(full_rules, Default::default());
62+
let serialized = engine.serialize().unwrap();
63+
64+
b.iter(|| {
65+
let mut deserialized = Engine::default();
66+
assert!(deserialized.deserialize(&serialized).is_ok());
67+
})
68+
});
69+
group.bench_function("slimlist", move |b| {
70+
let full_rules = rules_from_lists(&["data/slim-list.txt"]);
71+
72+
let engine = Engine::from_rules(full_rules, Default::default());
73+
let serialized = engine.serialize().unwrap();
74+
75+
b.iter(|| {
76+
let mut deserialized = Engine::default();
77+
assert!(deserialized.deserialize(&serialized).is_ok());
78+
})
79+
});
80+
81+
group.finish();
82+
}
83+
84+
criterion_group!(benches, serialization, deserialization);
85+
criterion_main!(benches);

examples/generate-dat.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ fn main() {
2121
)
2222
.unwrap();
2323
assert!(engine.check_network_request(&request).exception.is_some());
24-
let serialized = engine.serialize_raw().expect("Could not serialize!");
24+
let serialized = engine.serialize().expect("Could not serialize!");
2525

2626
// Write to file
2727
let mut file = File::create("engine.dat").expect("Could not create serialization file");

js/src/lib.rs

Lines changed: 59 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1+
use adblock::lists::{
2+
FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions, RuleTypes,
3+
};
4+
use adblock::resources::resource_assembler::assemble_web_accessible_resources;
5+
use adblock::resources::Resource;
6+
use adblock::Engine as EngineInternal;
17
use neon::prelude::*;
28
use neon::types::buffer::TypedArray as _;
39
use serde::{Deserialize, Serialize};
410
use std::cell::RefCell;
5-
use std::sync::Mutex;
611
use std::path::Path;
7-
use adblock::Engine as EngineInternal;
8-
use adblock::lists::{RuleTypes, FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions};
9-
use adblock::resources::Resource;
10-
use adblock::resources::resource_assembler::assemble_web_accessible_resources;
12+
use std::sync::Mutex;
1113

1214
/// Use the JS context's JSON.stringify and JSON.parse as an FFI, at least until
1315
/// https://github.com/neon-bindings/neon/pull/953 is available
@@ -17,14 +19,18 @@ mod json_ffi {
1719

1820
/// Call `JSON.stringify` to convert the input to a `JsString`, then call serde_json to parse
1921
/// it to an instance of a native Rust type
20-
pub fn from_js<'a, C: Context<'a>, T: DeserializeOwned>(cx: &mut C, input: Handle<JsValue>) -> NeonResult<T> {
22+
pub fn from_js<'a, C: Context<'a>, T: DeserializeOwned>(
23+
cx: &mut C,
24+
input: Handle<JsValue>,
25+
) -> NeonResult<T> {
2126
let json: Handle<JsObject> = cx.global().get(cx, "JSON")?;
2227
let json_stringify: Handle<JsFunction> = json.get(cx, "stringify")?;
2328

2429
let undefined = JsUndefined::new(cx);
2530
let js_string = json_stringify
2631
.call(cx, undefined, [input])?
27-
.downcast::<JsString, _>(cx).or_throw(cx)?;
32+
.downcast::<JsString, _>(cx)
33+
.or_throw(cx)?;
2834

2935
match serde_json::from_str(&js_string.value(cx)) {
3036
Ok(v) => Ok(v),
@@ -34,16 +40,16 @@ mod json_ffi {
3440

3541
/// Use `serde_json` to stringify the input, then call `JSON.parse` to convert it to a
3642
/// `JsValue`
37-
pub fn to_js<'a, C: Context<'a>, T: serde::Serialize>(cx: &mut C, input: &T) -> JsResult<'a, JsValue> {
43+
pub fn to_js<'a, C: Context<'a>, T: serde::Serialize>(
44+
cx: &mut C,
45+
input: &T,
46+
) -> JsResult<'a, JsValue> {
3847
let input_handle = JsString::new(cx, serde_json::to_string(&input).unwrap());
3948

4049
let json: Handle<JsObject> = cx.global().get(cx, "JSON")?;
4150
let json_parse: Handle<JsFunction> = json.get(cx, "parse")?;
4251

43-
json_parse
44-
.call_with(cx)
45-
.arg(input_handle)
46-
.apply(cx)
52+
json_parse.call_with(cx).arg(input_handle).apply(cx)
4753
}
4854
}
4955

@@ -61,10 +67,16 @@ impl FilterSet {
6167
fn add_filters(&self, rules: &[String], opts: ParseOptions) -> FilterListMetadata {
6268
self.0.borrow_mut().add_filters(rules, opts)
6369
}
64-
fn add_filter(&self, filter: &str, opts: ParseOptions) -> Result<(), adblock::lists::FilterParseError> {
70+
fn add_filter(
71+
&self,
72+
filter: &str,
73+
opts: ParseOptions,
74+
) -> Result<(), adblock::lists::FilterParseError> {
6575
self.0.borrow_mut().add_filter(filter, opts)
6676
}
67-
fn into_content_blocking(&self) -> Result<(Vec<adblock::content_blocking::CbRule>, Vec<String>), ()> {
77+
fn into_content_blocking(
78+
&self,
79+
) -> Result<(Vec<adblock::content_blocking::CbRule>, Vec<String>), ()> {
6880
self.0.borrow().clone().into_content_blocking()
6981
}
7082
}
@@ -74,7 +86,10 @@ impl Finalize for FilterSet {}
7486
fn create_filter_set(mut cx: FunctionContext) -> JsResult<JsBox<FilterSet>> {
7587
match cx.argument_opt(0) {
7688
Some(arg) => {
77-
let debug: bool = arg.downcast::<JsBoolean, _>(&mut cx).or_throw(&mut cx)?.value(&mut cx);
89+
let debug: bool = arg
90+
.downcast::<JsBoolean, _>(&mut cx)
91+
.or_throw(&mut cx)?
92+
.value(&mut cx);
7893
Ok(cx.boxed(FilterSet::new(debug)))
7994
}
8095
None => Ok(cx.boxed(FilterSet::default())),
@@ -158,9 +173,7 @@ fn engine_constructor(mut cx: FunctionContext) -> JsResult<JsBox<Engine>> {
158173
};
159174
EngineInternal::from_filter_set(rules, optimize)
160175
}
161-
None => {
162-
EngineInternal::from_filter_set(rules, true)
163-
},
176+
None => EngineInternal::from_filter_set(rules, true),
164177
};
165178
Ok(cx.boxed(Engine(Mutex::new(engine_internal))))
166179
}
@@ -175,7 +188,9 @@ fn engine_check(mut cx: FunctionContext) -> JsResult<JsValue> {
175188
let debug = match cx.argument_opt(4) {
176189
Some(arg) => {
177190
// Throw if the argument exists and it cannot be downcasted to a boolean
178-
arg.downcast::<JsBoolean, _>(&mut cx).or_throw(&mut cx)?.value(&mut cx)
191+
arg.downcast::<JsBoolean, _>(&mut cx)
192+
.or_throw(&mut cx)?
193+
.value(&mut cx)
179194
}
180195
None => false,
181196
};
@@ -230,10 +245,10 @@ fn engine_url_cosmetic_resources(mut cx: FunctionContext) -> JsResult<JsValue> {
230245
json_ffi::to_js(&mut cx, &result)
231246
}
232247

233-
fn engine_serialize_raw(mut cx: FunctionContext) -> JsResult<JsArrayBuffer> {
248+
fn engine_serialize(mut cx: FunctionContext) -> JsResult<JsArrayBuffer> {
234249
let this = cx.argument::<JsBox<Engine>>(0)?;
235250
let serialized = if let Ok(engine) = this.0.lock() {
236-
engine.serialize_raw().unwrap()
251+
engine.serialize().unwrap()
237252
} else {
238253
cx.throw_error("Failed to acquire lock on engine")?
239254
};
@@ -336,14 +351,25 @@ fn ublock_resources(mut cx: FunctionContext) -> JsResult<JsValue> {
336351
let redirect_resources_path: String = cx.argument::<JsString>(1)?.value(&mut cx);
337352
// `scriptlets_path` is optional, since adblock-rust parsing that file is now deprecated.
338353
let scriptlets_path = match cx.argument_opt(2) {
339-
Some(arg) => Some(arg.downcast::<JsString, _>(&mut cx).or_throw(&mut cx)?.value(&mut cx)),
354+
Some(arg) => Some(
355+
arg.downcast::<JsString, _>(&mut cx)
356+
.or_throw(&mut cx)?
357+
.value(&mut cx),
358+
),
340359
None => None,
341360
};
342361

343-
let mut resources = assemble_web_accessible_resources(&Path::new(&web_accessible_resource_dir), &Path::new(&redirect_resources_path));
362+
let mut resources = assemble_web_accessible_resources(
363+
&Path::new(&web_accessible_resource_dir),
364+
&Path::new(&redirect_resources_path),
365+
);
344366
if let Some(scriptlets_path) = scriptlets_path {
345367
#[allow(deprecated)]
346-
resources.append(&mut adblock::resources::resource_assembler::assemble_scriptlet_resources(&Path::new(&scriptlets_path)));
368+
resources.append(
369+
&mut adblock::resources::resource_assembler::assemble_scriptlet_resources(&Path::new(
370+
&scriptlets_path,
371+
)),
372+
);
347373
}
348374

349375
json_ffi::to_js(&mut cx, &resources)
@@ -380,13 +406,19 @@ register_module!(mut m, {
380406
m.export_function("FilterSet_constructor", create_filter_set)?;
381407
m.export_function("FilterSet_addFilters", filter_set_add_filters)?;
382408
m.export_function("FilterSet_addFilter", filter_set_add_filter)?;
383-
m.export_function("FilterSet_intoContentBlocking", filter_set_into_content_blocking)?;
409+
m.export_function(
410+
"FilterSet_intoContentBlocking",
411+
filter_set_into_content_blocking,
412+
)?;
384413

385414
m.export_function("Engine_constructor", engine_constructor)?;
386415
m.export_function("Engine_check", engine_check)?;
387416
m.export_function("Engine_urlCosmeticResources", engine_url_cosmetic_resources)?;
388-
m.export_function("Engine_hiddenClassIdSelectors", engine_hidden_class_id_selectors)?;
389-
m.export_function("Engine_serializeRaw", engine_serialize_raw)?;
417+
m.export_function(
418+
"Engine_hiddenClassIdSelectors",
419+
engine_hidden_class_id_selectors,
420+
)?;
421+
m.export_function("Engine_serialize", engine_serialize)?;
390422
m.export_function("Engine_deserialize", engine_deserialize)?;
391423
m.export_function("Engine_enableTag", engine_enable_tag)?;
392424
m.export_function("Engine_useResources", engine_use_resources)?;

0 commit comments

Comments
 (0)