Skip to content

Commit 9755f9a

Browse files
authored
Merge pull request #395 from brave/handle-adg-cosmetic-location-regex
Avoid creating invalid filters for AdGuard cosmetic filter location regexes
2 parents baa7deb + c3dabb7 commit 9755f9a

File tree

2 files changed

+56
-7
lines changed

2 files changed

+56
-7
lines changed

src/content_blocking.rs

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,9 @@ impl TryFrom<CosmeticFilter> for CbRule {
560560
type Error = CbRuleCreationFailure;
561561

562562
fn try_from(v: CosmeticFilter) -> Result<Self, Self::Error> {
563-
use crate::filters::cosmetic::{CosmeticFilterLocationType, CosmeticFilterMask};
563+
use crate::filters::cosmetic::{
564+
CosmeticFilterLocationType as LocationType, CosmeticFilterMask,
565+
};
564566

565567
if v.action.is_some() {
566568
return Err(CbRuleCreationFailure::CosmeticActionRulesNotSupported);
@@ -573,28 +575,29 @@ impl TryFrom<CosmeticFilter> for CbRule {
573575
let mut hostnames_vec = vec![];
574576
let mut not_hostnames_vec = vec![];
575577

576-
let mut any_entities = false;
578+
let mut any_unsupported = false;
577579

578580
// Unwrap is okay here - cosmetic rules must have a '#' character
579581
let sharp_index = find_char(b'#', raw_line.as_bytes()).unwrap();
580582
CosmeticFilter::locations_before_sharp(&raw_line, sharp_index).for_each(
581583
|(location_type, location)| match location_type {
582-
CosmeticFilterLocationType::Entity => any_entities = true,
583-
CosmeticFilterLocationType::NotEntity => any_entities = true,
584-
CosmeticFilterLocationType::Hostname => {
584+
LocationType::Entity | LocationType::NotEntity | LocationType::Unsupported => {
585+
any_unsupported = true
586+
}
587+
LocationType::Hostname => {
585588
if let Ok(encoded) = idna::domain_to_ascii(location) {
586589
hostnames_vec.push(encoded);
587590
}
588591
}
589-
CosmeticFilterLocationType::NotHostname => {
592+
LocationType::NotHostname => {
590593
if let Ok(encoded) = idna::domain_to_ascii(location) {
591594
not_hostnames_vec.push(encoded);
592595
}
593596
}
594597
},
595598
);
596599

597-
if any_entities {
600+
if any_unsupported && hostnames_vec.is_empty() && not_hostnames_vec.is_empty() {
598601
return Err(CbRuleCreationFailure::CosmeticEntitiesUnsupported);
599602
}
600603

@@ -1403,4 +1406,25 @@ mod filterset_tests {
14031406

14041407
Ok(())
14051408
}
1409+
1410+
#[test]
1411+
fn convert_cosmetic_filter_locations() -> Result<(), ()> {
1412+
let list = [
1413+
r"/^dizipal\d+\.com$/##.web",
1414+
r"/^example\d+\.com$/,test.net,b.*##.ad",
1415+
];
1416+
let mut set = FilterSet::new(true);
1417+
set.add_filters(&list, Default::default());
1418+
1419+
let (cb_rules, used_rules) = set.into_content_blocking()?;
1420+
assert_eq!(used_rules.len(), 1);
1421+
assert_eq!(cb_rules.len(), 1);
1422+
assert!(cb_rules[0].trigger.if_domain.is_some());
1423+
assert_eq!(
1424+
cb_rules[0].trigger.if_domain.as_ref().unwrap(),
1425+
&["test.net"]
1426+
);
1427+
1428+
Ok(())
1429+
}
14061430
}

src/filters/cosmetic.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ pub(crate) enum CosmeticFilterLocationType {
150150
NotEntity,
151151
Hostname,
152152
NotHostname,
153+
Unsupported,
153154
}
154155

155156
/// Contains hashes of all of the comma separated location items that were populated before the
@@ -186,6 +187,10 @@ impl CosmeticFilter {
186187
hostname.len()
187188
};
188189
let location = &hostname[start..end];
190+
// AdGuard regex syntax
191+
if location.starts_with('/') {
192+
return Some((CosmeticFilterLocationType::Unsupported, part));
193+
}
189194
Some(match (negation, entity) {
190195
(true, true) => (CosmeticFilterLocationType::NotEntity, location),
191196
(true, false) => (CosmeticFilterLocationType::NotHostname, location),
@@ -216,6 +221,7 @@ impl CosmeticFilter {
216221
return Err(CosmeticFilterError::LocationModifiersUnsupported);
217222
}
218223

224+
let mut any_unsupported = false;
219225
for (location_type, location) in Self::locations_before_sharp(line, sharp_index) {
220226
let mut hostname = String::new();
221227
if location.is_ascii() {
@@ -232,8 +238,20 @@ impl CosmeticFilter {
232238
CosmeticFilterLocationType::NotHostname => not_hostnames_vec.push(hash),
233239
CosmeticFilterLocationType::Entity => entities_vec.push(hash),
234240
CosmeticFilterLocationType::Hostname => hostnames_vec.push(hash),
241+
CosmeticFilterLocationType::Unsupported => {
242+
any_unsupported = true;
243+
}
235244
}
236245
}
246+
// Discard the rule altogether if there are no supported location types
247+
if any_unsupported
248+
&& hostnames_vec.is_empty()
249+
&& entities_vec.is_empty()
250+
&& not_hostnames_vec.is_empty()
251+
&& not_entities_vec.is_empty()
252+
{
253+
return Err(CosmeticFilterError::UnsupportedSyntax);
254+
}
237255

238256
/// Sorts `vec` and wraps it in `Some` if it's not empty, or returns `None` if it is.
239257
#[inline]
@@ -2167,6 +2185,13 @@ mod matching_tests {
21672185
assert!(parse_cf(r#"​##a[href^="https://www.g2fame.com/"] > img"#).is_err());
21682186
}
21692187

2188+
#[test]
2189+
fn adg_regex() {
2190+
assert!(parse_cf(r"/^dizipal\d+\.com$/##.web").is_err());
2191+
// Filter is still salvageable if at least one location is supported
2192+
assert!(parse_cf(r"/^dizipal\d+\.com,test.net$/##.web").is_ok());
2193+
}
2194+
21702195
#[test]
21712196
#[cfg(feature = "css-validation")]
21722197
fn abp_has_conversion() {

0 commit comments

Comments
 (0)