Skip to content

Commit

Permalink
Earlier enforcement of regex constraints.
Browse files Browse the repository at this point in the history
This commit moves regex enforcment into the core
query matching pipeline. This significantly
improves performance of multi-pattern queries using regexes and
makes it possible to use regexes in NOT sub queries.

Fixes weggli-rs#23.
  • Loading branch information
felixwilhelm committed Jan 10, 2022
1 parent bfdcfd0 commit 545bf4f
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 109 deletions.
36 changes: 27 additions & 9 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,19 @@ use std::collections::{HashMap, HashSet};
use crate::capture::{add_capture, Capture};
use crate::query::{NegativeQuery, QueryTree};
use crate::util::parse_number_literal;
use crate::RegexMap;
use colored::Colorize;
use tree_sitter::{Node, TreeCursor};

/// Translate a parsed and validated input source (specified by `source` and `cursor`) into a `QueryTree`.
/// When `is_cpp` is set, C++ specific features are enabled.
pub fn build_query_tree(source: &str, cursor: &mut TreeCursor, is_cpp: bool) -> QueryTree {
_build_query_tree(source, cursor, 0, is_cpp, false, false)
pub fn build_query_tree(
source: &str,
cursor: &mut TreeCursor,
is_cpp: bool,
regex_constraints: Option<RegexMap>,
) -> QueryTree {
_build_query_tree(source, cursor, 0, is_cpp, false, false, regex_constraints)
}

fn _build_query_tree(
Expand All @@ -35,13 +41,18 @@ fn _build_query_tree(
is_cpp: bool,
is_multi_pattern: bool,
strict_mode: bool,
regex_constraints: Option<RegexMap>,
) -> QueryTree {
let mut b = QueryBuilder {
query_source: source.to_string(),
captures: Vec::new(),
negations: Vec::new(),
id,
cpp: is_cpp,
regex_constraints: match regex_constraints {
Some(r) => r,
None => RegexMap::new(HashMap::new()),
},
};

// Skip the root node if it's a translation_unit.
Expand Down Expand Up @@ -147,7 +158,7 @@ fn process_captures(
Capture::Check(s) => {
sexp += &format!(r#"(#eq? @{} "{}")"#, (i + offset).to_string(), s);
}
Capture::Variable(var) => {
Capture::Variable(var, _) => {
vars.entry(var.clone())
.or_insert_with(Vec::new)
.push(i + offset);
Expand All @@ -165,7 +176,7 @@ fn process_captures(
let a = vec[0].to_string();
for capture in vec.iter().skip(1) {
let b = capture.to_string();
sexp += &format!(r#"(#eq? @{} @{})"#, a.to_string(), b.to_string());
sexp += &format!(r#"(#eq? @{} @{})"#, a, b);
}
}
}
Expand All @@ -180,6 +191,7 @@ struct QueryBuilder {
negations: Vec<NegativeQuery>, // all negative sub queries (not: )
id: usize, // a globally unique ID used for caching results see `query.rs`
cpp: bool, // flag to enable C++ support
regex_constraints: RegexMap,
}

impl QueryBuilder {
Expand Down Expand Up @@ -245,7 +257,7 @@ impl QueryBuilder {
// Anonymous nodes are string constants like "+" or "+=".
// We can simply copy them into the query.
if !c.node().is_named() {
return format!(r#""{}""#, c.node().kind().to_string());
return format!(r#""{}""#, c.node().kind());
}

let kind = c.node().kind();
Expand Down Expand Up @@ -305,6 +317,7 @@ impl QueryBuilder {
self.cpp,
true,
false, // limit strictness to current depth for now
Some(self.regex_constraints.clone()),
)));
return "(compound_statement) @".to_string()
+ &add_capture(&mut self.captures, capture);
Expand Down Expand Up @@ -353,7 +366,10 @@ impl QueryBuilder {
let unquoted = &pattern[1..pattern.len() - 1];

if unquoted.starts_with('$') {
let c = Capture::Variable(unquoted.to_string());
let c = Capture::Variable(
unquoted.to_string(),
self.regex_constraints.get(unquoted),
);
return format! {"(string_literal) @{}", &add_capture(&mut self.captures, c)};
}
}
Expand All @@ -371,7 +387,7 @@ impl QueryBuilder {
let mut result = format!("({}", c.node().kind());
if !c.goto_first_child() {
if !c.node().is_named() {
return format!(r#""{}""#, c.node().kind().to_string());
return format!(r#""{}""#, c.node().kind());
}
return result + ")";
}
Expand Down Expand Up @@ -445,6 +461,7 @@ impl QueryBuilder {
self.cpp,
false,
false, // TODO: should strict mode be supported in NOT queries?
Some(self.regex_constraints.clone()),
)),
previous_capture_index: before,
});
Expand Down Expand Up @@ -473,15 +490,15 @@ impl QueryBuilder {
};

let capture = if pattern.starts_with('$') {
Capture::Variable(pattern.to_string())
Capture::Variable(pattern.to_string(), self.regex_constraints.get(pattern))
} else {
Capture::Check(pattern.to_string())
};

result += " @";
result += &add_capture(&mut self.captures, capture);

return result;
result
}

// Handle $foo() and _(). Returns None if the call does not need special handling.
Expand Down Expand Up @@ -518,6 +535,7 @@ impl QueryBuilder {
self.cpp,
false,
strict_mode,
Some(self.regex_constraints.clone()),
)));
return Some("_ @".to_string() + &add_capture(&mut self.captures, capture));
}
Expand Down
25 changes: 13 additions & 12 deletions src/capture.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
/*
Copyright 2021 Google LLC
Copyright 2021 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
use regex::Regex;

/// We use captures as a way to extend tree-sitter's query mechanism.
/// Variable captures correspond to a weggli variable ($foo) and we enforce
Expand All @@ -23,7 +24,7 @@
#[derive(Debug)]
pub enum Capture {
Display,
Variable(String),
Variable(String, Option<(bool, Regex)>),
Check(String),
Number(i128),
Subquery(Box<crate::query::QueryTree>),
Expand Down
27 changes: 26 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

use std::collections::{hash_map::Keys, HashMap};

use regex::Regex;
use tree_sitter::{Language, Parser, Query, Tree};

#[macro_use]
Expand Down Expand Up @@ -60,7 +63,7 @@ fn ts_query(sexpr: &str, cpp: bool) -> tree_sitter::Query {
unsafe { tree_sitter_cpp() }
};

match Query::new(language, &sexpr) {
match Query::new(language, sexpr) {
Ok(q) => q,
Err(e) => {
eprintln!(
Expand All @@ -74,3 +77,25 @@ fn ts_query(sexpr: &str, cpp: bool) -> tree_sitter::Query {
}
}

/// Map from variable names to a positive/negative regex constraint
/// see --regex
#[derive(Clone)]
pub struct RegexMap(HashMap<String, (bool, Regex)>);

impl RegexMap {
pub fn new(m: HashMap<String, (bool, Regex)>) -> RegexMap {
RegexMap(m)
}

pub fn variables(&self) -> Keys<String, (bool, Regex)> {
self.0.keys()
}

pub fn get(&self, variable: &str) -> Option<(bool, Regex)> {
if let Some((b, r)) = self.0.get(variable) {
Some((*b, r.to_owned()))
} else {
None
}
}
}
Loading

0 comments on commit 545bf4f

Please sign in to comment.