From 0d2b2bb5879bb29161c729e636e5c01af7cfa5ae Mon Sep 17 00:00:00 2001 From: Wade McEwen Date: Fri, 6 Oct 2023 09:29:20 -0500 Subject: [PATCH 1/5] API-9331 test case --- test/unit/evaluator_test.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/unit/evaluator_test.rb b/test/unit/evaluator_test.rb index 3fa6691..86187d6 100644 --- a/test/unit/evaluator_test.rb +++ b/test/unit/evaluator_test.rb @@ -98,6 +98,7 @@ def test_nots assert sample("Not (Not Test Eq true)") assert sample("Not (Not(Not Test Eq true))") assert !sample("Test Eq false And Test Eq true Not Test Eq false") + assert !sample("Test Eq true Not (Test Eq false Or Test Eq false) And (Test Eq false Or Test Eq false)") end def test_examples From 2db22f3b7a174d968f249208dba648f53694aa6d Mon Sep 17 00:00:00 2001 From: Wade McEwen Date: Wed, 6 Aug 2025 14:18:48 -0500 Subject: [PATCH 2/5] API-9331 full rewrite of the evaluator to make dealing with it easier --- lib/sparkql/evaluator.rb | 235 ++++++++++++++++++------------------ test/unit/evaluator_test.rb | 18 ++- 2 files changed, 130 insertions(+), 123 deletions(-) diff --git a/lib/sparkql/evaluator.rb b/lib/sparkql/evaluator.rb index 7e864ba..cb24231 100644 --- a/lib/sparkql/evaluator.rb +++ b/lib/sparkql/evaluator.rb @@ -4,20 +4,6 @@ # fields. Plus, it has some optimizations built in to skip the processing for # any expressions that don't contribute to the net result of the filter. class Sparkql::Evaluator - # The struct here mimics some of the parser information about an expression, - # but should not be confused for an expression. Nodes reduce the expressions - # to a result based on conjunction logic, and only one exists per block group. - Node = Struct.new( - :level, - :block_group, - :conjunction, - :conjunction_level, - :match, - :good_ors, - :expressions, - :unary - ) - attr_reader :processed_count def initialize(expression_resolver) @@ -25,131 +11,146 @@ def initialize(expression_resolver) end def evaluate(expressions) - @dropped_expression = nil @processed_count = 0 - @index = Node.new(0, 0, "And", 0, true, false, 0, nil) - @groups = [@index] - expressions.each do |expression| - handle_group(expression) - adjust_expression_for_dropped_field(expression) - check_for_good_ors(expression) - next if skip?(expression) + levels = {} + block_groups = {} - evaluate_expression(expression) + build_structures(levels, block_groups, expressions) + + final_result = process_structures(levels, block_groups) + # If we didn't process anything, we consider that a success + if final_result.nil? + final_result = true end - cleanup - @index[:match] + + final_result end private - # prepare the group stack for the next expression - def handle_group(expression) - if @index[:block_group] == expression[:block_group] - # Noop - elsif @index[:block_group] < expression[:block_group] - @index = new_group(expression) - @groups.push(@index) - else - # Turn the group into an expression, resolve down to previous group(s) - smoosh_group(expression) - end - end - # Here's the real meat. We use an internal stack to represent the result of - # each block_group. This logic is re-used when merging the final result of one - # block group with the previous. - def evaluate_expression(expression) - @processed_count += 1 - evaluate_node(expression, @resolver.resolve(expression)) - end + # Take all the expressions and organize them into "chunks" appropriate for + # evaluation. Each block group should process it's expressions, and every + # block group injects itself as a placeholder expression in the block group a + # level above it. + # + # When no block groups exist above, we must stub one out for processing. + def build_structures(levels, block_groups, expressions) + expressions.each do |expression| + level = expression[:level] + block = expression[:block_group] + block_group = block_groups[block] - def evaluate_node(node, result) - if result == :drop - @dropped_expression = node - return result - end - if node[:unary] == "Not" - result = !result - end - if node[:conjunction] == 'Not' && - (node[:conjunction_level] == node[:level] || - node[:conjunction_level] == @index[:level]) - @index[:match] = !result if @index[:match] - elsif node[:conjunction] == 'And' || (@index[:expressions]).zero? - @index[:match] = result if @index[:match] - elsif node[:conjunction] == 'Or' && result - @index[:match] = result - end - @index[:expressions] += 1 - result - end + unless block_group + block_groups[block] ||= block_builder(expression, level) + block_group = block_groups[block] + levels[level] ||= [] + levels[level] << block - # Optimization logic, once we find any set of And'd expressions that pass and - # run into an Or at the same level, we can skip further processing at that - # level. - def check_for_good_ors(expression) - if expression[:conjunction] == 'Or' - good_index = @index - unless expression[:conjunction_level] == @index[:level] - good_index = nil - # Well crap, now we need to go back and find that level by hand - @groups.reverse_each do |i| - if i[:level] == expression[:conjunction_level] - good_index = i + # When dealing with Not expression conjunctions at the block level, + # it's far simpler to convert it into the equivalent "And Not" + if block_group[:conjunction] == "Not" + block_group[:unary] = "Not" + block_group[:conjunction] = "And" + end + + # Every block group _must_ be seen as an expression in another block + # group.This aids in final resolution order when processing the levels + # + # This is even true if there's only one block group. We always end up + # with a level -1 here to turn the top level expressions into a block + # group for processing. + current_level = level + while current_level >= 0 + current_level -= 1 + levels[current_level] ||= [] + last_block_group_id = levels[current_level].last + if last_block_group_id + block_groups[last_block_group_id][:expressions] << block_group + break + else + block_id = "placeholder_for_#{block}_#{current_level}" + placeholder_block = block_builder(block_group, current_level) + placeholder_block[:expressions] << block_group + + levels[current_level] << block_id + block_groups[block_id] = placeholder_block end end end - if !good_index.nil? && (good_index[:expressions]).positive? && good_index[:match] - good_index[:good_ors] = true - end + + block_group[:expressions] << expression end end - # We can skip further expression processing when And-d with a false expression - # or a "good Or" was already encountered. - def skip?(expression) - @index[:good_ors] || - !@index[:match] && expression[:conjunction] == 'And' - end + # Starting from the deepest levels, we process block groups expressions and + # reduce the block group to a result. This result is used in our placeholder + # block groups at levels above, ending in a single final result. + def process_structures(levels, block_groups) + final_result = nil - def new_group(expression) - Node.new(expression[:level], expression[:block_group], - expression[:conjunction], expression[:conjunction_level], - true, false, 0, nil) - end + # Now go through each level starting with the deepest and working back up. + levels.keys.sort.reverse.each do |level| + # Process each block group at this level and resolve the expressions in the group + levels[level].each do |block| + block_group = block_groups[block] - # When the last expression was dropped, we need to repair the filter by - # stealing the conjunction of that dropped field. - def adjust_expression_for_dropped_field(expression) - if @dropped_expression.nil? - return - elsif @dropped_expression[:block_group] == expression[:block_group] - expression[:conjunction] = @dropped_expression[:conjunction] - expression[:conjunction_level] = @dropped_expression[:conjunction_level] - end + block_result = nil + block_group[:expressions].each do |expression| + # If we encounter any or's in the same block group, we can cheat at + # resolving the rest, if we are at a true + if block_result && expression[:conjunction] == 'Or' + break + end - @dropped_expression = nil - end + expression_result = if expression.key?(:result) + # This is a reduced block group, just pass on + # the result + expression[:result] + else + @processed_count += 1 + @resolver.resolve(expression) # true, false, :drop + end + next if expression_result == :drop + + if expression[:unary] == "Not" + expression_result = !expression_result + end - # This is similar to the cleanup step, but happens when we return from a - # nesting level. Before we can proceed, we need wrap up the result of the - # nested group. - def smoosh_group(expression) - until @groups.last[:block_group] == expression[:block_group] - last = @groups.pop - @index = @groups.last - evaluate_node(last, last[:match]) + if block_result.nil? + block_result = expression_result + next + end + + case expression[:conjunction] + when 'Not' + block_result = block_result & !expression_result + when 'And' + block_result = block_result & expression_result + when 'Or' + block_result = block_result | expression_result + else + # Not a supported conjunction. We skip over this for backwards + # compatibility. + end + end + + # block_group.delete(:expressions) + block_group[:result] = block_result + final_result = block_result + end end + + final_result end - # pop off the group stack, evaluating each group with the previous as we go. - def cleanup - while @groups.size > 1 - last = @groups.pop - @index = @groups.last - evaluate_node(last, last[:match]) - end - @groups.last[:match] + def block_builder(expressionable, level) + { + conjunction: expressionable[:conjunction], + conjunction_level: expressionable[:conjunction_level], + level: level, + expressions: [], + result: nil + } end end diff --git a/test/unit/evaluator_test.rb b/test/unit/evaluator_test.rb index 86187d6..e02d6d8 100644 --- a/test/unit/evaluator_test.rb +++ b/test/unit/evaluator_test.rb @@ -84,10 +84,6 @@ def test_nesting end def test_nots - assert !sample("Not Test Eq true") - assert sample("Not Test Eq false") - assert !sample("Not (Test Eq true)") - assert sample("Not (Test Eq false)") assert sample("Test Eq true Not Test Eq false") assert !sample("Test Eq true Not Test Eq true") assert sample("Test Eq true Not (Test Eq false Or Test Eq false)") @@ -95,12 +91,22 @@ def test_nots assert !sample("Test Eq true Not (Test Eq false Or Test Eq true)") assert !sample("Test Eq true Not (Test Eq true Or Test Eq false)") assert !sample("Test Eq true Not (Not Test Eq false)") - assert sample("Not (Not Test Eq true)") - assert sample("Not (Not(Not Test Eq true))") assert !sample("Test Eq false And Test Eq true Not Test Eq false") assert !sample("Test Eq true Not (Test Eq false Or Test Eq false) And (Test Eq false Or Test Eq false)") end + def test_unary_nots + assert !sample("Not Test Eq true") + assert sample("Not Test Eq false") + assert !sample("Not (Test Eq true)") + assert sample("Not (Test Eq false)") + assert sample("Not (Not Test Eq true)") + end + + def test_unary_double_nots + assert sample("Not (Not(Not Test Eq true))") + end + def test_examples # This one is based on a real life example that had problems. # From 4c26f6d48e71234f8530dcd94d14c22535898860 Mon Sep 17 00:00:00 2001 From: Wade McEwen Date: Wed, 6 Aug 2025 14:21:17 -0500 Subject: [PATCH 3/5] rubocops --- lib/sparkql/evaluator.rb | 7 +++---- sparkql.gemspec | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/sparkql/evaluator.rb b/lib/sparkql/evaluator.rb index cb24231..08a2b0f 100644 --- a/lib/sparkql/evaluator.rb +++ b/lib/sparkql/evaluator.rb @@ -28,7 +28,6 @@ def evaluate(expressions) private - # Take all the expressions and organize them into "chunks" appropriate for # evaluation. Each block group should process it's expressions, and every # block group injects itself as a placeholder expression in the block group a @@ -124,11 +123,11 @@ def process_structures(levels, block_groups) case expression[:conjunction] when 'Not' - block_result = block_result & !expression_result + block_result &= !expression_result when 'And' - block_result = block_result & expression_result + block_result &= expression_result when 'Or' - block_result = block_result | expression_result + block_result |= expression_result else # Not a supported conjunction. We skip over this for backwards # compatibility. diff --git a/sparkql.gemspec b/sparkql.gemspec index 3dab508..d788a98 100644 --- a/sparkql.gemspec +++ b/sparkql.gemspec @@ -29,4 +29,5 @@ Gem::Specification.new do |s| s.add_development_dependency 'racc', '~> 1.4.8' s.add_development_dependency 'rake', ">=12.3.3" s.add_development_dependency 'test-unit', '~> 2.1.0' + s.add_development_dependency 'rubocop' end From bf6507780249a1b41b93fbff326026d615ddd8b7 Mon Sep 17 00:00:00 2001 From: Wade McEwen Date: Wed, 6 Aug 2025 15:16:49 -0500 Subject: [PATCH 4/5] buildfix? --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ba4a28..7bf1bab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: bundler-cache: true - run: | - gem install bundler + gem install bundler -v 2.2.31 ./script/ci_build publish: From 19ca18d42ccdf9dac1d0f58de5d382ec4cdcd835 Mon Sep 17 00:00:00 2001 From: Wade McEwen Date: Wed, 6 Aug 2025 15:21:18 -0500 Subject: [PATCH 5/5] changelog and gem bump --- CHANGELOG.md | 4 ++++ VERSION | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d886cb2..22a5a35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +v1.3.1, 2025-08-06 +------------------- + * [BUGFIX] Evaluator fix for Not expressions + v1.3.0, 2022-02-01 ------------------- * [BUGFIX] Redesign FunctionResolver to better support other timezones diff --git a/VERSION b/VERSION index f0bb29e..3a3cd8c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.3.0 +1.3.1