diff --git a/lib/hyp_diff.rb b/lib/hyp_diff.rb index 047f4f3..fbff4e0 100644 --- a/lib/hyp_diff.rb +++ b/lib/hyp_diff.rb @@ -42,14 +42,14 @@ def compare(before, after, options = {}) # @api private class NodeMap - def self.for(change_node_tuples, &block) + def self.for(change_node_tuples) new.build(change_node_tuples).map end attr_reader :map def initialize - @map = {} + @map = Hash.new {|h, k| h[k] = [] } @stashed = [] end @@ -79,8 +79,7 @@ def build(change_node_tuples) end def append_to_node(node, change) - list = (@map[node] ||= []) - list << change + @map[node] << change end end @@ -103,15 +102,63 @@ def render(changes) changes.each do |change| case change.action when "!" then - deletions << change.old_element.text - insertions << change.new_element.text + old_fulltext = change.old_element.fulltext + new_fulltext = change.new_element.fulltext + if old_fulltext.include?(new_fulltext) + if old_fulltext.start_with?(new_fulltext) + apply_insertions_and_deletions + new_text << new_fulltext + deletions << old_fulltext[new_fulltext.length..-1] + next + end + if old_fulltext.end_with?(new_fulltext) + deletions << old_fulltext[0, old_fulltext.length - new_fulltext.length] + apply_insertions_and_deletions + new_text << new_fulltext + next + end + end + if new_fulltext.include?(old_fulltext) + if new_fulltext.start_with?(old_fulltext) + apply_insertions_and_deletions + new_text << old_fulltext + insertions << new_fulltext[old_fulltext.length..-1] + next + end + if new_fulltext.end_with?(old_fulltext) + insertions << new_fulltext[0, new_fulltext.length - old_fulltext.length] + apply_insertions_and_deletions + new_text << old_fulltext + next + end + end + if insertions.empty? && deletions.empty? && change.old_element.before_whitespace && change.new_element.before_whitespace + new_text << " " + deletions << change.old_element.text + insertions << change.new_element.text + next + end + deletions << change.old_element.fulltext + insertions << change.new_element.fulltext when "=" then + if change.old_element.before_whitespace && !change.new_element.before_whitespace + deletions << " " + apply_insertions_and_deletions + new_text << change.new_element.text + next + end + if change.new_element.before_whitespace && !change.old_element.before_whitespace + insertions << " " + apply_insertions_and_deletions + new_text << change.new_element.text + next + end apply_insertions_and_deletions - new_text << escape_html(change.new_element.text) + new_text << escape_html(change.new_element.fulltext) when "+" then - insertions << change.new_element.text + insertions << change.new_element.fulltext when "-" then - deletions << change.old_element.text + deletions << change.old_element.fulltext else raise "unexpected change.action #{change.action}" end @@ -131,6 +178,13 @@ def rendered_text attr_reader :insertions, :deletions, :new_text def apply_insertions_and_deletions + while !deletions.empty? && !insertions.empty? + break unless deletions.first == insertions.first + + deletions.shift + new_text << insertions.shift + end + if deletions.length > 0 new_text << deletion_tag(deletions.join) end @@ -163,7 +217,7 @@ def parse(text) end def extract_text(node) - filter_whitespace(text_fragments(node)) + merge_whitespace(filter_whitespace(text_fragments(node))) end def text_fragments(node) @@ -187,5 +241,34 @@ def filter_whitespace(node_list) result end + def merge_whitespace(node_list) + result = [] + + last_whitespace_node = nil + node_list.each do |node| + if node.whitespace? + last_whitespace_node = node + next + end + + unless last_whitespace_node + result << node + next + end + + if last_whitespace_node.node.equal?(node.node) + node.before_whitespace = last_whitespace_node + else + result << last_whitespace_node + end + last_whitespace_node = nil + result << node + end + + result << last_whitespace_node if last_whitespace_node + + result + end + end; end diff --git a/lib/hyp_diff/text_from_node.rb b/lib/hyp_diff/text_from_node.rb index 9ad5d43..28053d6 100644 --- a/lib/hyp_diff/text_from_node.rb +++ b/lib/hyp_diff/text_from_node.rb @@ -1,13 +1,15 @@ module HypDiff class TextFromNode + attr_accessor :before_whitespace + def initialize(raw_text, node) @text = raw_text.strip == "" ? " " : raw_text @node = node end def ==(other) - text == other.text + eql?(other) end def eql?(other) @@ -22,6 +24,10 @@ def whitespace? @text == " " end + def fulltext + before_whitespace ? " #{text}" : text + end + def text @text end diff --git a/spec/hyp_diff_spec.rb b/spec/hyp_diff_spec.rb index 83f0b36..c3ccde9 100644 --- a/spec/hyp_diff_spec.rb +++ b/spec/hyp_diff_spec.rb @@ -48,7 +48,7 @@ def expect_diff(old, new, expected) end it "merges consecutive deletions into a single tag" do - expect_diff("hello beautiful world", "hello world", "hello beautiful world") + expect_diff("hello beautiful world", "hello world", "hello beautiful world") end it "merge consecutive additions and edits into single tags" do @@ -131,14 +131,19 @@ def expect_diff(old, new, expected) "hello world", "hello world" ) - expect_diff( - "hello world", - "hello world", - "hello world" - ) end - it "considers trailing and leading whitespace for insertions and deletions" do + it "treats consecutive whitespace as a single whitespace across tags (best effort for special cases)" do + expect( + HypDiff.compare( + "hello world", + "hello world", + ) + ).to eq("hello world") + .or eq("hello world") + end + + it "considers trailing and leading whitespace for insertions and deletions", :aggregate_failures do expect_diff("hello", "hello world", "hello world") expect_diff("hello world", "hello", "hello world") expect_diff("world", "hello world", "hello world") @@ -149,14 +154,14 @@ def expect_diff(old, new, expected) expect_diff("hello world", "hello ", "hello world") end - it "considers trailing and leading whitespace changes" do + it "considers trailing and leading whitespace changes", :aggregate_failures do expect_diff("hello ", "hello", "hello ") expect_diff("hello", "hello ", "hello ") expect_diff(" hello", "hello", " hello") expect_diff("hello", " hello", " hello") end - it "considers changes of text and whitespace" do + it "considers changes of text and whitespace", :aggregate_failures do expect_diff("hello world ", "hello friend", "hello world friend") expect_diff(" bye world", "hello world", " byehello world") expect_diff("hello friend", "hello world ", "hello friendworld ") @@ -168,7 +173,7 @@ def expect_diff(old, new, expected) expect_diff("hello world", "hello, world", "hello, world") end - it "diffs changes of punctuation to words" do + it "diffs changes of punctuation to words", :aggregate_failures do expect_diff( "hello, world", "hello beautiful world", @@ -181,7 +186,7 @@ def expect_diff(old, new, expected) ) end - it "diffs changes of punctuation to leading and trailing spaces" do + it "diffs changes of punctuation to leading and trailing spaces", :aggregate_failures do expect_diff("hello.", "hello ", "hello. ") expect_diff("hello ", "hello.", "hello .") expect_diff(" hello", ".hello", " .hello") @@ -192,4 +197,10 @@ def expect_diff(old, new, expected) expect_diff("hello world", "hello world.", "hello world.") end + it "converts newlines to spaces" do + content = + "

Office philosophy

\n

No set working places

\n

bla bla

" + expect_diff(content, content, + "

Office philosophy

No set working places

bla bla

") + end end diff --git a/spec/large_text_spec.rb b/spec/large_text_spec.rb new file mode 100644 index 0000000..ae4dad5 --- /dev/null +++ b/spec/large_text_spec.rb @@ -0,0 +1,24 @@ +# encoding: utf-8 +require "hyp_diff" +require "securerandom" + +describe HypDiff do + context "for large text" do + it "performs reasonably fast" do + words = [] + 300.times do + 8.times do |i| + words << SecureRandom.hex([i + 2, 6].min) + end + words << "replace" + words << "me." + end + text = words.join(" ") + modified_text = text.gsub("replace me", "better text") + start = Time.now + + HypDiff.compare(text, modified_text) + expect(Time.now - start).to be < 1 + end + end +end