diff --git a/lib/hyp_diff.rb b/lib/hyp_diff.rb
index 047f4f3..fbff4e0 100644
--- a/lib/hyp_diff.rb
+++ b/lib/hyp_diff.rb
@@ -42,14 +42,14 @@ def compare(before, after, options = {})
# @api private
class NodeMap
- def self.for(change_node_tuples, &block)
+ def self.for(change_node_tuples)
new.build(change_node_tuples).map
end
attr_reader :map
def initialize
- @map = {}
+ @map = Hash.new {|h, k| h[k] = [] }
@stashed = []
end
@@ -79,8 +79,7 @@ def build(change_node_tuples)
end
def append_to_node(node, change)
- list = (@map[node] ||= [])
- list << change
+ @map[node] << change
end
end
@@ -103,15 +102,63 @@ def render(changes)
changes.each do |change|
case change.action
when "!" then
- deletions << change.old_element.text
- insertions << change.new_element.text
+ old_fulltext = change.old_element.fulltext
+ new_fulltext = change.new_element.fulltext
+ if old_fulltext.include?(new_fulltext)
+ if old_fulltext.start_with?(new_fulltext)
+ apply_insertions_and_deletions
+ new_text << new_fulltext
+ deletions << old_fulltext[new_fulltext.length..-1]
+ next
+ end
+ if old_fulltext.end_with?(new_fulltext)
+ deletions << old_fulltext[0, old_fulltext.length - new_fulltext.length]
+ apply_insertions_and_deletions
+ new_text << new_fulltext
+ next
+ end
+ end
+ if new_fulltext.include?(old_fulltext)
+ if new_fulltext.start_with?(old_fulltext)
+ apply_insertions_and_deletions
+ new_text << old_fulltext
+ insertions << new_fulltext[old_fulltext.length..-1]
+ next
+ end
+ if new_fulltext.end_with?(old_fulltext)
+ insertions << new_fulltext[0, new_fulltext.length - old_fulltext.length]
+ apply_insertions_and_deletions
+ new_text << old_fulltext
+ next
+ end
+ end
+ if insertions.empty? && deletions.empty? && change.old_element.before_whitespace && change.new_element.before_whitespace
+ new_text << " "
+ deletions << change.old_element.text
+ insertions << change.new_element.text
+ next
+ end
+ deletions << change.old_element.fulltext
+ insertions << change.new_element.fulltext
when "=" then
+ if change.old_element.before_whitespace && !change.new_element.before_whitespace
+ deletions << " "
+ apply_insertions_and_deletions
+ new_text << change.new_element.text
+ next
+ end
+ if change.new_element.before_whitespace && !change.old_element.before_whitespace
+ insertions << " "
+ apply_insertions_and_deletions
+ new_text << change.new_element.text
+ next
+ end
apply_insertions_and_deletions
- new_text << escape_html(change.new_element.text)
+ new_text << escape_html(change.new_element.fulltext)
when "+" then
- insertions << change.new_element.text
+ insertions << change.new_element.fulltext
when "-" then
- deletions << change.old_element.text
+ deletions << change.old_element.fulltext
else
raise "unexpected change.action #{change.action}"
end
@@ -131,6 +178,13 @@ def rendered_text
attr_reader :insertions, :deletions, :new_text
def apply_insertions_and_deletions
+ while !deletions.empty? && !insertions.empty?
+ break unless deletions.first == insertions.first
+
+ deletions.shift
+ new_text << insertions.shift
+ end
+
if deletions.length > 0
new_text << deletion_tag(deletions.join)
end
@@ -163,7 +217,7 @@ def parse(text)
end
def extract_text(node)
- filter_whitespace(text_fragments(node))
+ merge_whitespace(filter_whitespace(text_fragments(node)))
end
def text_fragments(node)
@@ -187,5 +241,34 @@ def filter_whitespace(node_list)
result
end
+ def merge_whitespace(node_list)
+ result = []
+
+ last_whitespace_node = nil
+ node_list.each do |node|
+ if node.whitespace?
+ last_whitespace_node = node
+ next
+ end
+
+ unless last_whitespace_node
+ result << node
+ next
+ end
+
+ if last_whitespace_node.node.equal?(node.node)
+ node.before_whitespace = last_whitespace_node
+ else
+ result << last_whitespace_node
+ end
+ last_whitespace_node = nil
+ result << node
+ end
+
+ result << last_whitespace_node if last_whitespace_node
+
+ result
+ end
+
end; end
diff --git a/lib/hyp_diff/text_from_node.rb b/lib/hyp_diff/text_from_node.rb
index 9ad5d43..28053d6 100644
--- a/lib/hyp_diff/text_from_node.rb
+++ b/lib/hyp_diff/text_from_node.rb
@@ -1,13 +1,15 @@
module HypDiff
class TextFromNode
+ attr_accessor :before_whitespace
+
def initialize(raw_text, node)
@text = raw_text.strip == "" ? " " : raw_text
@node = node
end
def ==(other)
- text == other.text
+ eql?(other)
end
def eql?(other)
@@ -22,6 +24,10 @@ def whitespace?
@text == " "
end
+ def fulltext
+ before_whitespace ? " #{text}" : text
+ end
+
def text
@text
end
diff --git a/spec/hyp_diff_spec.rb b/spec/hyp_diff_spec.rb
index 83f0b36..c3ccde9 100644
--- a/spec/hyp_diff_spec.rb
+++ b/spec/hyp_diff_spec.rb
@@ -48,7 +48,7 @@ def expect_diff(old, new, expected)
end
it "merges consecutive deletions into a single tag" do
- expect_diff("hello beautiful world", "hello world", "hello beautiful world")
+ expect_diff("hello beautiful world", "hello world", "hello beautiful world")
end
it "merge consecutive additions and edits into single tags" do
@@ -131,14 +131,19 @@ def expect_diff(old, new, expected)
"hello world",
"hello world"
)
- expect_diff(
- "hello world",
- "hello world",
- "hello world"
- )
end
- it "considers trailing and leading whitespace for insertions and deletions" do
+ it "treats consecutive whitespace as a single whitespace across tags (best effort for special cases)" do
+ expect(
+ HypDiff.compare(
+ "hello world",
+ "hello world",
+ )
+ ).to eq("hello world")
+ .or eq("hello world")
+ end
+
+ it "considers trailing and leading whitespace for insertions and deletions", :aggregate_failures do
expect_diff("hello", "hello world", "hello world")
expect_diff("hello world", "hello", "hello world")
expect_diff("world", "hello world", "hello world")
@@ -149,14 +154,14 @@ def expect_diff(old, new, expected)
expect_diff("hello world", "hello ", "hello world")
end
- it "considers trailing and leading whitespace changes" do
+ it "considers trailing and leading whitespace changes", :aggregate_failures do
expect_diff("hello ", "hello", "hello ")
expect_diff("hello", "hello ", "hello ")
expect_diff(" hello", "hello", " hello")
expect_diff("hello", " hello", " hello")
end
- it "considers changes of text and whitespace" do
+ it "considers changes of text and whitespace", :aggregate_failures do
expect_diff("hello world ", "hello friend", "hello world friend")
expect_diff(" bye world", "hello world", " byehello world")
expect_diff("hello friend", "hello world ", "hello friendworld ")
@@ -168,7 +173,7 @@ def expect_diff(old, new, expected)
expect_diff("hello world", "hello, world", "hello, world")
end
- it "diffs changes of punctuation to words" do
+ it "diffs changes of punctuation to words", :aggregate_failures do
expect_diff(
"hello, world",
"hello beautiful world",
@@ -181,7 +186,7 @@ def expect_diff(old, new, expected)
)
end
- it "diffs changes of punctuation to leading and trailing spaces" do
+ it "diffs changes of punctuation to leading and trailing spaces", :aggregate_failures do
expect_diff("hello.", "hello ", "hello. ")
expect_diff("hello ", "hello.", "hello .")
expect_diff(" hello", ".hello", " .hello")
@@ -192,4 +197,10 @@ def expect_diff(old, new, expected)
expect_diff("hello world", "hello world.", "hello world.")
end
+ it "converts newlines to spaces" do
+ content =
+ "
bla bla
" + expect_diff(content, content, + "bla bla
") + end end diff --git a/spec/large_text_spec.rb b/spec/large_text_spec.rb new file mode 100644 index 0000000..ae4dad5 --- /dev/null +++ b/spec/large_text_spec.rb @@ -0,0 +1,24 @@ +# encoding: utf-8 +require "hyp_diff" +require "securerandom" + +describe HypDiff do + context "for large text" do + it "performs reasonably fast" do + words = [] + 300.times do + 8.times do |i| + words << SecureRandom.hex([i + 2, 6].min) + end + words << "replace" + words << "me." + end + text = words.join(" ") + modified_text = text.gsub("replace me", "better text") + start = Time.now + + HypDiff.compare(text, modified_text) + expect(Time.now - start).to be < 1 + end + end +end