diff --git a/lib/htmldiff.rb b/lib/htmldiff.rb index 9d33522..192930a 100644 --- a/lib/htmldiff.rb +++ b/lib/htmldiff.rb @@ -15,12 +15,29 @@ def end_in_new Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new) class DiffBuilder - - def initialize(old_version, new_version, ignore_whitespace = false, ignore_tags = false) + # For BC reasons, you can call this constructor with positioned options, but named are strongly preferred. + # + # Legacy signature: + # def initialize(old_version, new_version, ignore_whitespace = false, ignore_tags = false) + # + # New signature: + # def initialize(old_version, new_version, ignore_whitespace: false, ignore_tags: false, reduce_consecutive: false) + # + def initialize(old_version, new_version, *mixed) @old_version, @new_version = old_version, new_version - @ignore_whitespace = ignore_whitespace - @ignore_tags = ignore_tags - @join_char = ignore_whitespace ? ' ' : '' + @sibling_elements = %w(p li div) + if mixed.first.is_a?(Hash) + options = mixed.first + @ignore_whitespace = !! options[:ignore_whitespace] + @ignore_tags = !! options[:ignore_tags] + @reduce_consecutive = !! options[:reduce_consecutive] + @sibling_elements = options[:sibling_elements] if options[:sibling_elements] + else + @ignore_whitespace = !! mixed[0] + @ignore_tags = !! mixed[1] + @reduce_consecutive = false + end + @join_char = @ignore_whitespace ? ' ' : '' @content = [] end @@ -28,7 +45,12 @@ def build split_inputs_to_words index_new_words operations.each { |op| perform_operation(op) } - return @content.join(@join_char) + diff_output = @content.join(@join_char) + if @reduce_consecutive + ConsecutiveDiffReducer.new.call(diff_output) + else + diff_output + end end def split_inputs_to_words @@ -211,7 +233,10 @@ def insert_tag(tagname, cssclass, words) @content << wrap_text(non_tags.join(@join_char), tagname, cssclass) unless non_tags.empty? break if words.empty? - break if @ignore_tags && tagname == "del" + mm = words.first.match(/<\/?(\w+)>/) + next_tagname = mm ? mm[1] : nil + break if @ignore_tags && tagname == "del" && !@sibling_elements.include?(next_tagname) + @content += extract_consecutive_words(words) { |word| tag?(word) } end end @@ -293,8 +318,63 @@ def convert_html_to_list_of_words(x, use_brackets = false) end # of class Diff Builder - def diff(a, b, ignore_whitespace = false, ignore_tags = false) - DiffBuilder.new(a, b, ignore_whitespace, ignore_tags).build + class ConsecutiveDiffReducer + def initialize(skip: /^\s+$/) + @skip_regexp = skip + end + + def call(input) + token_regexp = /(]*>.*?<\/del>]*>.*?<\/ins>)/i + mode = :none + @output = [] + @buffer = [] + input.split(token_regexp).each do |token| + if token =~ token_regexp + flush_buffer! unless mode == :diffmod + mode = :diffmod + @buffer << token + elsif token =~ @skip_regexp && mode == :diffmod + @buffer << token + else + flush_buffer! + mode = :none + @output << token + end + end + flush_buffer! + @output.join + end + + def flush_buffer! + @output = @output + reduce_buffer + @buffer = [] + end + + def reduce_buffer + return [] if @buffer.empty? + delete_tag = nil + insert_tag = nil + deletes = [] + inserts = [] + @buffer.each do |token| + if token =~ @skip_regexp + deletes << token + inserts << token + else + m = token.match(/(]*>)(.*?)<\/del>(]*>)(.*?)<\/ins>/i) + fail "Token didn't match expression" unless m + delete_tag ||= m[1] + deletes << m[2] + insert_tag ||= m[3] + inserts << m[4] + end + end + [delete_tag, *deletes, "", insert_tag, *inserts, ""] + end + end # of class ConsecutiveDiffReducer + + def diff(a, b, *options) + DiffBuilder.new(a, b, *options).build end end diff --git a/spec/htmldiff_spec.rb b/spec/htmldiff_spec.rb index f8a4935..27bcb98 100644 --- a/spec/htmldiff_spec.rb +++ b/spec/htmldiff_spec.rb @@ -39,24 +39,106 @@ class TestDiff expect(diff).to eq("a bc") end - it "changes in properties will render both versions of the start tag, but not end tag" do - a = 'a b' - b = 'a c' - diff = TestDiff.diff(a, b, false, true) - expect(diff).to eq("a bc") - end - - it "works when jumping between tags and non tags" do - a = 'a b ce' - b = 'a c de' - diff = TestDiff.diff(a, b, false, true) - expect(diff).to eq("a bc cde") - end - it "example from the library" do a = '

a

' b = '

ab

c' diff = TestDiff.diff(a, b) expect(diff).to eq("

aab

c") end + + describe "ignore_tags option" do + describe "changes in properties should render balanced tags" do + describe "when disabled" do + it "will render both versions of the start tag, but not end tag" do + a = 'a b' + b = 'a c' + expected = 'a bc' + diff = TestDiff.diff(a, b, ignore_tags: false) + expect(diff).to eq(expected) + end + + it "will render both versions of the start tag, but not end tag" do + a = 'a b ce' + b = 'a c de' + expected = 'a bc cde' + diff = TestDiff.diff(a, b, ignore_tags: false) + expect(diff).to eq(expected) + end + end + + describe "when enabled" do + it "will produce valid html" do + a = 'a b' + b = 'a c' + expected = 'a bc' + diff = TestDiff.diff(a, b, ignore_tags: true) + expect(diff).to eq(expected) + end + + it "will produce valid html" do + a = 'a b ce' + b = 'a c de' + expected = 'a bc cde' + diff = TestDiff.diff(a, b, ignore_tags: true) + expect(diff).to eq(expected) + end + end + end + + describe "removing tag with similar siblings" do + describe "when disabled" do + it "should show deleted paragraph" do + a = '

first

second

' + b = '

first

' + expected = '

first

second

' + diff = TestDiff.diff(a, b, ignore_tags: false) + expect(diff).to eq(expected) + end + + it "should show deleted list-element" do + a = 'my list
  1. item a
  2. item b
' + b = 'my list
  1. item a
' + expected = 'my list
  1. item a
  2. item b
' + diff = TestDiff.diff(a, b, ignore_tags: false) + expect(diff).to eq(expected) + end + end + + describe "when enabled" do + it "should show deleted paragraph" do + a = '

first

second

' + b = '

first

' + expected = '

first

second

' + diff = TestDiff.diff(a, b, ignore_tags: true) + expect(diff).to eq(expected) + end + + it "should show deleted list-element" do + a = 'my list
  1. item a
  2. item b
' + b = 'my list
  1. item a
' + expected = 'my list
  1. item a
  2. item b
' + diff = TestDiff.diff(a, b, ignore_tags: true) + expect(diff).to eq(expected) + end + end + end + end + + describe "reduce_consecutive option" do + it "should diff individual words, when not enabled" do + a = '

Han går til samtaler ved en psykiater. Like a boss.

' + b = '

Han drikker stærk spiritus. Like a boss.

' + expected = '

Han gårdrikker tilstærk samtaler ved en psykiater.spiritus. Like a boss.

' + diff = TestDiff.diff(a, b, reduce_consecutive: false) + expect(diff).to eq(expected) + end + + it "should reduce consecutive matches, when enabled" do + a = '

Han går til samtaler ved en psykiater. Like a boss.

' + b = '

Han drikker stærk spiritus. Like a boss.

' + expected = '

Han går til samtaler ved en psykiater.drikker stærk spiritus. Like a boss.

' + diff = TestDiff.diff(a, b, reduce_consecutive: true) + expect(diff).to eq(expected) + end + end end