From e9d5a0916b907a594d1b4d6d984e2e28178f2daf Mon Sep 17 00:00:00 2001 From: Oliver Nassar Date: Thu, 15 Nov 2012 12:42:06 -0500 Subject: [PATCH] Added JS resource for those who don't wish to compile down CoffeeScript --- src/htmldiff.js | 368 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 src/htmldiff.js diff --git a/src/htmldiff.js b/src/htmldiff.js new file mode 100644 index 0000000..a796f2d --- /dev/null +++ b/src/htmldiff.js @@ -0,0 +1,368 @@ +var Match, calculate_operations, consecutive_where, create_index, diff, find_match, find_matching_blocks, html_to_tokens, is_end_of_tag, is_start_of_tag, is_tag, is_whitespace, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, wrap; + +is_end_of_tag = function(char) { + return char === '>'; +}; + +is_start_of_tag = function(char) { + return char === '<'; +}; + +is_whitespace = function(char) { + return /^\s+$/.test(char); +}; + +is_tag = function(token) { + return /^\s*<[^>]+>\s*$/.test(token); +}; + +isnt_tag = function(token) { + return !is_tag(token); +}; + +Match = (function() { + + function Match(start_in_before, start_in_after, length) { + this.start_in_before = start_in_before; + this.start_in_after = start_in_after; + this.length = length; + this.end_in_before = (this.start_in_before + this.length) - 1; + this.end_in_after = (this.start_in_after + this.length) - 1; + } + + return Match; + +})(); + +html_to_tokens = function(html) { + var char, current_word, mode, words, _i, _len; + mode = 'char'; + current_word = ''; + words = []; + for (_i = 0, _len = html.length; _i < _len; _i++) { + char = html[_i]; + switch (mode) { + case 'tag': + if (is_end_of_tag(char)) { + current_word += '>'; + words.push(current_word); + current_word = ''; + if (is_whitespace(char)) { + mode = 'whitespace'; + } else { + mode = 'char'; + } + } else { + current_word += char; + } + break; + case 'char': + if (is_start_of_tag(char)) { + if (current_word) { + words.push(current_word); + } + current_word = '<'; + mode = 'tag'; + } else if (/\s/.test(char)) { + if (current_word) { + words.push(current_word); + } + current_word = char; + mode = 'whitespace'; + } else if (/[\w\#@]+/i.test(char)) { + current_word += char; + } else { + if (current_word) { + words.push(current_word); + } + current_word = char; + } + break; + case 'whitespace': + if (is_start_of_tag(char)) { + if (current_word) { + words.push(current_word); + } + current_word = '<'; + mode = 'tag'; + } else if (is_whitespace(char)) { + current_word += char; + } else { + if (current_word) { + words.push(current_word); + } + current_word = char; + mode = 'char'; + } + break; + default: + throw new Error("Unknown mode " + mode); + } + } + if (current_word) { + words.push(current_word); + } + return words; +}; + +find_match = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after) { + var best_match_in_after, best_match_in_before, best_match_length, index_in_after, index_in_before, locations_in_after, looking_for, match, match_length_at, new_match_length, new_match_length_at, _i, _j, _len; + best_match_in_before = start_in_before; + best_match_in_after = start_in_after; + best_match_length = 0; + match_length_at = {}; + for (index_in_before = _i = start_in_before; start_in_before <= end_in_before ? _i < end_in_before : _i > end_in_before; index_in_before = start_in_before <= end_in_before ? ++_i : --_i) { + new_match_length_at = {}; + looking_for = before_tokens[index_in_before]; + locations_in_after = index_of_before_locations_in_after_tokens[looking_for]; + for (_j = 0, _len = locations_in_after.length; _j < _len; _j++) { + index_in_after = locations_in_after[_j]; + if (index_in_after < start_in_after) { + continue; + } + if (index_in_after >= end_in_after) { + break; + } + if (match_length_at[index_in_after - 1] == null) { + match_length_at[index_in_after - 1] = 0; + } + new_match_length = match_length_at[index_in_after - 1] + 1; + new_match_length_at[index_in_after] = new_match_length; + if (new_match_length > best_match_length) { + best_match_in_before = index_in_before - new_match_length + 1; + best_match_in_after = index_in_after - new_match_length + 1; + best_match_length = new_match_length; + } + } + match_length_at = new_match_length_at; + } + if (best_match_length !== 0) { + match = new Match(best_match_in_before, best_match_in_after, best_match_length); + } + return match; +}; + +recursively_find_matching_blocks = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks) { + var match; + match = find_match(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after); + if (match != null) { + if (start_in_before < match.start_in_before && start_in_after < match.start_in_after) { + recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, match.start_in_before, start_in_after, match.start_in_after, matching_blocks); + } + matching_blocks.push(match); + if (match.end_in_before <= end_in_before && match.end_in_after <= end_in_after) { + recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, match.end_in_before + 1, end_in_before, match.end_in_after + 1, end_in_after, matching_blocks); + } + } + return matching_blocks; +}; + +create_index = function(p) { + var idx, index, token, _i, _len, _ref; + if (p.find_these == null) { + throw new Error('params must have find_these key'); + } + if (p.in_these == null) { + throw new Error('params must have in_these key'); + } + index = {}; + _ref = p.find_these; + for (_i = 0, _len = _ref.length; _i < _len; _i++) { + token = _ref[_i]; + index[token] = []; + idx = p.in_these.indexOf(token); + while (idx !== -1) { + index[token].push(idx); + idx = p.in_these.indexOf(token, idx + 1); + } + } + return index; +}; + +find_matching_blocks = function(before_tokens, after_tokens) { + var index_of_before_locations_in_after_tokens, matching_blocks; + matching_blocks = []; + index_of_before_locations_in_after_tokens = create_index({ + find_these: before_tokens, + in_these: after_tokens + }); + return recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks); +}; + +calculate_operations = function(before_tokens, after_tokens) { + var action_map, action_up_to_match_positions, index, is_single_whitespace, last_op, match, match_starts_at_current_position_in_after, match_starts_at_current_position_in_before, matches, op, operations, position_in_after, position_in_before, post_processed, _i, _j, _len, _len1; + if (before_tokens == null) { + throw new Error('before_tokens?'); + } + if (after_tokens == null) { + throw new Error('after_tokens?'); + } + position_in_before = position_in_after = 0; + operations = []; + action_map = { + 'false,false': 'replace', + 'true,false': 'insert', + 'false,true': 'delete', + 'true,true': 'none' + }; + matches = find_matching_blocks(before_tokens, after_tokens); + matches.push(new Match(before_tokens.length, after_tokens.length, 0)); + for (index = _i = 0, _len = matches.length; _i < _len; index = ++_i) { + match = matches[index]; + match_starts_at_current_position_in_before = position_in_before === match.start_in_before; + match_starts_at_current_position_in_after = position_in_after === match.start_in_after; + action_up_to_match_positions = action_map[[match_starts_at_current_position_in_before, match_starts_at_current_position_in_after].toString()]; + if (action_up_to_match_positions !== 'none') { + operations.push({ + action: action_up_to_match_positions, + start_in_before: position_in_before, + end_in_before: (action_up_to_match_positions !== 'insert' ? match.start_in_before - 1 : void 0), + start_in_after: position_in_after, + end_in_after: (action_up_to_match_positions !== 'delete' ? match.start_in_after - 1 : void 0) + }); + } + if (match.length !== 0) { + operations.push({ + action: 'equal', + start_in_before: match.start_in_before, + end_in_before: match.end_in_before, + start_in_after: match.start_in_after, + end_in_after: match.end_in_after + }); + } + position_in_before = match.end_in_before + 1; + position_in_after = match.end_in_after + 1; + } + post_processed = []; + last_op = { + action: 'none' + }; + is_single_whitespace = function(op) { + if (op.action !== 'equal') { + return false; + } + if (op.end_in_before - op.start_in_before !== 0) { + return false; + } + return /^\s$/.test(before_tokens.slice(op.start_in_before, op.end_in_before + 1 || 9e9)); + }; + for (_j = 0, _len1 = operations.length; _j < _len1; _j++) { + op = operations[_j]; + if (((is_single_whitespace(op)) && last_op.action === 'replace') || (op.action === 'replace' && last_op.action === 'replace')) { + last_op.end_in_before = op.end_in_before; + last_op.end_in_after = op.end_in_after; + } else { + post_processed.push(op); + last_op = op; + } + } + return post_processed; +}; + +consecutive_where = function(start, content, predicate) { + var answer, index, last_matching_index, token, _i, _len; + content = content.slice(start, content.length + 1 || 9e9); + last_matching_index = void 0; + for (index = _i = 0, _len = content.length; _i < _len; index = ++_i) { + token = content[index]; + answer = predicate(token); + if (answer === true) { + last_matching_index = index; + } + if (answer === false) { + break; + } + } + if (last_matching_index != null) { + return content.slice(0, last_matching_index + 1 || 9e9); + } + return []; +}; + +wrap = function(tag, content) { + var length, non_tags, position, rendering, tags; + rendering = ''; + position = 0; + length = content.length; + while (true) { + if (position >= length) { + break; + } + non_tags = consecutive_where(position, content, isnt_tag); + position += non_tags.length; + if (non_tags.length !== 0) { + rendering += "<" + tag + ">" + (non_tags.join('')) + ""; + } + if (position >= length) { + break; + } + tags = consecutive_where(position, content, is_tag); + position += tags.length; + rendering += tags.join(''); + } + return rendering; +}; + +op_map = { + equal: function(op, before_tokens, after_tokens) { + return before_tokens.slice(op.start_in_before, op.end_in_before + 1 || 9e9).join(''); + }, + insert: function(op, before_tokens, after_tokens) { + var val; + val = after_tokens.slice(op.start_in_after, op.end_in_after + 1 || 9e9); + return wrap('ins', val); + }, + "delete": function(op, before_tokens, after_tokens) { + var val; + val = before_tokens.slice(op.start_in_before, op.end_in_before + 1 || 9e9); + return wrap('del', val); + } +}; + +op_map.replace = function(op, before_tokens, after_tokens) { + return (op_map["delete"](op, before_tokens, after_tokens)) + (op_map.insert(op, before_tokens, after_tokens)); +}; + +render_operations = function(before_tokens, after_tokens, operations) { + var op, rendering, _i, _len; + rendering = ''; + for (_i = 0, _len = operations.length; _i < _len; _i++) { + op = operations[_i]; + rendering += op_map[op.action](op, before_tokens, after_tokens); + } + return rendering; +}; + +diff = function(before, after) { + var ops; + if (before === after) { + return before; + } + before = html_to_tokens(before); + after = html_to_tokens(after); + ops = calculate_operations(before, after); + return render_operations(before, after, ops); +}; + +diff.html_to_tokens = html_to_tokens; + +diff.find_matching_blocks = find_matching_blocks; + +find_matching_blocks.find_match = find_match; + +find_matching_blocks.create_index = create_index; + +diff.calculate_operations = calculate_operations; + +diff.render_operations = render_operations; + +if (typeof define === 'function') { + define([], function() { + return diff; + }); +} else if (typeof module !== "undefined" && module !== null) { + module.exports = diff; +} else { + this.htmldiff = diff; +} +