commit ccef71870c0754017a79c64ce076b5865b6400b7 Author: Bruce Hill Date: Thu Oct 26 02:18:01 2017 -0700 Initial commit. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0f07af4 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# LuaDiffer: A simple lua diff library + +This is a simple lua module that performs a diff on two strings and returns a table of +string chunks. + +## Example + +```lua +make_diff = require "diff" +s1 = [[ +hello +to the +world]] +s2 = [[ +hello +at the +world]] +diff = make_diff(s1, s2) +diff:print{color=false} +``` + +Produces: + +``` + hello +- to the ++ at the + world +``` diff --git a/diff.lua b/diff.lua new file mode 100644 index 0000000..5404a1b --- /dev/null +++ b/diff.lua @@ -0,0 +1,160 @@ +-- LuaDiffer: A simple Lua diff library +-- +-- This file returns a function that can be called on two strings to return a table +-- containing the details of the difference between them. Usage: +-- >> local diff = require("diff") +-- >> diff("hello\nworld", "hello\nWORLD\n!!!") +-- {{old="hello\n", old_line=1, old_line_end=1, new="hello\n", new_line=1, new_line_end=1}, +-- {old="world", old_line=2, old_line_end=2, new="WORLD\n!!!", new_line=2, new_line_end=3}} +-- The returned diff table also has a diff:print() method on it that can be used to print +-- the diff in a human-readable form. See below for the print options. +-- +-- This implementation uses the Hunt–McIlroy algorithm, and is inspired by Jason Orendorff's +-- lovely python implementation at: http://pynash.org/2013/02/26/diff-in-50-lines/ +-- +-- Copyright 2017 Bruce Hill +-- +-- Permission is hereby granted, free of charge, to any person obtaining a copy of +-- this software and associated documentation files (the "Software"), to deal in +-- the Software without restriction, including without limitation the rights to +-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +-- of the Software, and to permit persons to whom the Software is furnished to do +-- so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in all +-- copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +-- SOFTWARE. + +-- ANSI color codes +local function color(n) return string.char(27)..("[%dm"):format(n) end +local COLORS = {green=color(32), red=color(31), bright=color(1), underscore=color(4), reset=color(0)} + +-- With a diff, you can call diff:print{...} to print it out. The options available are: +-- color = (true/false), true by default, whether or not to print with colors. +-- context = , how many lines of context to print around the diff, defaults to infinite +-- line_numbers = (true/false), false by default, whether or not to print line numbers +local function print_diff(d, options) + options = options or {} + local colors = (options.color ~= false) and COLORS or setmetatable({}, {__index=function() return "" end}) + local line_numbers = options.line_numbers or false + local context_pattern = nil + if options.context and options.context ~= 0 and options.context ~= math.huge then + local lines = ("[^\n]*\n"):rep(options.context) + context_pattern = ("^("..lines..").*\n("..lines..")$") + end + for i,chunk in ipairs(d) do + if chunk.old == chunk.new then + -- Unchanged + local old = chunk.old + if context_pattern then + local before, after = old:match(context_pattern) + if before and after then + -- "\b" is used as a hack to make sure the ellipsis is unindented + if i == 1 then old = "\b\b…\n"..after + elseif i == #d then old = before.."\n\b\b…" + else old = before.."\b\b…\n"..after end + end + end + if not (options and options.context == 0) then + io.write((old:gsub("[^\n]*\n?", " %1"))) + end + else + -- Changed + if #chunk.old > 0 then + if line_numbers then + print(colors.underscore..colors.bright..colors.red.. + ("Old line %d-%d:"):format(chunk.old_line, chunk.old_line_end)..colors.reset) + end + io.write(colors.red..(chunk.old:gsub("[^\n]*\n?", "- %1"))..colors.reset) + end + if #chunk.new > 0 then + if line_numbers then + print(colors.underscore..colors.bright..colors.green.. + ("New line %d-%d:"):format(chunk.new_line, chunk.new_line_end)..colors.reset) + end + io.write(colors.green..(chunk.new:gsub("[^\n]*\n?", "+ %1"))..colors.reset) + end + end + end + io.write('\n') +end + +local diff_mt = {__index={print=print_diff}} + +-- Take two strings, and return a table representing a line-by-line diff of the two. +-- The return value is a list of changes, that have have a .change of "+", "-", or nil +-- and a .lines with the relevant lines. +local function diff(old, new) + local insert, concat = table.insert, table.concat + + -- Split into lines + local A, B = {}, {} + for c in old:gmatch("[^\n]*\n?") do insert(A, c) end + for c in new:gmatch("[^\n]*\n?") do insert(B, c) end + + -- Find the longest common subsequence between A[a_min..a_max] and B[b_min..b_max] (inclusive), + -- and return (the starting position in a), (the starting position in b), (the length) + local longest_common_subsequence = function(a_min,a_max, b_min,b_max) + local longest = {a=a_min, b=b_min, length=0} + local runs = {} + for a = a_min, a_max do + local new_runs = {} + for b = b_min, b_max do + if A[a] == B[b] then + local new_run_len = 1 + (runs[b-1] or 0) + new_runs[b] = new_run_len + if new_run_len > longest.length then + longest.a = a - new_run_len + 1 + longest.b = b - new_run_len + 1 + longest.length = new_run_len + end + end + end + runs = new_runs + end + return longest + end + + -- Find *all* the common subsequences between A[a_min..a_max] and B[b_min..b_max] (inclusive) + -- and put them into the common_subsequences table. + local common_subsequences = {} + local find_common_subsequences + find_common_subsequences = function(a_min,a_max, b_min,b_max) + -- Take a greedy approach and pull out the longest subsequences first + local lcs = longest_common_subsequence(a_min,a_max, b_min,b_max) + if lcs.length == 0 then return end + find_common_subsequences(a_min, lcs.a - 1, b_min, lcs.b - 1) + insert(common_subsequences, lcs) + find_common_subsequences(lcs.a + lcs.length, a_max, lcs.b + lcs.length, b_max) + end + + find_common_subsequences(1,#A, 1,#B) + -- For convenience in iteration: + insert(common_subsequences, {a=#A+1, b=#B+1, length=0}) + local chunks = setmetatable({}, diff_mt) + local a, b = 1, 1 + for _,subseq in ipairs(common_subsequences) do + if subseq.a > a or subseq.b > b then + insert(chunks, { + old=concat(A, "", a, subseq.a-1), old_line=a, old_line_end=subseq.a-1, + new=concat(B, "", b, subseq.b-1), new_line=b, new_line_end=subseq.b-1}) + end + if subseq.length > 0 then + local lines = concat(A, "", subseq.a, subseq.a+subseq.length-1) + insert(chunks, { + old=lines, old_line=subseq.a, old_line_end=subseq.a+subseq.length-1, + new=lines, new_line=subseq.b, new_line_end=subseq.b+subseq.length-1}) + end + a = subseq.a + subseq.length + b = subseq.b + subseq.length + end + return chunks +end +return diff