Updated to fix some printing issues and added support for diffing tables
of tokens and using different separators.
This commit is contained in:
parent
dc84e1e107
commit
e318b75349
28
README.md
28
README.md
@ -22,12 +22,12 @@ returns the table:
|
||||
|
||||
```lua
|
||||
{
|
||||
{old="hello\n...\n", old_line=1, old_line_end=2,
|
||||
new="hello\n...\n", new_line=1, new_line_end=2},
|
||||
{old="to the\n", old_line=3, old_line_end=3,
|
||||
new="*at* the\n", new_line=3, new_line_end=3},
|
||||
{old="world", old_line=4, old_line_end=4,
|
||||
new="world", new_line=4, new_line_end=4},
|
||||
{old="hello\n...\n", old_first=1, old_last=2,
|
||||
new="hello\n...\n", new_first=1, new_last=2},
|
||||
{old="to the\n", old_first=3, old_last=3,
|
||||
new="*at* the\n", new_first=3, new_last=3},
|
||||
{old="world", old_first=4, old_last=4,
|
||||
new="world", new_first=4, new_last=4},
|
||||
}
|
||||
```
|
||||
|
||||
@ -46,3 +46,19 @@ produces the output:
|
||||
+ *at* the
|
||||
world
|
||||
```
|
||||
|
||||
You can also diff tables:
|
||||
|
||||
```lua
|
||||
diff({1,2,3,4,5},{99,2,4,5,6})
|
||||
```
|
||||
|
||||
returns:
|
||||
|
||||
```lua
|
||||
{{old={1}, old_first=1, old_last=1, new={99}, new_first=1, new_last=1},
|
||||
{old={2}, old_first=2, old_last=2, new={2}, new_first=2, new_last=2},
|
||||
{old={3}, old_first=3, old_last=3, new={}, new_first=3, new_last=2},
|
||||
{old={4, 5}, old_first=4, old_last=5, new={4, 5}, new_first=3, new_last=4},
|
||||
{old={}, old_first=6, old_last=5, new={6}, new_first=5, new_last=5}}
|
||||
```
|
||||
|
116
diff.lua
116
diff.lua
@ -4,8 +4,8 @@
|
||||
-- containing the details of the difference between them. Usage:
|
||||
-- >> local diff = require("diff")
|
||||
-- >> diff("hello\nworld", "hello\nWORLD\n!!!")
|
||||
-- {{old="hello\n", old_line=1, old_line_end=1, new="hello\n", new_line=1, new_line_end=1},
|
||||
-- {old="world", old_line=2, old_line_end=2, new="WORLD\n!!!", new_line=2, new_line_end=3}}
|
||||
-- {{old="hello", old_first=1, old_last=1, new="hello\n", new_first=1, new_last=1},
|
||||
-- {old="world", old_first=2, old_last=2, new="WORLD\n!!!", new_first=2, new_last=3}}
|
||||
-- The returned diff table also has a diff:print() method on it that can be used to print
|
||||
-- the diff in a human-readable form. See below for the print options.
|
||||
--
|
||||
@ -32,52 +32,75 @@
|
||||
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
-- SOFTWARE.
|
||||
|
||||
local diff_mt = {__index={}}
|
||||
|
||||
-- ANSI color codes
|
||||
local function color(n) return string.char(27)..("[%dm"):format(n) end
|
||||
local COLORS = {green=color(32), red=color(31), bright=color(1), underscore=color(4), reset=color(0)}
|
||||
|
||||
-- Return a new diff as if it were a diff of strings, rather than tables.
|
||||
diff_mt.__index.stringify = function(d, sep)
|
||||
if #d > 0 and type(d[1].old) == 'string' then return d end
|
||||
sep = sep or ", "
|
||||
local stringified = {}
|
||||
for i,chunk in ipairs(d) do
|
||||
stringified[i] = {
|
||||
old=table.concat(chunk.old,sep), old_first=chunk.old_first, old_last=chunk.old_last,
|
||||
new=table.concat(chunk.new,sep), new_first=chunk.new_first, new_last=chunk.new_last,
|
||||
}
|
||||
end
|
||||
return setmetatable(stringified, diff_mt)
|
||||
end
|
||||
|
||||
-- With a diff, you can call diff:print{...} to print it out. The options available are:
|
||||
-- color = (true/false), true by default, whether or not to print with colors.
|
||||
-- context = <number>, how many lines of context to print around the diff, defaults to infinite
|
||||
-- line_numbers = (true/false), false by default, whether or not to print line numbers
|
||||
local function print_diff(d, options)
|
||||
-- context = <number>, how many lines of context to print around the diff, (default: infinite)
|
||||
-- sep = <string>, the separator used for context (default: "\n")
|
||||
-- numbers = (true/false), whether or not to print the (line) numbers where the chunks
|
||||
-- came from in the input (default: false)
|
||||
diff_mt.__index.print = function(d, options)
|
||||
options = options or {}
|
||||
if #d > 0 and type(d[1].old) == 'table' then
|
||||
d = d:stringify(options.sep)
|
||||
end
|
||||
local colors = (options.color ~= false) and COLORS or setmetatable({}, {__index=function() return "" end})
|
||||
local line_numbers = options.line_numbers or false
|
||||
local numbers = options.numbers or false
|
||||
local context_pattern = nil
|
||||
if options.context and options.context ~= 0 and options.context ~= math.huge then
|
||||
local lines = ("[^\n]*\n"):rep(options.context)
|
||||
context_pattern = ("^("..lines..").*\n("..lines..")$")
|
||||
options.context = options.context or math.huge
|
||||
if options.context ~= 0 and options.context ~= math.huge then
|
||||
local sep = options.sep or "\n"
|
||||
local lines = ("[^"..sep.."]*"..sep):rep(options.context)
|
||||
context_pattern = ("^("..lines..").*"..sep.."("..lines..")$")
|
||||
end
|
||||
for i,chunk in ipairs(d) do
|
||||
if chunk.old == chunk.new then
|
||||
-- Unchanged
|
||||
local old = chunk.old
|
||||
local same = chunk.old
|
||||
if context_pattern then
|
||||
local before, after = old:match(context_pattern)
|
||||
local before, after = same:match(context_pattern)
|
||||
if before and after then
|
||||
-- "\b" is used as a hack to make sure the ellipsis is unindented
|
||||
if i == 1 then old = "\b\b…\n"..after
|
||||
elseif i == #d then old = before.."\n\b\b…"
|
||||
else old = before.."\b\b…\n"..after end
|
||||
if i == 1 then same = "\b\b…\n"..after
|
||||
elseif i == #d then same = before.."\n\b\b…"
|
||||
else same = before.."\b\b…\n"..after end
|
||||
end
|
||||
end
|
||||
if not (options and options.context == 0) then
|
||||
io.write((old:gsub("([^\n]*)\n?", " %1\n")))
|
||||
io.write((same:gsub("([^\n]*)\n?", " %1\n")))
|
||||
end
|
||||
else
|
||||
-- Changed
|
||||
if #chunk.old > 0 then
|
||||
if line_numbers then
|
||||
if numbers then
|
||||
print(colors.underscore..colors.bright..colors.red..
|
||||
("Old line %d-%d:"):format(chunk.old_line, chunk.old_line_end)..colors.reset)
|
||||
("Old #%d-%d:"):format(chunk.old_first, chunk.old_last)..colors.reset)
|
||||
end
|
||||
io.write(colors.red..(chunk.old:gsub("([^\n]*)\n?", "- %1\n"))..colors.reset)
|
||||
end
|
||||
if #chunk.new > 0 then
|
||||
if line_numbers then
|
||||
if numbers then
|
||||
print(colors.underscore..colors.bright..colors.green..
|
||||
("New line %d-%d:"):format(chunk.new_line, chunk.new_line_end)..colors.reset)
|
||||
("New #%d-%d:"):format(chunk.new_first, chunk.new_last)..colors.reset)
|
||||
end
|
||||
io.write(colors.green..(chunk.new:gsub("([^\n]*)\n?", "+ %1\n"))..colors.reset)
|
||||
end
|
||||
@ -85,23 +108,30 @@ local function print_diff(d, options)
|
||||
end
|
||||
end
|
||||
|
||||
local diff_mt = {__index={print=print_diff}}
|
||||
|
||||
-- Take two strings, and return a table representing a line-by-line diff of the two.
|
||||
-- The return value is a list of changes, that have have a .change of "+", "-", or nil
|
||||
-- and a .lines with the relevant lines.
|
||||
local function diff(old, new)
|
||||
-- Take two strings or tables, and return a table representing a chunk-by-chunk diff of the two.
|
||||
-- By default, strings are broken up by lines, but the optional third parameter "sep" lets
|
||||
-- you provide a different separator to break on.
|
||||
-- The return value is a list of chunks that have .old, .new corresponding to the old and
|
||||
-- new versions. For identical chunks, .old == .new.
|
||||
local function diff(old, new, sep)
|
||||
local insert, concat = table.insert, table.concat
|
||||
|
||||
-- Split into lines
|
||||
local A, B = old, new
|
||||
if type(A) ~= 'table' then
|
||||
A = {}
|
||||
for c in old:gmatch("[^\n]*\n?") do insert(A, c) end
|
||||
end
|
||||
if type(B) ~= 'table' then
|
||||
B = {}
|
||||
for c in new:gmatch("[^\n]*\n?") do insert(B, c) end
|
||||
local A, B, slice
|
||||
if type(old) == 'string' and type(new) == 'string' then
|
||||
-- Split into a table using sep (default: newline)
|
||||
sep = sep or "\n"
|
||||
A, B = {}, {}
|
||||
for c in old:gmatch("[^"..sep.."]*"..sep.."?") do insert(A, c) end
|
||||
for c in new:gmatch("[^"..sep.."]*"..sep.."?") do insert(B, c) end
|
||||
slice = function(X,start,stop) return concat(X,"",start,stop) end
|
||||
elseif type(old) == 'table' and type(new) == 'table' then
|
||||
A, B = old, new
|
||||
slice = function(X,start,stop)
|
||||
local s = {}
|
||||
for i=start,stop do s[#s+1] = X[i] end
|
||||
return s
|
||||
end
|
||||
else
|
||||
error("Two different types passed to diff: "..type(old).." and "..type(new))
|
||||
end
|
||||
|
||||
-- Find the longest common subsequence between A[a_min..a_max] and B[b_min..b_max] (inclusive),
|
||||
@ -139,23 +169,25 @@ local function diff(old, new)
|
||||
insert(common_subsequences, lcs)
|
||||
find_common_subsequences(lcs.a + lcs.length, a_max, lcs.b + lcs.length, b_max)
|
||||
end
|
||||
|
||||
find_common_subsequences(1,#A, 1,#B)
|
||||
-- For convenience in iteration:
|
||||
|
||||
-- For convenience in iteration (this catches matching chunks at the end):
|
||||
insert(common_subsequences, {a=#A+1, b=#B+1, length=0})
|
||||
local chunks = setmetatable({}, diff_mt)
|
||||
local a, b = 1, 1
|
||||
for _,subseq in ipairs(common_subsequences) do
|
||||
if subseq.a > a or subseq.b > b then
|
||||
insert(chunks, {
|
||||
old=concat(A, "", a, subseq.a-1), old_line=a, old_line_end=subseq.a-1,
|
||||
new=concat(B, "", b, subseq.b-1), new_line=b, new_line_end=subseq.b-1})
|
||||
old=slice(A, a, subseq.a-1), old_first=a, old_last=subseq.a-1,
|
||||
new=slice(B, b, subseq.b-1), new_first=b, new_last=subseq.b-1})
|
||||
end
|
||||
if subseq.length > 0 then
|
||||
local lines = concat(A, "", subseq.a, subseq.a+subseq.length-1)
|
||||
-- Ensure that the *same* table is used for .old and .new so equality checks
|
||||
-- suffice and you don't need to do element-wise comparisons.
|
||||
local same = slice(A, subseq.a, subseq.a+subseq.length-1)
|
||||
insert(chunks, {
|
||||
old=lines, old_line=subseq.a, old_line_end=subseq.a+subseq.length-1,
|
||||
new=lines, new_line=subseq.b, new_line_end=subseq.b+subseq.length-1})
|
||||
old=same, old_first=subseq.a, old_last=subseq.a+subseq.length-1,
|
||||
new=same, new_first=subseq.b, new_last=subseq.b+subseq.length-1})
|
||||
end
|
||||
a = subseq.a + subseq.length
|
||||
b = subseq.b + subseq.length
|
||||
|
Loading…
Reference in New Issue
Block a user