Jump to content

Module:Diff

From Humanipedia
Revision as of 00:20, 17 November 2024 by hp>Jonesey95 (Update from sandbox per talk page discussion. Functional changes are (1) encode all new line characters to avoid Linter misnesting errors when block content is wrapped, and (2) vertically align the diff blocks at the top of their respective table cells for easier comparison.)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Lua error in package.lua at line 80: module 'Module:Yesno' not found. Provides functions for diffing text.

Usage

Ciaran Hope (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.

Difference in words

{{TextDiff|[[Ciaran Hope]] (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|[[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}

Result:

Template:TextDiff

Difference in characters

{{StringDiff|[[Ciaran Hope]] (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|[[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}

Result:

Template:StringDiff

Example with invoke

{{#invoke:Diff|main|[[Ciaran Hope]] (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|[[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}

Result:

[[Ciaran Hope]] (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.
+
[[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.

-----------------------------------------------------------------------------
-- Provides functions for diffing text.
--
-- (c) 2007, 2008  Yuri Takhteyev (yuri@freewisdom.org)
-- (c) 2007 Hisham Muhammad
-- Adapted to MediaWiki Lua originally by User:Ebrahim
--
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
-----------------------------------------------------------------------------

local SKIP_SEPARATOR = true  -- a constant

-- token statuses
local IN   = "in"
local OUT  = "out"
local SAME = "same"

-----------------------------------------------------------------------------
-- Split a string into tokens.  (Adapted from Gavin Kistner's split on
-- http://lua-users.org/wiki/SplitJoin.
--
-- @param text           A string to be split.
-- @param separator      [optional] the separator pattern (defaults to any
--                       whitespace - %s+).
-- @param skip_separator [optional] don't include the separator in the results.
-- @return               A list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
	separator = separator or "%s+"
	local parts = {}
	local start = 1
	local split_start, split_end = mw.ustring.find(text, separator, start)
	while split_start do
		table.insert(parts, mw.ustring.sub(text, start, split_start-1))
		if not skip_separator then
			table.insert(parts, mw.ustring.sub(text, split_start, split_end))
		end
		start = split_end + 1
		split_start, split_end = mw.ustring.find(text, separator, start)
	end
	if mw.ustring.sub(text, start) ~= "" then
		table.insert(parts, mw.ustring.sub(text, start))
	end
	return parts
end


-----------------------------------------------------------------------------
-- Derives the longest common subsequence of two strings.  This is a faster
-- implementation than one provided by stdlib.  Submitted by Hisham Muhammad.
-- The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence
--
-- @param t1             the first string.
-- @param t2             the second string.
-- @return               the least common subsequence as a matrix.
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
	local m = #t1
	local n = #t2

	-- Build matrix on demand
	local C = {}
	local setmetatable = setmetatable
	local mt_tbl = {
		__index = function(t, k)
			t[k] = 0
			return 0
		end
	}
	local mt_C = {
		__index = function(t, k)
			local tbl = {}
			setmetatable(tbl, mt_tbl)
			t[k] = tbl
			return tbl
		end
	}
	setmetatable(C, mt_C)
	local max = math.max
	for i = 1, m+1 do
		local ci1 = C[i+1]
		local ci = C[i]
		for j = 1, n+1 do
			if t1[i-1] == t2[j-1] then
				ci1[j+1] = ci[j] + 1
			else
				ci1[j+1] = max(ci1[j], ci[j+1])
			end
		end
	end
	return C
end



-----------------------------------------------------------------------------
-- Formats an inline diff as HTML, with <ins> and <del> tags.
--
-- @param tokens         a table of {token, status} pairs.
-- @return               an HTML string.
-----------------------------------------------------------------------------
local function format_as_html(tokens)
	local diff_buffer = ""
	local token, status
	for i, token_record in ipairs(tokens) do
		token = mw.text.nowiki(token_record[1])
		status = token_record[2]
		if status == "in" then
			diff_buffer = diff_buffer..'<ins>'..token..'</ins>'
		elseif status == "out" then
			diff_buffer = diff_buffer..'<del>'..token..'</del>'
		else
			diff_buffer = diff_buffer..token
		end
	end
	return diff_buffer
end

-----------------------------------------------------------------------------
-- Returns a diff of two strings as a list of pairs, where the first value
-- represents a token and the second the token's status ("same", "in", "out").
--
-- @param old             The "old" text string
-- @param new             The "new" text string
-- @param separator      [optional] the separator pattern (defaults to any
--                       whitespace).
-- @return               A list of annotated tokens.
-----------------------------------------------------------------------------
local function diff(old, new, separator)
	assert(old); assert(new)
	new = split(new, separator); old = split(old, separator)

	-- First, compare the beginnings and ends of strings to remove the common
	-- prefix and suffix.  Chances are, there is only a small number of tokens
	-- in the middle that differ, in which case  we can save ourselves a lot
	-- in terms of LCS computation.
	local prefix = "" -- common text in the beginning
	local suffix = "" -- common text in the end
	while old[1] and old[1] == new[1] do
		local token = table.remove(old, 1)
		table.remove(new, 1)
		prefix = prefix..token
	end
	while old[#old] and old[#old] == new[#new] do
		local token = table.remove(old)
		table.remove(new)
		suffix = token..suffix
	end

	-- Setup a table that will store the diff (an upvalue for get_diff). We'll
	-- store it in the reverse order to allow for tail calls.  We'll also keep
	-- in this table functions to handle different events.
	local rev_diff = {
		put  = function(self, token, type) table.insert(self, {token,type}) end,
		ins  = function(self, token) self:put(token, IN) end,
		del  = function(self, token) self:put(token, OUT) end,
		same = function(self, token) if token then self:put(token, SAME) end end,
	}

	-- Put the suffix as the first token (we are storing the diff in the
	-- reverse order)

	rev_diff:same(suffix)

	-- Define a function that will scan the LCS matrix backwards and build the
	-- diff output recursively.
	local function get_diff(C, old, new, i, j)
		local old_i = old[i]
		local new_j = new[j]
		if i >= 1 and j >= 1 and old_i == new_j then
			rev_diff:same(old_i)
			return get_diff(C, old, new, i-1, j-1)
		else
			local Cij1 = C[i][j-1]
			local Ci1j = C[i-1][j]
			if j >= 1 and (i == 0 or Cij1 >= Ci1j) then
				rev_diff:ins(new_j)
				return get_diff(C, old, new, i, j-1)
			elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then
				rev_diff:del(old_i)
				return get_diff(C, old, new, i-1, j)
			end
		end
	end
	-- Then call it.
	get_diff(quick_LCS(old, new), old, new, #old + 1, #new + 1)

	-- Put the prefix in at the end
	rev_diff:same(prefix)

	-- Reverse the diff.
	local diff = {}

	for i = #rev_diff, 1, -1 do
		table.insert(diff, rev_diff[i])
	end
	diff.to_html = format_as_html
	return diff
end

-----------------------------------------------------------------------------
-- Wiki diff style, currently just for a line
-----------------------------------------------------------------------------
local function wikiDiff(old, new, separator)
	local tokens = diff(old, new, separator)
	local root = mw.html.create('')

	local token, status

	local plusMinusStyle = 'width: 2%; padding: 0.25em; font-weight: bold;' ..
		'font-size: 1.25em; text-align: end;'
	local tdDivStyle = 'word-wrap: break-word; direction: ltr;'

	local tdSharedStyle = 'vertical-align:top; width: 48%; border-style: solid; border-radius: 0.33em; ' ..
		'padding: 0.33em 0.5em; color: inherit; font-size: 1em; font-family: monospace; white-space: pre-wrap; border-width: 1px 1px 1px 4px; ' ..
		'-webkit-border-end-width: 1px; -webkit-border-start-width: 4px; ' ..
		'-moz-border-end-width: 1px; -moz-border-start-width: 4px;' -- these override default border-width for browsers that support them, needed for RTL UI on commons
	local insDelSharedStyle = 'padding: 0.25em 0; font-weight: bold; text-decoration: initial;'


	local tr = root:tag('table'):addClass('diff'):css('width', '100%'):tag('tr')

	tr:tag('td')
		:addClass('diff-marker')
		:cssText(plusMinusStyle)
		:wikitext('−')

	local deleted = tr
		:tag('td')
			:cssText('border-color: var(--background-color-content-removed,#ffe49c); ' .. tdSharedStyle)
			:addClass('diff-deletedline')
			:tag('div')
				:cssText(tdDivStyle)

	for i, token_record in ipairs(tokens) do
		token = mw.text.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
		status = token_record[2]
		if status == OUT then
			deleted
				:tag('del')
					:cssText('background: var(--background-color-content-removed,#ffe49c); color: inherit; ' .. insDelSharedStyle)
					:addClass('diffchange')
					:addClass('diffchange-inline')
					:wikitext(token)
		elseif status == SAME then
			deleted:wikitext(token)
		end
	end

	tr:tag('td')
		:cssText(plusMinusStyle)
		:wikitext('+')

	local inserted = tr
		:tag('td')
			:cssText('border-color: var(--background-color-content-added,#a3d3ff); ' .. tdSharedStyle)
			:addClass('diff-addedline')
			:tag('div')
				:cssText(tdDivStyle)

	for i, token_record in ipairs(tokens) do
		token = mw.text.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
		status = token_record[2]
		if status == IN then
			inserted
				:tag('ins')
					:cssText('background: var(--background-color-content-added,#a3d3ff); color: inherit; ' .. insDelSharedStyle)
					:addClass('diffchange')
					:addClass('diffchange-inline')
					:wikitext(token)
		elseif status == SAME then
			inserted:wikitext(token)
		end
	end

	return tostring(root)
end

local function main(frame)
	return wikiDiff(mw.text.decode(mw.text.unstrip(frame.args[1])), mw.text.decode(mw.text.unstrip(frame.args[2])), frame.args[3] or '[%s%.:-]+')
end

return {
	diff = diff,
	wikiDiff = wikiDiff,
	main = main
}