Module:TextUtil

From Tardis Wiki, the free Doctor Who reference
Revision as of 20:36, 13 August 2024 by Bongolium500 (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:TextUtil/doc

-- Originally from https://lol.fandom.com/wiki/Module:TextUtil
-- Source for gsplit, split, and trim: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua

local p = {}
local h = {}

function p.gsplit( text, pattern, plain )
	if not pattern then pattern = ',' end
	if not plain then
		pattern = '%s*' .. pattern .. '%s*'
	end
	local s, l = 1, text:len()
	return function ()
		if s then
			local e, n = text:find( pattern, s, plain )
			local ret
			if not e then
				ret = text:sub( s )
				s = nil
			elseif n < e then
				-- Empty separator!
				ret = text:sub( s, e )
				if e < l then
					s = e + 1
				else
					s = nil
				end
			else
				ret = e > s and text:sub( s, e - 1 ) or ''
				s = n + 1
			end
			return ret
		end
	end, nil, nil
end

function p.splitNonempty(text, pattern, plain)
	if text == '' then
		return {}
	end
	return p.split(text, pattern, plain)
end

function p.splitOrNil(text, pattern, plain)
	if text == '' or not text then
		return nil
	end
	return p.split(text, pattern, plain)
end

function p.split(text, pattern, plain)
	if not text then
		return {}
	end
	local ret = {}
	for m in p.gsplit(text, pattern, plain) do
		ret[#ret+1] = m
	end
	return ret
end

function p.splitIfString(str, sep, plain)
	if type(str) == 'table' then
		return str
	end
	return p.split(str, sep, plain)
end
	

function p.trim( s, charset )
	charset = charset or '\t\r\n\f '
	s = s:gsub( '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' )
	return s
end

function p.escape(link)
	link = link or ''
	-- because of gsub not letting you have - unescaped
	link = string.gsub(link,'%%','%%%%')
	link = string.gsub(link,'%-','%%%-')
	link = string.gsub(link,'%(','%%%(')
	link = string.gsub(link,'%)','%%%)')
	link = string.gsub(link,'%+','%%%+')
	link = string.gsub(link,'%[','%%%[')
	link = string.gsub(link,'%]','%%%]')
	link = string.gsub(link,'%.','%%%.')
	link = string.gsub(link,'%*','%%%*')
	link = string.gsub(link,'%?','%%%?')
	link = string.gsub(link,'%^','%%%^')
	link = string.gsub(link,'%$','%%%$')
	return link
end

function p.escapeCustom(link, tbl)
	for _, row in pairs(tbl) do
		link = link:gsub(row.find, row.replace)
	end
	return link
end

function p.nextLetter(char)
	return string.char(char:byte() + 1)
end

function p.extLink(link, text)
	if link == '' then link = nil end
	if not link then
		return
	end
	return ('[%s %s]'):format(link, text or 'Link')
end

function p.intLink(link, text)
	if link == '' then link = nil end
	if not link and not text then
		return
	end
	return ('[[%s|%s]]'):format(link or text, text or 'Link')
end

function p.fileLink(file, target)
	if file == '' then file = nil end
	if not file then return nil end
	file = file:gsub('File:', '')
	return ('[[File:%s|link=%s]]'):format(file, target)
end

function p.link(link, text)
	if not link then return p.intLink(link, text) end
	if link:find('http') then
		return p.extLink(link, text)
	else
		return p.intLink(link, text)
	end
end

function p.ambiguousLink(link, text)
	if not link then return p.intLink(link, text) end
	if link:find('http') then
		return p.extLink(link, text)
	else
		return ('<span class="external">%s</span>'):format(p.intLink(link, text))
	end
end

function p.extLinkOrText(link, text)
	if link == '' then link = nil end
	if not link then return text end
	return ('[%s %s]'):format(link, text or link)
end

function p.intLinkOrText(link, text)
	if link == '' then link = nil end
	if not link then return text end
	return ('[[%s|%s]]'):format(link, text or link)
end

function p.linkOrText(link, text)
	if link:find('http') then
		return p.extLinkOrText(link, text)
	else
		return p.intLinkOrText(link, text)
	end
end

function p.ucFirstOnly(str)
	if not str then return end
	return mw.getLanguage('en'):ucfirst(str:lower())
end

function p.ucfirst(str)
	if not str then return end
	return  mw.getLanguage('en'):ucfirst(str)
end

function p.lcfirst(str)
	if not str then return end
	return mw.getLanguage('en'):lcfirst(str)
end

function p.possessive(str, literal)
	if (literal or str):find('s$') then
		return str .. "'"
	end
	return str .. "'s"
end

-- functions below this point were not in the original module from the lol wiki

-- the following 2 functions were adapted from http://lua-users.org/wiki/StringRecipes
function h.tchelper(first, rest)
   return first:upper()..rest:lower()
end
function p.titleCase(str)
	-- Add extra characters to the pattern if you need to. _ and ' are
	--  found in the middle of identifiers and English words.
	-- We must also put %w_' into [%w_'] to make it handle normal stuff
	-- and extra stuff the same.
	-- This also turns hex numbers into, eg. 0Xa7d4
	return str:gsub("(%a)([%w_']*)", h.tchelper)
end

function p.pascalCase(str)
	return p.titleCase(str):gsub("[%s_-']*", "")
end
function p.camelCase(str)
	return p.lcfirst(p.pascalCase(str))
end

-- the following functions were taken from https://en.wikipedia.org/wiki/Module:String2 (CC BY-SA 4.0, https://en.wikipedia.org/w/index.php?title=Module:String2&action=history)
--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------

Converts a hyphen to a dash under certain conditions.  The hyphen must separate
like items; unlike items are returned unmodified.  These forms are modified:
	letter - letter (A - B)
	digit - digit (4-5)
	digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
	letterdigit - letterdigit (A1-A5) (an optional separator between letter and
		digit is supported – a.1-a.5 or a-1-a-5)
	digitletter - digitletter (5a - 5d) (an optional separator between letter and
		digit is supported – 5.a-5.d or 5-a-5-d)

any other forms are returned unmodified.

str may be a comma- or semicolon-separated list

]]
function p.hyphen_to_dash( str, spacing )
	if (str == nil or str == '') then
		return str
	end

	local accept

	str = mw.text.decode(str, true )											-- replace html entities with their characters; semicolon mucks up the text.split

	local out = {}
	local list = mw.text.split (str, '%s*[,;]%s*')								-- split str at comma or semicolon separators if there are any

	for _, item in ipairs (list) do												-- for each item in the list
		item = mw.text.trim(item)												-- trim whitespace
		item, accept = item:gsub ('^%(%((.+)%)%)$', '%1')
		if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then	-- if a hyphenated range or has endash or emdash separators
			if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or			-- letterdigit hyphen letterdigit (optional separator between letter and digit)
				item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or			-- digitletter hyphen digitletter (optional separator between digit and letter)
				item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or			-- digit separator digit hyphen digit separator digit
				item:match ('^%d+%s*%-%s*%d+$') or								-- digit hyphen digit
				item:match ('^%a+%s*%-%s*%a+$') then							-- letter hyphen letter
					item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2')	-- replace hyphen, remove extraneous space characters
			else
				item = mw.ustring.gsub (item, '%s*[–—]%s*', '–')				-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
			end
		end
		table.insert (out, item)												-- add the (possibly modified) item to the output table
	end

	local temp_str = table.concat (out, ',' .. spacing)							-- concatenate the output table into a comma separated string
	temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1')					-- remove accept-this-as-written markup when it wraps all of concatenated out
	if accept ~= 0 then
		temp_str = str:gsub ('^%(%((.+)%)%)$', '%1')							-- when global markup removed, return original str; do it this way to suppress boolean second return value
	end
	return temp_str
end

function p.hyphen2dash( frame )
	local str = frame.args[1] or ''
	local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing

	return p.hyphen_to_dash(str, spacing)
end


return p