Module:TextUtil: Difference between revisions

From Tardis Wiki, the free Doctor Who reference
(from https://lol.fandom.com/wiki/Module:TextUtil)
 
No edit summary
 
(8 intermediate revisions by the same user not shown)
Line 1: Line 1:
-- From https://lol.fandom.com/wiki/Module:TextUtil
-- Originally from https://lol.fandom.com/wiki/Module:TextUtil
-- Source for gsplit, split, and trim: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua
-- Source for gsplit, split, and trim: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua


local p = {}
local p = {}
local h = {}


function p.gsplit( text, pattern, plain )
function p.gsplit( text, pattern, plain )
Line 76: Line 77:
link = link or ''
link = link or ''
-- because of gsub not letting you have - unescaped
-- because of gsub not letting you have - unescaped
link = string.gsub(link,'%%','%%%%')
link = string.gsub(link,'%-','%%%-')
link = string.gsub(link,'%-','%%%-')
link = string.gsub(link,'%(','%%%(')
link = string.gsub(link,'%(','%%%(')
link = string.gsub(link,'%)','%%%)')
link = string.gsub(link,'%)','%%%)')
link = string.gsub(link,'%+','%%%+')
link = string.gsub(link,'%+','%%%+')
link = string.gsub(link,'%[','%%%[')
link = string.gsub(link,'%]','%%%]')
link = string.gsub(link,'%.','%%%.')
link = string.gsub(link,'%*','%%%*')
link = string.gsub(link,'%?','%%%?')
link = string.gsub(link,'%^','%%%^')
link = string.gsub(link,'%$','%%%$')
return link
return link
end
end
Line 176: Line 185:
return str .. "'s"
return str .. "'s"
end
end
-- functions below this point were not in the original module from the lol wiki
-- the following 2 functions were adapted from http://lua-users.org/wiki/StringRecipes
function h.tchelper(first, rest)
  return first:upper()..rest:lower()
end
function p.titleCase(str)
-- Add extra characters to the pattern if you need to. _ and ' are
--  found in the middle of identifiers and English words.
-- We must also put %w_' into [%w_'] to make it handle normal stuff
-- and extra stuff the same.
-- This also turns hex numbers into, eg. 0Xa7d4
return str:gsub("(%a)([%w_']*)", h.tchelper)
end
function p.pascalCase(str)
return p.titleCase(str):gsub("[%s_-']*", "")
end
function p.camelCase(str)
return p.lcfirst(p.pascalCase(str))
end
-- the following functions were taken from https://en.wikipedia.org/wiki/Module:String2 (CC BY-SA 4.0, https://en.wikipedia.org/w/index.php?title=Module:String2&action=history)
--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------
Converts a hyphen to a dash under certain conditions.  The hyphen must separate
like items; unlike items are returned unmodified.  These forms are modified:
letter - letter (A - B)
digit - digit (4-5)
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
letterdigit - letterdigit (A1-A5) (an optional separator between letter and
digit is supported – a.1-a.5 or a-1-a-5)
digitletter - digitletter (5a - 5d) (an optional separator between letter and
digit is supported – 5.a-5.d or 5-a-5-d)
any other forms are returned unmodified.
str may be a comma- or semicolon-separated list
]]
function p.hyphen_to_dash( str, spacing )
if (str == nil or str == '') then
return str
end
local accept
str = mw.text.decode(str, true ) -- replace html entities with their characters; semicolon mucks up the text.split
local out = {}
local list = mw.text.split (str, '%s*[,;]%s*') -- split str at comma or semicolon separators if there are any
for _, item in ipairs (list) do -- for each item in the list
item = mw.text.trim(item) -- trim whitespace
item, accept = item:gsub ('^%(%((.+)%)%)$', '%1')
if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators
if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit)
item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter)
item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or -- digit separator digit hyphen digit separator digit
item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit
item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter
item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2') -- replace hyphen, remove extraneous space characters
else
item = mw.ustring.gsub (item, '%s*[–—]%s*', '–') -- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
end
end
table.insert (out, item) -- add the (possibly modified) item to the output table
end
local temp_str = table.concat (out, ',' .. spacing) -- concatenate the output table into a comma separated string
temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1') -- remove accept-this-as-written markup when it wraps all of concatenated out
if accept ~= 0 then
temp_str = str:gsub ('^%(%((.+)%)%)$', '%1') -- when global markup removed, return original str; do it this way to suppress boolean second return value
end
return temp_str
end
function p.hyphen2dash( frame )
local str = frame.args[1] or ''
local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing
return p.hyphen_to_dash(str, spacing)
end


return p
return p

Latest revision as of 20:36, 13 August 2024

Documentation for this module may be created at Module:TextUtil/doc

-- Originally from https://lol.fandom.com/wiki/Module:TextUtil
-- Source for gsplit, split, and trim: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua

local p = {}
local h = {}

function p.gsplit( text, pattern, plain )
	if not pattern then pattern = ',' end
	if not plain then
		pattern = '%s*' .. pattern .. '%s*'
	end
	local s, l = 1, text:len()
	return function ()
		if s then
			local e, n = text:find( pattern, s, plain )
			local ret
			if not e then
				ret = text:sub( s )
				s = nil
			elseif n < e then
				-- Empty separator!
				ret = text:sub( s, e )
				if e < l then
					s = e + 1
				else
					s = nil
				end
			else
				ret = e > s and text:sub( s, e - 1 ) or ''
				s = n + 1
			end
			return ret
		end
	end, nil, nil
end

function p.splitNonempty(text, pattern, plain)
	if text == '' then
		return {}
	end
	return p.split(text, pattern, plain)
end

function p.splitOrNil(text, pattern, plain)
	if text == '' or not text then
		return nil
	end
	return p.split(text, pattern, plain)
end

function p.split(text, pattern, plain)
	if not text then
		return {}
	end
	local ret = {}
	for m in p.gsplit(text, pattern, plain) do
		ret[#ret+1] = m
	end
	return ret
end

function p.splitIfString(str, sep, plain)
	if type(str) == 'table' then
		return str
	end
	return p.split(str, sep, plain)
end
	

function p.trim( s, charset )
	charset = charset or '\t\r\n\f '
	s = s:gsub( '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' )
	return s
end

function p.escape(link)
	link = link or ''
	-- because of gsub not letting you have - unescaped
	link = string.gsub(link,'%%','%%%%')
	link = string.gsub(link,'%-','%%%-')
	link = string.gsub(link,'%(','%%%(')
	link = string.gsub(link,'%)','%%%)')
	link = string.gsub(link,'%+','%%%+')
	link = string.gsub(link,'%[','%%%[')
	link = string.gsub(link,'%]','%%%]')
	link = string.gsub(link,'%.','%%%.')
	link = string.gsub(link,'%*','%%%*')
	link = string.gsub(link,'%?','%%%?')
	link = string.gsub(link,'%^','%%%^')
	link = string.gsub(link,'%$','%%%$')
	return link
end

function p.escapeCustom(link, tbl)
	for _, row in pairs(tbl) do
		link = link:gsub(row.find, row.replace)
	end
	return link
end

function p.nextLetter(char)
	return string.char(char:byte() + 1)
end

function p.extLink(link, text)
	if link == '' then link = nil end
	if not link then
		return
	end
	return ('[%s %s]'):format(link, text or 'Link')
end

function p.intLink(link, text)
	if link == '' then link = nil end
	if not link and not text then
		return
	end
	return ('[[%s|%s]]'):format(link or text, text or 'Link')
end

function p.fileLink(file, target)
	if file == '' then file = nil end
	if not file then return nil end
	file = file:gsub('File:', '')
	return ('[[File:%s|link=%s]]'):format(file, target)
end

function p.link(link, text)
	if not link then return p.intLink(link, text) end
	if link:find('http') then
		return p.extLink(link, text)
	else
		return p.intLink(link, text)
	end
end

function p.ambiguousLink(link, text)
	if not link then return p.intLink(link, text) end
	if link:find('http') then
		return p.extLink(link, text)
	else
		return ('<span class="external">%s</span>'):format(p.intLink(link, text))
	end
end

function p.extLinkOrText(link, text)
	if link == '' then link = nil end
	if not link then return text end
	return ('[%s %s]'):format(link, text or link)
end

function p.intLinkOrText(link, text)
	if link == '' then link = nil end
	if not link then return text end
	return ('[[%s|%s]]'):format(link, text or link)
end

function p.linkOrText(link, text)
	if link:find('http') then
		return p.extLinkOrText(link, text)
	else
		return p.intLinkOrText(link, text)
	end
end

function p.ucFirstOnly(str)
	if not str then return end
	return mw.getLanguage('en'):ucfirst(str:lower())
end

function p.ucfirst(str)
	if not str then return end
	return  mw.getLanguage('en'):ucfirst(str)
end

function p.lcfirst(str)
	if not str then return end
	return mw.getLanguage('en'):lcfirst(str)
end

function p.possessive(str, literal)
	if (literal or str):find('s$') then
		return str .. "'"
	end
	return str .. "'s"
end

-- functions below this point were not in the original module from the lol wiki

-- the following 2 functions were adapted from http://lua-users.org/wiki/StringRecipes
function h.tchelper(first, rest)
   return first:upper()..rest:lower()
end
function p.titleCase(str)
	-- Add extra characters to the pattern if you need to. _ and ' are
	--  found in the middle of identifiers and English words.
	-- We must also put %w_' into [%w_'] to make it handle normal stuff
	-- and extra stuff the same.
	-- This also turns hex numbers into, eg. 0Xa7d4
	return str:gsub("(%a)([%w_']*)", h.tchelper)
end

function p.pascalCase(str)
	return p.titleCase(str):gsub("[%s_-']*", "")
end
function p.camelCase(str)
	return p.lcfirst(p.pascalCase(str))
end

-- the following functions were taken from https://en.wikipedia.org/wiki/Module:String2 (CC BY-SA 4.0, https://en.wikipedia.org/w/index.php?title=Module:String2&action=history)
--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------

Converts a hyphen to a dash under certain conditions.  The hyphen must separate
like items; unlike items are returned unmodified.  These forms are modified:
	letter - letter (A - B)
	digit - digit (4-5)
	digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
	letterdigit - letterdigit (A1-A5) (an optional separator between letter and
		digit is supported – a.1-a.5 or a-1-a-5)
	digitletter - digitletter (5a - 5d) (an optional separator between letter and
		digit is supported – 5.a-5.d or 5-a-5-d)

any other forms are returned unmodified.

str may be a comma- or semicolon-separated list

]]
function p.hyphen_to_dash( str, spacing )
	if (str == nil or str == '') then
		return str
	end

	local accept

	str = mw.text.decode(str, true )											-- replace html entities with their characters; semicolon mucks up the text.split

	local out = {}
	local list = mw.text.split (str, '%s*[,;]%s*')								-- split str at comma or semicolon separators if there are any

	for _, item in ipairs (list) do												-- for each item in the list
		item = mw.text.trim(item)												-- trim whitespace
		item, accept = item:gsub ('^%(%((.+)%)%)$', '%1')
		if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then	-- if a hyphenated range or has endash or emdash separators
			if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or			-- letterdigit hyphen letterdigit (optional separator between letter and digit)
				item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or			-- digitletter hyphen digitletter (optional separator between digit and letter)
				item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or			-- digit separator digit hyphen digit separator digit
				item:match ('^%d+%s*%-%s*%d+$') or								-- digit hyphen digit
				item:match ('^%a+%s*%-%s*%a+$') then							-- letter hyphen letter
					item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2')	-- replace hyphen, remove extraneous space characters
			else
				item = mw.ustring.gsub (item, '%s*[–—]%s*', '–')				-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
			end
		end
		table.insert (out, item)												-- add the (possibly modified) item to the output table
	end

	local temp_str = table.concat (out, ',' .. spacing)							-- concatenate the output table into a comma separated string
	temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1')					-- remove accept-this-as-written markup when it wraps all of concatenated out
	if accept ~= 0 then
		temp_str = str:gsub ('^%(%((.+)%)%)$', '%1')							-- when global markup removed, return original str; do it this way to suppress boolean second return value
	end
	return temp_str
end

function p.hyphen2dash( frame )
	local str = frame.args[1] or ''
	local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing

	return p.hyphen_to_dash(str, spacing)
end


return p