Module:TextUtil
From Tardis Wiki, the free Doctor Who reference
Documentation for this module may be created at Module:TextUtil/doc
-- Originally from https://lol.fandom.com/wiki/Module:TextUtil
-- Source for gsplit, split, and trim: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua
local p = {}
local h = {}
function p.gsplit( text, pattern, plain )
if not pattern then pattern = ',' end
if not plain then
pattern = '%s*' .. pattern .. '%s*'
end
local s, l = 1, text:len()
return function ()
if s then
local e, n = text:find( pattern, s, plain )
local ret
if not e then
ret = text:sub( s )
s = nil
elseif n < e then
-- Empty separator!
ret = text:sub( s, e )
if e < l then
s = e + 1
else
s = nil
end
else
ret = e > s and text:sub( s, e - 1 ) or ''
s = n + 1
end
return ret
end
end, nil, nil
end
function p.splitNonempty(text, pattern, plain)
if text == '' then
return {}
end
return p.split(text, pattern, plain)
end
function p.splitOrNil(text, pattern, plain)
if text == '' or not text then
return nil
end
return p.split(text, pattern, plain)
end
function p.split(text, pattern, plain)
if not text then
return {}
end
local ret = {}
for m in p.gsplit(text, pattern, plain) do
ret[#ret+1] = m
end
return ret
end
function p.splitIfString(str, sep, plain)
if type(str) == 'table' then
return str
end
return p.split(str, sep, plain)
end
function p.trim( s, charset )
charset = charset or '\t\r\n\f '
s = s:gsub( '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' )
return s
end
function p.escape(link)
link = link or ''
-- because of gsub not letting you have - unescaped
link = string.gsub(link,'%%','%%%%')
link = string.gsub(link,'%-','%%%-')
link = string.gsub(link,'%(','%%%(')
link = string.gsub(link,'%)','%%%)')
link = string.gsub(link,'%+','%%%+')
link = string.gsub(link,'%[','%%%[')
link = string.gsub(link,'%]','%%%]')
link = string.gsub(link,'%.','%%%.')
link = string.gsub(link,'%*','%%%*')
link = string.gsub(link,'%?','%%%?')
link = string.gsub(link,'%^','%%%^')
link = string.gsub(link,'%$','%%%$')
return link
end
function p.escapeCustom(link, tbl)
for _, row in pairs(tbl) do
link = link:gsub(row.find, row.replace)
end
return link
end
function p.nextLetter(char)
return string.char(char:byte() + 1)
end
function p.extLink(link, text)
if link == '' then link = nil end
if not link then
return
end
return ('[%s %s]'):format(link, text or 'Link')
end
function p.intLink(link, text)
if link == '' then link = nil end
if not link and not text then
return
end
return ('[[%s|%s]]'):format(link or text, text or 'Link')
end
function p.fileLink(file, target)
if file == '' then file = nil end
if not file then return nil end
file = file:gsub('File:', '')
return ('[[File:%s|link=%s]]'):format(file, target)
end
function p.link(link, text)
if not link then return p.intLink(link, text) end
if link:find('http') then
return p.extLink(link, text)
else
return p.intLink(link, text)
end
end
function p.ambiguousLink(link, text)
if not link then return p.intLink(link, text) end
if link:find('http') then
return p.extLink(link, text)
else
return ('<span class="external">%s</span>'):format(p.intLink(link, text))
end
end
function p.extLinkOrText(link, text)
if link == '' then link = nil end
if not link then return text end
return ('[%s %s]'):format(link, text or link)
end
function p.intLinkOrText(link, text)
if link == '' then link = nil end
if not link then return text end
return ('[[%s|%s]]'):format(link, text or link)
end
function p.linkOrText(link, text)
if link:find('http') then
return p.extLinkOrText(link, text)
else
return p.intLinkOrText(link, text)
end
end
function p.ucFirstOnly(str)
if not str then return end
return mw.getLanguage('en'):ucfirst(str:lower())
end
function p.ucfirst(str)
if not str then return end
return mw.getLanguage('en'):ucfirst(str)
end
function p.lcfirst(str)
if not str then return end
return mw.getLanguage('en'):lcfirst(str)
end
function p.possessive(str, literal)
if (literal or str):find('s$') then
return str .. "'"
end
return str .. "'s"
end
-- functions below this point were not in the original module from the lol wiki
-- the following 2 functions were adapted from http://lua-users.org/wiki/StringRecipes
function h.tchelper(first, rest)
return first:upper()..rest:lower()
end
function p.titleCase(str)
-- Add extra characters to the pattern if you need to. _ and ' are
-- found in the middle of identifiers and English words.
-- We must also put %w_' into [%w_'] to make it handle normal stuff
-- and extra stuff the same.
-- This also turns hex numbers into, eg. 0Xa7d4
return str:gsub("(%a)([%w_']*)", h.tchelper)
end
function p.pascalCase(str)
return p.titleCase(str):gsub("[%s_-']*", "")
end
function p.camelCase(str)
return p.lcfirst(p.pascalCase(str))
end
-- the following functions were taken from https://en.wikipedia.org/wiki/Module:String2 (CC BY-SA 4.0, https://en.wikipedia.org/w/index.php?title=Module:String2&action=history)
--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------
Converts a hyphen to a dash under certain conditions. The hyphen must separate
like items; unlike items are returned unmodified. These forms are modified:
letter - letter (A - B)
digit - digit (4-5)
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
letterdigit - letterdigit (A1-A5) (an optional separator between letter and
digit is supported – a.1-a.5 or a-1-a-5)
digitletter - digitletter (5a - 5d) (an optional separator between letter and
digit is supported – 5.a-5.d or 5-a-5-d)
any other forms are returned unmodified.
str may be a comma- or semicolon-separated list
]]
function p.hyphen_to_dash( str, spacing )
if (str == nil or str == '') then
return str
end
local accept
str = mw.text.decode(str, true ) -- replace html entities with their characters; semicolon mucks up the text.split
local out = {}
local list = mw.text.split (str, '%s*[,;]%s*') -- split str at comma or semicolon separators if there are any
for _, item in ipairs (list) do -- for each item in the list
item = mw.text.trim(item) -- trim whitespace
item, accept = item:gsub ('^%(%((.+)%)%)$', '%1')
if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators
if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit)
item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter)
item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or -- digit separator digit hyphen digit separator digit
item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit
item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter
item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2') -- replace hyphen, remove extraneous space characters
else
item = mw.ustring.gsub (item, '%s*[–—]%s*', '–') -- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
end
end
table.insert (out, item) -- add the (possibly modified) item to the output table
end
local temp_str = table.concat (out, ',' .. spacing) -- concatenate the output table into a comma separated string
temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1') -- remove accept-this-as-written markup when it wraps all of concatenated out
if accept ~= 0 then
temp_str = str:gsub ('^%(%((.+)%)%)$', '%1') -- when global markup removed, return original str; do it this way to suppress boolean second return value
end
return temp_str
end
function p.hyphen2dash( frame )
local str = frame.args[1] or ''
local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing
return p.hyphen_to_dash(str, spacing)
end
return p