<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://academia.la/w/index.php?action=history&amp;feed=atom&amp;title=Module%3Astring_utilities</id>
	<title>Module:string utilities - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://academia.la/w/index.php?action=history&amp;feed=atom&amp;title=Module%3Astring_utilities"/>
	<link rel="alternate" type="text/html" href="https://academia.la/w/index.php?title=Module:string_utilities&amp;action=history"/>
	<updated>2026-04-29T19:53:10Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.40.0</generator>
	<entry>
		<id>https://academia.la/w/index.php?title=Module:string_utilities&amp;diff=1548&amp;oldid=prev</id>
		<title>Jācōbus: Created page with &quot;local module_name = &quot;string_utilities&quot; local export = {}  local rfind = mw.ustring.find  local format_escapes = {     [&quot;op&quot;] = &quot;{&quot;,     [&quot;cl&quot;] = &quot;}&quot;, }  function export.format_fun(str, fun)     return (string.gsub(str, &quot;{(\\?)((\\?)[^{}]*)}&quot;, function (p1, name, p2)         if #p1 + #p2 == 1 then             return format_escapes[name] or error(module_name .. &quot;.format: unrecognized escape sequence '{\\&quot; .. name .. &quot;}'&quot;)         else         	if fun(name) and type(fun(nam...&quot;</title>
		<link rel="alternate" type="text/html" href="https://academia.la/w/index.php?title=Module:string_utilities&amp;diff=1548&amp;oldid=prev"/>
		<updated>2023-02-11T19:40:01Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;local module_name = &amp;quot;string_utilities&amp;quot; local export = {}  local rfind = mw.ustring.find  local format_escapes = {     [&amp;quot;op&amp;quot;] = &amp;quot;{&amp;quot;,     [&amp;quot;cl&amp;quot;] = &amp;quot;}&amp;quot;, }  function export.format_fun(str, fun)     return (string.gsub(str, &amp;quot;{(\\?)((\\?)[^{}]*)}&amp;quot;, function (p1, name, p2)         if #p1 + #p2 == 1 then             return format_escapes[name] or error(module_name .. &amp;quot;.format: unrecognized escape sequence &amp;#039;{\\&amp;quot; .. name .. &amp;quot;}&amp;#039;&amp;quot;)         else         	if fun(name) and type(fun(nam...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;local module_name = &amp;quot;string_utilities&amp;quot;&lt;br /&gt;
local export = {}&lt;br /&gt;
&lt;br /&gt;
local rfind = mw.ustring.find&lt;br /&gt;
&lt;br /&gt;
local format_escapes = {&lt;br /&gt;
    [&amp;quot;op&amp;quot;] = &amp;quot;{&amp;quot;,&lt;br /&gt;
    [&amp;quot;cl&amp;quot;] = &amp;quot;}&amp;quot;,&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
function export.format_fun(str, fun)&lt;br /&gt;
    return (string.gsub(str, &amp;quot;{(\\?)((\\?)[^{}]*)}&amp;quot;, function (p1, name, p2)&lt;br /&gt;
        if #p1 + #p2 == 1 then&lt;br /&gt;
            return format_escapes[name] or error(module_name .. &amp;quot;.format: unrecognized escape sequence '{\\&amp;quot; .. name .. &amp;quot;}'&amp;quot;)&lt;br /&gt;
        else&lt;br /&gt;
        	if fun(name) and type(fun(name)) ~= &amp;quot;string&amp;quot; then&lt;br /&gt;
        		error(module_name .. &amp;quot;.format: '&amp;quot; .. name .. &amp;quot;' is a &amp;quot; .. type(fun(name)) .. &amp;quot;, not a string&amp;quot;)&lt;br /&gt;
        	end&lt;br /&gt;
            return fun(name) or error(module_name .. &amp;quot;.format: '&amp;quot; .. name .. &amp;quot;' not found in table&amp;quot;)&lt;br /&gt;
        end&lt;br /&gt;
    end))&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.format(str, tbl)&lt;br /&gt;
    return export.format_fun(str, function (key) return tbl[key] end)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- Reimplementation of mw.ustring.split() that includes any capturing&lt;br /&gt;
-- groups in the splitting pattern. This works like Python's re.split()&lt;br /&gt;
-- function, except that it has Lua's behavior when the split pattern&lt;br /&gt;
-- is empty (i.e. advancing by one character at a time; Python returns the&lt;br /&gt;
-- whole remainder of the string).&lt;br /&gt;
function export.capturing_split(str, pattern)&lt;br /&gt;
    local ret = {}&lt;br /&gt;
    -- (.-) corresponds to (.*?) in Python or Perl; () captures the&lt;br /&gt;
    -- current position after matching.&lt;br /&gt;
    pattern = &amp;quot;(.-)&amp;quot; .. pattern .. &amp;quot;()&amp;quot;&lt;br /&gt;
    local start = 1&lt;br /&gt;
    while true do&lt;br /&gt;
        -- Did we reach the end of the string?&lt;br /&gt;
        if start &amp;gt; #str then&lt;br /&gt;
            table.insert(ret, &amp;quot;&amp;quot;)&lt;br /&gt;
            return ret&lt;br /&gt;
        end&lt;br /&gt;
        -- match() returns all captures as multiple return values;&lt;br /&gt;
        -- we need to insert into a table to get them all.&lt;br /&gt;
        local captures = {mw.ustring.match(str, pattern, start)}&lt;br /&gt;
        -- If no match, add the remainder of the string.&lt;br /&gt;
        if #captures == 0 then&lt;br /&gt;
            table.insert(ret, mw.ustring.sub(str, start))&lt;br /&gt;
            return ret&lt;br /&gt;
        end&lt;br /&gt;
        local newstart = table.remove(captures)&lt;br /&gt;
        -- Special case: If we don't advance by any characters, then advance&lt;br /&gt;
        -- by one character; this avoids an infinite loop, and makes splitting&lt;br /&gt;
        -- by an empty string work the way mw.ustring.split() does. If we&lt;br /&gt;
        -- reach the end of the string this way, return immediately, so we&lt;br /&gt;
        -- don't get a final empty string.&lt;br /&gt;
        if newstart == start then&lt;br /&gt;
            table.insert(ret, mw.ustring.sub(str, start, start))&lt;br /&gt;
            table.remove(captures, 1)&lt;br /&gt;
            start = start + 1&lt;br /&gt;
            if start &amp;gt; #str then&lt;br /&gt;
            	return ret&lt;br /&gt;
            end&lt;br /&gt;
        else&lt;br /&gt;
            table.insert(ret, table.remove(captures, 1))&lt;br /&gt;
            start = newstart&lt;br /&gt;
        end&lt;br /&gt;
        -- Insert any captures from the splitting pattern.&lt;br /&gt;
        for _, x in ipairs(captures) do&lt;br /&gt;
            table.insert(ret, x)&lt;br /&gt;
        end&lt;br /&gt;
    end&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
local function uclcfirst(text, dolower)&lt;br /&gt;
	local function douclcfirst(text)&lt;br /&gt;
		-- Actual function to re-case of the first letter.&lt;br /&gt;
		local first_letter = mw.ustring.sub(text, 1, 1)&lt;br /&gt;
		first_letter = dolower and mw.ustring.lower(first_letter) or mw.ustring.upper(first_letter)&lt;br /&gt;
		return first_letter .. mw.ustring.sub(text, 2)&lt;br /&gt;
	end&lt;br /&gt;
	-- If there's a link at the beginning, re-case the first letter of the&lt;br /&gt;
	-- link text. This pattern matches both piped and unpiped links.&lt;br /&gt;
	-- If the link is not piped, the second capture (linktext) will be empty.&lt;br /&gt;
	local link, linktext, remainder = mw.ustring.match(text, &amp;quot;^%[%[([^|%]]+)%|?(.-)%]%](.*)$&amp;quot;)&lt;br /&gt;
	if link then&lt;br /&gt;
		return &amp;quot;[[&amp;quot; .. link .. &amp;quot;|&amp;quot; .. douclcfirst(linktext ~= &amp;quot;&amp;quot; and linktext or link) .. &amp;quot;]]&amp;quot; .. remainder&lt;br /&gt;
	end&lt;br /&gt;
	return douclcfirst(text)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.ucfirst(text)&lt;br /&gt;
	return uclcfirst(text, false)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.lcfirst(text)&lt;br /&gt;
	return uclcfirst(text, true)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.capitalize(text)&lt;br /&gt;
	if type(text) == &amp;quot;table&amp;quot; then&lt;br /&gt;
		-- allow calling from a template&lt;br /&gt;
		text = text.args[1]&lt;br /&gt;
	end&lt;br /&gt;
	-- Capitalize multi-word that is separated by spaces&lt;br /&gt;
	-- by uppercasing the first letter of each part.&lt;br /&gt;
	-- I assume nobody will input all CAP text.&lt;br /&gt;
	w2 = {}&lt;br /&gt;
	for w in mw.ustring.gmatch(text, &amp;quot;%S+&amp;quot;) do&lt;br /&gt;
		table.insert(w2, uclcfirst(w, false))&lt;br /&gt;
	end&lt;br /&gt;
	return table.concat(w2, &amp;quot; &amp;quot;)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.pluralize(text)&lt;br /&gt;
	if type(text) == &amp;quot;table&amp;quot; then&lt;br /&gt;
		-- allow calling from a template&lt;br /&gt;
		text = text.args[1]&lt;br /&gt;
	end&lt;br /&gt;
	-- Pluralize a word in a smart fashion, according to normal English rules.&lt;br /&gt;
	-- 1. If word ends in consonant + -y, replace the -y with -ies.&lt;br /&gt;
	-- 2. If the word ends in -s, -x, -z, -sh, -ch, add -es.&lt;br /&gt;
	-- 3. Otherwise, add -s.&lt;br /&gt;
	-- This handles links correctly:&lt;br /&gt;
	-- 1. If a piped link, change the second part appropriately.&lt;br /&gt;
	-- 2. If a non-piped link and rule #1 above applies, convert to a piped link&lt;br /&gt;
	--    with the second part containing the plural.&lt;br /&gt;
	-- 3. If a non-piped link and rules #2 or #3 above apply, add the plural&lt;br /&gt;
	--    outside the link.&lt;br /&gt;
	&lt;br /&gt;
	local function word_ends_in_consonant_plus_y(text)&lt;br /&gt;
		-- FIXME, a subrule of rule #1 above says the -ies ending doesn't&lt;br /&gt;
		-- apply to proper nouns, hence &amp;quot;the Gettys&amp;quot;, &amp;quot;the public Ivys&amp;quot;.&lt;br /&gt;
		-- We should maybe consider applying this rule here; but it may not&lt;br /&gt;
		-- be important as this function is almost always called on common nouns&lt;br /&gt;
		-- (e.g. parts of speech, place types).&lt;br /&gt;
		return text:find(&amp;quot;[^aeiouAEIOU ]y$&amp;quot;)&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	local function word_takes_es_plural(text)&lt;br /&gt;
		return text:find(&amp;quot;[sxz]$&amp;quot;) or text:find(&amp;quot;[cs]h$&amp;quot;)&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	local function do_pluralize(text)&lt;br /&gt;
		if word_ends_in_consonant_plus_y(text) then&lt;br /&gt;
			-- avoid returning multiple values&lt;br /&gt;
			local hack_single_retval = text:gsub(&amp;quot;y$&amp;quot;, &amp;quot;ies&amp;quot;)&lt;br /&gt;
			return hack_single_retval&lt;br /&gt;
		elseif word_takes_es_plural(text) then&lt;br /&gt;
			return text .. &amp;quot;es&amp;quot;&lt;br /&gt;
		else&lt;br /&gt;
			return text .. &amp;quot;s&amp;quot;&lt;br /&gt;
		end&lt;br /&gt;
	end&lt;br /&gt;
		&lt;br /&gt;
	-- Check for a link. This pattern matches both piped and unpiped links.&lt;br /&gt;
	-- If the link is not piped, the second capture (linktext) will be empty.&lt;br /&gt;
	local beginning, link, linktext = mw.ustring.match(text, &amp;quot;^(.*)%[%[([^|%]]+)%|?(.-)%]%]$&amp;quot;)&lt;br /&gt;
	if link then&lt;br /&gt;
		if linktext ~= &amp;quot;&amp;quot; then&lt;br /&gt;
			return beginning .. &amp;quot;[[&amp;quot; .. link .. &amp;quot;|&amp;quot; .. do_pluralize(linktext) .. &amp;quot;]]&amp;quot;&lt;br /&gt;
		end&lt;br /&gt;
		if word_ends_in_consonant_plus_y(link) then&lt;br /&gt;
			return beginning .. &amp;quot;[[&amp;quot; .. link .. &amp;quot;|&amp;quot; .. link:gsub(&amp;quot;y$&amp;quot;, &amp;quot;ies&amp;quot;) .. &amp;quot;]]&amp;quot;&lt;br /&gt;
		end&lt;br /&gt;
		return beginning .. &amp;quot;[[&amp;quot; .. link .. &amp;quot;]]&amp;quot; .. (word_takes_es_plural(link) and &amp;quot;es&amp;quot; or &amp;quot;s&amp;quot;)&lt;br /&gt;
	end&lt;br /&gt;
	return do_pluralize(text)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.singularize(text)&lt;br /&gt;
	if type(text) == &amp;quot;table&amp;quot; then&lt;br /&gt;
		-- allow calling from a template&lt;br /&gt;
		text = text.args[1]&lt;br /&gt;
	end&lt;br /&gt;
	-- Singularize a word in a smart fashion, according to normal English rules.&lt;br /&gt;
	-- Works analogously to pluralize().&lt;br /&gt;
	-- NOTE: This doesn't always work as well as pluralize(). Beware. It will&lt;br /&gt;
	-- mishandle cases like &amp;quot;passes&amp;quot; -&amp;gt; &amp;quot;passe&amp;quot;, &amp;quot;eyries&amp;quot; -&amp;gt; &amp;quot;eyry&amp;quot;.&lt;br /&gt;
	-- 1. If word ends in -ies, replace -ies with -y.&lt;br /&gt;
	-- 2. If the word ends in -xes, -shes, -ches, remove -es. [Does not affect&lt;br /&gt;
	--    -ses, cf. &amp;quot;houses&amp;quot;, &amp;quot;impasses&amp;quot;.]&lt;br /&gt;
	-- 3. Otherwise, remove -s.&lt;br /&gt;
	-- This handles links correctly:&lt;br /&gt;
	-- 1. If a piped link, change the second part appropriately. Collapse the&lt;br /&gt;
	--    link to a simple link if both parts end up the same.&lt;br /&gt;
	-- 2. If a non-piped link, singularize the link.&lt;br /&gt;
	-- 3. A link like &amp;quot;[[parish]]es&amp;quot; will be handled correctly because the&lt;br /&gt;
	--    code that checks for -shes etc. allows ] characters between the&lt;br /&gt;
	--    'sh' etc. and final -es.&lt;br /&gt;
	local function do_singularize(text)&lt;br /&gt;
		local sing = text:match(&amp;quot;^(.-)ies$&amp;quot;)&lt;br /&gt;
		if sing then&lt;br /&gt;
			return sing .. &amp;quot;y&amp;quot;&lt;br /&gt;
		end&lt;br /&gt;
		-- Handle cases like &amp;quot;[[parish]]es&amp;quot;&lt;br /&gt;
		local sing = text:match(&amp;quot;^(.-[sc]h%]*)es$&amp;quot;)&lt;br /&gt;
		if sing then&lt;br /&gt;
			return sing&lt;br /&gt;
		end&lt;br /&gt;
		-- Handle cases like &amp;quot;[[box]]es&amp;quot;&lt;br /&gt;
		local sing = text:match(&amp;quot;^(.-x%]*)es$&amp;quot;)&lt;br /&gt;
		if sing then&lt;br /&gt;
			return sing&lt;br /&gt;
		end&lt;br /&gt;
		local sing = text:match(&amp;quot;^(.-)s$&amp;quot;)&lt;br /&gt;
		if sing then&lt;br /&gt;
			return sing&lt;br /&gt;
		end&lt;br /&gt;
		return text&lt;br /&gt;
	end&lt;br /&gt;
&lt;br /&gt;
	local function collapse_link(link, linktext)&lt;br /&gt;
		if link == linktext then&lt;br /&gt;
			return &amp;quot;[[&amp;quot; .. link .. &amp;quot;]]&amp;quot;&lt;br /&gt;
		else&lt;br /&gt;
			return &amp;quot;[[&amp;quot; .. link .. &amp;quot;|&amp;quot; .. linktext .. &amp;quot;]]&amp;quot;&lt;br /&gt;
		end&lt;br /&gt;
	end&lt;br /&gt;
&lt;br /&gt;
	-- Check for a link. This pattern matches both piped and unpiped links.&lt;br /&gt;
	-- If the link is not piped, the second capture (linktext) will be empty.&lt;br /&gt;
	local beginning, link, linktext = mw.ustring.match(text, &amp;quot;^(.*)%[%[([^|%]]+)%|?(.-)%]%]$&amp;quot;)&lt;br /&gt;
	if link then&lt;br /&gt;
		if linktext ~= &amp;quot;&amp;quot; then&lt;br /&gt;
			return beginning .. collapse_link(link, do_singularize(linktext))&lt;br /&gt;
		end&lt;br /&gt;
		return beginning .. &amp;quot;[[&amp;quot; .. do_singularize(link) .. &amp;quot;]]&amp;quot;&lt;br /&gt;
	end&lt;br /&gt;
&lt;br /&gt;
	return do_singularize(text)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
function export.add_indefinite_article(text, uppercase)&lt;br /&gt;
	local is_vowel = false&lt;br /&gt;
	-- If there's a link at the beginning, examine the first letter of the&lt;br /&gt;
	-- link text. This pattern matches both piped and unpiped links.&lt;br /&gt;
	-- If the link is not piped, the second capture (linktext) will be empty.&lt;br /&gt;
	local link, linktext, remainder = mw.ustring.match(text, &amp;quot;^%[%[([^|%]]+)%|?(.-)%]%](.*)$&amp;quot;)&lt;br /&gt;
	if link then&lt;br /&gt;
		is_vowel = rfind(linktext ~= &amp;quot;&amp;quot; and linktext or link, &amp;quot;^[AEIOUaeiou]&amp;quot;)&lt;br /&gt;
	else&lt;br /&gt;
		is_vowel = rfind(text, &amp;quot;^[AEIOUaeiou]&amp;quot;)&lt;br /&gt;
	end&lt;br /&gt;
	return (is_vowel and (uppercase and &amp;quot;An &amp;quot; or &amp;quot;an &amp;quot;) or (uppercase and &amp;quot;A &amp;quot; or &amp;quot;a &amp;quot;)) .. text&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
return export&lt;/div&gt;</summary>
		<author><name>Jācōbus</name></author>
	</entry>
</feed>