Documentation for this module may be created at Մոդուլ:IPA/doc

local export = {}
local m_data = mw.loadData('Module:IPA/data')

local i2x_lookup, x2i_lookup = {}, {}

for ipa_sym, data in pairs(m_data.symbols[1]) do
	if type(data.XSAMPA) == "table" then
		i2x_lookup[ipa_sym] = data.XSAMPA[1]
		for _, xsampa_sym in ipairs(data.XSAMPA) do
			x2i_lookup[xsampa_sym] = ipa_sym
		end
	else
		i2x_lookup[ipa_sym] = data.XSAMPA
		x2i_lookup[data.XSAMPA] = ipa_sym
	end
end

--exception cases where two IPA characters map to one XSAMPA character
x2i_lookup["_T"]="˥"
x2i_lookup["_H"]="˦"
x2i_lookup["_M"]="˧"
x2i_lookup["_L"]="˨"
x2i_lookup["_B"]="˩"

function export.IPA_to_XSAMPA(text)
	local escape = false
	if type(text) == 'table' then -- a frame, extract args
		text = text.args[1]
		text = text:gsub('{{=}}','='):gsub('{{!}}','|')
		text = mw.text.decode(text) -- XXX
		escape = true
	end

	text = mw.ustring.gsub(text, 'ːː', ':') -- this basically sums up m_data.symbols[2].XSAMPA
	text = mw.ustring.gsub(text, '.', i2x_lookup)

	if escape then
		text = mw.text.nowiki(text)
	end
	return text
end

function export.XSAMPA_to_IPA(text)
	local escape = false
	if type(text) == 'table' then -- a frame, extract args
		text = text.args[1]
		text = mw.text.decode(text) -- XXX
		escape = true
	end
	
	-- XXX: may not be the most efficient, but at least correct.
	local output = {}
	while #text > 0 do
		local a1, a2, a3, a4 = mw.ustring.sub(text, 1, 1), mw.ustring.sub(text, 1, 2), mw.ustring.sub(text, 1, 3), mw.ustring.sub(text, 1, 4)
		if x2i_lookup[a4] then
			table.insert(output, x2i_lookup[a4])
			text = mw.ustring.sub(text, 5)
		elseif x2i_lookup[a3] then
			table.insert(output, x2i_lookup[a3])
			text = mw.ustring.sub(text, 4)
		elseif x2i_lookup[a2] then
			table.insert(output, x2i_lookup[a2])
			text = mw.ustring.sub(text, 3)
		elseif x2i_lookup[a1] then
			table.insert(output, x2i_lookup[a1])
			text = mw.ustring.sub(text, 2)
		else -- no match
			table.insert(output, a1)
			text = mw.ustring.sub(text, 2)
		end
	end

	output = table.concat(output)
	if escape then
--		output = mw.text.nowiki(output)
	end

	return output
end


function export.IPA_multiple(pronunciations, notes)
	for i, pron in ipairs(pronunciations) do
		pron = export.format_IPA(pron)
		
		if notes[i] then
			pron = pron .. "<ref>" .. notes[i] .. "</ref>"
		end
		
		pronunciations[i] = pron
	end

	return table.concat(pronunciations, ", ")
end

-- Takes an IPA pronunciation and formats it and adds cleanup categories.
function export.format_IPA(text)
	local categories = {}
 
	-- Fetch the representation type marks and remove them for "text2"
	local repr_mark = {}
	repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = mw.ustring.find(text, '^(.).-(.)$')
	local text2 = mw.ustring.sub(text, 2, -2)
 
	-- Check for obsolete and nonstandard symbols
	local nonstandard = {
		"ɑ̢", "d̂", "t̂", "n̂", "l̂", "k̫", "ɔ̗", "ɔ̖", -- these symbols consist of more than one character, so we can't put them in the line below
		"[ʦʣʧʤʨʥ?ƍσƺƪƻƾƞᶀᶁᶂᶃᶄᶅᶆᶈᶇᶉᶊᶋƫᶌᶍᶎʓʆλƛłščžǰǧǯẋᵻᵿⱻʚɷωıȹȸ∅ØƥƭƈƙʠʇʗʖʞɩɼȣяɿʅʮʯᴀᴀᴇGRŒQȡȶȵȴKPT]",
	}
 
	for i, symbol in ipairs(nonstandard) do
		result = ""
		result = mw.ustring.find(text2, symbol)
		if result then
			table.insert(categories, "IPA pronunciations with obsolete or nonstandard characters|" .. result)
			break
		end
	end
 
	-- Check for invalid symbols
	local valid_symbols = ' %(%)%%{%|%}%-~.!abcdefhijklmnopqrstuvwxyz¡àáâãäæçèéêëìíîïðòóôõöøùúûüýÿāăēĕěħĩīĭŋōŏőœũūŭűŷǀǁǂǃǎǐǒǔǖǘǚǜǟǣǽǿȁȅȉȍȕȫȭȳɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɪɫɬɭɮɯɰɱɲɳɴɵɶɸɹɺɻɽɾʀʁʂʃʄʈʉʊʋṽʌʍʎʏʐʑʒʔʕʘʙʛʜʝʟʡʢʬʭ⁻¹²³⁴⁵ᵝʰʱʲʳʴʵʶʷʸʼˀˁˈˌːˑ˞ˠˡˢˣ˥˦˧˨˩ˬ˭̘̙̜̝̞̟̠̣̤̥̩̪̬̯̰̹̺̻̼͇͈͉͍͎͔͕̀́̂̃̄̆̈̋̌̏̽͆͊͋͌̊̌̚͢͡β͜θχᴙᵊᵐᵑᶑᶣᶬᶮᶯᶰᶹ᷽᷄᷅᷆᷇᷈᷉ḁḛḭḯṍṏṳṵṹṻạẹẽịọụỳỵỹ‖․‥…‼‿ⁿ↑↓↗↘ⱱꜛꜜꟸꟹ𝆏𝆑'
 
	result = mw.ustring.gsub(text2, '[' .. valid_symbols .. ']', '')
	if result ~= '' then
		table.insert(categories, "IPA pronunciations with invalid IPA characters|" .. result)
	end
 
	-- Check the representation type
	if not ((repr_mark.left == '/' and repr_mark.right == '/')
	or	  (repr_mark.left == '[' and repr_mark.right == ']')) then
		table.insert(categories, "IPA pronunciations with invalid representation marks")
	end

	-- Check for double character
	if mw.ustring.match(text2,'([^˥˦˧˨˩])%1') then
		table.insert(categories, "IPA pronunciations with repetition")
	end
	
	-- Reference inside IPA template usage
	if mw.ustring.find(text,'</ref>') then
		table.insert(categories, "IPA pronunciations with reference")
	end

	-- Format the text
	text = '<span class="IPA" lang="">' .. text .. '</span>'
 
	-- Add the categories
	for key, cat in ipairs(categories) do
		text = text .. "[[Category:" .. cat .. "]]"
	end
 
	return text
end

return export