Module:X2i
This module is rated as beta, and is ready for widespread use. It is still new and should be used with some caution to ensure the results are as expected. |
This module converts X-SAMPA to IPA, and is accessible through {{x2i}}. See there for more.
Testcase
- [ɛɡˈzɑmpɫ̩]
local p = {} local U = mw.ustring.char -- Slashes \, apostrophes ', and double quotes " are escaped with \. -- \\ = \, \' = ', \" = " local data = { ["a"] = { "a" }, ["b"] = { "b" }, -- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary ["b\\"] = { "ⱱ" }, ["b_<"] = { "ɓ" }, ["c"] = { "c" }, ["d"] = { "d" }, ["d`"] = { "ɖ", has_descender = true }, ["d_<"] = { "ɗ" }, -- not in official X-SAMPA; Wikipedia-specific ["d`_<"] = { "ᶑ", has_descender = true }, ["e"] = { "e" }, ["f"] = { "f" }, ["g"] = { "ɡ", has_descender = true }, ["g_<"] = { "ɠ", has_descender = true }, ["h"] = { "h" }, ["h\\"] = { "ɦ" }, ["i"] = { "i" }, ["j"] = { "j", has_descender = true }, ["j\\"] = { "ʝ", has_descender = true }, ["k"] = { "k" }, ["l"] = { "l" }, ["l`"] = { "ɭ", has_descender = true }, ["l\\"] = { "ɺ" }, ["m"] = { "m" }, ["n"] = { "n" }, ["n`"] = { "ɳ", has_descender = true }, ["o"] = { "o" }, ["p"] = { "p", has_descender = true }, ["p\\"] = { "ɸ", has_descender = true }, ["q"] = { "q", has_descender = true }, ["r"] = { "r" }, ["r`"] = { "ɽ", has_descender = true }, ["r\\"] = { "ɹ" }, ["r\\`"] = { "ɻ", has_descender = true }, ["s"] = { "s" }, ["s`"] = { "ʂ", has_descender = true }, ["s\\"] = { "ɕ" }, ["t"] = { "t" }, ["t`"] = { "ʈ" }, ["u"] = { "u" }, ["v"] = { "v" }, ["v\\"] = { "ʋ" }, ["w"] = { "w" }, ["x"] = { "x" }, ["x\\"] = { "ɧ", has_descender = true }, ["y"] = { "y", has_descender = true }, ["z"] = { "z" }, ["z`"] = { "ʐ", has_descender = true }, ["z\\"] = { "ʑ" }, ["A"] = { "ɑ" }, ["B"] = { "β", has_descender = true }, ["B\\"] = { "ʙ" }, ["C"] = { "ç", has_descender = true }, ["D"] = { "ð" }, ["E"] = { "ɛ" }, ["F"] = { "ɱ", has_descender = true }, ["G"] = { "ɣ", has_descender = true }, ["G\\"] = { "ɢ" }, ["G\\_<"] = { "ʛ" }, ["H"] = { "ɥ", has_descender = true }, ["H\\"] = { "ʜ" }, ["I"] = { "ɪ" }, ["I\\"] = { "ɪ̈" }, ["J"] = { "ɲ", has_descender = true }, ["J\\"] = { "ɟ" }, ["J\\_<"] = { "ʄ", has_descender = true }, ["K"] = { "ɬ" }, ["K\\"] = { "ɮ", has_descender = true }, ["L"] = { "ʎ" }, ["L\\"] = { "ʟ" }, ["M"] = { "ɯ" }, ["M\\"] = { "ɰ", has_descender = true }, ["N"] = { "ŋ", has_descender = true }, ["N\\"] = { "ɴ" }, ["O"] = { "ɔ" }, ["O\\"] = { "ʘ" }, ["P"] = { "ʋ" }, ["Q"] = { "ɒ" }, ["R"] = { "ʁ" }, ["R\\"] = { "ʀ" }, ["S"] = { "ʃ", has_descender = true }, ["T"] = { "θ" }, ["U"] = { "ʊ" }, ["U\\"] = { "ʊ̈" }, ["V"] = { "ʌ" }, ["W"] = { "ʍ" }, ["X"] = { "χ", has_descender = true }, ["X\\"] = { "ħ" }, ["Y"] = { "ʏ" }, ["Z"] = { "ʒ", has_descender = true }, ["."] = { "." }, ["\""] = { "ˈ" }, ["%"] = { "ˌ" }, -- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary ["%\\"] = { "ᴙ" }, ["'"] = { "ʲ", is_diacritic = true }, [":"] = { "ː", is_diacritic = true }, [":\\"] = { "ˑ", is_diacritic = true }, ["@"] = { "ə" }, ["@`"] = { "ɚ" }, ["@\\"] = { "ɘ" }, ["{"] = { "æ" }, ["}"] = { "ʉ" }, ["1"] = { "ɨ" }, ["2"] = { "ø" }, ["3"] = { "ɜ" }, ["3`"] = { "ɝ" }, ["3\\"] = { "ɞ" }, ["4"] = { "ɾ" }, ["5"] = { "ɫ" }, ["6"] = { "ɐ" }, ["7"] = { "ɤ" }, ["8"] = { "ɵ" }, ["9"] = { "œ" }, ["&"] = { "ɶ" }, ["?"] = { "ʔ" }, ["?\\"] = { "ʕ" }, ["<\\"] = { "ʢ" }, [">\\"] = { "ʡ" }, ["^"] = { "ꜛ" }, ["!"] = { "ꜜ" }, -- not in official X-SAMPA ["!!"] = { "‼" }, ["!\\"] = { "ǃ" }, ["|"] = { "|", has_descender = true }, ["|\\"] = { "ǀ", has_descender = true }, ["||"] = { "‖", has_descender = true }, ["|\\|\\"] = { "ǁ", has_descender = true }, ["=\\"] = { "ǂ", has_descender = true }, -- linking mark, liaison ["-\\"] = { "‿", is_diacritic = true }, -- coarticulated; not in official X-SAMPA; used by Wiktionary ["__"] = { U(0x361) }, -- fortis, strong articulation; not in official X-SAMPA; used by Wiktionary ["_:"] = { U(0x348) }, ["_\""] = { U(0x308), is_diacritic = true }, -- advanced ["_+"] = { U(0x31F), with_descender = "˖", is_diacritic = true }, -- retracted ["_-"] = { U(0x320), with_descender = "˗", is_diacritic = true }, -- rising tone ["_/"] = { U(0x30C), is_diacritic = true }, -- voiceless ["_0"] = { U(0x325), with_descender = U(0x30A), is_diacritic = true }, -- syllabic ["="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true }, -- syllabic ["_="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true }, -- strident: not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary ["_%\\"] = { U(0x1DFD) }, -- ejective ["_>"] = { "ʼ", is_diacritic = true }, -- pharyngealized ["_?\\"] = { "ˤ", is_diacritic = true }, -- falling tone ["_\\"] = { U(0x302), is_diacritic = true }, -- non-syllabic ["_^"] = { U(0x32F), with_descender = U(0x311), is_diacritic = true }, -- no audible release ["_}"] = { U(0x31A), is_diacritic = true }, -- r-coloring (colouring), rhotacization ["`"] = { U(0x2DE), is_diacritic = true }, -- nasalization ["~"] = { U(0x303), is_diacritic = true }, -- advanced tongue root ["_A"] = { U(0x318), is_diacritic = true }, -- apical ["_a"] = { U(0x33A), is_diacritic = true }, -- extra-low tone ["_B"] = { U(0x30F), is_diacritic = true }, -- low rising tone ["_B_L"] = { U(0x1DC5), is_diacritic = true }, -- less rounded ["_c"] = { U(0x31C), is_diacritic = true }, -- dental ["_d"] = { U(0x32A), is_diacritic = true }, -- velarized or pharyngealized (dark) ["_e"] = { U(0x334), is_diacritic = true }, -- downstep ["<F>"] = { "↘" }, -- falling tone ["_F"] = { U(0x302), is_diacritic = true }, -- velarized ["_G"] = { "ˠ", is_diacritic = true }, -- high tone ["_H"] = { U(0x301), is_diacritic = true }, -- high rising tone ["_H_T"] = { U(0x1DC4), is_diacritic = true }, -- aspiration ["_h"] = { "ʰ", is_diacritic = true }, -- palatalization ["_j"] = { "ʲ", is_diacritic = true }, -- creaky voice, laryngealization, vocal fry ["_k"] = { U(0x330), is_diacritic = true }, -- low tone ["_L"] = { U(0x300), is_diacritic = true }, -- lateral release ["_l"] = { "ˡ", is_diacritic = true }, -- mid tone ["_M"] = { U(0x304), is_diacritic = true }, -- laminal ["_m"] = { U(0x33B), is_diacritic = true }, -- linguolabial ["_N"] = { U(0x33C), is_diacritic = true }, -- nasal release ["_n"] = { "ⁿ", is_diacritic = true }, -- more rounded ["_O"] = { U(0x339), is_diacritic = true }, -- lowered ["_o"] = { U(0x31E), with_descender = "˕", is_diacritic = true }, -- retracted tongue root ["_q"] = { U(0x319), is_diacritic = true }, -- global rise ["<R>"] = { "↗" }, -- rising tone ["_R"] = { U(0x30C), is_diacritic = true }, -- rising falling tone ["_R_F"] = { U(0x1DC8), is_diacritic = true }, -- raised ["_r"] = { U(0x31D), is_diacritic = true }, -- extra-high tone ["_T"] = { U(0x30B), is_diacritic = true }, -- breathy voice, murmured voice, murmur, whispery voice ["_t"] = { U(0x324), is_diacritic = true }, -- voiced ["_v"] = { U(0x32C), is_diacritic = true }, -- labialized ["_w"] = { "ʷ", is_diacritic = true }, -- extra-short ["_X"] = { U(0x306), is_diacritic = true }, -- mid-centralized ["_x"] = { U(0x33D), is_diacritic = true }, ["__T"] = { "˥" }, ["__H"] = { "˦" }, ["__M"] = { "˧" }, ["__L"] = { "˨" }, ["__B"] = { "˩" }, ["0"] = { "◌" }, -- dotted circle } local function escape(text, pattern, list, i) text = mw.ustring.gsub( text, pattern, function(match) list[i] = match local replacement = string.rep("$", i) i = i + 1 return replacement end ) return text end local function _XSAMPAtoIPA(text) local output = {} local characteristics = {} local escaped = {} local i = 1 local toBeEscaped = { "*(.)", "'''" } for i, pattern in pairs(toBeEscaped) do text = mw.ustring.gsub( text, pattern, function(match) escaped[i] = match local replacement = string.rep("$", i) .. "*" i = i + 1 return replacement end ) end mw.log(i, #escaped, text) mw.logObject(escaped) while #text > 0 do local substrings = { mw.ustring.sub(text, 1, 4), mw.ustring.sub(text, 1, 3), mw.ustring.sub(text, 1, 2), mw.ustring.sub(text, 1, 1) } for i, substring in ipairs(substrings) do local result, IPA, with_descender, has_descender, is_diacritic if data[substring] then result = data[substring] IPA = result[1] with_descender = result.with_descender has_descender = result.has_descender diacritic = result.is_diacritic if with_descender then -- Go backwords through the transcription, skipping any diacritics. local i = 0 while characteristics[#characteristics - i].is_diacritic do i = i + 1 end --[[ Look at the first non-diacritic symbol before the current symbol. If it has a descender, use the descender form of the current symbol. ]] if characteristics[#characteristics - i].has_descender then IPA = with_descender end end elseif not substrings[i + 1] then IPA = substring end if IPA then text = mw.ustring.sub(text, 6 - i) table.insert(output, IPA) table.insert(characteristics, { has_descender = has_descender, is_diacritic = is_diacritic } ) break end end end output = table.concat(output) output = mw.ustring.gsub( output, "($+)%*", function(match) local i = string.len(match) return escaped[i] end ) return output end function p.X2IPA(frame) local text if type(frame) == "table" then text = frame.getParent and frame:getParent().args[1] or frame.args and frame.args[1] else text = frame end return _XSAMPAtoIPA(text) end local function _IPAspan(text) return "<span class=\"IPA\">"..text.."</span>" end function p.example(frame) local args = frame.args local parentargs = frame.getParent and frame:getParent().args local text = parentargs and parentargs[1] or args and args[1] or type(frame) == "string" and frame or error("No text provided") local output = { " <code>{{[[mw:Manual:Substitution|subst:]][[Template:x2i|x2i]]|" } if mw.ustring.find(text, "=") then table.insert(output, "1=") end table.insert(output, text) table.insert(output, "}}</code>") table.insert(output, "\n| ") local IPA = _IPAspan(p.X2IPA(text)) table.insert(output, IPA) return table.concat(output) end return p