Module:IPAc2-mh
Implements {{IPAc2-mh}}
-- This module is primarily maintained at: -- https://en.wiktionary.org/wiki/Module:mh-pronunc -- Please direct all technical queries and contributions there. -- The version of this script on Wikipedia is only a mirror. local export = {} local MERGED_VOWELS = false local PARENTHETICAL_EPENTHESIS = true local PHONETIC_DETAILS = false local W_OFF_GLIDES = true local ASYLL = "̯" local BREVE = "̆" local CEDILLA = "̧" local MACRON = "̄" local TIE = "͡" local TIE2 = "͜" local C1_ = "pbtdSZszkgmnNrlyYhH_" local C1 = "["..C1_.."]" local C2_ = "jGw" local C = ".["..C2_.."]" local V_ = "aEeiAV7MQOou" local V = "["..V_.."]" local VI_ = V_.."I" local VI = "["..VI_.."]" local S = "[%s%-]+" local UTF8_CHAR = "[%z\1-\127\194-\244][\128-\191]*" local EMPTY = {} -- Adds elements to a sequence as if it's a set (retains unique elements only). local function addUnique(seq, value) for _, value2 in pairs(seq) do if value == value2 then return end end seq[#seq + 1] = value end -- Intended to work the same as JavaScript's Object.assign() function. local function assign(target, ...) local args = { ... } for _, source in pairs(args) do if type(source) == "table" then for key, value in pairs(source) do target[key] = value end end end return target end local function fastTrim(text) return string.match(text, "^%s*(.-)%s*$") end local function parseBoolean(text) if type(text) == "string" then text = string.gsub(text, "[^0-9A-Za-z]", "") if text ~= "" and text ~= "0" and string.lower(text) ~= "false" then return true end end return false end local function splitChars(text, pattern, chars, shorten) chars = chars or {} local index = 1 for ch in string.gmatch(text, pattern or UTF8_CHAR) do chars[index] = ch index = index + 1 end if index <= #chars then if shorten then table.remove(chars, index) else repeat chars[index] = nil index = index + 1 until index > #chars end end return chars end local function string_gsub2(text, pattern, subst) return string.gsub(string.gsub(text, pattern, subst), pattern, subst) end local function tableGet(value, key1, key2, key3) if type(value) ~= "table" or key1 == nil then return value end value = value[key1] if key2 == nil then return value end if type(value) ~= "table" then return nil end value = value[key2] if key3 == nil then return value end if type(value) ~= "table" then return nil end return value[key3] end local function ZTBL(text, sep) local tbl = {} for key in mw.text.gsplit(text, sep or " ") do tbl[key] = true end return tbl end local PARSE_PSEUDO_GLIDE = { ["y"] = "0", ["h"] = "0h", ["w"] = "0w" } local PARSE_C_CH_CW = { ["k"] = "kG", ["kh"] = "kGh", -- N\A ["kw"] = "kW", ["l"] = "lJ", ["lh"] = "lG", ["lw"] = "lW", ["m"] = "mJ", ["mh"] = "mG", ["mw"] = "mJw", -- N\A ["n"] = "nJ", ["nh"] = "nG", ["nw"] = "nW", ["ng"] = "NG", ["ngh"] = "NGh", -- N\A ["ngw"] = "NW", ["r"] = "rG", ["rh"] = "rGh", -- N\A ["rw"] = "rW", ["0"] = "_J", ["0h"] = "_G", ["0w"] = "_W" } local PARSE_REMAINING = { ["b"] = "pG", ["d"] = "rj", ["e"] = "E", ["&"] = "e", ["h"] = "hG", ["j"] = "tj", ["J"] = "j", ["p"] = "pj", ["t"] = "tG", ["w"] = "hw", ["W"] = "w", ["y"] = "hj", ["z"] = "yj", ["Z"] = "Yj", ["'"] = "" } local function parse(code) local outSeq = {} code = mw.ustring.gsub(code, "%s+", " ") code = string.lower(code) for text in mw.text.gsplit(code, " *,[ ,]*") do text = fastTrim(text) if text ~= "" then local temp = string.gsub(text, "[abdeghijklmnprtwy_&'%- ]", "") if temp ~= "" then error("'"..code.."' contains unsupported characters: "..temp) end -- Recognize "y_", "h_", "w_", "_y", "_h", "_w" as pseudo-glides. text = string.gsub(text, "_*([hwy])_+", PARSE_PSEUDO_GLIDE) text = string.gsub(text, "_+([hwy])", PARSE_PSEUDO_GLIDE) if string.find(text, "_") then error("contains misplaced underscores: "..code) end -- a plain {i} protected from dialect-specific reflexes text = string.gsub(text, "'i", "I") -- "yi'y" and "'yiy" sequences text = string.gsub(text, "('?)yi('*)y", function(aposA, aposB) if aposA ~= "" then -- "dwelling upon" i return "Z" elseif aposB ~= "" then -- "passing over lightly" i return "z" end end) -- Convert multigraphs to pseudo-X-SAMPA format. text = string.gsub(text, "[klmnr0]g?[hw]?", PARSE_C_CH_CW) if string.find(text, "g") then error("contains g that is not part of ng: "..code) end -- Convert remaining sequences to pseudo-X-SAMPA format. text = string.gsub(text, ".", PARSE_REMAINING) -- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics, -- but allow VC, CV at affix boundaries -- where a vowel may link to another morpheme's consonant. temp = string.gsub(text, "[%s%-]+", "") if string.find(temp, "_..[jGw]") or string.find(temp, ".[jGw]_.") then error("pseudo-glides may not neighbor a consonant") end if string.find(temp, VI.."_."..VI) then error("pseudo-glides may only be at the beginning or end"..code) end if string.find(temp, VI..VI) then error("vowels must be separated by a consonant: "..code) end if string.find(temp, ".[jGw].[jGw].[jGw]") then error("each consonant cluster is limited to two: "..code) end if string.find(temp, ".[jGw].[jGw]$") then error("may not end with a consonant cluster: "..code) end string.gsub(temp, "^(.[jGw])(.[jGw])", function(consonX, consonY) if consonX ~= consonY then error("may only begin with single or geminated consonant: " ..code) end end) if text ~= "" then addUnique(outSeq, text) end end end return outSeq end local BENDER_1968 = { ["pj"] = "p", ["pG"] = "b", ["tj"] = "j", ["tG"] = "t", ["kG"] = "k", ["kw"] = "q", ["mj"] = "m", ["mG"] = "ṁ", ["nj"] = "n", ["nG"] = "ṅ", ["nw"] = "n̈", ["NG"] = "g", ["Nw"] = "g̈", ["rj"] = "d", ["rG"] = "r", ["rw"] = "r̈", ["lj"] = "l", ["lG"] = "ł", ["lw"] = "l̈", ["yj"] = "yi'y", ["Yj"] = "'yiy", ["hj"] = "y", ["hG"] = "h", ["hw"] = "w", ["_j"] = "", ["_G"] = "", ["_w"] = "", ["a"] = "a", ["E"] = "e", ["e"] = "&", ["i"] = "i", ["I"] = "i" } local BENDER_MED = assign({}, BENDER_1968, { ["mG"] = "m̧", ["nG"] = "ņ", ["nw"] = "ņ°", ["Nw"] = "g°", ["rw"] = "r°", ["lG"] = "ļ", ["lw"] = "ļ°", ["e"] = "ȩ" }) local BENDER_MOD = assign({}, BENDER_MED, { ["kw"] = "kʷ", ["mG"] = "ṃ", ["nG"] = "ṇ", ["nw"] = "ṇʷ", ["Nw"] = "gʷ", ["rw"] = "rʷ", ["lG"] = "ḷ", ["lw"] = "ḷʷ", ["e"] = "ẹ" }) local BENDER_DEFAULT = assign({}, BENDER_MOD, { ["mG"] = "m̧", ["nG"] = "ņ", ["nw"] = "ņʷ", ["lG"] = "ļ", ["lw"] = "ļʷ", ["e"] = "ȩ" }) local BENDER_MAPS = { ["1968"] = BENDER_1968, ["med"] = BENDER_MED, ["mod"] = BENDER_MOD } local function toBender(inSeq, args) -- "1968" is from "Marshallese Phonology" (1968 by Byron W. Bender). -- "med" is from the Marshallese-English Dictionary (1976). -- "mod" is from the Marshallese-English Online Dictionary. -- "default" is the same as "mod" but with cedillas. local version = args and args.version local map = BENDER_MAPS[ type(version) == "string" and string.lower(version) or "" ] or BENDER_DEFAULT local outSeq = {} for _, text in pairs(inSeq) do text = string.gsub(text, ".[jGw]?", map) addUnique(outSeq, text) end return outSeq end local TO_MOD = { ["Ȩ"] = "Ẹ", ["ȩ"] = "ẹ", ["Ļ"] = "Ḷ", ["ļ"] = "ḷ", ["M̧"] = "Ṃ", ["m̧"] = "ṃ", ["Ņ"] = "Ṇ", ["ņ"] = "ṇ", ["N̄"] = "Ñ", ["n̄"] = "ñ", ["O̧"] = "Ọ", ["o̧"] = "ọ" } local function toMOD(text) text = mw.ustring.gsub(text, ".["..CEDILLA..MACRON.."]?", TO_MOD) return text end local PHONEMIC_MAP = { ["pj"] = "pʲ", ["pG"] = "pˠ", ["tj"] = "tʲ", ["tG"] = "tˠ", ["kG"] = "k", ["kw"] = "kʷ", ["mj"] = "mʲ", ["mG"] = "mˠ", ["nj"] = "nʲ", ["nG"] = "nˠ", ["nw"] = "nʷ", ["NG"] = "ŋ", ["Nw"] = "ŋʷ", ["rj"] = "rʲ", ["rG"] = "rˠ", ["rw"] = "rʷ", ["lj"] = "lʲ", ["lG"] = "lˠ", ["lw"] = "lʷ", ["hj"] = "j", ["hG"] = "ɰ", ["hw"] = "w", ["_j"] = "", ["_G"] = "", ["_w"] = "", ["a"] = "æ", ["E"] = "ɛ", ["e"] = "e", ["i"] = "i", ["I"] = "i" } if false then assign(PHONEMIC_MAP, { ["a"] = "ɐ", ["E"] = "ə", ["e"] = "ɘ", ["i"] = "ɨ", ["I"] = "ɨ" }) end assign(PHONEMIC_MAP, { ["yj"] = PHONEMIC_MAP.hj..PHONEMIC_MAP.i..ASYLL..PHONEMIC_MAP.hj, ["Yj"] = PHONEMIC_MAP.hj..PHONEMIC_MAP.i..PHONEMIC_MAP.hj..PHONEMIC_MAP.hj }) local function toPhonemic(inSeq) local outSeq = {} for _, text in pairs(inSeq) do text = string.gsub(text, ".[jGw]?", PHONEMIC_MAP) addUnique(outSeq, text) end return outSeq end local VOWEL = { -- VOWELS[f1][f2] { "a", "A", "Q" }, { "E", "V", "O" }, { "e", "7", "o" }, { "i", "M", "u" } } local F1 = {} local F2_FRONT = 1 local F2_BACK = 2 local F2_ROUND = 3 local F2 = { ["j"] = F2_FRONT, ["G"] = F2_BACK, ["w"] = F2_ROUND } local FRONT_VOWEL = {} local BACK_VOWEL = {} local ROUND_VOWEL = {} for f1, row in pairs(VOWEL) do local front = row[F2_FRONT] local back = row[F2_BACK] local round = row[F2_ROUND] for f2, vowel in pairs(row) do F1[vowel] = f1 F2[vowel] = f2 FRONT_VOWEL[vowel] = front BACK_VOWEL[vowel] = back ROUND_VOWEL[vowel] = round end end local function maxF1(a, b, c) if c then return VOWEL[math.max(F1[a], F1[b], F1[c])][F2_FRONT] elseif b then return VOWEL[math.max(F1[a], F1[b])][F2_FRONT] else return FRONT_VOWEL[a] end end local function toPhoneticDialect(text, config, isRalik) -- Morphemes can begin with geminated consonants, but spoken words cannot. text = string.gsub(text, "^(.[jGw])( *)%1( *)("..VI..")", function(conson, _, __, vowel) if conson == "hG" then if isRalik then return "hG"..vowel.._.."hG"..__..vowel else return "hG".._..__..vowel end else if isRalik then return "hj"..maxF1(vowel, "E")..conson.._..conson..__..vowel else return conson..maxF1(vowel, "E").._..conson..__..vowel end end end ) -- Initial {yiyV-, yiwV-, wiwV-} sequences have special behavior. -- To block this in the template argument, use "'i" instead of "i". text = " "..text text = string.gsub(text, "([ jGw])( *)(h[jw])( *)i( *)(h[jw])( *)("..VI..")", function(nonVowel, _, consonX, __, ___, consonY, ____, vowel) if consonY == "hw" then -- {yiwV-, wiwV-} sequences if isRalik then -- Rālik {wiwV-} becomes {yiwV-}. consonX = "hj" end -- {[yw]iwV-} becomes {[yw]iwwV-} in both dialects. return nonVowel.._..consonX..__.. "I"..___..consonY..____..consonY..vowel elseif consonX == "hj" then -- {yiyV-} sequences if isRalik then -- "dwelling upon" i return nonVowel.._..__.."Yj"..___..____..vowel else -- "passing over lightly" i return nonVowel.._..__.."yj"..___..____..vowel end end end ) text = string.sub(text, 2) -- Restore protected {i}, we won't be checking for it anymore. text = string.gsub(text, "I", "i") return text end local IS_VOWEL = FRONT_VOWEL local VOWEL_REFLEX if true then -- [f1] local aEei = { "a", "E", "e", "i" } local AEei = { "A", "E", "e", "i" } local AV7i = { "A", "V", "7", "i" } local AV7M = { "A", "V", "7", "M" } local AV7u = { "A", "V", "7", "u" } local AOou = { "A", "O", "o", "u" } local QOou = { "Q", "O", "o", "u" } -- [F2[secondaryR]][f1] local _jv_X = { aEei, AEei, QOou } local njv_X = { aEei, AV7i, QOou } local hjvtX = { aEei, aEei, QOou } local hjvkX = { AV7i, AV7i, QOou } local _Gv_X = { AV7i, AV7M, QOou } local rGv_X = { AEei, AV7M, QOou } -- not currently used local hGv_X = { AV7M, AV7M, AV7M } local _wv_X = { AV7u, AOou, QOou } local rwv_X = { AOou, AOou, QOou } local hwv_X = { AV7M, AOou, QOou } local hwvtX = { AV7M, AV7M, QOou } -- [F2[secondaryL]][F2[secondaryR]][f1] local _Xv__ = { _jv_X, _Gv_X, _wv_X } local nXv__ = { njv_X, _Gv_X, hwv_X } local rXv__ = { _jv_X, _Gv_X, rwv_X } local hXv__ = { _jv_X, hGv_X, hwv_X } local hXvt_ = { hjvtX, hGv_X, hwvtX } local hXvk_ = { hjvkX, hGv_X, _wv_X } local hXvr_ = { hjvtX, hGv_X, hwv_X } -- [primaryR][F2[secondaryL]][F2[secondaryR]][f1] local __vX_ = { ["p"] = _Xv__, ["t"] = _Xv__, ["k"] = _Xv__, ["m"] = _Xv__, ["n"] = _Xv__, ["N"] = _Xv__, ["r"] = _Xv__, ["l"] = _Xv__ } local n_vX_ = { ["p"] = nXv__, ["t"] = nXv__, ["k"] = nXv__, ["m"] = nXv__, ["n"] = nXv__, ["N"] = nXv__, ["r"] = nXv__, ["l"] = nXv__ } local r_vX_ = { ["p"] = rXv__, ["t"] = rXv__, ["k"] = rXv__, ["m"] = rXv__, ["n"] = rXv__, ["N"] = rXv__, ["r"] = rXv__, ["l"] = _Xv__ } local h_vX_ = { ["p"] = hXv__, ["t"] = hXvt_, ["k"] = hXvk_, ["m"] = hXv__, ["n"] = hXv__, ["N"] = hXvk_, ["r"] = hXvr_, ["l"] = hXv__ } -- [primaryL][primaryR][F2[secondaryL]][F2[secondaryR]][f1] VOWEL_REFLEX = { ["p"] = __vX_, ["t"] = __vX_, ["k"] = __vX_, ["m"] = __vX_, ["n"] = n_vX_, ["N"] = n_vX_, ["r"] = r_vX_, ["l"] = n_vX_, ["h"] = h_vX_ } end local CONSON_REFLEX if true then local map = { ["t"] = { ["j"] = "T" }, ["n"] = { ["j"] = "J" }, ["r"] = { ["j"] = "R" }, ["l"] = { ["j"] = "L" } } for primary in mw.text.gsplit("ptkmnNrl", "") do local map2 = map[primary] if not map2 then map2 = {} map[primary] = map2 end map2["j"] = map2["j"] or primary map2["G"] = map2["G"] or primary map2["w"] = map2["w"] or primary end map["T"] = map["t"] map["J"] = map["n"] map["R"] = map["r"] map["L"] = map["l"] CONSON_REFLEX = map end local VOICED_PRIMARY = { ["p"]="b", ["t"]="d", ["T"]="D", ["S"]="Z", ["s"]="z", ["k"]="g" } local VOICELESS_PRIMARY = { ["b"]="p", ["d"]="t", ["D"]="T", ["Z"]="S", ["z"]="s", ["g"]="k" } local PHONETIC_IPA if true then local map = { ["p"] = "p", ["b"] = "b", ["B"] = "β̞", ["t"] = "t", ["d"] = "d", ["s"] = "s", ["z"] = "z", ["k"] = "k", ["g"] = "ɡ", ["m"] = "m", ["n"] = "n", ["N"] = "ŋ", ["r"] = "r", ["l"] = "l", ["Hj"] = "j", ["HG"] = "ʔ", ["Hw"] = "w", ["_"] = "‿", ["j"] = "ʲ", ["G"] = "ˠ", ["w"] = "ʷ", ["a"] = "æ", ["E"] = "ɛ", ["e"] = "e", ["i"] = "i", ["A"] = "ɑ", ["V"] = "ʌ", ["7"] = "ɤ", ["M"] = "ɯ", ["Q"] = "ɒ", ["O"] = "ɔ", ["o"] = "o", ["u"] = "u", ["^"] = ASYLL, ["@"] = ASYLL, ["("] = "(", [")"] = ")", [":"] = "ː", ["="] = TIE2 } if PHONETIC_DETAILS then assign(map, { ["t"] = "t̪", ["T"] = "t̠", ["d"] = "d̪", ["D"] = "d̠", ["s"] = "s̠", ["z"] = "z̠", ["k"] = "k̠", ["g"] = "ɡ̠", ["n"] = "n̠", ["J"] = "n̪", ["N"] = "ŋ̠", ["r"] = "r̠", ["R"] = "r̪", ["l"] = "l̠", ["L"] = "l̪", ["a"] = "æ̝", ["E"] = "ɛ̝", ["E@"] = "e"..map["@"], ["E^"] = "e"..map["^"], ["Q"] = "ɒ̝", ["O"] = "ɔ̝", ["O@"] = "o"..map["@"], ["O^"] = "o"..map["^"] }) end map["T"] = map["T"] or map["t"] map["D"] = map["D"] or map["d"] map["S"] = map["S"] or (map["T"]..map["s"]) map["Z"] = map["Z"] or (map["D"]..map["z"]) map["kG"] = map["kG"] or map["k"] map["gG"] = map["gG"] or map["g"] map["J"] = map["J"] or map["n"] map["NG"] = map["NG"] or map["N"] map["R"] = map["R"] or map["r"] map["L"] = map["L"] or map["l"] map["Hj"] = map["Hj"] or map["i"]..map["^"] local key for primary in mw.text.gsplit("pbBtdTDSZszkgmnJNrRlL_", "") do for secondary in mw.text.gsplit("jGw", "") do key = primary..secondary map[key] = map[key] or (map[primary]..map[secondary]) end end for vowel in mw.text.gsplit(V_, "") do key = vowel.."@" map[key] = map[key] or (map[vowel]..map["@"]) key = vowel.."^" map[key] = map[key] or (map[vowel]..map["^"]) end PHONETIC_IPA = map end local function toPhoneticRemainder(code, config, leftFlag, rightFlag) local text = code local chars, subst local diphthongs = config.diphthongs -- If the phrase begins or ends with a bare vowel -- and no pseudo-glide, display phrase up to five times -- with each of the different pseudo-glides and possible vowel reflexes. if IS_VOWEL[string.sub(text, 1, 1)] then text = "_j"..code toPhoneticRemainder(text, config, false, rightFlag) if not diphthongs then toPhoneticRemainder(text, config, true, rightFlag) end text = "_G"..code toPhoneticRemainder(text, config, false, rightFlag) if not diphthongs then toPhoneticRemainder(text, config, true, rightFlag) end text = "_w"..code toPhoneticRemainder(text, config, false, rightFlag) if not diphthongs then toPhoneticRemainder(text, config, true, rightFlag) end return end if IS_VOWEL[string.sub(text, -1)] then text = code.."_j" toPhoneticRemainder(text, config, leftFlag, false) if not diphthongs then toPhoneticRemainder(text, config, leftFlag, true) end text = code.."_G" toPhoneticRemainder(text, config, leftFlag, false) if not diphthongs then toPhoneticRemainder(text, config, leftFlag, true) end text = code.."_w" toPhoneticRemainder(text, config, leftFlag, false) if not diphthongs then toPhoneticRemainder(text, config, leftFlag, true) end return end local initialJ = config.initialJ local medialJ = config.medialJ local finalJ = config.finalJ local noHints = config.noHints local outSeq = config.outSeq local voice = config.voice if initialJ == "x" or medialJ == "x" or finalJ == "x" then local subSeq = {} config.outSeq = subSeq if initialJ == "x" then config.initialJ = "t" end if medialJ == "x" then config.medialJ = "t" end if finalJ == "x" then config.finalJ = "t" end toPhoneticRemainder(code, config) if initialJ == "x" then config.initialJ = "s" end if medialJ == "x" then config.medialJ = "s" end if finalJ == "x" then config.finalJ = "s" end toPhoneticRemainder(code, config) addUnique(outSeq, table.concat(subSeq, " ~ ")) config.outSeq = outSeq config.initialJ = initialJ config.medialJ = medialJ config.finalJ = finalJ return end -- Glides always trigger epenthesis, even neighboring other glides. text = string_gsub2(text, "([aEei])( *h)(.)( *)(h)%3( *)([aEei])", function(vowelL, _, secondary, __, primaryR, ___, vowelR) if secondary == "w" then primaryR = "H" end return ( vowelL.._..secondary.. maxF1(vowelL, vowelR).."@".. __..primaryR..secondary..___..vowelR ) end ) text = string.gsub(text, "([aEei])( *)hG( *.[jGw])", "%1%2hG%1@%3") text = string.gsub(text, "(.[jGw])( *)hG( *)([aEei])", "%1%4@%2hG%3%4") text = string.gsub(text, "([aEei])( *)h(.)( *.[jGw])", "%1%2h%3%1@%4") text = string.gsub(text, "(.[jGw])( *)h(. *)([aEei])", "%1%4@%2h%3%4") text = string.gsub(text, "(.[jGw])( *[yY].)", "%1i@%2") -- Preserve these exceptionally stable clusters. text = string.gsub(text, "l([jG] *)tG", "l%1|tG") -- Unstable consonant clusters trigger epenthesis. -- Liquids before coronal obstruents. text = string.gsub(text, "([rl].)( *)t", "%1v%2t") -- Nasals and liquids after coronal obstruents. text = string.gsub(text, "t(.)( *[nrl])", "t%1v%2") -- Heterorganic clusters. -- Labial consonants neighboring coronal or dorsal consonants. text = string.gsub(text, "([pm].)( *[tnrlkN])", "%1v%2") -- Coronal consonants neighboring labial or dorsal consonants. text = string.gsub(text, "([tnrl].)( *[pmkN])", "%1v%2") -- Dorsal consonants neighboring labial or coronal consonants. text = string.gsub(text, "([kN].)( *[pmtnrl])", "%1v%2") -- Organic speech involves certain consonant cluster assimilations. -- Forward assimilation of rounded consonants. -- There is no rounded coronal obstruent. text = string.gsub(text, "(w *[^t])[jG]", "%1w") -- Backward assimilation of remaining secondary articulations. text = string.gsub(text, "[jGw]( *.)([jGw])", "%2%1%2") -- Backward nasal assimilation of primary articulations. text = string.gsub(text, "[pkrl](. *)([mnN])", "%2%1%2") -- No longer need to protect exceptionally stable consonant clusters. text = string.gsub(text, "|", "") -- Give a vowel height to all epenthetic vowels that still lack one. text = string_gsub2(text, "(.)( *..)v( *.. *)(.)", function(vowelL, consonL, consonR, vowelR) return vowelL..consonL.. maxF1(vowelL, vowelR, "E").."@".. consonR..vowelR end ) -- Tag all vowels for next set of operations. text = string.gsub(text, "([aEei])", "/%1") -- There is no variation in the surface realizations of vowels -- between two identical secondary articulations. text = string_gsub2(text, "([jGw])( *)/([aEei])(@? *.)%1", function(secondary, _, vowel, infix) return ( secondary.._..VOWEL[F1[vowel]][F2[secondary]].. infix..secondary ) end ) if diphthongs then text = string_gsub2(text, "(.)([jGw])( *)/([aEei])(@?)( *)(.)([jGw])", function( primaryL, secondaryL, _, vowel, epenth, __, primaryR, secondaryR ) local f1 = F1[vowel] return ( primaryL..secondaryL.._.. VOWEL[f1][F2[secondaryL]]..epenth.."=".. VOWEL[f1][F2[secondaryR]]..epenth..__.. primaryR..secondaryR ) end ) else -- Vowels neighboring pseudo-glides. subst = function( primaryL, secondaryL, _, vowel, epenth, __, primaryR, secondaryR, flag ) local f2L = F2[secondaryL] local f2R = F2[secondaryR] local f2 if flag then f2 = math.max(f2L, f2R) else f2 = math.min(f2L, f2R) end return ( primaryL..secondaryL.._.. VOWEL[F1[vowel]][f2]..epenth..__.. primaryR..secondaryR ) end text = string.gsub(text, "(_)([jGw])( *)/("..V..")(@?)( *)(.)([jGw])", function(a, b, c, d, e, f, g, h) return subst(a, b, c, d, e, f, g, h, leftFlag) end ) text = string.gsub(text, "(.)([jGw])( *)/("..V..")(@?)( *)(_)([jGw])", function(a, b, c, d, e, f, g, h) return subst(a, b, c, d, e, f, g, h, rightFlag) end ) -- Vowels between two non-glides have the most predictable reflexes. text = string_gsub2(text, "([ptkmnNrl])(.)( *)/([aEei])(@? *)([ptkmnNrl])(.)", function( primaryL, secondaryL, _, vowel, infix, primaryR, secondaryR ) return primaryL..secondaryL.._.. VOWEL_REFLEX[primaryL][primaryR] [F2[secondaryL]][F2[secondaryR]][F1[vowel]].. infix..primaryR..secondaryR end ) -- Exceptionally for the single word "rej". text = string.gsub(text, "^(rG *)([V7])( *tj)$", function(prefix, vowel, suffix) return prefix..FRONT_VOWEL[vowel]..suffix end ) -- Vowels always claim the secondary articulation -- of a neighboring back unrounded glide. text = string.gsub(text, "(hG *)/([aEei])", function(prefix, vowel) return prefix..BACK_VOWEL[vowel] end) text = string.gsub(text, "/([aEei])(@? *hG)", function(vowel, suffix) return BACK_VOWEL[vowel]..suffix end) -- Unless already claimed, epenthetic vowels after a glide -- always claim the secondary articulation to the left. text = string.gsub(text, "([hH])(.)( *)/([aEei])@", function(primaryL, secondaryL, _, vowel) return ( primaryL..secondaryL.._.. VOWEL[F1[vowel]][F2[secondaryL]].."@" ) end ) -- Unless already claimed, vowels before a glide -- always claim the secondary articulation to the right. text = string.gsub(text, "/([aEei])(@?)( *[hHyY])(.)", function(vowel, epenth, primaryR, secondaryR) return ( VOWEL[F1[vowel]][F2[secondaryR]]..epenth.. primaryR..secondaryR ) end ) -- For now, unless already claimed, vowels before a rounded consonant -- claim the secondary articulation to the right. text = string.gsub(text, "/([aEei])(@? *.w)", function(vowel, suffix) return ROUND_VOWEL[vowel]..suffix end) -- For now, unless already claimed, remaining vowels -- claim the secondary articulation to the left. text = string.gsub(text, "([jGw])( *)/([aEei])", function(secondaryL, _, vowel) return secondaryL.._..VOWEL[F1[vowel]][F2[secondaryL]] end ) -- Change certain vowels in a special environment from round to front. text = string_gsub2(text, "(hj *)([Oou])( *.w *"..V.." *h[jh])", function(prefix, vowel, suffix) return prefix..FRONT_VOWEL[vowel]..suffix end ) text = string.gsub(text, "(hj *)([Oou])( *)(.w)( *)("..V..")", function(prefix, vowelL, _, conson, __, vowelR) if conson ~= "hw" or F1[vowelL] ~= F1[vowelR] then return prefix..FRONT_VOWEL[vowelL].._..conson..__..vowelR end end ) text = string.gsub(text, "(hj *)([Oou])( *.w *.w)", function(prefix, vowel, suffix) return prefix..FRONT_VOWEL[vowel]..suffix end ) text = string.gsub(text, "(a@? *hj *)Q( *.w *"..V..")", "%1a%2") text = string.gsub(text, "(a@? *hj *)Q( *.w *.w)", "%1a%2") -- Tag certain glide-vowel-non-glide sequences for special reflexes. text = string.gsub(text, "([HyY][jw] *)("..V.." *[ptkmnNrl])", "%1/%2") text = string.gsub(text, "^ *(h[jw] *)("..V.." *[ptkmnNrl])", "%1/%2") text = string.gsub(text, "(@ *h[jw] *)("..V.." *[ptkmnNrl])", "%1/%2") text = string.gsub(text, "([EeiAV7MOou] *h[jw] *)([aAQ] *[ptkmnNrl])", "%1/%2") text = string.gsub(text, "([iMu] *hj *)([EeV7] *[kN]G)", "%1/%2") text = string.gsub(text, "(hj *[aEei]@? *hw *)("..V.." *[ptkmnNrl])", "%1/%2") -- Untag certain sequences, exempting them from special reflexes. text = string.gsub(text, "(hj *)/([aEei] *[knNrl]w)", "%1%2") -- Special reflexes. text = string.gsub(text, "([jw])( *)/("..V..")( *)(.)([jGw])", function(secondaryL, _, vowel, __, primaryR, secondaryR) return ( secondaryL.._.. VOWEL_REFLEX["h"][primaryR] [F2[secondaryL]][F2[secondaryR]][F1[vowel]].. __..primaryR..secondaryR ) end ) -- Exceptional phrase-initial reflex. text = string.gsub(text, "^ *([Hh]j *)([V7])( *[kN]G)", function(prefix, vowel, suffix) return prefix..FRONT_VOWEL[vowel]..suffix end ) text = string.gsub(text, "^ *([Hh]w *)M( *tG)", "%1u%2") end -- Temporarily cancel epenthetic {i} neighboring {yi'y}. text = string.gsub(text, "i@( *yj)", "%1") -- {yi'y} neighboring {i} may now be demoted to {y}. text = string.gsub(text, "([iMu]@? *)yj", "%1hj") text = string.gsub(text, "yj( *[iMu])", "hj%1") -- {'yiy} may now be demoted everywhere. text = string.gsub(text, "(i@ *)Yj", "%1hjihj") text = string.gsub(text, "Yj", "hjihji@hj") -- For the purposes of this template, -- surface all glides pronounced in isolation. text = string.gsub(text, "^ *h(.) *$", "H%1") if not diphthongs then -- Opportunistically front these vowels. text = string.gsub(text, "(hj *)([A7M])( *[kN]G *[kN]?G? *"..V..")", function(prefix, vowel, suffix) return prefix..FRONT_VOWEL[vowel]..suffix end ) -- Surface certain glides. text = string.gsub(text, "^ *h(w *[Oou])", "H%1") text = string.gsub(text, "h(w *[aEeiAV7M])", "H%1") text = string.gsub(text, "^ *h(j *[AV7MQOou])", "H%1") text = string.gsub(text, "([ptkmnNrl]..@ *)h(w *[Oou])", "%1H%2") text = string.gsub(text, "([ptkmnNrl]..@ *)h(j *"..V..")", "%1H%2") text = string.gsub(text, "([AV7MQOou]@? *)h(j *[AV7MQOou])", "%1H%2") text = string.gsub(text, "([aEeiAV7M])(@? *)hw( *)([QOou])", function(vowelL, infix, _, vowelR) if F1[vowelL] > F1[vowelR] then return vowelL..infix.."Hw".._..vowelR end end ) text = string.gsub(text, "([AV7MQOou])(@? *)hj( *)([aEei])", function(vowelL, infix, _, vowelR) if F1[vowelL] > F1[vowelR] then return vowelL..infix.."Hj".._..vowelR end end ) text = string.gsub(text, "([aEei])(@? *)hj( *)([AV7MQOou])", function(vowelL, infix, _, vowelR) if F1[vowelL] < F1[vowelR] then return vowelL..infix.."Hj".._..vowelR end end ) text = string.gsub(text, "("..V..")( *)h([jw]) *$", function(vowel, _, secondary) if F2[vowel] ~= F2[secondary] then return vowel.._.."H"..secondary end end ) -- Protect word-final epenthetic vowels after non-glides -- from the next operation. text = string.gsub(text, "([ptkmnNrl]."..V..")(@ )", "%1/%2") -- De-epenthesize vowels if they still neighbor unsurfaced glides. text = string.gsub(text, "("..V..")@( *h.)", "%1%2") text = string.gsub(text, "(h. *"..V..")@", "%1") -- Adjust F1 of currently remaining epenthetic vowels. text = string_gsub2(text, "("..V..")( *.[jGw])(.)@( *.[jGw] *)("..V..")", function(vowelL, infixL, vowel, infixR, vowelR) return ( vowelL..infixL.. VOWEL[F1[maxF1(vowelL, vowelR, "E")]][F2[vowel]].."/@".. infixR..vowelR ) end ) text = string.gsub(text, "/", "") end -- Delete all remaining unsurfaced glides. text = string.gsub(text, "h.", "") -- Surface realization for {yi'y}. text = string.gsub(text, "yj", "i^") if not diphthongs then -- Realization for surfaced {y}. text = string_gsub2(text, "("..V.."?)(@?)( *)Hj( *)("..V.."?)", function(vowelL, epenthL, _, __, vowelR) if vowelL ~= "" then if vowelR ~= "" then if vowelL == vowelR and F2[vowelL] == F2_FRONT then return vowelL.._..__..vowelR else return ( vowelL..epenthL.._.. maxF1(vowelL, vowelR, "E").."^"..__..vowelR ) end else return vowelL.._..epenthL..maxF1(vowelL, "E").."^"..__ end else if vowelR ~= "" then return _..maxF1(vowelR, "E").."^"..__..vowelR else return _.."i^"..__ end end end ) -- Flatten this epenthetic vowel and surfaced glide. text = string_gsub2(text, "([aAQ] *"..C..")E@( *)E%^( *)a", "%1a%2%3a") -- Collapse this epenthetic vowel and surfaced glide into a semi-vowel. text = string.gsub(text, "([aEei])@( *)%1%^", "%2%1^") end if MERGED_VOWELS then text = string.gsub(text, "[EO]", function(vowel) return VOWEL[F1[vowel] + 1][F2[vowel]] end) end chars = splitChars(text, ".") if not diphthongs then -- Geminate long vowels. local index = #chars repeat local ch = chars[index] local index2 = index - 1 if IS_VOWEL[ch] then local ch2 = chars[index + 1] if ch2 ~= "@" and ch2 ~= "^" and chars[index2] == ch then chars[index] = ":" end end index = index2 until index == 1 text = table.concat(chars, "") end -- Tweak remaining consonants, using offsets as a guide. text = string.gsub(text, "()(.)([jGw])( *)([ptkmnNrl]?)([jGw]?)()", function( offsetL, primaryL, secondaryL, _, primaryR, secondaryR, offsetR ) local isInitial = offsetL == 1 local isFinal = offsetR == #chars + 1 if primaryL == "H" or primaryL == "y" then return primaryL..secondaryL.._ end if primaryL == "_" then if noHints then -- Delete pseudo-glide. return _ end if isInitial then -- Show secondary articulation to the left, not the right. return secondaryL..primaryL.._ end return primaryL..secondaryL.._ end local geminated = primaryL == primaryR if primaryL ~= "t" and primaryR == "t" then -- /tʲ/ is palatalized postalveolar. -- /tˠ/ is velarized dental. -- /nʲ, rʲ, lʲ/ are palatalized dental. -- /nˠ, rˠ, lˠ/ are velarized postalveolar. -- Regressively assimilate primary dental or postalveolar. -- None of this will be visible unless PHONETIC_DETAILS == true. primaryL = CONSON_REFLEX[primaryL] [secondaryL == "j" and "G" or "j"] primaryR = CONSON_REFLEX[primaryR][secondaryR] else primaryL = CONSON_REFLEX[primaryL][secondaryL] if primaryR ~= "" then primaryR = CONSON_REFLEX[primaryR][secondaryR] end end if primaryR == "T" then if primaryL == "T" then primaryL = finalJ primaryR = initialJ if primaryL == "S" and primaryR ~= "s" then primaryL = "T" elseif primaryL == "T" and primaryR == "s" and medialJ == "S" then primaryL = "S" end else primaryR = medialJ end elseif primaryL == "T" then if isInitial then primaryL = initialJ elseif isFinal then primaryL = finalJ else primaryL = medialJ end end if primaryR ~= "" then -- Consonant cluster. -- For some reason, the {t} in {lt} and {ļt} is voiceless. if not geminated and primaryL ~= "l" and primaryL ~= "L" then primaryL = VOICED_PRIMARY[primaryL] or primaryL primaryR = VOICED_PRIMARY[primaryR] or primaryR end -- Display secondary articulation only once for the cluster. secondaryL = "" elseif not isInitial and not isFinal then -- Medial single consonant. primaryL = VOICED_PRIMARY[primaryL] or primaryL end if voice == false then primaryL = VOICELESS_PRIMARY[primaryL] or primaryL primaryR = VOICELESS_PRIMARY[primaryR] or primaryR elseif voice == true then primaryL = VOICED_PRIMARY[primaryL] or primaryL primaryR = VOICED_PRIMARY[primaryR] or primaryR end return primaryL..secondaryL.._..primaryR..secondaryR end ) if not diphthongs then -- Elegantly connect long and epenthetic vowels across word gaps. text = string.gsub(text, "(["..V_..":]): +", "%1 : ") text = string.gsub(text, "("..V..") +%1([^%^])", "%1 :%2") text = string.gsub(text, "("..V..") +%1$", "%1 :") text = string.gsub(text, "("..V..")@ +%1", " %1 :") text = string.gsub(text, "("..V.."@) +", " %1 ") if W_OFF_GLIDES then -- Add [w] off-glides after certain consonants. subst = function(primary, _, epenth) if epenth == "" then return primary.."Hw".._ end end if false and PHONETIC_DETAILS then text = string.gsub(text, "([pbm])(G *[aEei])(@?)", function(primary, _, epenth) if epenth == "" then return primary.."B".._ end end ) else text = string.gsub(text, "([pbm])G( *[aEei])(@?)", subst) end text = string.gsub(text, "([kgnNrl])w( *[aEeiAV7M])(@?)", subst) -- Remove [w] off-glides after certain consonants -- when they occur after rounded vowels. text = string.gsub(text, "([QOou] *[nrl]? *[nrl])Hw", "%1w") text = string.gsub(text, "([QOou] *[kgN]? *N)Hw( *M)", "%1w%2") end end if PARENTHETICAL_EPENTHESIS then if not diphthongs then text = string.gsub(text, "(.)@("..V..")", "%1^%2") end text = string.gsub(text, "(.)@", "(%1)") text = string.gsub(text, "%)(=?)%(", "%1") if not diphthongs and W_OFF_GLIDES then if false and PHONETIC_DETAILS then text = string.gsub(text, "([pbm]G%()([aEei])", "%1BG%2") else text = string.gsub(text, "([pbm]G%()([aEei])", "%1Hw%2") end text = string.gsub(text, "([kgnNrl]w%()([aEeiAV7M])", "%1Hw%2") text = string.gsub(text, "([QOou] *[nrl]w%()Hw", "%1") text = string.gsub(text, "([QOou] *Nw%()HwM", "%1M") end end -- Convert remaining word gaps to liaison. text = fastTrim(text) text = string.gsub(text, " +", false and "_" or "") text = string.gsub(text, ".[jGw@%^]?", PHONETIC_IPA) addUnique(outSeq, text) end local PHONETIC_ARG_J = { ["t"] = "T", ["c"] = "S", ["s"] = "s", ["x"] = "x" } local function toPhonetic(inSeq, args) -- Recognize "ralik" for Rālik Chain (western dialect). -- Recognize "ratak" for Ratak Chain (eastern dialect). -- For other values, list both possible dialect reflexes where applicable. local dialect = args and args.dialect and mw.ustring.lower(mw.text.trim(args.dialect)) or "" if dialect == "rālik" then dialect = "ralik" end -- If enabled, display full diphthong allophones for short vowels. local diphthongs = not not (args and parseBoolean(args.diphthongs)) -- Argument "J" has format like "tst". -- Recognized letters are "t" = plosive, "c" = affricate, "s" = fricative. -- Letters for initial, medial and final respectively. -- Real-world pronunciation said to vary by sociological factors, -- but all realizations may occur in free variation. local modeJ = splitChars(args and args.J and string.lower(args.J) or "tst") local initialJ = PHONETIC_ARG_J[modeJ[1] or ""] or "t" local medialJ = PHONETIC_ARG_J[modeJ[2] or ""] or "s" local finalJ = PHONETIC_ARG_J[modeJ[3] or ""] or initialJ -- If enabled, do not display pseudo-glide hints at all. local noHints = not not (args and parseBoolean(args.nohints)) -- "false" will display all obstruent allophones as voiceless. -- "true" will display all obstruent allophones as voiced. -- Empty string or absent by default will display -- only medial obstruent allophones as semi-voiced. local voice = args and args.voice or "" if voice ~= "" then voice = parseBoolean(voice) end local outSeq = {} local config = { ["outSeq"] = outSeq, ["diphthongs"] = diphthongs, ["initialJ"] = initialJ, ["medialJ"] = medialJ, ["finalJ"] = finalJ, ["noHints"] = noHints, ["voice"] = voice } for _, str in pairs(inSeq) do str = string.gsub(str, S, " ") str = string.gsub(str, "^ *", "") str = string.gsub(str, " *$", "") local isRalik = dialect == "ralik" if isRalik or dialect == "ratak" then str = toPhoneticDialect(str, config, isRalik) toPhoneticRemainder(str, config) else local ralik = toPhoneticDialect(str, config, true) local ratak = toPhoneticDialect(str, config, false) -- If both dialect reflexes are the same, display only one of them. toPhoneticRemainder(ralik, config) if ralik ~= ratak then toPhoneticRemainder(ratak, config) end end end return outSeq end export._parse = parse export._toBender = toBender export._toMOD = toMOD export._toPhonemic = toPhonemic export._toPhonetic = toPhonetic function export.bender(frame) return table.concat(toBender(parse(frame.args[1], frame.args)), ", ") end function export.MOD(frame) return toMOD(frame.args[1]) end function export.parse(frame) return table.concat(parse(frame.args[1]), ", ") end function export.phonemic(frame) return table.concat(toPhonemic(parse(frame.args[1])), ", ") end function export.phonetic(frame) return table.concat(toPhonetic(parse(frame.args[1]), frame.args), ", ") end function export.phoneticMED(frame) return "DEPRECATED" end function export.phoneticChoi(frame) return "DEPRECATED" end function export.phoneticWillson(frame) return "DEPRECATED" end return export