Module:ne-IPA

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Nepali IPA pronunciation module. See {{ne-IPA}}.

Testcases

Module:ne-IPA/testcases:

1 of 54 tests failed. (refresh)

TextExpectedActualComments
test_phonetic:
Passedविवाह (vivāha)[bibä̤ː][bibä̤ː]
Passedश्रावण (śrāwaṇ)[sräwʌɽ̃][sräwʌɽ̃]
Passedहलो (halo)[ɦʌlo][ɦʌlo]
Passedजहाज (jahāj)[d͡zä̤ː(d)z][d͡zä̤ː(d)z]
Passedकार्य (kārya)[kärːe][kärːe]
Passedपर्यो (paryo)[pʌɾjo][pʌɾjo]
Passedविश्व (viśwa)[bisːo][bisːo]
Passedअक्षर (akṣar)[ʌk̚t͡sʰer][ʌk̚t͡sʰer]
Passedक्षेत्री (kṣetrī)[t͡sʰet̪ri][t͡sʰet̪ri]
Passedत्यस (tyas)[t̪es][t̪es]
Passedविश्व (viśwa)[bisːo][bisːo]
Passedराज्य (rājya)[räd̚d͡ze][räd̚d͡ze]
Passedअङ्ग्रेजी (aṅgrejī)[ʌŋɡre(d)zi][ʌŋɡre(d)zi]
Passedपेय (peya)[pe(j)ʌ][pe(j)ʌ]
Passedअन्तर्राष्ट्रिय (antarrāṣṭriya)[ʌn̪t̪ʌrːäs̠t̠rie][ʌn̪t̪ʌrːäs̠t̠rie]dental assimilation
Passedसय (saya)[sʌe][sʌe]non-initial ya before before consonants
Passedसत्य (satya)[sʌt̪t̪e][sʌt̪t̪e]gemination of preceding consonant and replacement with e
Passedयस (yas)[es][es]Initial ya replacement with e
Passedविश्वास (viśvās)[biswäs][biswäs]w to b before certain vowels and glides
Passedविश्व (viśwa)[bisːo][bisːo]
Passedवरिपरि (waripari)[wʌɾipʌɾi][wʌɾipʌɾi]w elsewhere
Passedघरको (gharko)[ɡʱʌ̤rko][ɡʱʌ̤rko]breathy vowel after voiced aspirates
Passedअर्थ (artha)[ʌrt̪ʰʌ][ʌrt̪ʰʌ]
Passedअर्को (arko)[ʌrko][ʌrko]
Passedतारा (tārā)[t̪äɾä][t̪äɾä]tapped r intervocalically
Passedघोडा (ghoḍā)[ɡʱo̤ɽä][ɡʱo̤ɽä]retroflex tap postvocalically
Passedडर (ḍar)[ɖʌr][ɖʌr]ḍ elsewhere
Passedकस्को (kasko)[kʌsko][kʌsko]
Passedकसको (kasko)[kʌsko][kʌsko]
Passedलर्क (larka)[lʌrkʌ][lʌrkʌ]turned v into schwa final syllable
Passedसम्म (samma)[sʌmːʌ][sʌmːʌ]
Passedसंस्था (sansthā)[sʌnst̪ʰä][sʌnst̪ʰä]
Passedरुची (rucī)[rut͡si][rut͡si]
Passedहाम्रो (hāmro)[ɦämro][ɦämro]
Passedबाह्र (bāhra)[bäɾʌ][bäɾʌ]
Passedबाह्रखरी (bāhrakharī)[bäɾʌk̞ʰʌɾi][bäɾʌk̞ʰʌɾi]
Passedबाह्य (bāhya)[bäi̯e][bäi̯e]
Passedचर्चा (carcā)[t͡sʌrt͡sä][t͡sʌrt͡sä]
Passedरचना (racanā)[rʌt͡sʌnä][rʌt͡sʌnä]
Passedटर्रो (ṭarro)[ʈʌrːo][ʈʌrːo]
Passedआर (ār)[är][är]
Passedईर् (īr)[ir][ir]
Passedरातो (rāto)[rät̪o][rät̪o]
Failedउर (ura)[ur][uɾʌ]
Passedदुई (duī)[d̪ui̯][d̪ui̯]
Passedचप्पल (cappal)[t͡sʌp̚pʌl][t͡sʌp̚pʌl]
Passedपक्का (pakkā)[pʌk̚kä][pʌk̚kä]
Passedअस्ट्रेलिया (asṭreliyā)[ʌs̠t̠relijä][ʌs̠t̠relijä]
Passedभिज़न (bhizan)[bʱi̤zʌn][bʱi̤zʌn]
Passedऑस्ट्रेलिया (ŏsṭreliyā)[ɒs̠t̠relijä][ɒs̠t̠relijä]
Passedॲप (ĕp)[æp][æp]
Passedज़ू ()[zu][zu]
Passedद़िस (ðis)[ðis][ðis]
Passedथ़िक (θik)[θik][θik]

local export = {}

local lang = require("Module:languages").getByCode("ne")
local sc = require("Module:scripts").getByCode("Deva")
local m_IPA = require("Module:IPA")

local find = mw.ustring.find
local gcodepoint = mw.ustring.gcodepoint
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = require("Module:string/char")

local correspondences = {
    ["ṅ"] = "ŋ",
    ["g"] = "ɡ",
    ["c"] = "t͡s",
    ["j"] = "d͡z",
    ["ċ"] = "t͡s",
    ["j̈"] = "d͡z",
    ["ñ"] = "n",
    ["ṭ"] = "ʈ",
    ["ḍ"] = "ɖ",
    ["ṇ"] = "ɳ",
    ["t"] = "t̪",
    ["d"] = "d̪",
    ["y"] = "j",
    ["v"] = "b",
    ["w"] = "w",
    ["l"] = "l",
    ["ś"] = "s",
    ["ṣ"] = "s",
    ["h"] = "ɦ",
    ["r̥"] = "ri",
    ["ṃ"] = "̃",
    ["ॽ"] = "",
    ["ऽ"] = "ː",
    ["'"] = "",
    [","] = ",",
    ["r"] = "r",
    ["ỹ"] = "j̃",
    ["ⁿ"] = "̃",
    ["ṛ"] = "ɽ",
    ["n"] = "n",
    ["ž"] = "ʒ",
    ["z"] = "z",
    ["ġ"] = "ɣ",
    ["q"] = "q",
    ["x"] = "x",
    ["θ"] = "θ",
    ["ð"] = "ð",
    ["f"] = "f",
    ["ḻ"] = "ɭ",
    ["ṟ"] = "ɹ",
    ["V"] = "v",
    ["a"] = "ʌ",
    ["ā"] = "ä",
    ["i"] = "i",
    ["ī"] = "i",
    ["o"] = "o",
    ["e"] = "e",
    ["u"] = "u",
    ["ū"] = "u",
    ["ai"] = "ʌi̯",
    ["au"] = "ʌu̯",
    ["ŏ"] = "ɒ",
    ["ĕ"] = "æ",
    ["āu"] = "äu̯",
    ["āi"] = "äi̯",
    ["ãi"] = "ʌ̃ĩ̯",
    ["ũ"] = "ũ",
    ["ãu"] = "ʌ̃ũ̯",
    ["õ"] = "õ",
    ["ã"] = "ʌ̃",
    ["ā̃"] = "ä̃",
    ["ẽ"] = "ẽ",
    ["ḥ"] = "ː",
    [" "] = "‿"
}

local vowels = "aāā̃ẽõiīuūi̯u̯e̞eī̃ū̃ĩoää̃ʌʌ̃ãũŏĕ̤"
local weak_h_c = "gjdḍd̪ṇɽbṛvrṟwy"
local primary_stress = "ˈ"
local secondary_stress = "ˌ"
local weak_h = "([" .. weak_h_c .. "])h"
local aspirate = "([kctṭt̪p])"
local syllabify_pattern = "([" .. vowels .. "]+)([^" .. vowels .. "%.]+)([" .. vowels .. "]+)"

local function find_consonants(text)
    local current = ""
    local cons = {}
    for cc in gcodepoint(text .. " ") do
        local ch = u(cc)
        if
            find(current .. ch, "^[kgṅcjñṭḍṇɽtdnpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]$") or
                find(current .. ch, "^[kgcjṭḍṇtɽdɽ̃pbṛṟ]h$")
         then
            current = current .. ch
        else
            table.insert(cons, current)
            current = ch
        end
    end
    return cons
end

local function syllabify(text)
    for count = 1, 2 do
        text =
            gsub(
            text,
            syllabify_pattern,
            function(a, b, c)
                b_set = find_consonants(b)
                table.insert(b_set, #b_set > 1 and 2 or 1, "")
                return a .. table.concat(b_set) .. c
            end
        )
    end
    return text
end

local identical = "knlsfzθð"
for character in gmatch(identical, ".") do
    correspondences[character] = character
end

local function transliterate(text)
    return (lang:transliterate(text))
end

function export.link(term)
    return require("Module:links").full_link {term = term, lang = lang, sc = sc}
end

function export.toIPA(text, phonetic)
    local translit = text
    if lang:findBestScript(text):isTransliterated() then
    	translit = transliterate(text)
    end
    if not translit then
        error('The term "' .. text .. '" could not be transliterated.')
    end

    if phonetic then
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])([nl])([td])", "%1%2̪%3") -- dental assimilation
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])ṇ([ṭyḍ])", "%1ɳ%2")
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])n([ṭḍ])", "%1ɳ%2") -- retroflex assimilation 2nd
        translit = gsub(translit, "ṇ", "ʀ")
        translit = gsub(translit, "ॽ", "ʔ")
        translit = gsub(translit, "'", "ʔ")
        -- ɡlottal stop
        translit = gsub(translit, "(h?)r̥", "ɾi") -- hr̥ ligature initially
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])kṣ", "%1k̚t͡sʰy") -- kṣ ligature
        translit = gsub(translit, "()kṣ", "t͡sʰ") -- kṣ initial
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])h%1", "%1̤ː") -- h dropping
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])h([iīuūĩũī̃ū̃])", "%1%2̯")
        translit = gsub(translit, "([uūũū̃])h([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])", "%1%2")
        translit = gsub(translit, "([ʌ̃ʌaã])h([ä̃äāā̃])", "%2̤ː")
        translit = gsub(translit, "([ää̃āā̃])h([ʌʌ̃aã])", "%1̤ː")
        translit = gsub(translit, "([iĩīī̃])h([uũūū̃])", "%1%2")
        translit = gsub(translit, "([uũūū̃])h([iĩīī̃])", "%1%2")
        translit = gsub(translit, "([iīaʌ])h([äāeä̃ā̃ẽ])", "%1(ɦ)%2")

        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])h([nml])", "%1%2%2")
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])hr([aʌäāiīuūeoŏĕ])", "%1ɾ%2")
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])hy([aʌäāiīuūeoŏĕ])", "%1i̯y%2")
        translit = gsub(translit, "([ñnl])([cj])", "%1̻%2")
        translit = gsub(translit, "([śṣs])([ṭ])(h)", "s̠t̠ʰ")
        translit = gsub(translit, "([śṣs])([ḍ])(h)", "s̠d̠ʱ")
        translit = gsub(translit, "([śṣs])([ṭ])", "s̠t̠")
        translit = gsub(translit, "([śṣs])([ḍ])", "s̠d̠")
        translit = gsub(translit, "([l])([ṭʈɖḍ])", "ɭ%2")
        translit = gsub(translit, "([ṭḍʈɖ])(h?)([l])", "%1%2ɭ")
        translit = gsub(translit, "([ṭʈḍ])(h?)([n])", "%1%2ɳ")
        translit = gsub(translit, "([td])(h?)([n])", "%1%2%3̪")
        translit = gsub(translit, "([nmṅɳŋɲñṇ])([iīuū])([̃ṃ]?)([iīuū])([̃ṃ]?)", "%1%2̃%4̃")

        translit = gsub(translit, "([ʌʌ̃äaāisīːũuĩī̃ūeẽãū̃ä̃ā̃õo̤])([p])([h])", "%1ɸ")
        translit = gsub(translit, "([ʌʌ̃äaāisīːũuĩī̃ūeẽãū̃ä̃ā̃õo̤])([k])([h])", "%1k̞ʰ")
        translit = gsub(translit, "([ʌʌ̃äaāisīːũuĩī̃ūeẽãū̃ä̃ā̃õo̤])([j])([h]?)", "%1ʐ")
        translit = gsub(translit, "([ʌʌ̃äaāisīːũuĩī̃ūeẽãū̃ä̃ā̃õo̤])b(h?)([ʌʌ̃äaāisīːũuĩī̃ūeẽãū̃ä̃ā̃õo̤])", "%1b%3")
        translit = gsub(translit, "([ʌʌ̃äaāisīːũuĩī̃ūeẽãū̃ä̃ā̃õo̤])([d])([h])", "%1%2")
        translit = gsub(translit, "([ʌʌ̃äaāisīːũuĩī̃ūeẽãū̃ä̃ā̃õo̤])g(h?)", "%1ɡ̞")
        translit = gsub(translit, "c(h?)c(h)", "t̚t͡sʰ")
        translit = gsub(translit, "cc", "t̚t͡s")
        translit = gsub(translit, "j(h?)j(h)", "d̚d͡zʱ")
        translit = gsub(translit, "jj", "d̚d͡z")
        translit = gsub(translit, "([kɡtdʈṭpb])(̪?)([ʰʱ]?)([ %.ˈ]?)([kɡtdṭʈpb])([ʰʱ]?)", "%1%2%3̚%4%5%6") -- no audible release
        translit = gsub(translit, "([aʌäāiīuūoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃õɔ̃o̤])ya", "%1e") -- ya to e before vowel
        translit = gsub(translit, "([e])ya", "%1(y)ʌ")
        translit =
            gsub(
            translit,
            "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])([gɡbtṭdmhncjvwṛṛ̃ṅɽśṣɳszʐlkpḍç])(̞?)([ʰhʱ]?)([wv])a",
            "%1%2%2%4o"
        ) -- gemination of preceding consonant
        translit = gsub(translit, "()([gbtṭdmhncjvwṇṅɽzśṣɳslkpḍ])([wv])a", "%2o")
        translit =
            gsub(
            translit,
            "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])([rbtṭdmɡgncjvwṇʐzṅśṣɳslkpç])(̞?)([ʰhʱ]?)ya",
            "%1%2%2%4e"
        ) -- gemination of preceding consonant
        translit = gsub(translit, "()([grbtṭdmncjvwṅzśṣɳslkpḍ])ya", "%2e") -- no gemination
        translit = gsub(translit, "()ya", "e") -- ya to e
        translit = gsub(translit, "([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤ːˑ])ḍ(h?)(y?)", "%1ɽ%3") -- postvocalic allophone of ḍ and ḍh
        translit = gsub(translit, "([bdgjḍ])h([ʌäaāiīuūeeoo])", "%1ʱ%2̤") -- breathy voice
        -- force final schwa
        translit = gsub(translit, "a~$", "ʌ")
        translit = gsub(translit, "a$", "ʌ") -- final schwa retention
        translit = gsub(translit, "(...)a ", "%1ʌ ") -- final schwa retenti
        translit = gsub(translit, "([ʌʌ̃äaāiīuūəãā̃ī̃ĩũū̃ẽõeeo̯o ̤])r([ʌʌ̃äaāiīuūəãā̃ī̃ĩũū̃ẽõeeyo̯o])", "%1ɾ%2")
        translit = gsub(translit, "m̐", "ːm")
    end
    local result = gsub(translit, ".", correspondences)

    translit = gsub(translit, "͠", "̃")
    ---translit = gmatch(translit, '^[^-]') and ('ˈ' .. gsub(translit, ' ', ' ˈ'))
    translit = gsub(translit, "(...)*$", "%1ʌ") -- second person mid-respect verb form
    translit = gsub(translit, "m̐", "m")
    translit = gsub(translit, "rr̥", "ri")
    translit =
        gsub(
        translit,
        "()([śsnlcjzkhptdgb])([vw])([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])([cspdtngkbrjyṇṣśṇɾṅñṃ])",
        "%2w%4%5"
    )
    translit =
        gsub(
        translit,
        "([aʌäāiīuūea̤eo̤ʌ̤ä̤ṳṳ̄i̤ī̤oŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])([̤]?)(̃?)([ṃ]?)([iīūu])(̃?)([ṃ]?)",
        "%1%2%3%4%5̯%6%7"
    )
    translit = gsub(translit, "([ʌaʌ̤a̤])([iu])̯̃", "ʌ̃%2̃")
    translit = gsub(translit, "([āā̤ā̃aää̃ä̤ʌuūiīeʌ̃ãʌ̤a̤])̃([̤]?)([iuūī])̯", "%1̃%2%3̯̃")
    translit = gsub(translit, "([aʌäāiīuūea̤eo̤ʌ̤ä̤ṳṳ̄i̤ī̤oŏĕɔæɛe̤])([̤]?)([iuūī])̯(̃)", "%1̃%2%3̯%4")
    translit = gsub(translit, "%-", " ")
    translit = gsub(translit, "r̥", "ri")
    translit = syllabify(translit)

    -- aspiration rules
    translit = gsub(translit, aspirate .. "h", "%1ʰ")
    translit = gsub(translit, weak_h, "%1ʱ")
    translit = gsub(translit, "([" .. weak_h_c .. "])%.h", ".%1ʱ")
    translit = gsub(translit, aspirate .. "%.h", ".%1ʰ")
    translit = gsub(translit, "%.ː", "ː.")

    local result = gsub(translit, ".", correspondences)

    -- formatting
    result = gsub(result, "ː̃", "̃ː")
    result = gsub(result, "ː.̃", "̃ː.")
    result = gsub(result, "([snmrfv])%1", "%1ː")
    result = gsub(result, "t̪͡s", "t͡s")
    result = gsub(result, "t̪̠", "t̠")
    result = gsub(result, "d̪̠", "d̠")
    result = gsub(result, "i̯̯", "i̯")
    result = gsub(result, "u̯̯", "u̯")
    result = gsub(result, "%. ", " ")
    result = gsub(result, "%.$", " ")
    result = gsub(result, "%.?%-", ".")
    result = gsub(result, "t̪̚t͡s", "t̚t͡s")
    result = gsub(result, "([ʌäaāiīuūe̞eo]̃)̃", "%1")
    result = gsub(result, "ĩ̯̯̃", "ĩ̯")
    result = gsub(result, "ĩ̯̯", "ĩ̯")
    result = gsub(result, "d̪̚d̪͡z", "d̚d͡z")
    result = gsub(result, "ʐʐ", "d̚d͡z")
    result = gsub(result, "ʐ", "(d)z")
    result = gsub(result, "ʀ", "ɽ̃")
    result = gsub(result, "ː%.̃", "̃ː.")
    return result
end

function export.make(frame)
    local args = frame:getParent().args
    local pagetitle = mw.title.getCurrentTitle().text

    local p, results = {}, {}

    if args[1] then
        for index, item in ipairs(args) do
            table.insert(p, (item ~= "") and item or nil)
        end
    else
        p = {pagetitle}
    end

    for _, Nepali in ipairs(p) do
        table.insert(results, {pron = "[" .. export.toIPA(Nepali, true) .. "]"})
        if export.toIPA(Nepali) ~= export.toIPA(Nepali, true) then
        --table.insert(results, { pron = "[" .. export.toIPA(Nepali, true) .. "]" })
        end
    end

    return m_IPA.format_IPA_full { lang = lang, items = results }
end

return export