Module:mai-Tirh-translit
Jump to navigation
Jump to search
- The following documentation is located at Module:mai-Tirh-translit/documentation. [edit]
- Useful links: subpage list โข links โข transclusions โข testcases โข sandbox
This module will transliterate text in the Tirhuta script.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:mai-Tirh-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
-- Transliteration for Maithili in Tirhuta script
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local conv = {
-- consonants
["๐"] = "k", ["๐"] = "kh", ["๐"] = "g", ["๐"] = "gh", ["๐"] = "แน
",
["๐"] = "c", ["๐"] = "ch", ["๐"] = "j", ["๐"] = "jh", ["๐"] = "รฑ",
["๐"] = "แนญ", ["๐"] = "แนญ", ["๐"] = "แธ", ["๐"] = "แธh", ["๐"] = "แน",
["๐"] = "t", ["๐"] = "th", ["๐ "] = "d", ["๐ก"] = "dh", ["๐ข"] = "n",
["๐ฃ"] = "p", ["๐ค"] = "ph", ["๐ฅ"] = "b", ["๐ฆ"] = "bh", ["๐ง"] = "m",
["๐จ"] = "y", ["๐ฉ"] = "r", ["๐ช"] = "l", ["๐ซ"] = "v", ["๐ช๐"] = "แธท",
["๐ฌ"] = "ล", ["๐ญ"] = "แนฃ", ["๐ฎ"] = "s", ["๐ฏ"] = "h",
["๐๐"] = "แน", ["๐๐"] = "แนh",
-- vowel diacritics
["๐ฑ"] = "i", ["๐ณ"] = "u", ["๐น"] = "ฤ", ["๐บ"] = "e", ["๐ผ"] = "ล", ["๐ฝ"] = "o", ["๐ฐ"] = "ฤ", ["๐ฒ"] = "ฤซ", ["๐ด"] = "ลซ",
["๐ต"] = "rฬฅ", ["๐ถ"] = "rฬฅฬ", ["๐ป"] = "ai", ["๐พ"] = "au", ["๐ท"] = "lฬฅ", ["๐ธ"] = "lฬฅฬ",
-- vowels
["๐"] = "a", ["๐"] = "ฤ", ["๐"] = "i", ["๐"] = "ฤซ", ["๐
"] = "u", ["๐"] = "ลซ",
["๐"] = "rฬฅ", ["๐"] = "rฬฅฬ", ["๐"] = "lฬฅ", ["๐"] = "lฬฅฬ",
["๐"] = "ฤ", ["๐"] = "ai", ["๐"] = "ล", ["๐"] = "au",
["เฅจ"] = "โ", ["๐๐บ"] = "รช",["๐๐ฝ"] = "รด",
["เคฝ"] = "ยฒ", -- avagraha
["๐ฟ"] = "ฬ", -- chandrabindu
["๐"] = "ฬ", -- anusvara
["๐
"] = "ฬ", -- gvang
["๐"] = "แธฅ", -- visarga
["๐"] = "", -- virama
["๐"] = "omฬ", -- om
-- numerals
["๐"] = "0", ["๐"] = "1", ["๐"] = "2", ["๐"] = "3", ["๐"] = "4", ["๐"] = "5", ["๐"] = "6", ["๐"] = "7", ["๐"] = "8", ["๐"] = "9",
-- punctuation
["เฅค"] = ".", -- danda
["เฅฅ"] = ".", -- double danda
["+"] = "", -- compound separator
-- abbreviation sign
["เฅฐ"] = "."
}
local nasal_assim = {
["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐",
["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐",
["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐",
["๐"] = "๐ข", ["๐"] = "๐ข", ["๐ "] = "๐ข", ["๐ก"] = "๐ข", ["๐ข"] = "๐ข",
["๐ฃ"] = "๐ง", ["๐ค"] = "๐ง", ["๐ฅ"] = "๐ง", ["๐ฆ"] = "๐ง", ["๐ง"] = "๐ง"
}
local perm_cl = {
["๐ง๐๐ช"] = true
}
local all_cons, special_cons = "๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐ ๐ก๐ข๐ฃ๐ค๐ฅ๐ฆ๐ง๐จ๐ฉ๐ช๐ซ๐ฎ๐ฌ๐ญ๐ฏ๐ฉ", "๐๐๐๐๐๐ฎ๐ฏ๐จ๐๐๐๐๐ฃ๐ ๐๐ก๐ฉ๐ญ๐๐ช๐ซ๐ฅ๐ฆ๐๐๐ข๐ฌ๐ง"
local vowel, vowel_sign = "a๐ฑ๐ณ๐ต๐น๐ผ๐ฐ๐ฒ๐ด๐ป๐พ๐ฐ๐ป๐ฝ๐ฐ๐ฐ๐ฝ๐บ", "๐๐๐
๐๐๐๐๐๐๐๐๐บ๐๐ฝ๐ซ๐๐จ๐๐๐๐๐๐๐"
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(๐?[' .. all_cons .. '])a(๐?[' .. all_cons .. '])([๐๐ฟ]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = length, 1, -1 do
table.insert(result, mw.ustring.sub(text, i, i))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text =
gsub(
text,
"([" .. all_cons .. "]๐?)([" .. vowel .. "๐]?)",
function(c, d)
return c .. (d == "" and "a" or d)
end
)
for word in mw.ustring.gmatch(text, "[๐ฟ-เฅฅa]+") do
local orig_word = word
word = rev_string(word)
word = gsub(
word,
'^a(๐?)([' .. all_cons .. '])(.)(.?)',
function(opt, first, second, third)
local a = ""
if match(first, '[' .. special_cons .. ']')
and match(second, '๐')
and not perm_cl[first..second..third]
or match(first .. second, '๐จ[๐ฒ๐บ]') then
a = "a"
end
return a .. opt .. first .. second .. third
end
)
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, "%1%2แต%3%4")
end
word =
gsub(
word,
"(.?)๐(.)",
function(succ, prev)
return succ ..
(succ .. prev == "a" and "๐๐ง" or
(succ == "" and match(prev, "[" .. vowel .. "]") and "ฬ" or nasal_assim[succ] or "ฬ")) ..
prev
end
)
local escaped_orig_word = gsub(orig_word, "%+", "")
text = gsub(text, orig_word, rev_string(word))
text = gsub(text, "๐๐๐", "gy")
end
text = gsub(text, "ฤ([iu])ฬ", "ฤอ %1")
text = gsub(text, "uu", "u")
text = gsub(text, "aรข", "รข")
text = gsub(text, "ii", "i")
text = gsub(text, "([iฤซaฤuลซeoรข])a", "%1")
text = gsub(text, "[<>]", "")
text = gsub(text, ".๐?", conv)
return mw.ustring.toNFC(text)
end
return export