Module:tab-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Tabasaran language text per WT:TAB TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:tab-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local u = require("Module:string/char")

local export = {}

local tt = {
	["б"]="b", ["п"]="p", ["ф"]="f", ["в"]="v", ["м"]="m",
	["д"]="d", ["т"]="t", ["й"]="j", ["н"]="n", ["з"]="z", ["ц"]="c",
	["с"]="s", ["ж"]="ž", ["ш"]="š", ["щ"]="šč", 
	["л"]="l", ["ч"]="č", ["р"]="r", ["г"]="g", ["к"]="k", ["х"]="ꭓ", 
	["ъ"]="ʾ", ["а"]="a", ["е"]="e", ["ы"]="y", ["и"]="i", ["о"]="o", ["у"]="u", 
	["ё"]="ë", ["ь"]="ʼ", ["э"]="e", ["ю"]="ju", ["я"]="ja",
	["Б"]="B", ["П"]="P", ["Ф"]="F", ["В"]="V", ["М"]="M",
	["Д"]="D", ["Т"]="T", ["Й"]="J", ["Н"]="N", ["З"]="Z", ["Ц"]="C",
	["С"]="S", ["Ж"]="Ž", ["Ш"]="Š", ["Щ"]="Šč", 
	["Л"]="L", ["Ч"]="Č", ["Р"]="R", ["Г"]="G", ["К"]="K", ["Х"]="Ꭓ", 
	["Ъ"]="ʾ", ["А"]="A", ["Е"]="E", ["Ы"]="Y", ["И"]="I", ["О"]="O", ["У"]="U", 
	["Ё"]="Ë", ["Ь"]="ʼ", ["Э"]="E", ["Ю"]="Ju", ["Я"]="Ja"};

local trigraphs = {
	["хъв"] = "q°",
	["Хъв"] = "Q°",
	["къв"] = "q̄°",
	["Къв"] = "Q̄°",
	["кьв"] = "q̇°",
	["Кьв"] = "Q̇°",
	["гъв"] = "ġ°",
	["Гъв"] = "Ġ°",
	["кӏв"] = "ḳ°",
	["Кӏв"] = "Ḳ°",
	["ккв"] = "k̄°",
	["Ккв"] = "K̄°",
}

local digraphs = {
	["жв"] = "ž°",
	["Жв"] = "Ž°",
	["чв"] = "č°",
	["Чв"] = "Č°",
	["жъ"] = "č̄°",
	["Жъ"] = "Č̄°",
	["чъ"] = "č̣",
	["Чъ"] = "Č̣",
	["шв"] = "š°",
	["Шв"] = "Š°",
	["хв"] = "ꭓ°",
	["Хв"] = "Ꭓ°",
	["пп"] = "p̄",
	["Пп"] = "P̄",
	["пӏ"] = "ṗ",
	["Пӏ"] = "Ṗ",
	["тт"] = "t̄",
	["Тт"] = "T̄",
	["цӏ"] = "c̣",
	["Цӏ"] = "C̣",
	["цц"] = "c̄",
	["Цц"] = "C̄",
	["тӏ"] = "ṭ",
	["Тӏ"] = "Ṭ",
	["чч"] = "č̄",
	["Чч"] = "Č̄",
	["чӏ"] = "č̣",
	["Чӏ"] = "Č̣",
	["кь"] = "q̇",
	["Кь"] = "Q̇",
	["кк"] = "k̄",
	["Кк"] = "K̄",
	["кӏ"] = "ḳ",
	["Кӏ"] = "Ḳ",
	["хъ"] = "q",
	["Хъ"] = "Q",
	["къ"] = "q̄",
	["Къ"] = "Q̄",
	["гъ"] = "ġ",
	["Гъ"] = "Ġ",
	["гь"] = "h",
	["Гь"] = "H",
	["уь"] = "u̱",
	["Уь"] = "U̱",
	["хь"] = "x",
	["Хь"] = "X",
	["гв"] = "g°",
	["Гв"] = "G°",
	["кв"] = "k°",
	["Кв"] = "K°",
	["аь"] = "a̱",
	["Аь"] = "A̱",
}

function export.tr(text, lang, sc)
	local str_gsub = string.gsub
	local UTF8char = "[\1-\127\194-\244][\128-\191]*"
	
	-- Convert uppercase palochka to lowercase. Lowercase is found in tables
	-- above.
	text = str_gsub(text, u(0x4C0), u(0x4CF))
	
	for trigraph, translit in pairs(trigraphs) do
		text = str_gsub(text, trigraph, translit)
	end
	
	for digraph, translit in pairs(digraphs) do
		text = str_gsub(text, digraph, translit)
	end
	
	text = str_gsub(text, UTF8char, tt)
	
	return text
end

return export