Module:kk-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Notice: This module is not the primary transliteration module for Kazakh, for that see Module:languages/data/2. This module generates Latin spellings for Template:kk-alt.

For the Yañalif (old Latin spelling) correspondence, see this correspondence chart written in 1931 in the Soviet Union, and this chart on Kazakh Wikipedia.

Transliteration information

This module will transliterate Kazakh language text per WT:KK TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:kk-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local vowels = "үҮиИеЕәӘөӨӯӮАаоОЫыІіЯяЮюЁё"
local rsubn = mw.ustring.gsub

local tt = {
 ["ү"]="ü",["Ү"]="Ü",["т"]="t",["Т"]="T",["һ"]="h",["Һ"]="H",["р"]="r",["Р"]="R"
,["ф"]="f",["Ф"]="F",["ш"]="ş",["Ш"]="Ş",["н"]="n",["Н"]="N",["п"]="p",["П"]="P"
,["й"]="i",["Й"]="İ",["л"]="l",["Л"]="L",["з"]="z",["З"]="Z",["е"]="e",["Е"]="E"
,["г"]="g",["Г"]="G",["б"]="b",["Б"]="B",["у"]="u",["У"]="U",["с"]="s",["С"]="S"
,["х"]="x",["Х"]="X",["ы"]="y",["Ы"]="Y",["м"]="m",["М"]="M",["о"]="o",["О"]="O"
,["и"]="i",["И"]="I",["ж"]="j",["Ж"]="J",["к"]="k",["К"]="K",["д"]="d",["Д"]="D"
,["в"]="v",["В"]="V",["а"]="a",["А"]="A",["ң"]="ñ",["Ң"]="Ñ",["ә"]="ä",["Ә"]="Ä"
,["э"]="e",["Э"]="E",["ұ"]="ū",["Ұ"]="Ū",["қ"]="q",["Қ"]="Q",["ғ"]="ğ",["Ғ"]="Ğ"
,["і"]="ı",["І"]="ı",["ө"]="ö",["Ө"]="Ö"
--mostly in loanwords from Russian
,["ё"]="io",["Ё"]="İo",["ц"]="ts",["Ц"]="TS",["ч"]="tş",["Ч"]="Tş",["щ"]="ştş",["Щ"]="Ştş"
,["ъ"]="",["Ъ"]="",["э"]="e",["Э"]="E",["ю"]="iu",["Ю"]="İu"
,["я"]="ia",["Я"]="İa",["ӯ"]="u",["Ӯ"]="U"
};

-- Main translit
function export.tr(text, lang, sc)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	
	if not sc then
		sc = require("Module:languages").getByCode("kk"):findBestScript(text):getCode()
	end
	
	if sc ~= "Cyrl" then
		return nil
	end
	text = rsubn(text, "нг([^" .. vowels .. "])", "ң%1") 
	text = rsubn(text, "Нг([^" .. vowels .. "])", "Ң%1") 
	text = rsubn(text, "нг$", "ң")
	text = rsubn(text, "([^" .. vowels .. "])я", "%1ә") 
	text = rsubn(text, "([^" .. vowels .. "])Я", "%1Ә") 
	text = rsubn(text, "[иИ]([яЯ])", "%1")
	--currently it is not clear how these two are romanized
	text = rsubn(text, "щ", "ш")
	text = rsubn(text, "Щ", "Ш")
	text = rsubn(text,"[ьЬ]" , "" )
	--remove them if their romanization changes
	text = rsubn(text, '.', tt)
	return text
end


-- NOT MAIN TRANSLIT

-- Yañalif 1938 version
local yanalif = {
["ү"]="y",["Ү"]="Y",["ш"]="c",["Ш"]="C",["й"]="i",["Й"]="I",["б"]="ʙ"
,["ы"]="ь",["Ы"]="Ь",["и"]="i",["И"]="I",["ж"]="ç",["Ж"]="Ç"
,["ң"]="ꞑ",["Ң"]="Ꞑ",["ә"]="ə",["Ә"]="Ə",["ғ"]="ƣ",["Ғ"]="Ƣ"
,["і"]="j",["І"]="J",["ө"]="ɵ",["Ө"]="Ɵ"
--mostly in loanwords from Russian
,["ё"]="io",["Ё"]="Io",["ч"]="c",["Ч"]="Tc",["щ"]="cc",["Щ"]="Cc"
,["ъ"]="ʼ",["Ъ"]="ʼ",["ю"]="iu",["Ю"]="Iu"
,["я"]="ia",["Я"]="Ia"}

--Yañalif 1927 version
local oldyan = {
 ["ү"]="y",["Ү"]="Y",["ш"]="c",["Ш"]="C",["й"]="i",["Й"]="I",["б"]="ʙ"
,["ы"]="ь",["Ы"]="Ь",["и"]="j",["И"]="J",["ж"]="ç",["Ж"]="Ç"
,["ң"]="ꞑ",["Ң"]="Ꞑ",["ә"]="ə",["Ә"]="Ə",["ғ"]="ƣ",["Ғ"]="Ƣ"
,["і"]="i",["І"]="I",["ө"]="ɵ",["Ө"]="Ɵ",["в"]="ʙ",["В"]="B"
,["ф"]="p",["Ф"]="P",["х"]="h",["Х"]="H",["у"]="v",["У"]="V"
,["ұ"]="u",["Ұ"]="U"
--mostly in loanwords from Russian
,["ё"]="io",["Ё"]="Io",["ч"]="c",["Ч"]="Tc",["щ"]="cc",["Щ"]="Cc"
,["ъ"]="ʼ",["Ъ"]="ʼ",["ю"]="iu",["Ю"]="Iu"
,["я"]="ia",["Я"]="Ia"}

--Pinyin
local pinyin = {
 ["ү"]="ü",["Ү"]="Ü",["ш"]="x",["Ш"]="X",["й"]="y",["Й"]="Y"
,["ы"]="e",["Ы"]="E",["и"]="y",["И"]="Y",["у"]="w",["У"]="W"
,["ң"]="ng",["Ң"]="ng",["ә"]="ə",["Ә"]="Ə",["ғ"]="ƣ",["Ғ"]="Ƣ"
,["і"]="i",["І"]="I",["ө"]="ɵ",["Ө"]="Ɵ",["һ"]="ⱨ",["Һ"]="Ⱨ"
,["х"]="h",["Х"]="H",["қ"]="ⱪ",["Қ"]="Ⱪ",["е"]="ê",["Е"]="ê"
,["ұ"]="u",["Ұ"]="U"
--mostly in loanwords from Russian
,["ё"]="io",["Ё"]="Io",["ч"]="q",["Ч"]="Q",["щ"]="x",["Щ"]="X"
,["ъ"]="ʼ",["Ъ"]="ʼ",["ю"]="iu",["Ю"]="Iu",["ь"]="",["Ь"]=""
,["я"]="ia",["Я"]="Ia"}

-- Yanalif 1938 version
function export.yanalif(text, lang, sc)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	
	if not sc then
		sc = require("Module:languages").getByCode("kk"):findBestScript(text):getCode()
	end
	
	if sc ~= "Cyrl" then
		return nil
	end
	text = rsubn(text, "нг([^" .. vowels .. "])", "ң%1") 
	text = rsubn(text, "Нг([^" .. vowels .. "])", "Ң%1") 
	text = rsubn(text, "нг$", "ң")
	text = rsubn(text, "([^" .. vowels .. "])я", "%1ә") 
	text = rsubn(text, "([^" .. vowels .. "])Я", "%1Ә") 
	text = rsubn(text, "[иИ]([яЯ])", "%1")
	text = rsubn(text, '.', yanalif)
	text = rsubn(text, '.', tt)
	return text
end

local frontur = "әӘіІ"
local backur = "АаЫы"
local frontround = "үҮөӨ"
local backround = "ӯӮоОҰұ"
local consonant = "[^" .. vowels .. ". -]"

-- Yanalif pre 1927 version
function export.oldyan(text, lang, sc)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	
	if not sc then
		sc = require("Module:languages").getByCode("kk"):findBestScript(text):getCode()
	end
	
	if sc ~= "Cyrl" then
		return nil
	end
	text = rsubn(text, "нг([^" .. vowels .. "])", "ң%1") 
	text = rsubn(text, "Нг([^" .. vowels .. "])", "Ң%1") 
	text = rsubn(text, "нг$", "ң")
	text = rsubn(text, "([^" .. vowels .. "])я", "%1ә") 
	text = rsubn(text, "([^" .. vowels .. "])Я", "%1Ә") 
	-- check preceeding vowel
	-- for w
	text = rsubn(text, "([" .. backur .. "])(" .. consonant .. ")у", "%1%2ыу")
	text = rsubn(text, "([" .. backround .. "])(" .. consonant .. ")у", "%1%2ұу")
	text = rsubn(text, "([" .. frontur .. "])(" .. consonant .. ")у", "%1%2іу")
	text = rsubn(text, "([" .. frontround .. "])(" .. consonant .. ")у", "%1%2үу")
	-- for j
	text = rsubn(text, "([" .. backur .. backround .. "])(" .. consonant .. "?)и", "%1%2ыи")
	text = rsubn(text, "([" .. frontur .. frontround .. "])(" .. consonant .. "?)и", "%1%2іи")
	-- if none, check following vowel
	-- for w
	text = rsubn(text, "([^" .. vowels .. "])У(" .. consonant .. "?)([" .. backur .. "])", "%1ЫУ%2%3")
	text = rsubn(text, "([^" .. vowels .. "])у(" .. consonant .. "?)([" .. backur .. "])", "%1ыу%2%3")
	text = rsubn(text, "([^" .. vowels .. "])У(" .. consonant .. "?)([" .. backround .. "])", "%1ҰУ%2%3")
	text = rsubn(text, "([^" .. vowels .. "])у(" .. consonant .. "?)([" .. backround .. "])", "%1ұу%2%3")
	text = rsubn(text, "([^" .. vowels .. "])У(" .. consonant .. "?)([" .. frontur .. "])", "%1ІУ%2%3")
	text = rsubn(text, "([^" .. vowels .. "])у(" .. consonant .. "?)([" .. frontur .. "])", "%1іу%2%3")
	text = rsubn(text, "([^" .. vowels .. "])У(" .. consonant .. "?)([" .. frontround .. "])", "%1ҮУ%2%3")
	text = rsubn(text, "([^" .. vowels .. "])у(" .. consonant .. "?)([" .. frontround .. "])", "%1үу%2%3")
	-- for j
	text = rsubn(text, "([^" .. vowels .. "])и(" .. consonant .. "?)([" .. backur .. backround .. "])", "%1ыи%2%3")
	text = rsubn(text, "([^" .. vowels .. "])и(" .. consonant .. "?)([" .. frontur .. frontround .. "])", "%1іи%2%3")
	text = rsubn(text, "([^" .. vowels .. "])И(" .. consonant .. "?)([" .. backur .. backround .. "])", "%1ЫИ%2%3")
	text = rsubn(text, "([^" .. vowels .. "])И(" .. consonant .. "?)([" .. frontur .. frontround .. "])", "%1ІИ%2%3")
	text = rsubn(text, "^И(" .. consonant .. "?)([" .. backur .. backround .. "])", "ЫИ%1%2")
	text = rsubn(text, "^И(" .. consonant .. "?)([" .. frontur .. frontround .. "])", "ІИ%1%2")
	-- final touches
	text = rsubn(text, "[иИ]([яЯ])", "%1")
	text = rsubn(text, '.', oldyan)
	text = rsubn(text, '.', tt)
	return text
end

-- Pinyin
function export.pinyin(text, lang, sc)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	
	if not sc then
		sc = require("Module:languages").getByCode("kk"):findBestScript(text):getCode()
	end
	
	if sc ~= "Cyrl" then
		return nil
	end
	text = rsubn(text, "([^" .. vowels .. "])я", "%1ә") 
	text = rsubn(text, "([^" .. vowels .. "])Я", "%1Ә") 
	text = rsubn(text, "[иИ]([яЯ])", "%1")
	text = rsubn(text, "([^" .. vowels .. "])[у]", "%1ұу") 
	text = rsubn(text, "([^" .. vowels .. "])[У]", "%1ұУ")
	text = rsubn(text, "^[у]([^" .. vowels .. "])", "ұу%1") 
	text = rsubn(text, "^[У]([^" .. vowels .. "])", "ұУ%1")
	text = rsubn(text, '.', pinyin)
	text = rsubn(text, '.', tt)
	return text
end

return export