Mòdul:Kk-trans

De la Viquipèdia, l'enciclopèdia lliure
Icona de documentació de mòdul Documentació del mòdul [ mostra ] [ modifica el codi ] [ mostra l'historial ] [ refresca ]

Mòdul Kk-trans (codi · ús · discussió · proves · tests · casos prova | subpàgines · enllaços)

A continuació es mostra la documentació transclosa de la subpàgina /ús. [salta a la caixa de codi]


Transliteració i transcripció del kazakh al català segons els criteris de Viquipèdia:Transcripció del kazakh. Actualment s'usa només com a eina de consulta, vegeu Viquipèdia:Transcripció del kazakh/Consulta.

El codi original prové de wikt:Mòdul:kk-trans on s'utilitza per generar transcripcions automàtiques. En cas de modificacions és convenient mantenir-lo sincronitzat amb l'original.

local p = {}

local u = mw.ustring.char
local GR = u(0x0300) -- grave =  ̀
local AC = u(0x0301) -- acute = ˊ
local DI = u(0x0308) -- diaeresis = ¨

local tab_tlit = {
	["А"]="A",['а']='a',   ["Ә"]="Ä",['ә']='ä',   ["Б"]="B",['б']='b',   ["В"]="V",['в']='v',   ["Г"]="G",['г']='g', 
	["Ғ"]="Ğ",['ғ']='ğ',   ["Д"]="D",['д']='d',   ["Е"]="E",['е']='e',   ["Ё"]="Yo",['ё']='yo', ["Ж"]="J",['ж']='j', 
	["З"]="Z",['з']='z',   ["И"]="Ï",['и']='ï',   ["Й"]="Y",['й']='y',   ["К"]="K",['к']='k',   ["Қ"]="Q",['қ']='q',
	["Л"]="L",['л']='l',   ["М"]="M",['м']='m',   ["Н"]="N",['н']='n',   ["Ң"]="Ñ",['ң']='ñ',   ["О"]="O",['о']='o', 
	["Ө"]="Ö",['ө']='ö',   ["П"]="P",['п']='p',   ["Р"]="R",['р']='r',   ["С"]="S",['с']='s',   ["Т"]="T",['т']='t', 
	["У"]="W",['у']='w',   ["Ұ"]="U",['ұ']='u',   ["Ү"]="Ü",['ү']='ü',   ["Ф"]="F",['ф']='f',   ["Х"]="X",['х']='x',
	["Һ"]="h",['һ']='h',   ["Ц"]="C",['ц']='c',   ["Ч"]="Ç",['ч']='ç',   ["Ш"]="Ş",['ш']='ş',   ["Щ"]="Şş",['щ']='şş',
	["Ъ"]="ʺ",['ъ']='ʺ',   ["Ы"]="I",['ы']='ı',   ["І"]="I",['і']='i',   ["Ь"]="ʹ",['ь']='ʹ',   ["Э"]="É",['э']='é',
	["Ю"]="Yw",['ю']='yw', ["Я"]="Ya",['я']='ya'
}

local tab_tcrip = {
	["А"]="A",['а']='a',   ["Ә"]="A",['ә']='a',   ["Б"]="B",['б']='b',   ["В"]="V",['в']='v',   ["Г"]="G",['г']='g', 
	["Ғ"]="G",['ғ']='g',   ["Д"]="D",['д']='d',   ["Е"]="E",['е']='e',   ["Ё"]="Io",['ё']='io', ["Ж"]="J",['ж']='j', 
	["З"]="Z",['з']='z',   ["И"]="I",['и']='i',   ["Й"]="I",['й']='i',   ["К"]="K",['к']='k',   ["Қ"]="Kh",['қ']='kh',
	["Л"]="L",['л']='l',   ["М"]="M",['м']='m',   ["Н"]="N",['н']='n',   ["Ң"]="Ng",['ң']='ng', ["О"]="O",['о']='o', 
	["Ө"]="O",['ө']='o',   ["П"]="P",['п']='p',   ["Р"]="R",['р']='r',   ["С"]="S",['с']='s',   ["Т"]="T",['т']='t', 
	["У"]="U",['у']='u',   ["Ұ"]="U",['ұ']='u',   ["Ү"]="U",['ү']='u',   ["Ф"]="F",['ф']='f',   ["Х"]="Kh",['х']='kh',
	["Һ"]="h",['һ']='h',   ["Ц"]="Ts",['ц']='ts', ["Ч"]="Tx",['ч']='tx', ["Ш"]="X",['ш']='x',   ["Щ"]="Sx",['щ']='sx',
	["Ъ"]="",['ъ']='',     ["Ы"]="I",['ы']='i',   ["І"]="I",['і']='i',   ["Ь"]="",['ь']='',     ["Э"]="E",['э']='e',
	["Ю"]="Iu",['ю']='iu', ["Я"]="Ia",['я']='ia'
}
	
local non_consonants = "[АӘЕЁИОӨҰҮЫІЭЮЯаәеёиоөұүыіэюяʹʺ]"

local function map_to_je(pre, e)
	local map_to_je_map = {["Е"] = "Ie", ["е"] = "ie"}
	if e == nil then
		return map_to_je_map[pre]
	end
	return pre .. map_to_je_map[e]
end

-- Transliterates a single word. It should include stress marks, which are then preserved in the transliteration.
local function wtr(cyr)
	cyr = mw.ustring.gsub(cyr, GR, AC)
	
	-- reducció de consonants duplicades no usades en català
	local no_dobles = {"([Вв])в", "([Жж])ж", "([Кк])к", "([Ққ])қ", "([Ңң])ң", "([Хх])х", "([Һһ])һ", "([Цц])ц", "([Чч])ч", "([Шш])ш", "([Щщ])щ"}
	for i = 1, #no_dobles do
		cyr = mw.ustring.gsub(cyr, no_dobles[i], "%1")
	end
	
	-- е after a vowel or at the beginning of a word becomes ie
	cyr = mw.ustring.gsub(cyr, "^([Ее])", map_to_je)
	cyr = mw.ustring.gsub(cyr, "(" .. non_consonants .. ")([Ее])", map_to_je)
	-- need to do it twice in case of sequences of such vowels
	cyr = mw.ustring.gsub(cyr, "(" .. non_consonants .. ")([Ее])", map_to_je)
	
	local latin = mw.ustring.gsub(cyr, '.', tab_tcrip)
	
	-- simplificació de dues i
	latin = mw.ustring.gsub(latin, "(i" .. AC .. "?)i", "%1")
	
	-- geminació ll
	latin = mw.ustring.gsub(latin, "ll", "l·l")
	
	-- simplificació ngg
	latin = mw.ustring.gsub(latin, "ngg", "ng")
	
	-- correcció gue/gui
	latin = mw.ustring.gsub(latin, "([Gg])([ei])", "%1u%2")

	-- regles d'accentuació en català
	local char_acc = {["A"..AC]="À", ["E"..AC]="É", ["I"..AC]="Í", ["O"..AC]="Ó", ["U"..AC]="Ú", 
		["a"..AC]="à", ["e"..AC]="é", ["i"..AC]="í", ["i"..DI]="ï", ["o"..AC]="ó", ["u"..AC]="ú", ["u"..DI]="ü"}
	latin = require("Mòdul:ca-trans").accents(latin)
	
	-- accent obert à
	latin = mw.ustring.gsub(latin, ".[" .. AC .. DI .. "]", char_acc)
	
	-- correcció intervocàlica ss, ix
	latin = mw.ustring.gsub(latin, "([AEIOUaeiouÀÉÍÓÚàéíóúü])s([aeiouàéíóú])", "%1ss%2")
	latin = mw.ustring.gsub(latin, "([AEOUaeouÀÉÓÚàéóúü])x", "%1ix")
	
	return latin
end

-- Transliterates text, a single word or phrase.
function p.translit(text)
	if type(text) == "table" then text = text.args[1] end
	return (mw.ustring.gsub(text, '.', tab_tlit))
end

function p.transcrip(text)
	if type(text) == "table" then text = text.args[1] end
	local trwords = {}
	for word in mw.text.gsplit(text, '%s') do
		table.insert(trwords, wtr(word))
	end
	
	return table.concat(trwords, ' ')
end

return p