Module:bho-Kthi-translit
- De neikommende dokumintaasje stiet op Module:bho-Kthi-translit/dokumintaasje. [bewurkje]
Oernommen fan en:Module:bho-Kthi-translit.
-- Transliteration for Bhojpuri
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local conv = {
-- consonants
['๐'] = 'k', ['๐'] = 'kh', ['๐'] = 'g', ['๐'] = 'gh', ['๐'] = 'แน
',
['๐'] = 'c', ['๐'] = 'ch', ['๐'] = 'j', ['๐'] = 'jh', ['๐'] = 'รฑ',
['๐'] = 'แนญ', ['๐'] = 'แนญh', ['๐'] = 'แธ', ['๐'] = 'แธh', ['๐'] = 'แน',
['๐'] = 't', ['๐'] = 'th', ['๐ '] = 'd', ['๐ก'] = 'dh', ['๐ข'] = 'n',
['๐ฃ'] = 'p', ['๐ค'] = 'ph', ['๐ฅ'] = 'b', ['๐ฆ'] = 'bh', ['๐ง'] = 'm',
['๐จ'] = 'y', ['๐ฉ'] = 'r', ['๐ช'] = 'l', ['๐ซ'] = 'v', ['๐ซ'] = 'v', ['เคณ'] = 'แธท',
['๐ฌ'] = 'ล', ['๐ญ'] = 'แนฃ', ['๐ฎ'] = 's', ['๐ฏ'] = 'h',
['๐'] = 'แน', ['๐'] = 'แน', ['๐'] = 'แนh', ['๐'] = 'แนh',
-- ['๐๐น๐'] = 'gy',
-- vowel diacritics
['๐ฑ'] = 'i', ['๐ณ'] = 'u', ['๐ต'] = 'e', ['๐ท'] = 'o',
['๐ฐ'] = 'ฤ', ['๐ฒ'] = 'ฤซ', ['๐ด'] = 'ลซ',
['๐ถ'] = 'ai', ['๐ธ'] = 'au',
-- vowel signs
['๐'] = 'a', ['๐
'] = 'i', ['๐'] = 'u', ['๐'] = 'e', ['๐'] = 'o',
['๐'] = 'ฤ', ['๐'] = 'ฤซ', ['๐'] = 'ลซ',
['๐'] = 'ai', ['๐'] = 'au',
['เฅ'] = 'om',
-- chandrabindu
['๐'] = 'ฬ',
-- anusvara
['๐'] = 'แน',
-- visarga
['๐'] = 'แธฅ',
-- virama
['๐น'] = '',
-- numerals
['เฅฆ'] = '0', ['เฅง'] = '1', ['เฅจ'] = '2', ['เฅฉ'] = '3', ['เฅช'] = '4',
['เฅซ'] = '5', ['เฅฌ'] = '6', ['เฅญ'] = '7', ['เฅฎ'] = '8', ['เฅฏ'] = '9',
-- punctuation
['๐'] = '.', -- danda
['+'] = '', -- compound separator
-- abbreviation sign
['๐ป'] = '.',
}
local nasal_assim = {
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐ฃ'] = '๐ง', ['๐ค'] = '๐ง', ['๐ฅ'] = '๐ง', ['๐ฆ'] = '๐ง', ['๐ง'] = '๐ง',
}
local perm_cl = {
['๐ง๐น๐ช'] = true, ['๐ซ๐น๐ช'] = true, ['๐ซ๐น๐ช'] = true, ['๐ข๐น๐ช'] = true,
}
local all_cons, special_cons = '๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐ ๐ก๐ฃ๐ค๐ฅ๐ฆ๐ฌ๐ญ๐ฎ๐จ๐ฉ๐ช๐ซ๐ฏ๐๐ข๐ง', '๐จ๐ฉ๐ช๐ฅ๐ซ๐ฏ๐ข๐ง'
local vowel, vowel_sign = 'a๐ฐ๐ฑ๐ฒ๐ณ๐ด๐ต๐ถ๐ท๐ธ', '๐๐๐
๐๐๐๐๐๐๐'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(๐บ?[' .. all_cons .. '])a(๐บ?[' .. gsub(all_cons, "๐จ", "") .. '])([๐๐]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = 1, length do
table.insert(result, mw.ustring.sub(text, length - i + 1, length - i + 1))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text = gsub(
text,
'([' .. all_cons .. ']๐บ?)([' .. vowel .. '๐น]?)',
function(c, d)
return c .. (d == "" and 'a' or d)
end
)
for word in mw.ustring.gmatch(text, "[๐-๐a]+") do
local orig_word = word
word = rev_string(word)
word = gsub(
word,
'^a(๐บ?)([' .. all_cons .. '])(.)(.?)',
function(opt, first, second, third)
local a = ""
if match(first, '[' .. special_cons .. ']')
and match(second, '๐น')
and not perm_cl[first..second..third]
or match(first .. second, '๐จ[๐ฒ๐ต๐ถ]') then
a = "a"
end
return a .. opt .. first .. second .. third
end
)
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4')
end
word = gsub(
word,
'(.?)๐(.)',
function(succ, prev)
local mid = nasal_assim[succ] or "n"
if succ..prev == "a" then
mid = "๐บ๐ง"
elseif succ == "" and match(prev, '[' .. vowel .. ']') then
mid = "ฬ"
end
return succ .. mid .. prev
end
)
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, '.๐บ?', conv)
text = gsub(text, 'a([iu])ฬ', 'aอ %1')
text = gsub(text, '๐๐น๐', conv)
return mw.ustring.toNFC(text)
end
return export