본문으로 이동

모듈:cpx-pron

위키낱말사전, 말과 글의 누리

이 모듈에 대한 설명문서는 모듈:cpx-pron/설명문서에서 만들 수 있습니다

local export = {}
local m_string_utils = require("Module:string utilities")
local m_table = require("Module:table")
local m_data = require("Module:cpx-pron/data")

local sub = m_string_utils.sub
local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local toNFD = mw.ustring.toNFD
local toNFC = mw.ustring.toNFC

local SPECIAL_MARKERS = {
	NO_ASSIMILATION = "*",
	NO_SANDHI = "#",
	MANUAL_CHANGE = ">",
	CAPITALIZATION = "^",
	SPACE_AFTER = "\\"
}

local FORMAT_MODES = {
	BRIEF = "brief",
	COMPLETE = "complete",
	DEMO = "demo"
}

local dialects = {
	pt = "[[w:en:Putian dialect|푸톈]]",
	nr = "[[w:난르섬|난르]]",
	jk = "[[w:zh:江口鎮 (莆田市)|장커우]]",
	xy = "[[w:en:Xianyou dialect|셴유]]",
	ft = "[[w:zh:楓亭鎮|펑팅]]",
	yy = "[[w:zh:游洋鎮|유양]]"
}

local initials = {
	pt = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = ""
	},
	jk = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = ""
	},
	nr = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = ""
	},
	xy = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = "",
		["w"] = "β",
	},
	yy = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "θ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = "",
		["w"] = "β",
	},
	ft = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = "",
		["w"] = "β",
	},
}

local finals = {
	pt = {
		["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "o",
		["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
		["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
		["ou"] = "ɔu", ["ua"] = "ua", ["uei"] = "uei", ["ui"] = "ui", ["yor"] = "yɒ",
		["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ", ["oeng"] = "œŋ", ["ong"] = "ɔŋ",
		["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["ung"] = "uŋ", ["uang"] = "uaŋ", ["yng"] = "yŋ",
		["yorng"] = "yɒŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ", ["oeh"] = "œʔ", ["oh"] = "ɔʔ",
		["ih"] = "iʔ", ["iah"] = "iæʔ", ["uh"] = "uʔ", ["uah"] = "uaʔ", ["ieh"] = "iɛʔ",
		["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yorh"] = "yɒʔ",
	},
	jk = {
		["a"] = "a", ["e"] = "e", ["ae"] = "ɛ", ["eo"] = "ø", ["oe"] = "œ", 
		["or"] = "ɒ", ["o"] = "o", ["i"] = "i", ["u"] = "u", ["y"] = "y", 
		["ie"] = "ie", ["iao"] = "iɐu", ["iu"] = "iu", ["ai"] = "ai", ["ao"] = "au", 
		["ou"] = "ou", ["uo"] = "uo", ["uai"] = "uɐi", ["ui"] = "ui", ["yoe"] = "yø",
		["ang"] = "aŋ", ["eng"] = "ɛŋ", ["ing"] = "iŋ", ["ung"] = "uŋ", ["ieng"] = "iɛŋ", 
		["orng"] = "ɒŋ", ["oeng"] = "œŋ", ["ong"] = "ɔŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["eh"] = "ɛʔ", ["ih"] = "eʔ", ["oh"] = "ɔʔ", ["orh"] = "ɒʔ",
		["oeh"] = "œʔ"
	},
	nr = {
		["a"] = "a", ["e"] = "e", ["oe"] = "ø", ["or"] = "ɒ", ["o"] = "o", 
		["i"] = "i", ["u"] = "u", ["y"] = "y", ["ia"] = "ia", ["ieo"] = "ieu", 
		["iu"] = "iu", ["ai"] = "ai", ["ao"] = "au", ["oo"] = "ɔ", ["ua"] = "ua", 
		["uei"] = "uei", ["ui"] = "ui",
		["ang"] = "aŋ", ["eng"] = "e̞ŋ", ["ing"] = "iŋ", ["ong"] = "oŋ", 
		["orng"] = "ɒŋ", ["oeng"] = "œŋ", ["uerng"] = "uəŋ", ["yng"] = "yŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["eh"] = "e̞ʔ", ["ih"] = "iʔ", ["oh"] = "oʔ", ["orh"] = "ɒʔ",
		["oeh"] = "œʔ", ["uerh"] = "uəʔ", ["yh"] = "yʔ"
	},
	xy = {
		["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "o",
		["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
		["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
		["ou"] = "ɔu", ["ua"] = "ua", ["uei"] = "uei", ["ui"] = "ui", ["ya"] = "ya",
		["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ",
		["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["yng"] = "yŋ",
		["yoeng"] = "yøŋ", ["uong"] = "uoŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ",
		["ih"] = "iʔ", ["ieh"] = "iɛʔ", ["uh"] = "uʔ",
		["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yoeh"] = "yøʔ",
		["iah"] = "iaʔ", ["uah"] = "uaʔ", -- iah, uah only for 代詞促調
		["ann"] = "ã", ["inn"] = "ĩ", ["ynn"] = "ỹ", ["ornn"] = "ɒ̃", ["ainn"] = "ãĩ", 
		["aonn"] = "ãũ", ["iann"] = "ĩã", ["iunn"] = "ĩũ", ["uann"] = "ũã", ["uinn"] = "ũĩ", 
		["yann"] = "ỹã",
	},
	yy = {
		["a"] = "a", ["e"] = "e", ["oe"] = "ø", ["or"] = "ɒ", ["o"] = "o", 
		["i"] = "i", ["u"] = "u", ["y"] = "y", ["ia"] = "ia", ["ieo"] = "iəu", 
		["iu"] = "iu", ["ai"] = "ai", ["ao"] = "au", ["ou"] = "ou", ["ua"] = "ua", 
		["uai"] = "uai", ["oi"] = "oi", ["ui"] = "ui", ["ya"] = "ya",
		["ang"] = "aŋ", ["eng"] = "ɛŋ", ["ing"] = "iŋ", ["ong"] = "oŋ", ["ung"] = "uŋ",
		["ieng"] = "iɛŋ", ["orng"] = "ɒŋ", ["oeng"] = "œŋ", ["uang"] = "uaŋ", 
		["yang"] = "yɐŋ", ["yng"] = "yŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["eh"] = "ɛʔ", ["ih"] = "iʔ", ["oh"] = "oʔ", ["orh"] = "ɒʔ",
		["oeh"] = "œʔ", ["ieh"] = "iɛʔ", ["uah"] = "uaʔ", ["yah"] = "yɐʔ", ["yh"] = "yʔ",
		["ann"] = "ã", ["enn"] = "ẽ", ["oenn"] = "ø̃", ["ornn"] = "ɒ̃", 
		["iann"] = "ĩã", ["iunn"] = "ĩũ", ["uann"] = "ũã", ["uinn"] = "ũĩ", ["yann"] = "ỹã"
	},
	ft = {
		["a"] = "a", ["e"] = "e", ["or"] = "ɒ", ["er"] = "ɤ", ["i"] = "i", 
		["u"] = "u", ["ia"] = "ia", ["ieo"] = "iəu", ["iu"] = "iu", ["ai"] = "ai", 
		["ao"] = "au", ["ou"] = "ou", ["ua"] = "ua", ["uei"] = "uei", ["ui"] = "ui",
		["ang"] = "aŋ", ["eng"] = "ɛŋ", ["ing"] = "iŋ", ["orng"] = "ɒŋ",
		["ieng"] = "ieŋ", ["uerng"] = "ɯəŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["eh"] = "ɛʔ", ["ih"] = "iʔ", ["orh"] = "ɒʔ",
		["ieh"] = "ieʔ", ["uerh"] = "ɯəʔ",
		["ann"] = "ã", ["inn"] = "ĩ", ["ornn"] = "ɒ̃", 
		["iann"] = "ĩã", ["iunn"] = "ĩũ", ["uann"] = "ũã", ["uinn"] = "ũĩ"
	}
}

-- 1 陰平 | 2 陽平 | 3 上聲 | 4 陰去 | 5 陽去 | 6A 陰入甲 | 6B 陰入乙 | 7A 陽入甲 | 7B 陽入乙
-- S1: variant of 1 in non-final position
-- S3: 代詞促調, sounds like 上聲 in both Putian and Xianyou after tone sandhi
local tones = {
	pt = {
		["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
		["5"] = "¹¹", ["6A"] = "²¹", ["6B"] = "¹¹", ["7A"] = "⁴", ["7B"] = "¹³", 
		["S1"] = "⁵⁵", ["S3"] = "³²", ["S7"] = "⁴⁵"
	},
	jk = {
		["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
		["5"] = "¹¹", ["6A"] = "²¹", ["6B"] = "¹¹", ["7A"] = "⁴", ["7B"] = "⁴⁵³", 
		["S1"] = "⁵⁵", ["S3"] = "³²", ["S7"] = "⁴⁵"
	},
	nr = {
		["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
		["5"] = "¹¹", ["6A"] = "²¹", ["6B"] = "¹¹", ["7A"] = "⁴", ["7B"] = "¹³", 
		["S1"] = "⁵⁵", ["S3"] = "³²", ["S7"] = "⁴⁵"
	},
	xy = {
		["1"] = "⁵⁴⁴", ["2"] = "²⁴", ["3"] = "³³²", ["4"] = "⁴²",
		["5"] = "²¹", ["6A"] = "²", ["6B"] = "²¹", ["7A"] = "⁴", ["7B"] = "²⁴", 
		["S1"] = "⁵⁵", ["S3"] = "³²"
	},
	yy = {
		["1"] = "⁵⁴⁴", ["2"] = "²⁴", ["3"] = "³³²", ["4"] = "⁴²",
		["5"] = "²¹", ["6A"] = "²", ["6B"] = "²¹", ["7A"] = "⁴", ["7B"] = "²⁴", 
		["S1"] = "⁵⁵", ["S3"] = "³²"
	},
	ft = {
		["1"] = "⁵⁴⁴", ["2"] = "²⁴", ["3"] = "³³²", ["4"] = "⁴²",
		["5"] = "²¹", ["6A"] = "²", ["6B"] = "²¹", ["7A"] = "⁴", ["7B"] = "²⁴", 
		["S1"] = "⁵⁵", ["S3"] = "³²"
	},
}

local corrections = {
	common = {
		["au"] = "ao", ["ieu"] = "ieo", ["iau"] = "ieo"
	},
	pt = {
		["iao"] = "ieo", ["yo"] = "yor", ["ue"] = "uei", ["uai"] = "uei",
		["yoh"] = "yorh", ["yong"] = "yorng",
	},
	jk = {
		["ia"] = "ie", ["ua"] = "uo", ["ue"] = "uai", ["uei"] = "uai",
		["yo"] = "oe", ["yor"] = "oe", ["ye"] = "yoe",
	},
	nr = {
		["iao"] = "ieo", ["ou"] = "oo", ["ue"] = "uei", ["yo"] = "ua",
		["yor"] = "ua", ["ung"] = "ng", ["uang"] = "uerng", ["uah"] = "uerh",
	},
	xy = {
		["iao"] = "ieo", ["ue"] = "uei", ["yeh"] = "yoeh",
		["yeng"] = "yoeng", ["iang"] = "ieng", ["ung"] = "ng",
	},
	yy = {
		["iao"] = "ieo", ["ue"] = "oi", -- or "uai"
		["uei"] = "oi", -- or "uai"
	},
	ft = {
		["o"] = "er", ["iao"] = "ieo", ["ue"] = "uei", ["ya"] = "ia",
		["ung"] = "ng", ["uong"] = "uerng", ["erng"] = "uerng", ["erh"] = "uerh",
	}
}

local function handle_nasalization(final)
	local has_old_nasal = final:match("ⁿ$")
	local has_new_nasal = final:match("nn$")
	local base_final = has_old_nasal and final:gsub("ⁿ$", "") or 
					  has_new_nasal and final:gsub("nn$", "") or 
					  final
	
	return base_final, has_old_nasal, has_new_nasal
end
 
local sandhi_rules = {
	pt = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="2"},
		["4"] = {["1"]="S1", ["2"]="4", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="4"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["6A"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S7", ["7B"]="S7"},
		["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="6A", ["7B"]="6A"},
		["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
	},
	jk = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="2"},
		["4"] = {["1"]="S1", ["2"]="4", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="4"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["6A"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S7", ["7B"]="S7"},
		["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="6A", ["7B"]="6A"},
		["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
	},
	nr = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="2"},
		["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["6A"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S7", ["7B"]="S7"},
		["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="6A", ["7B"]="6A"},
		["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
	},
	xy = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="S1"},
		["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["6A"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
		["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="6A", ["7B"]="6A"},
		["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
	},
	yy = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="S1"},
		["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["6A"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
		["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="6A", ["7B"]="6A"},
		["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
	},
	ft = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="S1"},
		["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["6A"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
		["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
		["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="6A", ["7B"]="6A"},
		["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
		["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
	}
}

local initial_assimilation_rules = {
	pt = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		glottal_final = {}, -- remain unchanged
		other_final = {
			["b"] = "", ["p"] = "",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	},
	jk = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		glottal_final = {},
		other_final = {
			["b"] = "", ["p"] = "",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	},
	nr = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		glottal_final = {},
		other_final = {
			["b"] = "", ["p"] = "",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	},
	xy = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		nasalized_final = {
			["b"] = "m", ["m"] = "m", ["p"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "", ["k"] = "", ["h"] = "",
			["ng"] = "ng",
			[""] = ""
		},
		glottal_final = {},
		other_final = {
			["b"] = "w", ["p"] = "w",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	},
	yy = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		nasalized_final = {
			["b"] = "m", ["m"] = "m", ["p"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "", ["k"] = "", ["h"] = "",
			["ng"] = "ng",
			[""] = ""
		},
		glottal_final = {},
		other_final = {
			["b"] = "w", ["p"] = "w",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	},
	ft = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		nasalized_final = {
			["b"] = "m", ["m"] = "m", ["p"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "", ["k"] = "", ["h"] = "",
			["ng"] = "ng",
			[""] = ""
		},
		glottal_final = {}, -- remain unchanged
		other_final = {
			["b"] = "w", ["p"] = "w",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	},
}

local buc_initials = {
	["b"] = "b",
	["p"] = "p",
	["m"] = "m",
	["d"] = "d",
	["t"] = "t",
	["n"] = "n",
	["l"] = "l",
	["z"] = "c",
	["c"] = "ch",
	["s"] = "s",
	["g"] = "g",
	["k"] = "k",
	["ng"] = "ng",
	["h"] = "h",
	[""] = ""
}

local buc_finals = {
	["a"] = {{"a", 1}, {"aⁿ", 1}, {"ah", 1}},
	["ae"] = {{"e", 1}},
	["ah"] = {{"ah", 1}},
	["ai"] = {{"ai", 1}},
	["ang"] = {{"ang", 1}},
	["ao"] = {{"au", 1}},
	["e"] = {{"a̤", 1}, {"a̤ⁿ", 1}, {"a̤h", 1}},
	["eh"] = {{"eh", 1}},
	["eng"] = {{"eng", 1}},
	["i"] = {{"i", 1}, {"ih", 1}},
	["ia"] = {{"ia", 2}, {"iaⁿ", 2}, {"iah", 2}},
	["iah"] = {{"iah", 2}},
	["ieh"] = {{"iah", 2}},
	["ieng"] = {{"iang", 2}},
	["ieo"] = {{"a̤u", 2}, {"a̤uⁿ", 2}, {"a̤uh", 3}}, -- a̤u & a̤uⁿ on `a̤`, a̤uh on `u`
	["ih"] = {{"ih", 1}},
	["ing"] = {{"ing", 1}},
	["iu"] = {{"iu", 2}},
	["ng"] = {{"ng", 1}}, -- actually in the middle of `n` and `g`
	["o"] = {{"eo", 2}, {"eoh", 2}},
	["oe"] = {{"e̤", 1}, {"e̤ⁿ", 1}},
	["oeh"] = {{"e̤h", 1}},
	["oeng"] = {{"e̤ng", 1}},
	["oh"] = {{"eoh", 2}},
	["ong"] = {{"eong", 2}},
	["or"] = {{"o̤", 1}, {"o̤ⁿ", 1}, {"o̤h", 1}},
	["orh"] = {{"o̤h", 1}},
	["orng"] = {{"o̤ng", 1}},
	["ou"] = {{"o", 1}},
	["u"] = {{"u", 1}},
	["ua"] = {{"ua", 2}, {"uaⁿ", 2}, {"uah", 2}},
	["uah"] = {{"uah", 2}},
	["uang"] = {{"uang", 2}},
	["uei"] = {{"oi", 1}, {"uai", 2}, {"oiⁿ", 1}, {"oih", 1}}, -- on `o`
	["uh"] = {{"uh", 1}},
	["ui"] = {{"ui", 1}}, -- on `u`
	["ung"] = {{"ng", 1}}, -- actually in the middle of `n` and `g`
	["y"] = {{"ṳ", 1}},
	["yh"] = {{"ṳh", 1}},
	["yng"] = {{"ṳng", 1}},
	["yor"] = {{"io̤", 2}, {"io̤ⁿ", 2}, {"io̤h", 2}},
	["yorh"] = {{"io̤h", 2}},
	["yorng"] = {{"io̤ng", 2}}
}

local buc_tones = {
	["1"] = "",	  -- 陰平 null
	["2"] = "́",	  -- 陽平 u+0301
	["3"] = "̂",	  -- 上聲 u+0302
	["4"] = "̍",	  -- 陰去 u+030D
	["5"] = "̄",	  -- 陽去 u+0304
	["6A"] = "",	  -- 陰入甲 -h
	["6B"] = "̄",	 -- 陰入乙
	["7A"] = "̍",	  -- 陽入甲 -h + u+030D
	["7B"] = "̍",	  -- 陽入乙 -h + u+030D
}

local function split_dialect_codes(code_string)
	local codes = {}
	for code in code_string:gmatch("[^,]+") do
		-- Validate dialect code
		if not dialects[code] then
			error("Unsupported dialect: " .. code)
		end
		table.insert(codes, code)
	end
	return codes
end

local function get_syllable_markers(syllable)
	local markers = {
		capitalize = false,
		space_after = false,
		comma_after = false,
		manual_buc = nil
	}

	if syllable:sub(1, 1) == SPECIAL_MARKERS.CAPITALIZATION then
		markers.capitalize = true
		syllable = syllable:sub(2)
	end

	if syllable:sub(-1) == SPECIAL_MARKERS.SPACE_AFTER then
		markers.space_after = true
		syllable = syllable:sub(1, -2)
	end

	if syllable:sub(-1) == "," then
		markers.comma_after = true
		syllable = syllable:sub(1, -2)
	end

	-- Check manual BUC
	local manual_start, manual_end = syllable:find("{[^}]+}")
	if manual_start then
		markers.manual_buc = syllable:sub(manual_start + 1, manual_end - 1)
		syllable = syllable:sub(1, manual_start - 1) .. syllable:sub(manual_end + 1)
	end

	return markers, syllable
end

local function split_initial_final(options)
	if not options or not options.form then
		error("split_initial_final: form is required")
	end

	local form = options.form
	local initial, final

	if form == "ng" then
		initial, final = "", form
	elseif form:sub(1, 2) == "ng" and #form > 2 then
		initial, final = "ng", form:sub(3)
	else
		initial = form:match("^[bpmnltdzcsghkw]h?") or ""
		final = form:sub(#initial + 1)
	end

	if not final or final == "" then
		error("Invalid form: " .. form .. " (unable to extract final)")
	end

	return initial, final
end

-- Phonological rule application functions
local function get_final_type(options)
	if not options or type(options) ~= "table" then
		error("get_final_type: options must be a table")
	end

	local initial = options.initial
	local final = options.final
	local dialect = options.dialect

	if not final then
		error("get_final_type: final cannot be nil")
	end

	if sub(final, -2) == "ng" then
		return "nasal_final"
	elseif sub(final, -1) == "h" then
		return "glottal_final"
	elseif sub(final, -2) == "nn" or 
			(match(initial, "[mn]g?") and 
			get_final_type({initial = "", final = final}) == "other_final" and 
			dialect == "xy") then

		if match(initial, "[mn]g?") and 
		   get_final_type({initial = "", final = final}) == "other_final" and 
		   dialect == "xy" then
			require("Module:debug").track('cpx-pron/xy-nasal-initial/default-rule')
		end
		return "nasalized_final"
	else
		return "other_final"
	end
end

local function track_buc_issue(reason)
	require("Module:debug").track('cpx-pron/' .. reason)
end

local function combine_buc_syllable(options)
	local initial = options.initial
	local final = options.final
	local tone = options.tone
	local tone_position = options.tone_position

	local tone_mark = buc_tones[tone]
	if not tone_mark then
		error("Invalid tone: " .. tone)
	end

	-- Split the final string into character table
	local chars = {}
	for char in mw.ustring.gmatch(final, ".") do
		table.insert(chars, char)
	end

	-- put tone diacritic
	if #chars >= tone_position then
		chars[tone_position] = chars[tone_position] .. tone_mark
	else
		error("Invalid tone position: " .. tone_position)
	end

	return mw.ustring.toNFC(initial .. table.concat(chars))
end

local function lookup_char_readings(char)
	if not m_data.buc[char] then
		return nil
	end
	return m_data.buc[char]
end

-- Convert single PSP syllable to BUC
local function convert_to_buc_syllable(options)
	local syllable_info = options.syllable_info
	local char = options.char
	
	-- If BUC is manually specified, first verify
	if syllable_info.manual_buc then
		local is_valid, error_msg = validate_manual_buc(syllable_info.manual_buc)
		if not is_valid then
			track_buc_issue("manual form incorrect")
			return nil
		end
		return syllable_info.manual_buc
	end
	
	local lookup_tone = syllable_info.original_tone
	local lookup_final = syllable_info.original_final
	
	-- Special handling for S3 tone
	if syllable_info.original_tone == "S3" then
		lookup_tone = "3"
		-- Remove final h if present
		if lookup_final:sub(-1) == "h" then
			lookup_final = lookup_final:sub(1, -2)
		end
	end
	
	-- Get possible BUC finals
	local possible_finals = buc_finals[lookup_final]
	if not possible_finals then
		track_buc_issue("no final found")
		return nil
	end
	
	-- Get BUC initial
	local initial = buc_initials[syllable_info.original_initial]
	if not initial then
		track_buc_issue("no initial found")
		return nil
	end
	
	-- Generate all possible BUC forms
	local filtered_finals = {} -- special check for BUC tone 7B which merged into tone 2
	for _, final_info in ipairs(possible_finals) do
		local final, tone_position = final_info[1], final_info[2]
		local is_tone_7b_final = final:match("h$")
		local psp_has_h = syllable_info.original_final:match("h$")
		local should_keep = true
		local use_tone = lookup_tone
		
		if lookup_tone == "7B" then
			if final:match("h$") then
				final = final .. "*"
			else
				should_keep = false
			end
		end
		
		if is_tone_7b_final and not psp_has_h then
			if lookup_tone == "2" then
				use_tone = "7B"
				final = final .. "*"
			elseif lookup_tone == "7B" then
				-- do nothing
			else
				should_keep = false
			end
		end
		
		if should_keep then
			table.insert(filtered_finals, {
				final = final,
				tone_position = tone_position,
				tone = use_tone
			})
		end
	end
	
	local candidates = {}
	for _, final_info in ipairs(filtered_finals) do
		local candidate = combine_buc_syllable({
			initial = initial,
			final = final_info.final,
			tone = final_info.tone,
			tone_position = final_info.tone_position
		})
		table.insert(candidates, candidate)
	end
	
	if #candidates == 1 then
		return candidates[1]
	end
	
	-- No need to look up Hanzi-BUC table if hanzi's and PSP's counts don't match
	if not char then
		if #possible_finals > 1 then
			track_buc_issue("contraction and multiple final found")
			-- temp
			local finals_for_output = {}
			for _, final_info in ipairs(possible_finals) do
				table.insert(finals_for_output, final_info[1])
			end
			return nil
		end
		return combine_buc_syllable({
			initial = initial,
			final = possible_finals[1][1],
			tone = lookup_tone,
			tone_position = possible_finals[1][2]
		})
	end
	
	local char_readings = lookup_char_readings(char)
	if not char_readings then
		track_buc_issue("cannot look up table")
		return nil
	end
	
	local matches = {}
	for _, candidate in ipairs(candidates) do
		for _, reading in ipairs(char_readings) do
			local match = (candidate == reading)
			if match then
				table.insert(matches, candidate)
			end
		end
	end
	
	if #matches == 0 then
		track_buc_issue("no matching reading found")
		return nil
	elseif #matches > 1 then
		track_buc_issue("multiple matching readings found")
		return nil
	end
	
	-- temp
	if syllable_info.original_tone == "2" and matches[1]:match("h%*$") then
		require("Module:debug").track('cpx-pron/2-to-7B')
	end
	
	return matches[1]
end

local function generate_buc(options)
	if not options.syllable_infos then
		error("Missing required syllable_infos in generate_buc")
	end

	if options.dialect ~= "pt" then
		return nil
	end

	local page_title = mw.title.getCurrentTitle().text
	local chars = mw.ustring.gsub(page_title, "[\n\r\t ,]", "")
	local char_count = mw.ustring.len(chars)

	local syllable_count = #options.syllable_infos
	local check_char_table = (syllable_count == char_count)

	local buc_syllables = {}

	for i, syllable_info in ipairs(options.syllable_infos) do
		if syllable_info.manual_buc then
			table.insert(buc_syllables, syllable_info.manual_buc)
		else
			-- Get possible BUC
			local syllable_result = convert_to_buc_syllable({
				syllable_info = syllable_info,
				char = check_char_table and mw.ustring.sub(chars, i, i) or nil,
				word = options.word
			})

			-- If any syllable cannot be uniquely identified
			if not syllable_result then
				return nil
			end

			if syllable_info.capitalize then
				local normalized = mw.ustring.toNFD(syllable_result)
				local first_char = mw.ustring.sub(normalized, 1, 1)
				syllable_result = mw.ustring.toNFC(
					mw.ustring.upper(first_char) .. 
					mw.ustring.sub(normalized, 2)
				)
			end

			table.insert(buc_syllables, syllable_result)
		end
	end

	-- concat syllables
	local result = {}
	for i = 1, #buc_syllables do
		table.insert(result, buc_syllables[i])
		if i < #buc_syllables then
			if options.syllable_infos[i].comma_after then
				table.insert(result, ", ")
			elseif options.syllable_infos[i].space_after then
				table.insert(result, " ")
			else
				table.insert(result, "-")
			end
		end
	end

	return table.concat(result)
end

local function split_syllable(syllable)
	-- Initialize result table
	local components = {
		orig_form = nil,
		changed_form = nil,
		tone_part = nil,
		orig_initial = nil,
		orig_final = nil,
		changed_initial = nil,
		changed_final = nil,
		orig_tone = nil,
		manual_sandhi_tone = nil,
		no_sandhi = false,
		no_assimilation = false,
		-- BUC (only for Putian)
		capitalize = false,
		space_after = false,
		comma_after = false,
		manual_buc = nil
	}

	if not syllable or syllable == "" then
		error("Invalid syllable: " .. tostring(syllable))
	end

	local markers, cleaned_syllable = get_syllable_markers(syllable)
	components.capitalize = markers.capitalize
	components.space_after = markers.space_after
	components.comma_after = markers.comma_after
	components.manual_buc = markers.manual_buc
	syllable = cleaned_syllable

	components.no_assimilation = syllable:sub(1, 1) == SPECIAL_MARKERS.NO_ASSIMILATION
	if components.no_assimilation then
		syllable = syllable:sub(2)
	end

	components.no_sandhi = syllable:sub(-1) == SPECIAL_MARKERS.NO_SANDHI
	if components.no_sandhi then
		syllable = syllable:sub(1, -2)
	end

	if syllable:find(SPECIAL_MARKERS.MANUAL_CHANGE) then
		components.orig_form, components.changed_form, components.tone_part = 
			syllable:match("(.-)>(.-)([1-7S]+.*)$")
	else
		components.orig_form, components.tone_part = 
			syllable:match("(.-)([1-7S]+.*)$")
		components.changed_form = components.orig_form
	end

	-- If the segmentation is not correct
	if not components.orig_form or not components.tone_part then
		error("Invalid syllable format: " .. syllable)
	end

	-- Process form components
	components.orig_initial, components.orig_final = 
		split_initial_final({form = components.orig_form})
	components.changed_initial, components.changed_final = 
		split_initial_final({form = components.changed_form})

	-- Process tone components
	if components.tone_part:find("-") then
		components.orig_tone, components.manual_sandhi_tone = 
			components.tone_part:match("^([1-7S]+)%-([1-7S]+)$")
		require("Module:debug").track('cpx-pron/manual sandhi tone')
	else
		components.orig_tone = components.tone_part
	end

	-- Special tone processing
	if components.orig_tone == "3" and components.changed_final:sub(-1) == "h" then
		components.orig_tone = "S3"
	end
	if components.orig_tone == "6" then
		if components.orig_final:sub(-1) == "h" then
			components.orig_tone = "6A"
		else
			components.orig_tone = "6B"
		end
	elseif components.orig_tone == "7" then
		if components.orig_final:sub(-1) == "h" then
			components.orig_tone = "7A"
		else
			components.orig_tone = "7B"
		end
	end

	-- final validation
	if not (components.orig_initial and components.orig_final and components.orig_tone) then
		error("Unable to parse syllable: " .. syllable)
	end

	return components
end

local function create_syllable_info(options)
	local syllable_components = split_syllable(options.syllable)

	return {
		original_initial = syllable_components.orig_initial,
		original_final = syllable_components.orig_final,
		original_tone = syllable_components.orig_tone,
		changed_initial = syllable_components.changed_initial,
		changed_final = syllable_components.changed_final,
		changed_tone = syllable_components.orig_tone,  -- default: original tone
		no_sandhi = syllable_components.no_sandhi,
		no_assimilation = syllable_components.no_assimilation,
		is_first_syllable = options.is_first_syllable,
		manual_sandhi_tone = syllable_components.manual_sandhi_tone,
		-- BUC
		capitalize = syllable_components.capitalize,
		space_after = syllable_components.space_after,
		comma_after = syllable_components.comma_after,
		manual_buc = syllable_components.manual_buc
	}
end

-- Syllable processing functions
local function create_syllable_infos(options)
	local syllable_infos = {}
	for syllable in options.word:gmatch("%S+") do
		local syllable_options = {
			syllable = syllable,
			is_first_syllable = #syllable_infos == 0
		}
		table.insert(syllable_infos, create_syllable_info(syllable_options))
	end
	return syllable_infos
end

local function post_process_nasalization(options)
	local syllable = options.syllable

	-- Remove duplicate nasalization
	if syllable.changed_initial:match("^[mn]g?") then 
		if syllable.changed_final:match("nn$") then
			syllable.changed_final = syllable.changed_final:gsub("nn$", "")
		end
	end

	-- Simplify ng-initial syllables
	if syllable.changed_initial == "ng" and 
		syllable.changed_final == "ng" then
		syllable.changed_initial = ""
	end
end

local function get_sandhi_tone(options)
	local curr_syllable = options.curr_syllable
	local next_syllable = options.next_syllable
	local dialect = options.dialect

	-- Handle manual tone specification
	if curr_syllable.manual_sandhi_tone then
		return curr_syllable.manual_sandhi_tone
	end

	-- Handle final syllable
	if not next_syllable then
		return curr_syllable.original_tone
	end

	-- Apply sandhi rules
	local current_tone = curr_syllable.original_tone
	local next_tone = next_syllable.original_tone

	return sandhi_rules[dialect][current_tone][next_tone] or 
			curr_syllable.original_tone
end

local function apply_sandhi(options)
	local dialect = options.dialect
	local syllable_infos = options.syllable_infos

	for i = 1, #syllable_infos do
		local curr_syllable = syllable_infos[i]
		local next_syllable = syllable_infos[i + 1]

		local original_tone = curr_syllable.original_tone

		if curr_syllable.manual_sandhi_tone then
			if curr_syllable.manual_sandhi_tone == "6" then
				curr_syllable.manual_sandhi_tone = curr_syllable.changed_final:sub(-1) == "h" and "6A" or "6B"
			elseif curr_syllable.manual_sandhi_tone == "7" then
				curr_syllable.manual_sandhi_tone = curr_syllable.changed_final:sub(-1) == "h" and "7A" or "7B"
			elseif curr_syllable.manual_sandhi_tone == "3" and curr_syllable.changed_final:sub(-1) == "h" then
				curr_syllable.manual_sandhi_tone = "S3"
			end
		end

		-- No sandhi if one of the following conditions are met
		-- 1. there is a no_sandhi mark
		-- 2. syllable followed by a comma
		-- 3. is the last syllable
		if curr_syllable.no_sandhi or 
			curr_syllable.comma_after or 
			-- curr_syllable.space_after or 
			not next_syllable then
			curr_syllable.changed_tone = curr_syllable.original_tone
		else
			curr_syllable.changed_tone = get_sandhi_tone({
				curr_syllable = curr_syllable,
				next_syllable = next_syllable,
				dialect = dialect
			})
		end

		-- Special tone adjustment for glottal finals
		if curr_syllable.changed_tone == '3' and 
			curr_syllable.changed_final:sub(-1) == 'h' then
			curr_syllable.changed_tone = 'S3'
		end
		
		-- Tracking
		if next_syllable then
			local format_tone_for_tracking = function(tone)
				if tone == "S1" or tone == "S3" then
					return tone
				elseif tone:sub(1, 1) == "S" then
					return tone:sub(2)
				else
					return tone
				end
			end
			
			local track_original_tone = format_tone_for_tracking(original_tone)
			local track_next_tone = format_tone_for_tracking(next_syllable.original_tone)
			local track_changed_tone = format_tone_for_tracking(curr_syllable.changed_tone)
			
			require("Module:debug").track('cpx-pron/sandhi/' .. dialect .. '/' .. 
				track_original_tone .. '+' .. track_next_tone .. '/' .. 
				track_changed_tone)
		end
	end
end

local function apply_initial_assimilation(options)
	local dialect = options.dialect
	local syllable_infos = options.syllable_infos
	local result = {}

	-- Handle first syllable
	result[1] = syllable_infos[1]
	result[1].is_first_syllable = true

	-- Process subsequent syllables
	for i = 2, #syllable_infos do
		local prev_syllable = result[i-1]
		local curr_syllable = syllable_infos[i]
		-- Store original initial for tracking
		local original_initial = curr_syllable.original_initial
		local rule_applied = false

		-- Check for manual override in xy dialect with nasal initial + other final
		local is_manual_override = (dialect == "xy" or dialect == "yy" or dialect == "ft") and 
		   prev_syllable.changed_initial:match("^[mn]g?$") and 
		   not (prev_syllable.changed_final:sub(-2) == "ng" or 
				prev_syllable.changed_final:sub(-1) == "h" or 
				prev_syllable.changed_final:sub(-2) == "nn") and
		   curr_syllable.changed_initial ~= curr_syllable.original_initial

		if is_manual_override then
			require("Module:debug").track('cpx-pron/xy-nasal-initial/manual-override')
		end

		local original_final_type = get_final_type({
			initial = prev_syllable.changed_initial,
			final = prev_syllable.changed_final,
			dialect = dialect
		})

		-- No initial assimilation if one of the following conditions are met:
		-- 1. there is a no_assimilation mark
		-- 2. the previous syllable is followed by a comma
		if not curr_syllable.no_assimilation and 
			not prev_syllable.comma_after and
			curr_syllable.changed_initial == curr_syllable.original_initial then

			local final_type = original_final_type

			-- Special rule for nasalized finals
			local should_apply_nasal_rule = 
				final_type == "other_final" and
				curr_syllable.original_initial:match("^[bpdtzcs]") and
				get_final_type({
					initial = curr_syllable.original_initial,
					final = curr_syllable.original_final,
					dialect = dialect
				}) == "nasalized_final"

			if should_apply_nasal_rule then
				final_type = "nasal_final"
			end

			-- Mark that we are applying an assimilation rule
			rule_applied = true

			-- Apply assimilation rules
			curr_syllable.changed_initial = 
				initial_assimilation_rules[dialect][final_type][curr_syllable.original_initial] or 
				curr_syllable.original_initial

			require("Module:debug").track('cpx-pron/assimilation/' .. dialect .. '/' .. original_final_type .. '/' .. original_initial .. '/' .. curr_syllable.changed_initial)
		end

		-- Track assimilation only for manual override cases
		if not rule_applied and curr_syllable.changed_initial ~= original_initial then
			require("Module:debug").track('cpx-pron/assimilation/' .. dialect .. '/' .. original_final_type .. '/' .. original_initial .. '/' .. curr_syllable.changed_initial)
		end

		-- Post-process nasalization
		local post_process_options = {
			syllable = curr_syllable,
			dialect = dialect
		}
		post_process_nasalization(post_process_options)

		table.insert(result, curr_syllable)
	end
	return result
end

local function generate_actual_pronunciation(syllable_infos)
	local pronunciations = {}

	for _, syllable in ipairs(syllable_infos) do
		-- Combine the changed components
		local pronunciation = syllable.changed_initial ..
							syllable.changed_final ..
							syllable.changed_tone

		table.insert(pronunciations, pronunciation)
	end

	return table.concat(pronunciations, " ")
end

local function generate_actual_pronunciation(syllable_infos)
	local pronunciations = {}

	for _, syllable in ipairs(syllable_infos) do
		-- Combine the changed components
		local pronunciation = syllable.changed_initial ..
							syllable.changed_final ..
							syllable.changed_tone

		table.insert(pronunciations, pronunciation)
	end

	return table.concat(pronunciations, " ")
end

local function get_ipa_value(options)
	-- Validation
	if not options.type or not options.dialect or not options.value then
		error("Missing required parameter for IPA lookup")
	end

	-- Get the appropriate lookup table
	local lookup_tables = {
		initials = initials,
		finals = finals,
		tones = tones
	}

	local table = lookup_tables[options.type]
	if not table then
		error("Invalid lookup type: " .. options.type)
	end

	if options.type == "initials" and options.value == "bh" then
		error(string.format(
			'Invalid initial "bh" for %s dialect. Please use "w" instead.',
			options.dialect
		))
	end

	if options.type == "tones" and options.value == "S5" then
		error('Invalid tone S5. Please use "6" instead.')
	end

	if options.type == "finals" then
		local base_final, has_old_nasal, has_new_nasal = handle_nasalization(options.value)
		local nasal_suffix = ""
		
		if has_old_nasal then
			nasal_suffix = "nn"
			require("Module:debug").track('cpx-pron/deprecated-nasalization')
		elseif has_new_nasal then
			nasal_suffix = "nn"
		end

		local result = table[options.dialect] and table[options.dialect][options.value]

		if not result then
			local corrected_final = nil

			if corrections.common[base_final] then
				corrected_final = corrections.common[base_final]
			elseif corrections[options.dialect] and corrections[options.dialect][base_final] then
				corrected_final = corrections[options.dialect][base_final]
			elseif has_old_nasal and not has_new_nasal then
				corrected_final = base_final
				nasal_suffix = "nn"
			end

			if corrected_final or (has_old_nasal and not has_new_nasal) then
				local corrected_value = corrected_final or base_final
				if has_old_nasal or has_new_nasal then
					corrected_value = corrected_value .. nasal_suffix
				end

				local full_syllable = ""
				if options.syllable_info then
					full_syllable = options.syllable_info.original_initial .. corrected_value .. options.syllable_info.original_tone
				end
				
				error(string.format(
					'Invalid final "%s" for %s dialect. Please use "%s" instead.',
					options.value,
					options.dialect,
					full_syllable ~= "" and full_syllable:gsub("[AB]", "") or corrected_value
				))
			end
		end
	end

	local result = table[options.dialect] and table[options.dialect][options.value]

	if not result then
		error(string.format(
			"Invalid %s %s for %s.",
			options.type:sub(1, -2),
			options.value,
			options.dialect
		))
	end

	return result
end

local function get_ipa_components(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Get basic components
	local components = {
		initial = get_ipa_value({
			type = "initials",
			dialect = dialect,
			value = syllable_info.changed_initial,
			syllable_info = syllable_info
		}),
		final = get_ipa_value({
			type = "finals",
			dialect = dialect,
			value = syllable_info.changed_final,
			syllable_info = syllable_info
		}),
		tone = get_ipa_value({
			type = "tones",
			dialect = dialect,
			value = syllable_info.original_tone,
			syllable_info = syllable_info
		})
	}

	-- Handle tone change
	if syllable_info.changed_tone ~= syllable_info.original_tone then
		local sandhi_tone = get_ipa_value({
			type = "tones",
			dialect = dialect,
			value = syllable_info.changed_tone
		})

		if not sandhi_tone then
			error("Invalid sandhi tone: " .. syllable_info.changed_tone .. 
				  " for dialect: " .. dialect)
		end

		components.tone = components.tone .. "⁻" .. sandhi_tone
	end

	return components
end

local function get_original_initial_display(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Only show original initial for non-first syllables with changes
	if syllable_info.is_first_syllable or
		(syllable_info.original_initial == syllable_info.changed_initial) then
		return ""
	end

	-- Get IPA for original initial
	local ipa_initial = get_ipa_value({
		type = "initials",
		dialect = dialect,
		value = syllable_info.original_initial,
		syllable_info = syllable_info
	})

	return syllable_info.original_initial == "" and "<sup>(Ø-)</sup>" or
		   "<sup>(" .. ipa_initial .. "-)</sup>"
end

local function syllable_to_ipa(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Get IPA components
	local ipa_components = get_ipa_components({
		syllable_info = syllable_info,
		dialect = dialect
	})

	-- Generate display for changed initial if needed
	local original_initial_display = get_original_initial_display({
		syllable_info = syllable_info,
		dialect = dialect
	})

	-- Combine all parts
	return original_initial_display .. 
			ipa_components.initial .. 
			ipa_components.final .. 
			ipa_components.tone
end

-- Generate IPA for the syllables
local function generate_ipa(options)
	if not options or not options.syllable_infos then
		error("Missing required syllable_infos in generate_ipa")
	end

	local syllable_infos = options.syllable_infos
	local dialect = options.dialect

	local ipa_parts = {}
	for _, syllable_info in ipairs(syllable_infos) do
		table.insert(ipa_parts, syllable_to_ipa({
			syllable_info = syllable_info,
			dialect = dialect
		}))
	end

	return table.concat(ipa_parts, " ")
end

-- Process a single pronunciation entry
local function process_pronunciation(options)
	local result = {
		dialect_codes = options.dialect_codes,
		word = options.word,
		processed = {},
		index = options.index
	}

	local dialect_list = split_dialect_codes(options.dialect_codes)

	-- Create syllable info objects for each syllable
	local syllable_options = {
		word = options.word,
		is_first_syllable = true
	}
	local original_syllable_infos = create_syllable_infos(syllable_options)

	-- Process for each dialect
	for i, dialect in ipairs(dialect_list) do
		local syllable_infos = m_table.deepCopy(original_syllable_infos)

		-- Apply phonological rules
		local processed_syllables = apply_initial_assimilation({
			dialect = dialect,
			syllable_infos = syllable_infos
		})
		apply_sandhi({
			dialect = dialect,
			syllable_infos = processed_syllables
		})

		-- Generate IPA and collect results
		local pronunciation_result = {
			dialect = dialect,
			dialect_position = i,
			original = options.word,
			actual = generate_actual_pronunciation(processed_syllables),
			ipa = generate_ipa({
				syllable_infos = processed_syllables,
				dialect = dialect
			}),
			index = options.index,
			syllable_infos = processed_syllables
		}

		-- Generate BUC only for Putian
		if dialect == "pt" then
			pronunciation_result.buc = generate_buc({
				syllable_infos = processed_syllables,
				dialect = dialect,
				word = options.word
			})
		end

		table.insert(result.processed, pronunciation_result)
	end
	return result
end

-- Formatting helper functions
local function font_consolas(text)
	-- return '<span style="font-family: Consolas, monospace;">' .. text .. '</span>'
	return '<span class="zhpron-monospace">' .. text .. '</span>'
end

local function font_ipa(text)
	return '<span class="IPA">/' .. text .. '/</span>'
end

local function clear_pinging_format(text)
	if not text then
		return ""
	end
	return text:gsub("%-S?%d", "") -- remove tone sandhi
			  :gsub(">[a-zⁿ]+", "") -- remove irregular sound change
			  :gsub("[#*^\\]+", "") -- remove special symbols
			  :gsub("{[^}]+}", "") -- remove manual BUC
			  :gsub("[AB]", "") -- remove A/B
			  :gsub("(%d)", "<sup>%1</sup>") -- superscript tone numbers
			  :gsub("S", "") -- remove "S" in special tones
end

-- Output formatting functions
local function format_demo_output(options)
	local results = options.results
	local highlight_type = options.type or "default"
	local output = {}

	for _, result in ipairs(results) do
		for _, processed in ipairs(result.processed) do
			local syllable_infos = processed.syllable_infos
			local orig_parts = {}
			local actual_parts = {}
			
			-- Process each syllable
			for i, syllable_info in ipairs(syllable_infos) do
				local is_first = (i == 1)
				local is_last = (i == #syllable_infos)
				
				-- Process original syllable
				local orig_initial = syllable_info.original_initial
				local orig_final = syllable_info.original_final
				local orig_tone = syllable_info.original_tone:gsub("S", "")
				
				local orig_text = orig_initial .. orig_final
				
				-- Apply bold formatting based on highlight_type
				if highlight_type == "assim" and not is_first and orig_initial ~= "" then
					orig_text = "'''" .. orig_initial .. "'''" .. orig_final
				elseif highlight_type == "sandhi" and not is_last then
					orig_tone = "'''" .. orig_tone .. "'''"
				end
				
				-- Add tone as superscript and clear format
				orig_text = orig_text .. orig_tone:gsub("(%d)", "<sup>%1</sup>"):gsub("[AB]", "")
				table.insert(orig_parts, orig_text)
				
				-- Process actual syllable
				local actual_initial = syllable_info.changed_initial
				local actual_final = syllable_info.changed_final
				local actual_tone = syllable_info.changed_tone:gsub("S", "")
				
				local actual_text = actual_initial .. actual_final
				
				-- Apply bold formatting based on highlight_type
				if highlight_type == "assim" and not is_first and actual_initial ~= "" then
					-- Bold non-first syllable initials for assim type
					actual_text = "'''" .. actual_initial .. "'''" .. actual_final
				elseif highlight_type == "sandhi" and not is_last then
					-- Bold non-last syllable tones for sandhi type
					actual_tone = "'''" .. actual_tone .. "'''"
				end
				
				-- Add tone as superscript and clear format
				actual_text = actual_text .. actual_tone:gsub("(%d)", "<sup>%1</sup>"):gsub("[AB]", "")
				table.insert(actual_parts, actual_text)
			end
			
			-- Build the output line
			local line = table.concat(orig_parts, " ")
			line = line .. " → " .. table.concat(actual_parts, " ")
			
			-- Add IPA
			line = line .. "<br/>" .. font_ipa(processed.ipa)
			
			table.insert(output, line)
		end
	end

	return table.concat(output, "\n\n")
end

local function format_brief_output(options)
	local results = options.results
	local output_parts = {}
	local dialect_codes = {}
	local seen_pronunciations = {} 
	local order = {}

	-- Collecte Pronunciation and dialect codes in their original order
	for _, result in ipairs(results) do
		if result.processed and #result.processed > 0 then
			local original = result.processed[1].original
			local cleared_text = clear_pinging_format(original)

			-- If the cleaned pronunciation has not appeared before, record its order
			if not seen_pronunciations[cleared_text] then
				seen_pronunciations[cleared_text] = {
					original = original,
					index = result.index
				}
				table.insert(order, cleared_text)
			end

			-- Collect dialect codes
			for _, processed in ipairs(result.processed) do
				if not dialect_codes[processed.dialect] then
					dialect_codes[processed.dialect] = true
				end
			end
		end
	end

	local dialect_codes_array = {}
	for code, _ in pairs(dialect_codes) do
		table.insert(dialect_codes_array, code)
	end

	local output = " " -- "Puxian Min" already written in zh-pron
	if #dialect_codes_array == 1 then
		output = output .. "<small>(" .. dialects[dialect_codes_array[1]] .. ", "
	else
		output = output .. "<small>("
	end
	output = output .. "[[:en:Wiktionary:About Chinese/Puxian Min|푸셴 병음]]): </small>"

	-- Generate the pronunciation parts in the original order
	if #order > 0 then
		local formatted = {}
		for _, cleared_text in ipairs(order) do
			table.insert(formatted, cleared_text)
		end
		output = output .. font_consolas(table.concat(formatted, " / "))
	end

	return output
end

function format_complete_output(options)
	local results = options.results
	local output = {}

	local ordered_results = {}
	for _, result in ipairs(results) do
		table.insert(ordered_results, result)
	end
	
	table.sort(ordered_results, function(a, b)
		return a.index < b.index
	end)

	for _, result in ipairs(ordered_results) do
		local grouped = {}
		local group_keys = {}
		
		for _, processed in ipairs(result.processed) do
			local key = processed.original .. "|" .. processed.actual .. "|" .. processed.ipa
			
			if not grouped[key] then
				grouped[key] = {
					data = {
						original = processed.original,
						actual = processed.actual,
						ipa = processed.ipa,
						buc = processed.buc,
						dialect = processed.dialect
					},
					dialects = {}
				}
				table.insert(group_keys, key)
			elseif not grouped[key].data.buc and processed.buc then
				grouped[key].data.buc = processed.buc
				grouped[key].data.dialect = processed.dialect
			end
			
			table.insert(grouped[key].dialects, {
				code = processed.dialect,
				position = processed.dialect_position
			})
		end
		
		for _, key in ipairs(group_keys) do
			local group = grouped[key]
			
			table.sort(group.dialects, function(a, b)
				return a.position < b.position
			end)
			
			local dialect_names = {}
			for _, dialect_info in ipairs(group.dialects) do
				table.insert(dialect_names, dialects[dialect_info.code])
			end
			
			table.insert(output, "\n** <small>(" .. table.concat(dialect_names, ", ") .. ")</small>")
			
			-- Pouseng Ping'ing
			table.insert(output, "\n*** <small>[[:en:Wiktionary:About Chinese/Puxian Min|푸셴 병음]]</small>: " ..
				font_consolas(clear_pinging_format(group.data.original)))
			
			if clear_pinging_format(group.data.original) ~= clear_pinging_format(group.data.actual) then
				table.insert(output, font_consolas(
					" [<small>음성 표기</small>: " .. clear_pinging_format(group.data.actual)) ..
					"]")
			end
			
			-- BUC
			if group.data.dialect == "pt" and group.data.buc then
				local displayed_buc = group.data.buc:gsub("%*", "")
				table.insert(output, "\n*** <small>[[w:en:Hinghwa Romanized|평화자]]</small>: " ..
				font_consolas(displayed_buc))
			end
			
			-- IPA
			table.insert(output, '\n*** <small>중국학계 관용 [[위키낱말사전:국제 음성 기호|IPA]] ' ..
				'<sup>([[w:푸셴어|표기]])</sup></small>: ' .. font_ipa(group.data.ipa))
		end
	end

	return table.concat(output)
end

-- Main entry point
function export.rom_display(text, mode, highlight_type)
	if type(text) == "table" then
		highlight_type = text.args.type
		mode = text.args[2] or mode
		text = text.args[1]
	end

	-- Parameter validation
	if not text or text == "" then
		error("Invalid input: text must be a non-empty string")
	end

	mode = mode or FORMAT_MODES.BRIEF
	highlight_type = highlight_type or "default"

	local pronunciation_data = {
		results = {},
		mode = mode,
		type = highlight_type
	}

	-- Process each pronunciation in the input
	local index = 1
	for pronunciation in text:gmatch("[^/]+") do
		local dialect_codes, word = pronunciation:match("^(.+):(.+)$")
		if not dialect_codes or not word then
			error("Invalid input format: " .. pronunciation)
		end

		local pron_options = {
			dialect_codes = dialect_codes,
			word = word,
			index = index
		}
		table.insert(pronunciation_data.results, 
					process_pronunciation(pron_options))
		index = index + 1
	end

	-- Format output according to the specified mode
	if mode == FORMAT_MODES.BRIEF then
		return format_brief_output(pronunciation_data)
	elseif mode == FORMAT_MODES.COMPLETE then
		return format_complete_output(pronunciation_data)
	elseif mode == FORMAT_MODES.DEMO then
		return format_demo_output(pronunciation_data)
	else
		error("Unsupported mode: " .. mode)
	end
end

-- Convert single BUC syllable to PSP
local function syllable_to_psp(input)
	local buc_to_psp_initials = {
		["b"] = "b", ["ch"] = "c", ["c"] = "z",
		["d"] = "d", ["g"] = "g", ["h"] = "h",
		["k"] = "k", ["l"] = "l", ["m"] = "m",
		["ng"] = "ng", ["n"] = "n", ["p"] = "p",
		["s"] = "s", ["t"] = "t", [""] = ""
	}

	local buc_to_psp_finals = {
		["a"] = "a",
		["aⁿ"] = "a",
		["ah"] = "ah",
		["ah*"] = "a",
		["ai"] = "ai",
		["ang"] = "ang",
		["au"] = "ao",
		["a̤"] = "e",
		["a̤ⁿ"] = "e",
		["a̤h"] = "eh",
		["a̤h*"] = "e",
		["e"] = "ae",
		["eh"] = "eh",
		["eng"] = "eng",
		["e̤"] = "oe",
		["e̤ⁿ"] = "oe",
		["e̤h"] = "oeh",
		["e̤ng"] = "oeng",
		["i"] = "i",
		["ih"] = "ih",
		["ih*"] = "i",
		["ing"] = "ing",
		["ia"] = "ia",
		["iaⁿ"] = "ia",
		["iah"] = "iah",
		["iah*"] = "ia",
		["iang"] = "ieng",
		["iu"] = "iu",
		["o"] = "ou",
		["o̤"] = "or",
		["o̤ⁿ"] = "or",
		["o̤h"] = "orh",
		["o̤h*"] = "or",
		["o̤ng"] = "orng",
		["eo"] = "o",
		["eoh"] = "oh",
		["eoh*"] = "o",
		["eong"] = "ong",
		["u"] = "u",
		["uh"] = "uh",
		["ua"] = "ua",
		["uaⁿ"] = "ua",
		["uah"] = "uah",
		["uah*"] = "ua",
		["uang"] = "uang",
		["ui"] = "ui",
		["uai"] = "uei",
		["oi"] = "uei",
		["oiⁿ"] = "uei",
		["oih"] = "uei",
		["oih*"] = "uei",
		["ṳ"] = "y",
		["ṳh"] = "yh",
		["ṳng"] = "yng",
		["io̤"] = "yor",
		["io̤ⁿ"] = "yor",
		["io̤h"] = "yorh",
		["io̤h*"] = "yor",
		["io̤ng"] = "yorng",
		["ng"] = "ng",
		["a̤u"] = "ieo",
		["a̤uⁿ"] = "ieo",
		["a̤uh"] = "ieoh",
		["a̤uh*"] = "ieo"
	}

	-- Handle input parameter
	local syllable
	if type(input) == "table" then
		syllable = input.args[1]
	else
		syllable = input
	end

	if not syllable or syllable == "" then
		return syllable
	end

	-- Try to convert the syllable, return original if any error occurs
	local success, result = pcall(function()
		-- Decompose the syllable and check for validity
		local decomposed = mw.ustring.toNFD(syllable)
		if not decomposed then
			return syllable
		end

		-- Extract and remove tone marks
		local tone = ""
		if decomposed:find("́") then		  -- Tone 2: COMBINING ACUTE ACCENT
			tone = "2"
			decomposed = decomposed:gsub("́", "")
		elseif decomposed:find("̂") then	  -- Tone 3: COMBINING CIRCUMFLEX ACCENT
			tone = "3"
			decomposed = decomposed:gsub("̂", "")
		elseif decomposed:find("̍") then	  -- Tone 4/7: COMBINING VERTICAL LINE ABOVE
			if decomposed:find("h%*$") then   -- Special case: -h* ending -> tone 2
				if not decomposed:find("̍") then  -- If has h* but no vertical line
					return syllable
				end
				tone = "2"
			elseif decomposed:find("h$") then
				tone = "7"
			else
				tone = "4"
			end
			decomposed = decomposed:gsub("̍", "")
		elseif decomposed:find("̄") then	  -- Tone 5: COMBINING MACRON
			tone = "5"
			decomposed = decomposed:gsub("̄", "")
		else
			-- No tone mark: either tone 1 (no -h) or tone 6 (with -h)
			if decomposed:find("h$") and not decomposed:find("h%*$") then
				tone = "6"
			else
				tone = "1"
			end
		end

		-- Recompose and check validity
		local normalized = mw.ustring.toNFC(decomposed)
		if not normalized then
			return syllable
		end

		-- Special case: standalone `ng` syllable after tone removal
		if normalized == "ng" then
			return "ng" .. tone
		end

		-- Extract initial
		local initial = ""
		if normalized:match("^[Cc][Hh]") then
			initial = normalized:sub(1, 2):lower()
			normalized = normalized:sub(3)
		elseif normalized:match("^[Nn][Gg]") then
			initial = normalized:sub(1, 2):lower()
			normalized = normalized:sub(3)
		elseif normalized:match("^[BbCcDdFfGgHhKkLlMmNnPpSsTt]") then
			initial = normalized:sub(1, 1):lower()
			normalized = normalized:sub(2)
		end
		
		local psp_initial = buc_to_psp_initials[initial] or ""

		-- Process final
		-- Remove -h* marker if present (affects tone but not final lookup)
		local final = normalized:gsub("h%*$", "")

		-- Look up PSP final
		local psp_final = buc_to_psp_finals[final]
		if not psp_final then
			return syllable
		end

		-- Combine all parts to form complete PSP syllable
		return (psp_initial .. psp_final .. tone):lower()
	end)

	-- Return original syllable if conversion failed
	return success and result or syllable
end

-- Convert BUC to PSP (both single syllable and text)
function export.buc_to_psp(input)
	-- Handle input parameter
	local text
	if type(input) == "table" then
		text = input.args[1]
	else
		text = input
	end

	if not text or text == "" then
		return text
	end

	-- Split text into parts by delimiters while keeping delimiters
	local parts = {}
	local last_pos = 1
	local pattern = "[%s%-%.,;:!%?,。;:!?「」『』、]"

	for pos, delimiter in mw.ustring.gmatch(text, "()("..pattern..")") do
		if pos > last_pos then
			table.insert(parts, mw.ustring.sub(text, last_pos, pos - 1))
		end
		table.insert(parts, delimiter)
		last_pos = pos + mw.ustring.len(delimiter)
	end

	-- Handle the last part
	if last_pos <= mw.ustring.len(text) then
		table.insert(parts, mw.ustring.sub(text, last_pos))
	end

	-- Convert syllables and keep delimiters
	for i = 1, #parts do
		if not parts[i]:match("^[%s%-%.,;:!%?,。;:!?「」『』、]$") then
			parts[i] = syllable_to_psp(parts[i])
		end
	end

	return table.concat(parts)
end

return export