본문으로 이동

모듈:Dolab1

위키낱말사전, 말과 글의 누리

이 모듈에 대한 설명문서는 모듈:Dolab1/설명문서에서 만들 수 있습니다

--Module:Jpan-headword
local m_str_utils = require("Module:string utilities")

local byteoffset = mw.ustring.byteoffset
local concat = table.concat
local insert = table.insert
local maxn = table.maxn
local remove = table.remove
local ufind = m_str_utils.find
local ugmatch = m_str_utils.gmatch
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local usub = m_str_utils.sub

local m_ja = require("Module:ja")
local m_ja_ruby = require('Module:ja-ruby')
local kana_to_romaji = require("Module:Hrkt-translit").tr

local export = {}
local pos_functions = {}

local data_range = mw.loadData('Module:ja/data/range')
local Jpan = require("Module:scripts").getByCode("Jpan")

local function remove_links(text)
    return (text:gsub("%[%[[^|%]]-|", "")
        :gsub("%[%[", "")
        :gsub("%]%]", ""))
end

local function assign_kana_to_kanji(head, kana, pagename, template_name)
    local m_tu = require'Module:template utilities'

    local kanji_pos = {[0] = { nil, 0}}
    local head_nolink = {}
    local link_border = 0

    local function insert_kanji_pos(substr)
        insert(head_nolink, substr)
        for p1, w1 in ugmatch(substr, '()([々' .. data_range.kanji .. '])') do
            p1 = byteoffset(substr, p1) + link_border
            insert(kanji_pos, { p1, p1 + w1:len() - 1 })
        end
    end

    for p1, p2, w1 in  m_tu.gfind_bracket(head, {['%[%['] = ']]'}) do
        insert_kanji_pos(head:sub(link_border + 1, p1 - 1))
        local p_pipe = w1:find'|' or 2
        link_border = p1 + p_pipe - 1
        insert_kanji_pos(w1:sub(p_pipe + 1, -3))
        link_border = p2
    end

    insert_kanji_pos(head:sub(link_border + 1))
    head_nolink = concat(head_nolink)

    local pagetext = mw.title.new(pagename):getContent()
    if not pagetext then return head, kana end

    local non_kanji = {}
    local last_kanji = 1
    for p1 in ugmatch(head_nolink, '[々' .. data_range.kanji .. ']()') do
        insert(non_kanji, usub(head_nolink, last_kanji, p1 - 2))
        last_kanji = p1
    end
    insert(non_kanji, usub(head_nolink, last_kanji))

    for kanjitab in pagetext:gmatch('(){{%s*' .. template_name) do
        kanjitab = select(3, m_tu.find_bracket(pagetext, m_tu.brackets_temp, kanjitab))
        if not kanjitab then error('ill-formed [[t:' .. template_name:gsub('%%', '') .. ']] syntax') end
        kanjitab = m_tu.parse_temp(kanjitab)

        local readings = {}
        local readings_len = {}

        for i = 1, maxn(kanjitab.args) do
            local r_i = kanjitab.args[i] or ''
            local r_o = kanjitab.args['o' .. i] or ''
            if kanjitab.args['k' .. i] then
                readings[i] = kanjitab.args['k' .. i] .. r_o
                readings_len[i] = tonumber(r_i:match'^%s*%D*(%d*)%s*$') or 1
            else
                local r_kana, r_len = r_i:match'^%s*(%D*)(%d*)%s*$'
                readings[i] = r_kana .. r_o
                readings_len[i] = tonumber(r_len) or 1
            end
        end

        local kana_decom = {}
        local reading_id = 1
        local reading_len = 1
        for i = 1, #non_kanji - 1 do
            if reading_len <= 1 then
                reading_len = readings_len[reading_id] or 1

                insert(kana_decom, non_kanji[i])
                insert(kana_decom, readings[reading_id])

                reading_id = reading_id + 1
            else
                reading_len = reading_len - 1
            end
        end
        insert(kana_decom, non_kanji[#non_kanji])

        local function strip_nonkana(str, repl)
            return ugsub(str, '[^' .. data_range.kana .. ']+', repl) or nil
        end
        local xeno_reading = {strip_nonkana(kana, ''):match('^' .. strip_nonkana(concat(kana_decom), '(.-)') .. '$')}
        if #xeno_reading > 0 then
            local head_decom = {}
            reading_id = 1
            reading_len = 1
            for i = 1, #non_kanji - 1 do
                if reading_len <= 1 then
                    reading_len = readings_len[reading_id] or 1

                    insert(head_decom, head:sub(kanji_pos[i - 1][2] + 1, kanji_pos[i][1] - 1))
                    insert(head_decom, head:sub(kanji_pos[i][1], kanji_pos[i + reading_len - 1][2]))

                    reading_id = reading_id + 1
                else
                    reading_len = reading_len - 1
                end
            end
            insert(head_decom, head:sub(kanji_pos[#non_kanji - 1][2] + 1))

            if #head_decom ~= #kana_decom then error('number of parameters in [[t:' .. template_name:gsub('%%', '') .. ']] is incorrect') end

            local n_xeno_reading = 0
            for i = 1, #kana_decom, 2 do
                kana_decom[i] = ugsub(kana_decom[i], '[^' .. data_range.kana .. ']+', function()
                    n_xeno_reading = n_xeno_reading + 1
                    if xeno_reading[n_xeno_reading] == '' then return nil
                    else return xeno_reading[n_xeno_reading] end
                end)
            end

            return concat(head_decom, '%'), concat(kana_decom, '%')
        end
    end

    return head, kana
end

local numerals = {
    "한", "두", "세", "네", "다섯",
    "여섯", "일곱", "여덟", "아홉", "열",
    "열한", "열두", "열셋", "열넷", "열다섯"
}

local en_grades = {
    "first grade", "second grade", "third grade",
    "fourth grade", "fifth grade", "sixth grade",
    "secondary school", "jinmeiyō", "hyōgaiji"
}

local aliases = {
    ['transitive']='tr', ['trans']='tr',
    ['intransitive']='in', ['intrans']='in', ['intr']='in',
    ['godan']='1', ['ichidan']='2', ['irregular']='irr'
}

local function kana_to_romaji_with_pos_format(kana, data, args)
    if data.headword.pos_category == "combining forms" or data.headword.pos_category == "punctuation marks" or data.headword.pos_category == "iteration marks" or data.headword.pos_category == "symbols" then
        return "-"
    end

    local rom = remove_links(kana_to_romaji(kana, data.lang_code))

    if args['infl'] == '1' or args['infl'] == '1s' or args['infl'] == 'godan' then
        rom = rom:gsub('ō$', 'ou'):gsub('ū$', 'uu')
    elseif args['infl'] == 'i' or args['infl'] == 'is' or args['infl'] == 'い' then
        rom = rom:gsub('ī$', 'ii')
    end

    if data.headword.pos_category == "prefixes" then
        rom = rom:gsub('%-?$', '-')
    elseif data.headword.pos_category == "suffixes" or data.headword.pos_category == "suffix forms" or data.headword.pos_category == "counters" or data.headword.pos_category == "classifiers" then
        rom = rom:gsub('^%-?', '-')
    elseif data.headword.pos_category == "proper nouns" then
        if not kana:match'%^' then
            rom = ugsub(rom, '%f[^%s%c%p]%l', string.uupper)
            rom = ugsub(rom, "%w'%u", string.ulower)
        end
    end

    return rom
end

local function historical_kana(data, hist_kana, modern_kana)
    local categories = data.headword.categories
    if hist_kana:match'ゐ' then
        insert(categories, data.lang_name .. " terms historically spelled with ゐ")
    end
    if hist_kana:match'ゑ' then
        insert(categories, data.lang_name .. " terms historically spelled with ゑ")
    end
    if hist_kana:match'を' and not (modern_kana and modern_kana:match'を') then
        insert(categories, data.lang_name .. " terms historically spelled with を")
    end
    if hist_kana:match'ぢ' and not (modern_kana and modern_kana:match'ぢ') then
        insert(categories, data.lang_name .. " terms historically spelled with ぢ")
    end
    if hist_kana:match'づ' and not (modern_kana and modern_kana:match'づ') then
        insert(categories, data.lang_name .. " terms historically spelled with づ")
    end
    insert(data.info_hist, require('Module:ja-link').link({
        lang = data.headword.lang,
        lemma = hist_kana,
    }, {
        hist = true,
        face = 'head',
        disableSelfLink = true,
    }))
end

local function detect_pagename_kana(data, digraphs)
    local pagename = data.pagename
    local function remove_kana(m)
        return m:match("[&@]") or ""
    end

    if ugsub(pagename, '[%p%s%c' .. data_range.hiragana .. (digraphs and "ゟ" or "") .. ']', remove_kana) == "" then
        return 'hira'
    elseif ugsub(pagename, '[%p%s%c' .. data_range.katakana .. (digraphs and "ヿ" or "") .. ']', remove_kana) == "" then
        return 'kata'
    elseif ugsub(pagename, '[%p%s%c' .. data_range.kana .. (digraphs and "ゟヿ" or "") .. ']', remove_kana) == "" then
        return 'both'
    end
end

local function format_headword(args, data)
    local pagename = data.pagename
    data.pagename_kana = detect_pagename_kana(data)

    if args[1][1] and not args[1][1]:match'[\128-\255]' then
        remove(args[1], 1)
    end

    local suru_ending = data.headword.pos_category == "suru verbs" and '[[する]]' or ''
    local rom_suru_ending = data.headword.pos_category == "suru verbs" and ' suru' or ''
    if data.pagename_kana then
        if #args.head > 0 or args.head.default then
            insert(data.headword.categories, data.lang_name .. " terms with redundant head parameter")
        end

        if not args[1][1] then
            args[1][1] = pagename
        elseif remove_links(args[1][1]:gsub("[%^%-%. %%]", "")) ~= pagename then
            insert(args[1], 1, pagename)
        end

        for i, k in ipairs(args[1]) do
            insert(data.headword.heads, {
                term = k:gsub("[%^%-%. %%]", "") .. suru_ending,
                tr = '-',
                q = args.label[i],
            })
        end

        for i = 1, math.max(args.rom.maxindex, 1) do
            local rom = args.rom[i] or args.rom.default or kana_to_romaji_with_pos_format(args[1][1], data, args)
            if not data.headword.heads[i] then
                data.headword.heads[i] = {term = data.headword.heads[i-1].term}
            end
            if rom == "-" then
                data.headword.heads[i].tr = "-"
            elseif data.headword.lang:link_tr(Jpan) then
                data.headword.heads[i].tr = "[[" .. rom .. "]]" ..
                (rom_suru_ending == "" and "" or "[[" .. rom_suru_ending .. "]]")
            else
                data.headword.heads[i].tr = rom .. rom_suru_ending
            end
        end

        data.kanas[1] = pagename

        data.inflection_base.form = remove_links(args[1][1]:gsub("[%^%-%. %%]", "")) .. suru_ending
        data.inflection_base.romaji = data.headword.heads[1].tr .. rom_suru_ending

        if args.hist[1] then
            historical_kana(data, args.hist[1], args[1][1])
        end
    else
        if #args[1] == 0 and not (data.headword.pos_category == "punctuation marks" or data.headword.pos_category == "iteration marks" or data.headword.pos_category == "symbols") then
            error('Kana form is required')
        end
        if args.head.default == pagename then
            insert(data.headword.categories, data.lang_name .. " terms with redundant head parameter")
        end

        local rom_repetition_final = {}
        for i, k in ipairs(args[1]) do
            local rom_auto = kana_to_romaji_with_pos_format(k, data, args)
            local head = args.head[i] or args.head.default or pagename
            if args.head[i] == pagename then
                insert(data.headword.categories, data.lang_name .. " terms with redundant head parameter")
            end

            local head_for_ruby, kana_for_ruby
            if ulen(head) > 1 and head:match'%%' == nil and k:match'%%' == nil then
                head_for_ruby, kana_for_ruby = assign_kana_to_kanji(head, k, pagename, data.lang_code .. '%-kanjitab')
            else
                head_for_ruby, kana_for_ruby = head, k
            end
            local format_table = m_ja_ruby.parse_text(head_for_ruby, kana_for_ruby, {
                try = 'force',
                try_force_limit = 10000
            })
            local kana_bare = remove_links(k:gsub("[%^%-%. %%]", ""))
            local rom = args.rom[i] or args.rom.default or rom_auto

            head = {
                term = m_ja_ruby.to_wiki(format_table, {
                    break_link = true,
                }):gsub('<rt>(..-)</rt>', "<rt>[[" .. kana_bare .."|%1]]</rt>") .. suru_ending,
                q = args.label[i],
            }
            if rom == "-" or rom_repetition_final[rom] then
                head.tr = "-"
            elseif data.headword.lang:link_tr(Jpan) then
                head.tr = "[[" .. rom .. "]]" ..
                (rom_suru_ending == "" and "" or "[[" .. rom_suru_ending .. "]]")
            else
                head.tr = rom .. rom_suru_ending
            end
            insert(data.headword.heads, head)

            rom_repetition_final[rom] = true
            insert(data.kanas, kana_bare)

            if args.hist[i] then
                historical_kana(data, args.hist[i], k)
            end

            if not data.inflection_base.form then
                data.inflection_base.form = remove_links(m_ja_ruby.to_markup(format_table)) .. suru_ending
                data.inflection_base.romaji = rom .. rom_suru_ending
            end
        end

        if #data.kanas > 1 then
            insert(data.headword.categories, data.lang_name .. " terms with multiple readings")
        else
            local lang_code = data.lang_code
            local lang_name = data.lang_name
            local content = mw.title.getCurrentTitle():getContent()
            local loc1, loc2 = content:find("%f[^%z%s]==%s*" .. lang_name:gsub("%-", "%%%-") .. "%s*==()")
            loc2 = content:find("%f[^%z%s]==[^\n=]+==", loc2)
            if loc1 then
                content = content:sub(loc1, loc2)
                local reading, first_reading
                for template, args in require("Module:template parser").findTemplates(content) do
                    if (
                        template == lang_code .. "-head" or
                        template == lang_code .. "-pos"
                    ) and args[2] then
                        reading = remove_links(args[2]):gsub("[ %-%.^%%]", "")
                    elseif (
                        template == lang_code .. "-noun" or
                        template == lang_code .. "-verb" or
                        template == lang_code .. "-adj" or
                        template == lang_code .. "-phrase" or
                        template == lang_code .. "-verb form" or
                        template == lang_code .. "-verb-suru"
                    ) and args[1] then
                        reading = remove_links(args[1]):gsub("[ %-%.^%%]", "")
                    elseif template == lang_code .. "-see" and args[1] then
                        reading = remove_links(args[1]):gsub("[ %-%.^%%]", "")
                    end
                    reading = reading and kana_to_romaji(reading, lang_code)
                    first_reading = first_reading or reading
                    if reading ~= first_reading then
                        insert(data.headword.categories, lang_name .. " terms with multiple readings")
                        break
                    end
                end
            end
        end
    end
end

local function add_transitivity(data, tr)
    tr = aliases[tr] or tr
    local categories = data.headword.categories
    if tr == "tr" then
        insert(data.info_mid, 'transitive')
        insert(categories, data.lang_name .. " transitive verbs")
    elseif tr == "in" then
        insert(data.info_mid, 'intransitive')
        insert(categories, data.lang_name .. " intransitive verbs")
    elseif tr == "both" then
        insert(data.info_mid, 'transitive or intransitive')
        insert(categories, data.lang_name .. " transitive verbs")
        insert(categories, data.lang_name .. " intransitive verbs")
    else
        insert(categories, data.lang_name .. " verbs without transitivity")
    end
end

local function replace_suffix(lemma, romaji, lemma_from, lemma_to, romaji_from, romaji_to)
    local result = {}
    local pattern_from, n_from = lemma_from:gsub('.[\128-\191]*', function(c)
        return '[' .. c .. m_ja.hira_to_kata(c) .. ']([^' .. data_range.kana .. ']*)'
    end)
    pattern_from = pattern_from .. '$'
    
    for i_lemma_to, s_lemma_to in ipairs(lemma_to) do
        local n_to = 0
        local pattern_to = s_lemma_to:gsub('.[\128-\191]*', function(c)
            if n_to < n_from then
                n_to = n_to + 1
                return c  .. '%' .. n_to
            else return c end
        end)
        for i = n_to + 1, n_from do
            pattern_to = pattern_to .. '%' .. i
        end
        
        local lemma_inflected, success = ugsub(lemma, pattern_from, pattern_to)
        if success == 0 then return nil end
        local romaji_inflected
        romaji_inflected, success = romaji:gsub(romaji_from .. '(%]?%]?)$', romaji_to[i_lemma_to] .. "%1")
        if success == 0 then return nil end

        insert(result, { lemma = lemma_inflected, romaji = romaji_inflected })
    end
    return result
end

local function insert_form(data, label, ...)
    local labeled_forms = {label = label}
    for _, v in ipairs{...} do
        local table_form = m_ja_ruby.parse_markup(v.lemma)
        local form_term = m_ja_ruby.to_wiki(table_form)
        if not form_term:find'%[%[.+%]%]' then
            form_term = '[[' .. m_ja_ruby.to_text(table_form) .. '#' .. data.lang_name .. '|' .. form_term .. ']]'
        end
        insert(labeled_forms, {
            term = form_term,
            translit = v.romaji,
        })
    end
    insert(data.headword.inflections, labeled_forms)
end

local function add_inflections(data, inflection_type, cat_suffix)
    local lemma = data.inflection_base.form
    local romaji = data.inflection_base.romaji
    inflection_type = aliases[inflection_type] or inflection_type

    local inflected_forms
    if data.lang_code == 'ja' then
        if inflection_type == '1' or inflection_type == '1s' then
            insert(data.info_mid, '<abbr title="godan (type I) conjugation">godan</abbr>')
            if cat_suffix then
                insert(data.headword.categories, data.lang_name .. " type 1 " .. cat_suffix)
                if cat_suffix == 'verbs' and ufind(data.inflection_base.romaji, '[ieIEīēĪĒ]ru$') then
                    insert(data.headword.categories, data.lang_name .. " type 1 verbs that end in -iru or -eru")
                end
            end
            if inflection_type == '1' then
                inflected_forms =
                    replace_suffix(lemma, romaji, 'く', {'き', 'いた'}, 'ku', {'ki', 'ita'}) or
                    replace_suffix(lemma, romaji, 'ぐ', {'ぎ', 'いだ'}, 'gu', {'gi', 'ida'}) or
                    replace_suffix(lemma, romaji, 'す', {'し', 'した'}, 'su', {'shi', 'shita'}) or
                    replace_suffix(lemma, romaji, 'つ', {'ち', 'った'}, 'tsu', {'chi', 'tta'}) or
                    replace_suffix(lemma, romaji, 'ぬ', {'に', 'んだ'}, 'nu', {'ni', 'nda'}) or
                    replace_suffix(lemma, romaji, 'ぶ', {'び', 'んだ'}, 'bu', {'bi', 'nda'}) or
                    replace_suffix(lemma, romaji, 'む', {'み', 'んだ'}, 'mu', {'mi', 'nda'}) or
                    replace_suffix(lemma, romaji, 'る', {'り', 'った'}, 'ru', {'ri', 'tta'}) or
                    replace_suffix(lemma, romaji, 'う', {'い', 'った'}, 'u', {'i', 'tta'})
                if inflected_forms then
                    insert_form(data, 'stem', inflected_forms[1])
                    insert_form(data, 'past', inflected_forms[2])
                else
                    require'Module:debug'.track'Jpan-headword/inflection failed/ja'
                end
            else
                inflected_forms =
                    replace_suffix(lemma, romaji, 'る', {'り', 'った', 'い'}, 'ru', {'ri', 'tta', 'i'}) or
                    replace_suffix(lemma, romaji, 'いく', {'いき', 'いった'}, 'iku', {'iki', 'itta'}) or
                    replace_suffix(lemma, romaji, 'う', {'い', 'うた'}, 'ou', {'oi', 'ōta'})
                if inflected_forms then
                    insert_form(data, 'stem', inflected_forms[1], inflected_forms[3])
                    insert_form(data, 'past', inflected_forms[2])
                else
                    require'Module:debug'.track'Jpan-headword/inflection failed/ja'
                end
            end
        elseif inflection_type == '2' then
            insert(data.info_mid, '<abbr title="ichidan (type II) conjugation">ichidan</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " type 2 " .. cat_suffix) end
            inflected_forms = replace_suffix(lemma, romaji, 'る', {'', 'た'}, 'ru', {'', 'ta'})
            if inflected_forms then
                insert_form(data, 'stem', inflected_forms[1])
                insert_form(data, 'past', inflected_forms[2])
            else
                require'Module:debug'.track'Jpan-headword/inflection failed/ja'
            end
        elseif inflection_type == 'suru' then
            insert(data.info_mid, '<abbr title="suru (type III) conjugation">suru</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " suru " .. cat_suffix) end
            inflected_forms =
                replace_suffix(lemma, romaji, 'する', {'し', 'した'}, 'suru', {'shi', 'shita'}) or
                replace_suffix(lemma, romaji, 'ずる', {'じ', 'じた'}, 'zuru', {'ji', 'jita'})
            if inflected_forms then
                insert_form(data, 'stem', inflected_forms[1])
                insert_form(data, 'past', inflected_forms[2])
            else
                require'Module:debug'.track'Jpan-headword/inflection failed/ja'
            end
        elseif inflection_type == 'kuru' then
            insert(data.info_mid, '<abbr title="kuru (type III) conjugation">kuru</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " kuru " .. cat_suffix) end
            inflected_forms = replace_suffix(lemma, romaji, 'くる', {'き', 'きた'}, 'kuru', {'ki', 'kita'})
            if inflected_forms then
                insert_form(data, 'stem', inflected_forms[1])
                insert_form(data, 'past', inflected_forms[2])
            else
                require'Module:debug'.track'Jpan-headword/inflection failed/ja'
            end
        elseif inflection_type == 'i' or inflection_type == 'い' then
            insert(data.info_mid, '<abbr title="-i (type I) inflection">-i</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " い-i " .. cat_suffix) end
            inflected_forms = replace_suffix(lemma, romaji, 'い', {'く'}, 'i', {'ku'})
            if inflected_forms then
                insert_form(data, 'adverbial', inflected_forms[1])
            else
                require'Module:debug'.track'Jpan-headword/inflection failed/ja'
            end
        elseif inflection_type == 'is' then
            insert(data.info_mid, '<abbr title="-i (type I) inflection">-i</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " い-i " .. cat_suffix) end
            inflected_forms = replace_suffix(lemma, romaji, 'いい', {'よく'}, 'ii', {'yoku'})
            if inflected_forms then
                insert_form(data, 'adverbial', inflected_forms[1])
            else
                require'Module:debug'.track'Jpan-headword/inflection failed/ja'
            end
        elseif inflection_type == 'na' or inflection_type == 'な' then
            insert(data.info_mid, '<abbr title="-na (type II) inflection">-na</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " な-na " .. cat_suffix) end
            inflected_forms = replace_suffix(lemma, romaji, '', {'[[な]]', '[[に]]'}, '', {' na', ' ni'})
            insert_form(data, 'adnominal', inflected_forms[1])
            insert_form(data, 'adverbial', inflected_forms[2])
        elseif inflection_type == "yo" then
            insert(data.info_mid, '<abbr title="yodan conjugation (classical)"><sup><small>†</small></sup>yodan</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " yodan " .. cat_suffix) end
        elseif inflection_type == "kami ni" then
            insert(data.info_mid, '<abbr title="kami nidan conjugation (classical)"><sup><small>†</small></sup>nidan</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " kami nidan " .. cat_suffix) end
        elseif inflection_type == "shimo ni" then
            insert(data.info_mid, '<abbr title="shimo nidan conjugation (classical)"><sup><small>†</small></sup>nidan</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " shimo nidan " .. cat_suffix) end
        elseif inflection_type == "rahen" then
            insert(data.info_mid, '<abbr title="r-special conjugation (classical)"><sup><small>†</small></sup>-ri</abbr>')
        elseif inflection_type == "sahen" then
            insert(data.info_mid, '<abbr title="s-special conjugation (classical)"><sup><small>†</small></sup>-se</abbr>')
        elseif inflection_type == "kahen" then
            insert(data.info_mid, '<abbr title="k-special conjugation (classical)"><sup><small>†</small></sup>-ko</abbr>')
        elseif inflection_type == "nahen" then
            insert(data.info_mid, '<abbr title="n-special conjugation (classical)"><sup><small>†</small></sup>-n</abbr>')
        elseif inflection_type == "nari" or inflection_type == "なり" then
            insert(data.info_mid, '<abbr title="-nari inflection (classical)"><sup><small>†</small></sup>-nari</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " なり-nari " .. cat_suffix) end
        elseif inflection_type == 'tari' or inflection_type == 'たり' then
            insert(data.info_mid, '<abbr title="-tari inflection (classical)"><sup><small>†</small></sup>-tari</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " たり-tari " .. cat_suffix) end
            inflected_forms = replace_suffix(lemma, romaji, '', {'[[とした]]', '[[たる]]', '[[と]]', '[[として]]'}, '', {' to shita', ' taru', ' to', ' to shite'})
            insert_form(data, 'adnominal', inflected_forms[1], inflected_forms[2])
            insert_form(data, 'adverbial', inflected_forms[3], inflected_forms[4])
        elseif inflection_type == "ku" or inflection_type == "く" then
            insert(data.info_mid, '<abbr title="-ku inflection (classical)"><sup><small>†</small></sup>-ku</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " く-ku " .. cat_suffix) end
        elseif inflection_type == "shiku" or inflection_type == "しく" then
            insert(data.info_mid, '<abbr title="-shiku inflection (classical)"><sup><small>†</small></sup>-shiku</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " しく-shiku " .. cat_suffix) end
        elseif inflection_type == "ka" or inflection_type == "か" then
            insert(data.info_mid, '<abbr title="-ka inflection (dialectal)"><sup><small>†</small></sup>-ka</abbr>')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " か-ka " .. cat_suffix) end
        elseif inflection_type == 'irr' then
            insert(data.info_mid, 'irregular')
            if cat_suffix then insert(data.headword.categories, data.lang_name .. " irregular " .. cat_suffix) end
        elseif inflection_type == '-' or inflection_type == 'un' then
            insert(data.info_mid, 'uninflectable')
        end
    end
end

local function add_categories(data)
    local lang_name = data.lang_name
    local pagename = data.pagename
    local tc = data.headword.categories
    
    local number_of_kanji = 0
    for c in ugmatch(pagename, '[々' .. data_range.kanji .. ']') do
        number_of_kanji = number_of_kanji + 1
        insert(tc, (lang_name .. " terms spelled with %s kanji"):format(en_grades[m_ja.kanji_grade(c)]))
    end

    if number_of_kanji == 1 then
	    insert(tc, lang_name .. " 글자 한자가 포함된 낱말")
	    if ulen(pagename) == 1 then
	        insert(tc, lang_name .. pagename .. " 글자가 포함된 낱말")
	        insert(tc, lang_name .. " 단일 한자어")
	    end
	elseif numerals[number_of_kanji] then
	    insert(tc, (lang_name .. " %s 글자 한자가 포함된 낱말"):format(numerals[number_of_kanji]))
	end

    if detect_pagename_kana(data, true) == 'hira' then insert(tc, lang_name .. " hiragana") end
    if detect_pagename_kana(data, true) == 'kata' then insert(data.katakana_category, lang_name .. " katakana") end
    local p, n = ugsub(pagename, '[' .. data_range.kana .. data_range.kanji .. data_range.ideograph .. data_range.kana_graph .. data_range.punctuation .. ']+', '')
    if p ~= '' and n > 0 then insert(tc, lang_name .. " terms written in multiple scripts") end

    for _,character in ipairs{'々','〻','〆','ヶ','ゝ','ゞ','ゝ゚','ヽ','ヾ','ヽ゚','ゐ','ヰ','ゑ','ヱ','ゔ','ヷ','ヸ','ヹ','ヺ','・','·','゠','=','-'} do
        if pagename:find(character) then
            insert(tc, lang_name .. " terms spelled with " .. character)
        end
    end

    if (
        data.headword.pos_category ~= "proverbs" and
        data.headword.pos_category ~= "phrases" and
        ufind(ugsub(pagename, "[" .. data_range.katakana .. "]+", ""), "[" .. data_range.hiragana .. "]") and
        ufind(ugsub(pagename, "[" .. data_range.hiragana .. "]+", ""), "[" .. data_range.katakana .. "]")
    ) then
        insert(tc, lang_name .. " terms spelled with mixed kana")
    end
end

pos_functions["verbs"] = function(args, data)
    add_transitivity(data, args["tr"])
    add_inflections(data, args["infl"], 'verbs')
end

pos_functions["suffixes"] = function(args, data)
    add_inflections(data, args["infl"])
end

pos_functions["auxiliary verbs"] = function(args, data)
    insert(data.headword.categories, data.lang_name .. " auxiliary verbs")
    add_inflections(data, args["infl"])
    data.headword.pos_category = "verbs"
end

pos_functions["suru verbs"] = function(args, data)
    add_transitivity(data, args["tr"])
    add_inflections(data, 'suru', 'verbs')
    data.headword.pos_category = "verbs"
end

pos_functions["adjectives"] = function(args, data)
    add_inflections(data, args["infl"], 'adjectives')
end

pos_functions["nouns"] = function(args, data)
    local counter = args["count"] or ""
    if counter == "-" then
        insert(data.headword.inflections, {label = "uncountable"})
    elseif counter ~= "" then
        insert(data.headword.inflections, {label = "counter", counter})
    end
end

function export.cat(data)
    data.lang_name = data.lang:getCanonicalName()
    data.pagename_kana = detect_pagename_kana(data)

    if data.pos then
        local pos = data.pos:gsub('x$', 'xe') .. 's'
        insert(data.categories, data.lang_name .. ' ' .. pos)
        insert(data.categories, data.lang_name .. ' ' .. require'Module:headword'.pos_lemma_or_nonlemma(pos, true) .. 's')
    end

    data.headword = { categories = data.categories }
    add_categories(data)
end

function export.show(frame)
    local poscat = frame.args[2] or frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")

    local params = {
        [1] = {list = true},
        ['rom'] = {list = true, allow_holes = true, separate_no_index = true},
        ['head'] = {list = true, allow_holes = true, separate_no_index = true},
        ['label'] = {list = true, allow_holes = true},
        ['hist'] = {list = true}, ['hhira'] = {alias_of = 'hist'}, ['hkata'] = {alias_of = 'hist'},
        ['tr'] = {},
        ['infl'] = {}, ['type'] = {alias_of = 'infl'}, ['decl'] = {alias_of = 'infl'},
        ['count'] = {},
        ['sort'] = {},
        ['pagename'] = {},
    }
    if poscat == "syllables" then
        params["sc"] = {}
    end

    local args = require('Module:parameters').process(frame:getParent().args, params)

    local data = {
        headword = {
            pos_category = poscat,
            categories = {},
            heads = {},
            no_redundant_head_cat = true,
            inflections = {},
            genders = {'m'},
            nogendercat = true
        },
        pagename = args.pagename or mw.loadData("Module:headword/data").pagename,
        pagename_kana = nil,
        lang_code = frame.args[1],
        lang_name = nil,
        katakana_category = {},
        info_mid = {},
        info_hist = {},
        inflection_base = {},
        kanas = {},
    }
    data.headword.lang = require("Module:languages").getByCode(data.lang_code)
    data.lang_name = data.headword.lang:getCanonicalName()

    format_headword(args, data)

    if pos_functions[poscat] then
        pos_functions[poscat](args, data)
    end

    add_categories(data)
    local sort_base = args.sort or data.kanas[1] or data.pagename
    data.headword.sort_key = data.headword.lang:makeSortKey(sort_base)

    local katakana_category = #data.katakana_category > 0 and
        require("Module:utilities").format_categories(
            data.katakana_category,
            data.headword.lang,
            nil,
            sort_base,
            nil,
            require("Module:scripts").getByCode("Kana")
        ) or ""

    local i_kanas = 0
    return katakana_category .. require('Module:headword').full_headword(data.headword):gsub('<span class="gender">.-</span>', function()
        return (#data.info_hist > 0 and '<sup>←' .. concat(data.info_hist, ' or ') .. '<sup>[[w:Historical kana orthography|?]]</sup></sup>' or '') .. ('<i>' .. concat(data.info_mid, '&nbsp;') .. '</i>')
    end):gsub('<strong .->.-</strong>', function(m0)
        i_kanas = i_kanas + 1
        if data.kanas[i_kanas] then
            return  '<span id="' .. data.lang_name .. ':_' .. data.kanas[i_kanas] .. '" class="senseid">' .. m0 .. '</span>'
        end
    end):gsub('<span class="headword%-tr tr" dir="ltr"><span class="Latn" lang="ja">', '<span lang="ja-Latn" class="headword-tr tr Latn" dir="ltr">'):gsub('</span></span>', '</span>')
end

return export