Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Recent changes
Random page
Help about MediaWiki
Humanipedia
Search
Search
Appearance
Create account
Log in
Personal tools
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Editing
Module:Ko-translit
Module
Discussion
English
Read
Edit source
View history
Tools
Tools
move to sidebar
hide
Actions
Read
Edit source
View history
General
What links here
Related changes
Special pages
Page information
Appearance
move to sidebar
hide
Warning:
You are not logged in. Your IP address will be publicly visible if you make any edits. If you
log in
or
create an account
, your edits will be attributed to your username, along with other benefits.
Anti-spam check. Do
not
fill this in!
local p = {} local find = mw.ustring.find local gsub = mw.ustring.gsub local len = mw.ustring.len local sub = mw.ustring.sub local u = mw.ustring.char local upper = mw.ustring.upper -- function for RR -- function for MR -- function for stripping Korean input --[[ IMPORTANT NOTE before editing this module: 1. Make sure that you use a font that displays the following characters differently, and that you know the differences of them: ᄀ (U+1100) ᆨ (U+11A8) ㄱ (U+3131) 2. When dealing with decomposed Hangul, a. [ᄀ-ᄒ] should not be directly followed by [ᅡ-ᅵ] because MediaWiki uses Unicode Normalization Form C (NFC), which converts any sequence of [ᄀ-ᄒ][ᅡ-ᅵ] into a precomposed character; write ᄀ[ᅡ] or ᄀ(ᅡ) b. ᄀ[ᅡ] or ᄀ(ᅡ) at the end of a pattern is equivalent to not just 가 but [가-갛] in precomposed form. To match a syllabic block without a final consonant at the end of a pattern, use both vowel + [^ᆨ-ᇂ] and vowel + $ For example, to only match 가 (and not [각-갛]) at the end of a pattern, use both ᄀ[ᅡ][^ᆨ-ᇂ] and ᄀ[ᅡ]$ --]] -- $하나\$ -> 하나$ -- 3 -- function p.main( hangul ) -- Convert to Revised Romanization -- This method is just a wrapper that unpacked arguments -- required by https://www.mediawiki.org/wiki/Manual:Coding_conventions/Lua function p.rr(frame) local hangul if frame == mw.getCurrentFrame() then hangul = frame:getParent().args[1] local frameArgsNum = frame.args[1] if frameArgsNum then hangul = frameArgsNum end else hangul = frame end return p._rr(hangul) end -- This method is for the actual logic function p._rr(hangul) local result = p.disallowInvalidInput(hangul) result = p.parseName(result) result = p.removeLinksAndMarkups(result) result = p.checkInvalidSeq1(result) result = gsub(result, "`", "") -- ignore ` (only needed for MR; not needed for RR) result = p.parseEnclosedHangul(result) result = p.decomposeHangul(result) result = p.checkInvalidSeq2(result) result = p.parseExceptions(result) result = gsub(result, "%*", "-") -- * for additional hyphen in romanization only -- $ for ㄴ-addition result = gsub(result, "([ᆨ-ᇂ])%$ᄋ([ᅣᅤᅧᅨᅭᅲᅵ])", "%1ᄂ%2") -- 색연필 [생년필], 물엿 [물렫] result = gsub(result, "%$", "") -- for null-init consonant ㅇ (연음) result = gsub(result, "ᆨᄋ", "ᄀ") result = gsub(result, "ᆩᄋ", "ᄁ") result = gsub(result, "ᆪᄋ", "ᆨᄉ") result = gsub(result, "ᆬᄋ", "ᆫᄌ") result = gsub(result, "ᆮᄋ", "ᄃ") result = gsub(result, "[ᆯᆶ]ᄋ", "ᄅ") result = gsub(result, "ᆰᄋ", "ᆯᄀ") result = gsub(result, "ᆱᄋ", "ᆯᄆ") result = gsub(result, "ᆲᄋ", "ᆯᄇ") result = gsub(result, "ᆳᄋ", "ᆯᄉ") result = gsub(result, "ᆴᄋ", "ᆯᄐ") result = gsub(result, "ᆵᄋ", "ᆯᄑ") result = gsub(result, "ᆸᄋ", "ᄇ") result = gsub(result, "ᆹᄋ", "ᆸᄉ") result = gsub(result, "ᆺᄋ", "ᄉ") result = gsub(result, "ᆻᄋ", "ᄊ") result = gsub(result, "ᆽᄋ", "ᄌ") result = gsub(result, "ᆾᄋ", "ᄎ") result = gsub(result, "ᆿᄋ", "ᄏ") result = gsub(result, "ᇀᄋ", "ᄐ") result = gsub(result, "ᇁᄋ", "ᄑ") result = gsub(result, "ᇂᄋ", "ᄋ") -- silent; 좋아 [조아] -- for ㅎ -- trivia: {ㄶ, ㅀ, ㅎ} + ㅂ doesn't actually exist, but added for completeness (syl-final ㅎ is for aspiration anyway) result = gsub(result, "ᆭᄀ", "ᆫᄏ") result = gsub(result, "ᆭᄃ", "ᆫᄐ") result = gsub(result, "ᆭᄇ", "ᆫᄑ") result = gsub(result, "ᆭᄌ", "ᆫᄎ") result = gsub(result, "ᆶᄀ", "ᆯᄏ") result = gsub(result, "ᆶᄃ", "ᆯᄐ") result = gsub(result, "ᆶᄇ", "ᆯᄑ") result = gsub(result, "ᆶᄌ", "ᆯᄎ") result = gsub(result, "ᇂᄀ", "ᄏ") result = gsub(result, "ᇂᄃ", "ᄐ") result = gsub(result, "ᇂᄇ", "ᄑ") result = gsub(result, "ᇂᄉ", "ᄉ") result = gsub(result, "ᇂᄌ", "ᄎ") -- ㄺㄱ [ㄹㄲ] (usually verb/adjective stem ending in ㄺ + ending/suffix beginning with ㄱ (맑고 [말꼬], 긁개 [글깨])) result = gsub(result, "ᆰᄀ", "ᆯᄀ") -- neutralization of syl-final consonants result = gsub(result, "[ᆩᆪᆰᆿ]", "ᆨ") result = gsub(result, "[ᆬᆭ]", "ᆫ") result = gsub(result, "[ᆺᆻᆽᆾᇀᇂ]", "ᆮ") result = gsub(result, "[ᆲᆳᆴᆶ]", "ᆯ") result = gsub(result, "ᆱ", "ᆷ") result = gsub(result, "[ᆵᆹᇁ]", "ᆸ") -- @ for ㄱㅎ/ㄷㅎ/ㅂㅎ → k/t/p, 절음 법칙, ㄴㄹ pronounced [ㄴㄴ] -- other irregularities documented are automatically handled result = gsub(result, "ᆨ@ᄒ", "ᄏ") result = gsub(result, "ᆮ@ᄒ", "ᄐ") result = gsub(result, "ᆸ@ᄒ", "ᄑ") result = gsub(result, "ᆨ@ᄋ", "ᄀ") result = gsub(result, "ᆮ@ᄋ", "ᄃ") -- 웃어른 [우더른], 곧이어 [고디어] result = gsub(result, "ᆯ@ᄋ", "ᄅ") result = gsub(result, "ᆸ@ᄋ", "ᄇ") result = gsub(result, "ᆫ@ᄅ", "ᆫᄂ") -- 음운론 [으문논] result = gsub(result, "@", "") -- consonant assimilations result = gsub(result, "[ᆨᆼ][ᄂᄅ]", "ᆼᄂ") result = gsub(result, "ᆨᄆ", "ᆼᄆ") result = gsub(result, "ᆫᄅ", "ᆯᄅ") result = gsub(result, "ᆮ[ᄂᄅ]", "ᆫᄂ") result = gsub(result, "ᆮᄆ", "ᆫᄆ") result = gsub(result, "ᆯᄂ", "ᆯᄅ") result = gsub(result, "[ᆷᆸ][ᄂᄅ]", "ᆷᄂ") result = gsub(result, "ᆸᄆ", "ᆷᄆ") result = gsub(result, "ᆯᄅ", "ᆯl") -- drop y after {ㅈ, ㅉ, ㅊ} result = gsub(result, "([ᄌ-ᄎ])ᅣ", "%1ᅡ") result = gsub(result, "([ᄌ-ᄎ])ᅤ", "%1ᅢ") result = gsub(result, "([ᄌ-ᄎ])ᅧ", "%1ᅥ") result = gsub(result, "([ᄌ-ᄎ])ᅨ", "%1ᅦ") result = gsub(result, "([ᄌ-ᄎ])ᅭ", "%1ᅩ") result = gsub(result, "([ᄌ-ᄎ])ᅲ", "%1ᅮ") -- vowels result = gsub(result, "[ᅡㅏ]", "a") result = gsub(result, "[ᅢㅐ]", "ae") result = gsub(result, "[ᅣㅑ]", "ya") result = gsub(result, "[ᅤㅒ]", "yae") result = gsub(result, "[ᅥㅓ]", "eo") result = gsub(result, "[ᅦㅔ]", "e") result = gsub(result, "[ᅧㅕ]", "yeo") result = gsub(result, "[ᅨㅖ]", "ye") result = gsub(result, "[ᅩㅗ]", "o") result = gsub(result, "[ᅪㅘ]", "wa") result = gsub(result, "[ᅫㅙ]", "wae") result = gsub(result, "[ᅬㅚ]", "oe") result = gsub(result, "[ᅭㅛ]", "yo") result = gsub(result, "[ᅮㅜ]", "u") result = gsub(result, "[ᅯㅝ]", "wo") result = gsub(result, "[ᅰㅞ]", "we") result = gsub(result, "[ᅱㅟ]", "wi") result = gsub(result, "[ᅲㅠ]", "yu") result = gsub(result, "[ᅳㅡ]", "eu") result = gsub(result, "[ᅴㅢ]", "ui") result = gsub(result, "[ᅵㅣ]", "i") -- single consonants result = gsub(result, "[ᄀㄱ]", "g") result = gsub(result, "[ᄁㄲ]", "kk") result = gsub(result, "ㄳ", "ks") result = gsub(result, "[ᄂᆫㄴ]", "n") result = gsub(result, "ㄵ", "nj") result = gsub(result, "ㄶ", "nh") result = gsub(result, "[ᄃㄷ]", "d") result = gsub(result, "[ᄄㄸ]", "tt") result = gsub(result, "[ᄅㄹ]", "r") result = gsub(result, "ᆯ", "l") result = gsub(result, "ㄺ", "lg") result = gsub(result, "ㄻ", "lm") result = gsub(result, "ㄼ", "lb") result = gsub(result, "ㄽ", "ls") result = gsub(result, "ㄾ", "lt") result = gsub(result, "ㄿ", "lp") result = gsub(result, "ㅀ", "lh") result = gsub(result, "[ᄆᆷㅁ]", "m") result = gsub(result, "[ᄇㅂ]", "b") result = gsub(result, "[ᄈㅃ]", "pp") result = gsub(result, "ㅄ", "ps") result = gsub(result, "[ᄉㅅ]", "s") result = gsub(result, "[ᄊㅆ]", "ss") result = gsub(result, "[ᄋㅇ]", "") result = gsub(result, "ᆼ", "ng") result = gsub(result, "[ᄌㅈ]", "j") result = gsub(result, "[ᄍㅉ]", "jj") result = gsub(result, "[ᄎㅊ]", "ch") result = gsub(result, "[ᄏᆨㅋ]", "k") result = gsub(result, "[ᄐᆮㅌ]", "t") result = gsub(result, "[ᄑᆸㅍ]", "p") result = gsub(result, "[ᄒㅎ]", "h") -- U+FDD0 for converting each syllabic block in given name separately (e.g. 한복남 Han Boknam, not Han Bongnam) result = gsub(result, "", "") -- ^ for capitalization result = gsub(result, "%^[a-eg-km-pr-uwy]", upper) result = gsub(result, "%^", "") -- final error checking if find(result, "[가-힣]") then error("Result contains precomposed Hangul; debugging required") end -- return orig chars result = p.returnOrigChars(result) return result end -- Convert to McCune–Reischauer Romanization of Korean function p.mr(frame) local hangul if frame == mw.getCurrentFrame() then hangul = frame:getParent().args[1] local frameArgsNum = frame.args[1] if frameArgsNum then hangul = frameArgsNum end else hangul = frame end return p._mr(hangul) end function p._mr(hangul) local result = p.disallowInvalidInput(hangul) result = p.parseName(result) result = gsub(result, "", "") -- remove U+FDD0 (only needed for RR; not needed for MR) result = p.removeLinksAndMarkups(result) result = p.checkInvalidSeq1(result) result = p.parseEnclosedHangul(result) result = p.decomposeHangul(result) result = p.checkInvalidSeq2(result) result = p.parseExceptions(result) result = gsub(result, "([ᄀᄁᄃ-ᄊᄌ-ᄑ])ᅴ", "%1ᅵ") -- syl-init consonant + ㅢ → syl-init consonant + ㅣ (except 의, 늬, 희) result = gsub(result, "(ᄋ[ᅧ]ᄃ[ᅥ]ᆲ)([ᄀᄃᄇᄉᄌ])", "%1`%2") -- 여덟 + particle (tensification does not occur) -- $ for ㄴ-addition result = gsub(result, "([ᆨ-ᇂ])%$ᄋ([ᅣᅤᅧᅨᅭᅲᅵ])", "%1ᄂ%2") -- 색연필 [생년필], 물엿 [물렫] result = gsub(result, "%$", "") -- for null-init consonant ㅇ (연음) result = gsub(result, "ᆨᄋ", "ᄀ") result = gsub(result, "ᆩᄋ", "ᄁ") result = gsub(result, "ᆪᄋ", "ᆨᄉ") result = gsub(result, "ᆬᄋ", "ᆫᄌ") result = gsub(result, "ᆮᄋ", "ᄃ") result = gsub(result, "[ᆯᆶ]ᄋ", "ᄅ") result = gsub(result, "ᆰᄋ", "ᆯᄀ") result = gsub(result, "ᆱᄋ", "ᆯᄆ") result = gsub(result, "ᆲᄋ", "ᆯᄇ") result = gsub(result, "ᆳᄋ", "ᆯᄉ") result = gsub(result, "ᆴᄋ", "ᆯᄐ") result = gsub(result, "ᆵᄋ", "ᆯᄑ") result = gsub(result, "ᆸᄋ", "ᄇ") result = gsub(result, "ᆹᄋ", "ᆸᄉ") result = gsub(result, "ᆺᄋ", "ᄉ") result = gsub(result, "ᆻᄋ", "ᄊ") result = gsub(result, "ᆽᄋ", "ᄌ") result = gsub(result, "ᆾᄋ", "ᄎ") result = gsub(result, "ᆿᄋ", "ᄏ") result = gsub(result, "ᇀᄋ", "ᄐ") result = gsub(result, "ᇁᄋ", "ᄑ") result = gsub(result, "ᇂᄋ", "ᄋ") -- silent; 좋아 [조아] -- for ㅎ -- trivia: {ㄶ, ㅀ, ㅎ} + ㅂ doesn't actually exist, but added for completeness (syl-final ㅎ is for aspiration anyway) result = gsub(result, "ᆭᄀ", "ᆫᄏ") result = gsub(result, "ᆭᄃ", "ᆫᄐ") result = gsub(result, "ᆭᄇ", "ᆫᄑ") result = gsub(result, "[ᆬᆭ]ᄉ", "ᆫᄊ") result = gsub(result, "ᆭᄌ", "ᆫᄎ") result = gsub(result, "ᆶᄀ", "ᆯᄏ") result = gsub(result, "ᆶᄃ", "ᆯᄐ") result = gsub(result, "ᆶᄇ", "ᆯᄑ") result = gsub(result, "[ᆲᆴᆶ]ᄉ", "ᆯᄊ") result = gsub(result, "ᆶᄌ", "ᆯᄎ") result = gsub(result, "ᇂᄀ", "ᄏ") result = gsub(result, "ᇂᄃ", "ᄐ") result = gsub(result, "ᇂᄇ", "ᄑ") result = gsub(result, "ᇂᄉ", "ᄊ") result = gsub(result, "ᇂᄌ", "ᄎ") -- ㄵ, ㄼ, ㄾ cause tensification of following consonant -- do not add ㄻ; does not always cause tensification (굶기다 [굼기다], 삶조차 [삼조차]) result = gsub(result, "([ᆬᆲᆴ])([ᄀᄃᄌ])", "%1@%2") -- ㄺㄱ [ㄹㄲ] (usually verb/adjective stem ending in ㄺ + ending/suffix beginning with ㄱ (맑고 [말꼬], 긁개 [글깨])) result = gsub(result, "ᆰᄀ", "ᆯ@ᄀ") -- @ for written 사이시옷 + ㄱ/ㅂ (should be done before neutralization of syl-final consonants) result = gsub(result, "ᆺ@ᄀ", "ᄁ") result = gsub(result, "ᆺ@ᄇ", "ᄈ") -- neutralization of syl-final consonants result = gsub(result, "[ᆩᆪᆰᆿ]", "ᆨ") result = gsub(result, "[ᆬᆭ]", "ᆫ") result = gsub(result, "[ᆺᆻᆽᆾᇀᇂ]", "ᆮ") result = gsub(result, "[ᆲᆳᆴᆶ]", "ᆯ") result = gsub(result, "ᆱ", "ᆷ") result = gsub(result, "[ᆵᆹᇁ]", "ᆸ") -- @ for tensification, 절음 법칙, ㄴㄹ pronounced [ㄴㄴ] -- other irregularities documented are automatically handled result = gsub(result, "([ᅡ-ᅵᆫᆷᆼ])@ᄉ", "%1ᄊ") result = gsub(result, "ᆨ@ᄋ", "ᄀ") result = gsub(result, "ᆮ@ᄋ", "ᄃ") -- 웃어른 [우더른], 곧이어 [고디어] result = gsub(result, "ᆯ@ᄋ", "ᄅ") result = gsub(result, "ᆸ@ᄋ", "ᄇ") result = gsub(result, "ᆫ@ᄅ", "ᆫᄂ") -- 음운론 [으문논] -- cases where ㄱ, ㄷ, ㅂ, ㅈ become voiced consonants -- * is for additional hyphen in romanization only (voicing is retained after hyphen) result = gsub(result, "ᆫᄀ", "ᆫ'`ᄀ") -- n'g result = gsub(result, "([ᅡ-ᅵᆫᆯᆷᆼ])([ᄀᄃᄇᄌ])", "%1`%2") result = gsub(result, "([ᅡ-ᅵᆫᆯᆷᆼ])%*([ᄀᄃᄇᄌ])", "%1-`%2") result = gsub(result, "ᆯ%*ᄅ", "ᆯ-l") -- ㄹ-ㄹ should probably be l-l rather than l-r result = gsub(result, "%*", "-") result = gsub(result, "@", "") -- consonant assimilations result = gsub(result, "[ᆨᆼ][ᄂᄅ]", "ᆼᄂ") result = gsub(result, "ᆨᄆ", "ᆼᄆ") result = gsub(result, "ᆫᄅ", "ᆯᄅ") result = gsub(result, "ᆮ[ᄂᄅ]", "ᆫᄂ") result = gsub(result, "ᆮᄆ", "ᆫᄆ") result = gsub(result, "ᆯᄂ", "ᆯᄅ") result = gsub(result, "[ᆷᆸ][ᄂᄅ]", "ᆷᄂ") result = gsub(result, "ᆸᄆ", "ᆷᄆ") -- no {kkk, ttt, ppp, sss/ts/tss, ttch} result = gsub(result, "ᆨᄁ", "ᄁ") result = gsub(result, "ᆮᄄ", "ᄄ") result = gsub(result, "ᆸᄈ", "ᄈ") result = gsub(result, "ᆮ[ᄉᄊ]", "ᄊ") result = gsub(result, "ᆮᄍ", "ᄍ") -- other misc conversions result = gsub(result, "ᆯᄅ", "ᆯl") result = gsub(result, "ᆯᄒ", "rᄒ") result = gsub(result, "ᄉ[ᅱ]", "shᅱ") -- drop y after {ㅈ, ㅉ, ㅊ} result = gsub(result, "([ᄌ-ᄎ])ᅣ", "%1ᅡ") result = gsub(result, "([ᄌ-ᄎ])ᅤ", "%1ᅢ") result = gsub(result, "([ᄌ-ᄎ])ᅧ", "%1ᅥ") result = gsub(result, "([ᄌ-ᄎ])ᅨ", "%1ᅦ") result = gsub(result, "([ᄌ-ᄎ])ᅭ", "%1ᅩ") result = gsub(result, "([ᄌ-ᄎ])ᅲ", "%1ᅮ") -- vowels result = gsub(result, "[ᅡㅏ]", "a") result = gsub(result, "[ᅢㅐ]", "ae") result = gsub(result, "[ᅣㅑ]", "ya") result = gsub(result, "[ᅤㅒ]", "yae") result = gsub(result, "[ᅥㅓ]", "ŏ") result = gsub(result, "[ᅦㅔ]", "e") result = gsub(result, "[ᅧㅕ]", "yŏ") result = gsub(result, "[ᅨㅖ]", "ye") result = gsub(result, "[ᅩㅗ]", "o") result = gsub(result, "[ᅪㅘ]", "wa") result = gsub(result, "[ᅫㅙ]", "wae") result = gsub(result, "[ᅬㅚ]", "oe") result = gsub(result, "[ᅭㅛ]", "yo") result = gsub(result, "[ᅮㅜ]", "u") result = gsub(result, "[ᅯㅝ]", "wŏ") result = gsub(result, "[ᅰㅞ]", "we") result = gsub(result, "[ᅱㅟ]", "wi") result = gsub(result, "[ᅲㅠ]", "yu") result = gsub(result, "[ᅳㅡ]", "ŭ") result = gsub(result, "[ᅴㅢ]", "ŭi") result = gsub(result, "[ᅵㅣ]", "i") -- ㅏ에 (aë) and ㅗ에 (oë) result = gsub(result, "([ao])ᄋe", "%1ë") -- single consonants result = gsub(result, "`ᄀ", "g") result = gsub(result, "`ᄃ", "d") result = gsub(result, "`ᄇ", "b") result = gsub(result, "`ᄌ", "j") result = gsub(result, "[ᄀᆨㄱ]", "k") result = gsub(result, "[ᄁㄲ]", "kk") result = gsub(result, "ㄳ", "ks") result = gsub(result, "[ᄂᆫㄴ]", "n") result = gsub(result, "ㄵ", "nj") result = gsub(result, "ㄶ", "nh") result = gsub(result, "[ᄃᆮㄷ]", "t") result = gsub(result, "[ᄄㄸ]", "tt") result = gsub(result, "[ᄅㄹ]", "r") result = gsub(result, "ᆯ", "l") result = gsub(result, "ㄺ", "lg") result = gsub(result, "ㄻ", "lm") result = gsub(result, "ㄼ", "lb") result = gsub(result, "ㄽ", "ls") result = gsub(result, "ㄾ", "lt'") result = gsub(result, "ㄿ", "lp'") result = gsub(result, "ㅀ", "rh") result = gsub(result, "[ᄆᆷㅁ]", "m") result = gsub(result, "[ᄇᆸㅂ]", "p") result = gsub(result, "[ᄈㅃ]", "pp") result = gsub(result, "ㅄ", "ps") result = gsub(result, "[ᄉㅅ]", "s") result = gsub(result, "[ᄊㅆ]", "ss") result = gsub(result, "[ᄋㅇ]", "") result = gsub(result, "ᆼ", "ng") result = gsub(result, "[ᄌㅈ]", "ch") result = gsub(result, "[ᄍㅉ]", "tch") result = gsub(result, "[ᄎㅊ]", "ch'") result = gsub(result, "[ᄏㅋ]", "k'") result = gsub(result, "[ᄐㅌ]", "t'") result = gsub(result, "[ᄑㅍ]", "p'") result = gsub(result, "[ᄒㅎ]", "h") result = gsub(result, "`", "") -- ^ for capitalization result = gsub(result, "%^[acehikm-pr-uwyŏŭ]", upper) result = gsub(result, "%^", "") -- replace string-final ' with ' (to avoid possible clashes with bold/italic markup) result = gsub(result, "([KPThkpt])'$", "%1'") -- final error checking if find(result, "[가-힣]") then error("Result contains precomposed Hangul; debugging required") end -- return orig chars result = p.returnOrigChars(result) return result end function p.parseName(hangul) local hanjaReadingsFinalL = "갈걸결골괄굴궐귤글길날녈놜눌닐달돌랄렬률말멸몰물밀발벌별불살설솔술슬실알얼열올왈울월율을일절졸줄즐질찰철촬출칠탈팔필할헐혈홀활훌휼흘힐" local hanjaReadingsInitDSJ = "다단달담답당대댁덕도독돈돌동두둔득등사삭산살삼삽상새색생서석선설섬섭성세소속손솔송쇄쇠수숙순술숭쉬슬습승시식신실심십자작잔잠잡장재쟁저적전절점접정제조족존졸종좌죄주죽준줄중즉즐즙증지직진질짐집징" -- note: internally uses 3 noncharacters -- (U+FDD0): mostly for given name in RR -- (U+FDD1): marks beginning of name -- (U+FDD2): marks end of name -- change % to U+FDD1 and U+FDD2 (end of string also terminates name mode) hangul = gsub(hangul, "%%([^%%]*)%%", "%1") hangul = gsub(hangul, "%%([^%%]*)$", "%1") -- disallow invalid input for name if find(hangul, "") then error("Name cannot be empty") elseif find(hangul, "[^]*[^가-힣_ ][^]*") then error("Invalid character in name") elseif find(hangul, " ") then error("Name cannot begin with space") elseif find(hangul, " ") then error("Name cannot end with space") elseif find(hangul, "[^]*[ _][^]*[ _][^]*") then error("No more than two components in name") elseif find(hangul, "[가-힣]_") then error("No _ after one-syllable surname") elseif find(hangul, "([^]*)([" .. hanjaReadingsFinalL .. "])@([" .. hanjaReadingsInitDSJ .. "])([^]*)") then error("Contains unnecessary @ in name") -- see below end -- separate surname and given name -- if input contains _ or space, separate there hangul = gsub(hangul, "([가-힣%$@]+)_", "^%1_") -- for surname-only string hangul = gsub(hangul, "_([가-힣%$@]+)", "_^%1") -- for mononym hangul = gsub(hangul, "([가-힣%$@]+)[ _]([가-힣%$@]+)", "^%1_^%2") -- otherwise, separate after first syllabic block hangul = gsub(hangul, "([가-힣])", "^%1_") -- for surname-only string hangul = gsub(hangul, "([가-힣])([가-힣%$@]+)", "^%1_^%2") -- check invalid input after separating surname and given name if find(hangul, "[^]*_%^[%$@][^]*") then error("No @ or $ between surname and given name") end -- tensification of ㄹ + {ㄷ, ㅅ, ㅈ} (needed for MR; e.g. 홍길동 [홍길똥], 을지문덕 [을찌문덕]) -- does not occur when same syllable is repeated (e.g. 구구절절 [구구절절], not [구구절쩔]); just using U+FDD0 here too for i = 1, len(hangul) do hangul = gsub(hangul, "([^]*)달달([^]*)", "%1달달%2") hangul = gsub(hangul, "([^]*)돌돌([^]*)", "%1돌돌%2") hangul = gsub(hangul, "([^]*)살살([^]*)", "%1살살%2") hangul = gsub(hangul, "([^]*)설설([^]*)", "%1설설%2") hangul = gsub(hangul, "([^]*)솔솔([^]*)", "%1솔솔%2") hangul = gsub(hangul, "([^]*)술술([^]*)", "%1술술%2") hangul = gsub(hangul, "([^]*)슬슬([^]*)", "%1슬슬%2") hangul = gsub(hangul, "([^]*)실실([^]*)", "%1실실%2") hangul = gsub(hangul, "([^]*)절절([^]*)", "%1절절%2") hangul = gsub(hangul, "([^]*)졸졸([^]*)", "%1졸졸%2") hangul = gsub(hangul, "([^]*)줄줄([^]*)", "%1줄줄%2") hangul = gsub(hangul, "([^]*)즐즐([^]*)", "%1즐즐%2") hangul = gsub(hangul, "([^]*)질질([^]*)", "%1질질%2") end -- now apply tensification for i = 1, len(hangul) do hangul = gsub(hangul, "([^]*)([" .. hanjaReadingsFinalL .. "])([" .. hanjaReadingsInitDSJ .. "])([^]*)", "%1%2@%3%4") end -- insert U+FDD0 in given name (needed for RR; e.g. 한복남 Han Boknam, not Han Bongnam) for i = 1, len(hangul) do hangul = gsub(hangul, "([^]*)_%^([^]*)([가-힣%$@])([가-힣%$@])([^]*)", "%1_^%2%3%4%5") end -- remove _ which was needed for surname-only string and mononym hangul = gsub(hangul, "_", "") hangul = gsub(hangul, "_%^", "^") -- remove U+FDD1 and U+FDD2 hangul = gsub(hangul, "[]", "") return hangul end function p.parseExceptions(decomposed) -- this is for pre-processing exceptions that apply to both RR and MR --[[ IMPORTANT: Before adding an exception, be sure to check if it can ALWAYS be applied in ALL contexts. Good example: 싫증 → 실@증 Bad example: 문자 → 문@자 (affects words like 방문자 (pronounced [방문자], not [방문짜])) --]] -- for linguistic contexts local exceptions = gsub(decomposed, "ㄴ([ᄀ-ᄒ])", "ᆫ%1") -- -ㄴ다 exceptions = gsub(exceptions, "ㄹ([ᄀ-ᄒ])", "ᆯ%1") -- -ㄹ까, -ㄹ래 exceptions = gsub(exceptions, "ㄹ@([ᄀᄃᄇᄉᄌ])", "ᆯ@%1") -- -ㄹ지 exceptions = gsub(exceptions, "ㅁ([ᄀ-ᄒ])", "ᆷ%1") exceptions = gsub(exceptions, "ㅂ([ᄀ-ᄒ])", "ᆸ%1") -- -ㅂ니다, -ㅂ시다 -- ㄴ-addition always occurs before 윷 and 잎 exceptions = gsub(exceptions, "([ᆨ-ᇂ])ᄋ(ᅲᆾ)", "%1ᄂ%2") exceptions = gsub(exceptions, "([ᆨ-ᇂ])ᄋ(ᅵᇁ)", "%1ᄂ%2") -- 싫증 [실쯩] exceptions = gsub(exceptions, "(ᄉ[ᅵ])ᆶ(ᄌ[ᅳ]ᆼ)", "%1ᆯ@%2") -- cases where ㄺㄱ is pronounced [ㄱㄲ] -- not including very rarely used words such as 삼시욹, 안찱, 우줅거리다, etc. exceptions = gsub(exceptions, "([ᄃᄉᄐ]ᅡ)ᆰᄀ", "%1ᆨᄀ") -- 닭, 삵, 수탉/암탉 exceptions = gsub(exceptions, "([ᄉᄒ]ᅳ)ᆰᄀ", "%1ᆨᄀ") -- 기슭, 흙 exceptions = gsub(exceptions, "(ᄎ[ᅵ])ᆰᄀ", "%1ᆨᄀ") -- 칡 -- palatalization and ㅈ + -히- exceptions = gsub(exceptions, "ᆮᄋ(ᅵ[ᆫᆯᆷᆸ])", "ᄌ%1") -- 해돋이 [해도지] exceptions = gsub(exceptions, "ᆮᄋ(ᅵ)([^ᆨ-ᇂ])", "ᄌ%1%2") exceptions = gsub(exceptions, "ᆮᄋ(ᅵ)$", "ᄌ%1") exceptions = gsub(exceptions, "[ᆮᆽ]ᄒ(ᅧᆻ)", "ᄎ%1") -- 굳히다 [구치다], 꽂히다 [꼬치다] exceptions = gsub(exceptions, "[ᆮᆽ]ᄒ(ᅵ[ᆫᆯᆷᆸ])", "ᄎ%1") exceptions = gsub(exceptions, "[ᆮᆽ]ᄒ([ᅧᅵ])([^ᆨ-ᇂ])", "ᄎ%1%2") exceptions = gsub(exceptions, "[ᆮᆽ]ᄒ([ᅧᅵ])$", "ᄎ%1") exceptions = gsub(exceptions, "ᆴᄋ(ᅧᆻ)", "ᆯᄎ%1") -- 훑이다 [훌치다] exceptions = gsub(exceptions, "ᆴᄋ(ᅵ[ᆫᆯᆷᆸ])", "ᆯᄎ%1") exceptions = gsub(exceptions, "ᆴᄋ([ᅧᅵ])([^ᆨ-ᇂ])", "ᆯᄎ%1%2") exceptions = gsub(exceptions, "ᆴᄋ([ᅧᅵ])$", "ᆯᄎ%1") exceptions = gsub(exceptions, "ᇀᄋ(ᅧᆻ)", "ᄎ%1") -- 붙이다 [부치다] exceptions = gsub(exceptions, "ᇀᄋ(ᅵ[ᆫᆯᆷᆸ])", "ᄎ%1") exceptions = gsub(exceptions, "ᇀᄋ([ᅧᅵ])([^ᆨ-ᇂ])", "ᄎ%1%2") exceptions = gsub(exceptions, "ᇀᄋ([ᅧᅵ])$", "ᄎ%1") -- {ㄵ, ㄺ, ㄼ} + -히- exceptions = gsub(exceptions, "ᆬᄒ(ᅧᆻ)", "ᆫᄎ%1") -- 앉히다 [안치다] exceptions = gsub(exceptions, "ᆬᄒ(ᅵ[ᆫᆯᆷᆸ])", "ᆫᄎ%1") exceptions = gsub(exceptions, "ᆬᄒ([ᅧᅵ])([^ᆨ-ᇂ])", "ᆫᄎ%1%2") exceptions = gsub(exceptions, "ᆬᄒ([ᅧᅵ])$", "ᆫᄎ%1") exceptions = gsub(exceptions, "ᆰᄒ(ᅧᆻ)", "ᆯᄏ%1") -- 밝히다 [발키다] exceptions = gsub(exceptions, "ᆰᄒ(ᅵ[ᆫᆯᆷᆸ])", "ᆯᄏ%1") exceptions = gsub(exceptions, "ᆰᄒ([ᅧᅵ])([^ᆨ-ᇂ])", "ᆯᄏ%1%2") exceptions = gsub(exceptions, "ᆰᄒ([ᅧᅵ])$", "ᆯᄏ%1") exceptions = gsub(exceptions, "ᆲᄒ(ᅧᆻ)", "ᆯᄑ%1") -- 넓히다 [널피다], 밟히다 [발피다] exceptions = gsub(exceptions, "ᆲᄒ(ᅵ[ᆫᆯᆷᆸ])", "ᆯᄑ%1") exceptions = gsub(exceptions, "ᆲᄒ([ᅧᅵ])([^ᆨ-ᇂ])", "ᆯᄑ%1%2") exceptions = gsub(exceptions, "ᆲᄒ([ᅧᅵ])$", "ᆯᄑ%1") -- cases where 넓- is pronounced [넙] before consonant exceptions = gsub(exceptions, "(ᄂ[ᅥ])ᆲ([ᄁᄄ-ᄈᄊᄍ-ᄒ])", "%1ᆸ%2") exceptions = gsub(exceptions, "(ᄂ[ᅥ])ᆲ(ᄃ[ᅡ]ᄃ[ᅳ]ᆷ)", "%1ᆸ%2") -- 넓다듬이 exceptions = gsub(exceptions, "(ᄂ[ᅥ])ᆲ(ᄃ[ᅮ]ᆼ)", "%1ᆸ%2") -- 넓둥글다 exceptions = gsub(exceptions, "(ᄂ[ᅥ])ᆲ(ᄉ[ᅡ]ᆯᄆ[ᅮ]ᆫ)", "%1ᆸ%2") -- 넓살문 exceptions = gsub(exceptions, "(ᄂ[ᅥ])ᆲ(ᄌ[ᅥᅮ]ᆨ)", "%1ᆸ%2") -- 넓적-, 넓죽- -- 밟- is [밥] before consonant (except null-init consonant ㅇ) exceptions = gsub(exceptions, "(ᄇ[ᅡ])ᆲ([^ᄋ])", "%1ᆸ%2") exceptions = gsub(exceptions, "(ᄇ[ᅡ])ᆲ$", "%1ᆸ") -- automatic 절음 법칙 exceptions = gsub(exceptions, "(ᄋ[ᅥ])ᆹᄋ(ᅢ[ᆫᆯᆷᆸᆻ])", "%1ᆸᄉ%2") -- except 없애다 [업쌔다] exceptions = gsub(exceptions, "(ᄋ[ᅥ])ᆹᄋ(ᅢ[^ᆨ-ᇂ])", "%1ᆸᄉ%2") exceptions = gsub(exceptions, "(ᄋ[ᅥ])ᆹᄋ(ᅢ)$", "%1ᆸᄉ%2") exceptions = gsub(exceptions, "(ᄆ[ᅡᅥ])ᆺᄋ(ᅵᆻ)", "%1ᄉ%2") -- except 맛있다 and 멋있다 which are usually pronounced [마싣따] and [머싣따] respectively exceptions = gsub(exceptions, "([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅡᅥᅧ][ᆨ-ᆺᆼ-ᇂ])", "%1@%2") -- except 아, 았, 어, 었, 여, 였 exceptions = gsub(exceptions, "([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅦ][ᆨ-ᆪᆬ-ᆮᆰ-ᇂ])", "%1@%2") -- except 에, 엔, 엘 exceptions = gsub(exceptions, "([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅳᅵ][ᆨ-ᆪᆬ-ᆮᆰ-ᆶᆹ-ᇂ])", "%1@%2") -- except 으, 은, 을, 음, 읍, 이, 인, 일, 임, 입 exceptions = gsub(exceptions, "([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅴ][ᆨ-ᇂ])", "%1@%2") -- except 의 (w/o final consonant) exceptions = gsub(exceptions, "([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅢ-ᅤᅨ-ᅲ])", "%1@%2") -- _ for additional space in romanization only exceptions = gsub(exceptions, "_", " ") return exceptions end function p.parseEnclosedHangul(hangul) -- actually not very necessary, but these are also classified as Hangul chars in Unicode -- no distinction is made between parenthesized and circled chars hangul = gsub(hangul, "[㈀㉠]", "(기역)") hangul = gsub(hangul, "[㈁㉡]", "(니은)") hangul = gsub(hangul, "[㈂㉢]", "(디귿)") hangul = gsub(hangul, "[㈃㉣]", "(리을)") hangul = gsub(hangul, "[㈄㉤]", "(미음)") hangul = gsub(hangul, "[㈅㉥]", "(비읍)") hangul = gsub(hangul, "[㈆㉦]", "(시옷)") hangul = gsub(hangul, "[㈇㉧]", "(이응)") hangul = gsub(hangul, "[㈈㉨]", "(지읒)") hangul = gsub(hangul, "[㈉㉩]", "(치읓)") hangul = gsub(hangul, "[㈊㉪]", "(키읔)") hangul = gsub(hangul, "[㈋㉫]", "(티읕)") hangul = gsub(hangul, "[㈌㉬]", "(피읖)") hangul = gsub(hangul, "[㈍㉭]", "(히읗)") hangul = gsub(hangul, "[㈎㉮]", "(가)") hangul = gsub(hangul, "[㈏㉯]", "(나)") hangul = gsub(hangul, "[㈐㉰]", "(다)") hangul = gsub(hangul, "[㈑㉱]", "(라)") hangul = gsub(hangul, "[㈒㉲]", "(마)") hangul = gsub(hangul, "[㈓㉳]", "(바)") hangul = gsub(hangul, "[㈔㉴]", "(사)") hangul = gsub(hangul, "[㈕㉵]", "(아)") hangul = gsub(hangul, "[㈖㉶]", "(자)") hangul = gsub(hangul, "[㈗㉷]", "(차)") hangul = gsub(hangul, "[㈘㉸]", "(카)") hangul = gsub(hangul, "[㈙㉹]", "(타)") hangul = gsub(hangul, "[㈚㉺]", "(파)") hangul = gsub(hangul, "[㈛㉻]", "(하)") hangul = gsub(hangul, "㈜", "(주)") hangul = gsub(hangul, "㈝", "(오전)") hangul = gsub(hangul, "㈞", "(오후)") hangul = gsub(hangul, "㉼", "(참고)") hangul = gsub(hangul, "㉽", "(주의)") hangul = gsub(hangul, "㉾", "(우)") return hangul end -- Removing special chars (except for escaped ones) function p.cleanHangul(frame) local hangul if frame == mw.getCurrentFrame() then hangul = frame:getParent().args[1] local frameArgsNum = frame.args[1] if frameArgsNum then hangul = frameArgsNum end else hangul = frame end -- Replacing escaped special chars with placeholders cleaned = gsub(hangul, "\\%$", "$") cleaned = gsub(cleaned, "\\%%", "%") cleaned = gsub(cleaned, "\\%*", "*") cleaned = gsub(cleaned, "\\@", "@") cleaned = gsub(cleaned, "\\%^", "^") cleaned = gsub(cleaned, "\\_", "_") cleaned = gsub(cleaned, "\\`", "`") -- Removing non-escaped special chars cleaned = gsub(cleaned, "[%$%%%*@%^_`]", "") -- Returning orig chars cleaned = p.returnOrigChars(cleaned) -- Unstripping test cleaned = mw.text.unstrip(cleaned) return cleaned end function p.removeLinksAndMarkups(hangul) -- these either are unnecessary or interfere with assimilation -- remove bold/italic -- it is not impossible to allow bold/italic when it does not interfere with assimilation, but determining when to allow or disallow that adds complication for little practical gain hangul = gsub(hangul, "'''", "") hangul = gsub(hangul, "''", "") -- remove HTML tags (except br) hangul = gsub(hangul, "<[Bb][Rr] */?>", " ") hangul = gsub(hangul, "</?[A-Za-z][^>]->", "") hangul = gsub(hangul, " ", "<br>") -- remove wikilinks hangul = gsub(hangul, "%[%[[^%|]+%|(..-)%]%]", "%1") hangul = gsub(gsub(hangul, "%[%[", ""), "%]%]", "") -- remove refs -- hangul = gsub(hangul, "<ref.-</ref>", "") hangul = mw.text.killMarkers(hangul) -- remove templates hangul = gsub(hangul, "{{.-}}", "") return hangul end function p.returnOrigChars(orig) orig = gsub(orig, "$", "$") orig = gsub(orig, "%", "%%") orig = gsub(orig, "*", "*") orig = gsub(orig, "@", "@") orig = gsub(orig, "^", "^") orig = gsub(orig, "_", "_") orig = gsub(orig, "`", "`") return orig end function p.disallowInvalidInput(hangul) -- preprocessing step (before decomposing Hangul) -- process escape chars first hangul = gsub(hangul, "\\%$", "$") hangul = gsub(hangul, "\\%%", "%") hangul = gsub(hangul, "\\%*", "*") hangul = gsub(hangul, "\\@", "@") hangul = gsub(hangul, "\\%^", "^") hangul = gsub(hangul, "\\_", "_") hangul = gsub(hangul, "\\`", "`") if find(hangul, "[ᄓ-ᅠᅶ-ᆧᇃ-ᇿ〮〯ㅤ-ㆌㆎꥠ-ힰ-]") then error("Contains Hangul not supported by romanization systems") elseif find(hangul, "ㆍ") then error("Contains Hangul not supported by romanization systems. For middle dot, use · (U+00B7) instead of ㆍ (U+318D)") elseif find(hangul, "[ᄀ-ᄒ]") or find(hangul, "[ᅡ-ᅵᆨ-ᇂ]") then error("Do not input conjoining Hangul jamo directly") elseif find(hangul, "`%*") then error("Use *` instead of `*") elseif find(hangul, "@%*") then error("Use *@ instead of @*") elseif find(hangul, "%^[^가-힣]") then error("^ must be immediately followed by Hangul syllabic block") elseif find(hangul, "[^%*0-9A-Za-z]`") or find(hangul, "[^0-9A-Za-z]%*`") or find(hangul, "`[^가-깋다-딯바-빟자-짛]") then error("Found invalid sequence containing `") elseif find(hangul, "[^%*ㄹ가-힣]@") or find(hangul, "[^가-힣]%*@") or find(hangul, "%*@[^가-깋다-딯바-빟자-짛]") or find(hangul, "ㄹ@[^가-깋다-딯바-빟사-싷자-짛]") or find(hangul, "@[^가-깋다-딯라-맇바-빟사-싷아어에엔엘여으은을음읍의이인일임입자-짛하-힣]") then error("Found invalid sequence containing @") elseif find(hangul, "[^가-힣]%$") or find(hangul, "%$[^야-얳여-옣요-욯유-윶윸-윻이-잍잏]") then error("Found invalid sequence containing $") elseif find(hangul, "%%$") then error("Remove final %") elseif find(hangul, "[ _][ _]") then error("No two or more consecutive space characters") elseif find(hangul, "^[%$%*@_`]") or find(hangul, "^%%[^_가-힣]") or find(hangul, "[ _]%*") or find(hangul, "%*[ %*%-_]") or find(hangul, "%-%*") or find(hangul, "[-]") or find(hangul, "[%$%*@%^`]$") then error("Invalid input") end return hangul end function p.checkInvalidSeq1(hangul) -- checked right after removing links and markups (before decomposing Hangul) if find(hangul, "[ _][ _]") then error("No two or more consecutive space characters") elseif find(hangul, "^[%$%*@_`]") or find(hangul, "[ _]%*") or find(hangul, "%*[ %*%-_]") or find(hangul, "%-%*") or find(hangul, "[%$%*@%^_`]$") then error("Invalid input") end return hangul end function p.checkInvalidSeq2(decomposed) -- checked after decomposing Hangul if find(decomposed, "[ᆨ-ᆪᆬ-ᆮᆴ-ᆶᆸᆹᆻᆽ-ᇂ]%*??@?[ᄀᄃᄇᄉᄌ]") or find(decomposed, "ᆰ%*??@?[ᄀ-ᄊᄌ-ᄑ]") or find(decomposed, "ᆲ?@?[ᄀ-ᄊᄌ-ᄑ]") or find(decomposed, "ᆺ%*@[ᄀᄇ]") or find(decomposed, "ᆺ%*??@?[ᄁ-ᄆᄈ-ᄊᄌ-ᄑ]") or find(decomposed, "[ᅡ-ᅵᆨ-ᆪᆬ-ᇂ]?@?ᄅ") or find(decomposed, "[ᅡ-ᅵᆨᆫᆭ-ᆯᆶ-ᆸᆼ]?@?ᄋ") or find(decomposed, "[ᅡ-ᅵᆫ-ᆭᆯᆱ-ᆷᆼ]?@?ᄒ") then error("Found invalid sequence containing @") elseif find(decomposed, "[ᅡ-ᅵ]?%$") then error("Found invalid sequence containing $") end return decomposed end -- Split up Hangul blocks into letters -- e.g. 한 (U+D55C) → ᄒ (U+1112), ᅡ (U+1161), ᆫ (U+11AB) function p.decomposeHangul(hangul) -- If we are being called from #invoke, then the Hangul is the first positional argument. -- If not, it is the frame parameter. local decomposed = "" for codepoint in mw.ustring.gcodepoint(hangul, 1, -1) do if codepoint >= 0xAC00 and codepoint <= 0xD7A3 then codepoint = codepoint - 0xAC00 local choseongIndex = math.floor(codepoint / 588) local jungseongIndex = math.floor((codepoint % 588) / 28) local jongseongIndex = codepoint % 28 local choseong = u(0x1100 + choseongIndex) local jungseong = u(0x1161 + jungseongIndex) local jongseong = "" if jongseongIndex ~= 0 then jongseong = u(0x11A7 + jongseongIndex) end decomposed = decomposed .. choseong .. jungseong .. jongseong else decomposed = decomposed .. u(codepoint) end end return decomposed end return p
Summary:
Please note that all contributions to Humanipedia may be edited, altered, or removed by other contributors. If you do not want your writing to be edited mercilessly, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource (see
Humanipedia:Copyrights
for details).
Do not submit copyrighted work without permission!
Cancel
Editing help
(opens in new window)
Templates used on this page:
Template:Error
(
edit
)
Template:Module other
(
edit
)
Template:Module rating
(
edit
)
Template:Ombox
(
edit
)
Template:Pp
(
edit
)
Template:Protection padlock
(
edit
)
Template:Template link
(
edit
)
Template:Tl
(
edit
)
Module:Error
(
edit
)
Module:File link
(
edit
)
Module:Ko-translit/doc
(
edit
)
Module:Message box
(
edit
)
Module:Protection banner
(
edit
)