yaejunyang/packages/utils/saferKorean.ts
2026-05-22 13:19:14 +00:00

533 lines
16 KiB
TypeScript

import CallingNumberKorean from "./callingNumberKorean.js";
import FloatKorean from "./floatKorean.js";
import IntegerKorean from "./integerKorean.js";
import PhoneNumberKorean from "./phoneNumberKorean.js";
import EmojiDescriptions from "./emoji-descriptions.json" with { type: "json" };
// Process trim tailing dots
export function processUnsounds(input: string): string {
return (
input
// Change tailing dots
.replace(/[.,]+$/, "")
.replace(/[.,]{2,}/g, "")
.replace(/[.,]\s/g, " ")
.replace(/[(){}[]]/g, " ")
);
}
// 핵토파스칼, 바, 핵타르 AU (에이커 인치 피트 야드)
// Process korean letter, choseong shortens
export function processKorean(input: string): string {
input = input.replace(/[아ㅏ]{3,}/g, "아아아");
return input.replace(/[ㄱ-ㅎㄲㄸㅃㅆㅉ]+/g, (i) =>
i
.replace(processKorean.DoubleMixedChoseongMapRegex, (content: string) => {
// ㅇㅋ => 오키, ㅇㄴ => 아니, ...
const key = content.substring(
0,
2,
) as keyof typeof processKorean.DoubleMixedChoseongMap;
const length = Math.min(Math.floor(content.length / 2), 2);
return processKorean.DoubleMixedChoseongMap[key].repeat(length);
})
.replace(processKorean.RepeatedChoseongMapRegex, (content: string) => {
// process ㄴㄴ ㄱㄱ ㅋㅋ ㄷㄷ, ...
const key = (content[0] ??
"") as keyof typeof processKorean.RepeatedChoseongMap;
const item = processKorean.RepeatedChoseongMap[key];
if (typeof item == "string") {
return item;
} else if (typeof item == "function") {
return item(content);
}
return content;
})
.replace(
/[ㄱ-ㅎㄲㄸㅃㅆㅉ]/g,
(char: string) =>
processKorean.ChoseongMap[
char as keyof typeof processKorean.ChoseongMap
] ?? char,
),
);
}
export namespace processKorean {
export const DoubleMixedChoseongMap = {
: "이지랄",
: "지랄노",
: "하이",
: "싫어",
: "기달",
: "제발",
: "몰라",
: "시바",
: "어디",
: "노잼",
: "바바",
: "바이",
: "죄송",
: "아니",
: "빨리",
: "인정",
: "노노",
: "감사",
: "쯧쯧",
: "지랄",
: "리얼",
: "아하",
: "오키",
: "추카",
: "꺼져",
: "잠깐만",
: "존나",
: "가능",
};
export const DoubleMixedChoseongMapRegex = new RegExp(
Object.keys(DoubleMixedChoseongMap)
.map((k) => `(?:${k})+`)
.join("|"),
"g",
);
export const RepeatedChoseongMap = {
: "틔틔",
: "덜덜",
: "노노",
: "응응",
: "추추",
: "유유",
: "야야",
: (content: string) => "크".repeat(content.length),
: (content: string) => "흐".repeat(content.length),
: (content: string) => {
if (content.length == 2) {
return "고고";
} else if (content.length == 3) {
return "고고고";
}
return content;
},
};
export const RepeatedChoseongMapRegex = new RegExp(
Object.keys(RepeatedChoseongMap)
.map((k) => `${k}{2,}`)
.join("|"),
"g",
);
// prettier-ignore
export const ChoseongMap = {
: "기역", : "니은", : "디귿", : "리을", : "미음", : "비읍",
: "시옷", : "이응", : "지읒", : "치읓", : "키읔", : "티읕",
: "피읖", : "히읗", : "쌍기역", : "쌍디귿", : "쌍비읍",
: "쌍시옷", : "쌍지읒",
};
}
// Process 10km 1,000 1.1, ... numbers
export function processNumber(input: string): string {
return input
.replace(
/(\+\d+[\s-]+)?([\d-]+)/g,
(_, prefix: string | undefined, phone: string) => {
const all = (prefix ?? "") + phone;
if (!phone.includes("-")) return all;
return PhoneNumberKorean.convert(all);
},
)
.replace(
/([\d,]+)(?:(?<prefix>[kKMmgGtTpPeEzZyY][iI]?)(?<unit>[bB])|(?<prefix>[m]?)(?<unit>[lL])|(?<prefix>[mck]?)(?<unit>m))(?<tail>[^a-zA-Z])/g,
(_, num: string, ...last: any): string => {
const group = last[last.length - 1] as {
prefix: string;
unit: string;
tail: string;
};
const tail = group.tail;
const unit = group.unit.toLocaleLowerCase();
const numStr = IntegerKorean.convertFromString(num);
let prefix = group.prefix;
if (unit == "b") {
// 10kib => 십키비바이트
prefix =
processNumber.DatasizePrefix[
prefix.toLowerCase() as keyof typeof processNumber.DatasizePrefix
];
return `${numStr} ${prefix}바이트 ${tail}`;
}
if (unit == "l") {
// 10l => 십리터
prefix =
processNumber.LiterPrefix[
prefix.toLowerCase() as keyof typeof processNumber.LiterPrefix
];
return `${numStr} ${prefix}리터 ${tail}`;
}
if (unit == "m") {
// 10m => 십미터
prefix =
processNumber.MeterPrefix[
prefix as keyof typeof processNumber.MeterPrefix
];
return `${numStr} ${prefix}미터 ${tail}`;
}
return `${num}${prefix}${unit}${tail}`;
},
)
.replace(
/([\d.,]+)\s*([개살시평명자벌장달병잔번채])/g,
(_, num: string, postfix: string) => {
// 10명 => 열명
if (num.includes(".")) {
return num + postfix;
}
const intNum = parseInt(num.replace(/,/g, ""));
if (CallingNumberKorean.canConvert(intNum)) {
return CallingNumberKorean.convert(intNum) + postfix;
} else {
return IntegerKorean.convertFromString(num) + postfix;
}
},
)
.replace(/[\d,]+/g, (num: string) => {
// 1,000 원 => 천원
if (!num.includes(",")) return num;
return IntegerKorean.convertFromString(num);
})
.replace(
/(v?)([\d.]+)([ab]?)/g,
(_, suffix: string, num: string, postfix: string) => {
const dotCount = [...num.matchAll(/\./g)].length;
const hasNoSuffix = suffix == "";
if (hasNoSuffix && dotCount == 0) {
// 일반 숫자는 인트로 읽음
return IntegerKorean.convertFromString(num) + postfix;
} else if (hasNoSuffix && dotCount == 1) {
// 소수는 . 앞은 인트로, 뒤는 플로트로 읽음
const [intPart, floatPart] = num.split(/\./);
return (
IntegerKorean.convertFromString(intPart ?? "") +
"쩜" +
FloatKorean.convert(floatPart ?? "") +
postfix
);
} else if ((suffix == "v" || postfix.length) && dotCount > 1) {
// 버전표기는 버전을 붙여서
return (
"버전" +
FloatKorean.convert(num) +
(processNumber.VersionPostfix[
postfix as keyof typeof processNumber.VersionPostfix
] ?? "")
);
} else {
// 모든 경우에 속하지 않으면 영일이삼사 형태로 읽음
// (예: 111.111.111.111 ip address)
return FloatKorean.convert(num) + postfix;
}
},
);
}
export namespace processNumber {
// prettier-ignore
export const DatasizePrefix = {
k: "킬로", ki: "키비", m: "메가", mi: "메비",
g: "기가", gi: "기비", t: "테라", ti: "테비",
p: "페타", pi: "페비", e: "엑사", ei: "엑시",
z: "제타", zi: "제비", y: "요타", yi: "요비",
};
// prettier-ignore
export const LiterPrefix = { m: "밀리", "": "" };
// prettier-ignore
export const MeterPrefix = {
m: "밀리", c: "센치", "": "", k: "킬로",
};
// prettier-ignore
export const VersionPostfix = {
a: "알파", b: "베타",
};
}
// Process unicode emojis and unicode symbols
export function processEmoji(input: string): string {
return input
.replace(
processEmoji.UnicodeSymbolsRegex,
(content: string) =>
processEmoji.UnicodeSymbols[
content as keyof typeof processEmoji.UnicodeSymbols
] ?? content,
)
.replace(/\p{Extended_Pictographic}/gu, (content: string) => {
return (
EmojiDescriptions[content as keyof typeof EmojiDescriptions] ?? content
);
})
.replace(/\p{Emoji}/u, " ");
}
export namespace processEmoji {
export const UnicodeSymbols = {
"㎢": "제곱킬로미터",
"㎡": "제곱미터",
"↑": "위쪽 화살표",
"↓": "아래쪽 화살표",
"←": "왼쪽 화살표",
"→": "오른쪽 화살표",
"↔": "좌우 화살표",
"↖": "왼쪽 위 화살표",
"↗": "오른쪽 위 화살표",
"↘": "오른쪽 아래 화살표",
"↙": "왼쪽 아래 화살표",
"™": "트레이드마크",
};
export const UnicodeSymbolsRegex = new RegExp(
"[" + Object.keys(UnicodeSymbols).join() + "]",
"gu",
);
}
// Process ```codeblock``` and https://link
export function processMarkdown(input: string): string {
return input
.replace(/```([\s\S]*?)```/g, (_, content: string) => {
// Process codeblock
const code = content
.substring(0, processMarkdown.LangPrefixMaxLength)
.toLowerCase();
let lang = "";
for (const [key, value] of Object.entries(processMarkdown.LangPrefixes)) {
if (code.startsWith(key + "\n")) {
lang = value + " ";
break;
}
}
return lang + "코드블럭";
})
.replace(/[hH][tT]{2}[pP][sS]?:\/\/(\S+)/g, (_, url: string) => {
// Process link
const mapped = processMarkdown.GIFMap[
url as keyof typeof processMarkdown.GIFMap
] as string | undefined;
if (mapped) return mapped;
if (
url.startsWith("tenor.com/view") ||
url.startsWith("images-ext-1.discordapp.net/external/")
) {
return "움짤! ";
}
if (
url.startsWith("www.youtube.com/") ||
url.startsWith("youtube.com/") ||
url.startsWith("youtu.be/")
) {
return "유튜브 영상! ";
}
if (url.startsWith("www.reddit.com/") || url.startsWith("reddit.com/")) {
return "레딧 링크! ";
}
if (
url.startsWith("www.instagram.com/") ||
url.startsWith("instagram.com/")
) {
return "인스타 링크! ";
}
if (url.startsWith("x.com/")) {
return "엑스 링크! ";
}
if (url.startsWith("github.com/")) {
return "깃허브 링크! ";
}
if (url.startsWith("store.steampowered.com")) {
return "스팀 스토어 링크! ";
}
if (url.startsWith("steamcommunity.com")) {
return "스팀 커뮤니티 링크! ";
}
return "링크 ";
});
}
export namespace processMarkdown {
export const LangPrefixes = {
typescript: "타입스크립트",
javascript: "자바스크립트",
java: "자바",
kotlin: "코틀린",
rust: "러스트",
lua: "루아",
json: "제이슨",
yaml: "야믈",
yml: "야믈",
toml: "토믈",
xml: "엑스엠엘",
julia: "줄리아",
matlab: "매트랩",
erlang: "얼랭",
elxir: "엘릭서",
zig: "지그",
txt: "텍스트",
vim: "빔",
perl: "펄",
php: "피에이치피",
lisp: "리스프",
postscript: "포스트스크립트",
ghostscript: "고스트스크립트",
fortran: "포트란",
algol: "알골",
scala: "스칼라",
haskell: "하스켈",
basic: "베이직",
cpp: "씨플플",
"c++": "씨플플",
csharp: "씨샵",
cs: "씨샵",
"c#": "씨샵",
c: "씨",
h: "헤더",
d: "디",
awk: "에이더블류케이",
pl: "펄",
pwsh: "파워쉘",
powershell: "파워쉘",
cmd: "씨엠디",
sh: "쉘",
ps1: "파워셀",
bat: "배치파일",
bash: "베시스크립트",
tex: "텍",
dart: "다트",
go: "고랭",
python: "파이썬",
swift: "스위프트",
css: "씨에스에스",
html: "에이치티엠엘",
latex: "레이텍",
md: "마크다운",
markdown: "마크다운",
py: "파이썬",
hs: "하스켈",
rs: "러스트",
kt: "코틀린",
js: "자스",
ts: "타스",
tsx: "리액트 타입스크립트",
jsx: "리액트 자바스크립트",
an: "에이엔",
parlance: "팔렌스",
};
export const LangPrefixMaxLength = (() => {
let max = 0;
for (const key in LangPrefixes) {
max = Math.max(key.length, max);
}
return max;
})();
export const GIFMap = {
"tenor.com/view/majo-no-tabitabi-the-journey-of-elaina-elaina-windy-hair-gif-19187698":
"화난 일레이나",
"tenor.com/view/majo-no-tabitabi-the-journey-of-elaina-elaina-sparkle-amazed-gif-18827847":
"일레이나 반짝반짝!",
"images-ext-1.discordapp.net/external/C3xPFuUxs16jY25AR3NvsIDezaOtib9wozhLBWejZk4/https/media.tenor.com/bUd8mk4ufwsAAAPo/anime-disappointment.mp4":
"일레이나 절래절래",
"images-ext-1.discordapp.net/external/SXv4qgpy2r1Gx-dNxhcfJle6AXDaH_SToRjEBYYaup0/https/media.tenor.com/nDDxJc4FDwEAAAPo/cute.mp4":
"일레이나 끄덕",
"tenor.com/view/majo-no-tabitabi-the-journey-of-elaina-elaina-what-gif-19011602":
"당황한 일레이나",
"images-ext-1.discordapp.net/external/2R41WcvNJwYMD69UKls2cDa_hEL-rzCRCFvOi2DDOVo/https/media.tenor.com/sU3RCOixDbgAAAPo/majo-no-tabitabi-the-journey-of-elaina.mp4":
"일레이나 손짓",
};
}
// Process %$*&... symbols to readable korean
export function processSymbol(input: string): string {
return input
.replace(
processSymbol.SymbolMapRegExp,
(t) => processSymbol.SymbolMap[t as keyof typeof processSymbol.SymbolMap],
)
.replace(/([?!]+)/g, (_, content: string): string => content[0] ?? "")
.replace(/[ \t\f\r]+/g, " ");
}
export namespace processSymbol {
export const SymbolMap = {
"%": "퍼센트",
$: "달러",
"^": "캐럿",
"&": "엔드",
"*": "스타",
"#": "샵",
"@": "엣",
".": "쩜",
"-": "마이너스",
"+": "플러스",
_: "언더바",
"=": "이퀄",
"/": "슬래쉬",
"~": "물결표",
"\\": "역슬래쉬",
"♡": "하트 ",
"|": "",
">": "",
"<": "",
":": "콜론",
";": "세미콜론",
};
export const SymbolMapRegExp = new RegExp(
"[" +
Object.keys(SymbolMap)
.map((i) => "\\" + i)
.join() +
"]",
"g",
);
}
// Process isolated symbols
export function processIsolatedSymbol(input: string): string {
return input
.replace(/^[?!'"]+ $/, (total) =>
[...total]
.map(
(element) =>
processIsolatedSymbol.IsolatedSymbolMap[
element as keyof typeof processIsolatedSymbol.IsolatedSymbolMap
],
)
.join(""),
)
.replace(/\s\|\|\s/g, " 오얼 ")
.replace(/\s&&\s/g, " 엔드 ");
}
export namespace processIsolatedSymbol {
export const IsolatedSymbolMap = {
"?": "물음표",
"!": "느낌표",
"'": "쿼트",
'"': "더블쿼트",
};
}
export function saferKorean(input: string): string {
return (input.normalize() + " ")
.let((i) => processUnsounds(i))
.let((i) => processIsolatedSymbol(i))
.let((i) => processMarkdown(i))
.let((i) => processKorean(i))
.let((i) => processNumber(i))
.let((i) => processSymbol(i))
.let((i) => processEmoji(i))
.replace(/\s+/g, " ")
.trim();
}