yaejunyang/packages/utils/saferKorean.ts

393 lines
11 KiB
TypeScript

import CallingNumberKorean from "./callingNumberKorean.js";
import FloatKorean from "./floatKorean.js";
import IntegerKorean from "./integerKorean.js";
import PhoneNumberKorean from "./phoneNumberKorean.js";
import EmojiDescriptions from "./emoji-descriptions.json" with { type: "json" };
export const IsolatedSymbolMap = {
"?": "물음표",
"!": "느낌표",
"'": "쿼트",
'"': "더블쿼트",
};
export const SymbolMap = {
"%": "퍼센트",
$: "달러",
"^": "캐럿",
"&": "엔드",
"*": "스타",
"#": "샵",
"@": "엣",
".": "쩜",
"-": "마이너스",
"+": "플러스",
_: "언더바",
"=": "이퀄",
"/": "슬래쉬",
"~": "물결표",
"\\": "역슬래쉬",
"♡": "하트 ",
"|": "",
">": "",
"<": "",
":": "콜론",
";": "세미콜론",
};
export const VersionPostfix = {
a: "알파",
b: "베타",
};
export const LangPrefixes = {
typescript: "타입스크립트",
javascript: "자바스크립트",
java: "자바",
kotlin: "코틀린",
rust: "러스트",
lua: "루아",
json: "제이슨",
yaml: "야믈",
yml: "야믈",
toml: "토믈",
xml: "엑스엠엘",
julia: "줄리아",
matlab: "매트랩",
erlang: "얼랭",
elxir: "엘릭서",
zig: "지그",
txt: "텍스트",
vim: "빔",
perl: "펄",
php: "피에이치피",
lisp: "리스프",
postscript: "포스트스크립트",
ghostscript: "고스트스크립트",
fortran: "포트란",
algol: "알골",
scala: "스칼라",
haskell: "하스켈",
basic: "베이직",
cpp: "씨플플",
"c++": "씨플플",
csharp: "씨샵",
cs: "씨샵",
"c#": "씨샵",
c: "씨",
h: "헤더",
d: "디",
awk: "에이더블류케이",
pl: "펄",
pwsh: "파워쉘",
powershell: "파워쉘",
cmd: "씨엠디",
sh: "쉘",
ps1: "파워셀",
bat: "배치파일",
bash: "베시스크립트",
tex: "텍",
dart: "다트",
go: "고랭",
python: "파이썬",
swift: "스위프트",
css: "씨에스에스",
html: "에이치티엠엘",
latex: "레이텍",
md: "마크다운",
markdown: "마크다운",
py: "파이썬",
hs: "하스켈",
rs: "러스트",
kt: "코틀린",
js: "자스",
ts: "타스",
tsx: "리액트 타입스크립트",
jsx: "리액트 자바스크립트",
an: "에이엔",
parlance: "팔렌스",
};
export const LangPrefixMaxLength = (() => {
let max = 0;
for (const key in LangPrefixes) {
max = Math.max(key.length, max);
}
return max;
})();
export const ChoseongMap = {
: "기역",
: "니은",
: "디귿",
: "리을",
: "미음",
: "비읍",
: "시옷",
: "이응",
: "지읒",
: "치읓",
: "키읔",
: "티읕",
: "피읖",
: "히읗",
: "쌍기역",
: "쌍디귿",
: "쌍비읍",
: "쌍시옷",
: "쌍지읒",
};
export const SIPrefix = {
k: "킬로",
ki: "키비",
m: "메가",
mi: "메비",
g: "기가",
gi: "기비",
t: "테라",
ti: "테비",
p: "페타",
pi: "페비",
e: "엑사",
ei: "엑시",
z: "제타",
zi: "제비",
y: "요타",
yi: "요비",
};
export const LiterPrefix = {
m: "밀리",
"": "",
};
export const MeterPrefix = {
m: "밀리",
c: "센치",
"": "",
k: "킬로",
};
export const GIFMap = {
"tenor.com/view/majo-no-tabitabi-the-journey-of-elaina-elaina-windy-hair-gif-19187698":
"화난 일레이나",
"tenor.com/view/majo-no-tabitabi-the-journey-of-elaina-elaina-sparkle-amazed-gif-18827847":
"일레이나 반짝반짝!",
"images-ext-1.discordapp.net/external/C3xPFuUxs16jY25AR3NvsIDezaOtib9wozhLBWejZk4/https/media.tenor.com/bUd8mk4ufwsAAAPo/anime-disappointment.mp4":
"일레이나 절래절래",
"images-ext-1.discordapp.net/external/SXv4qgpy2r1Gx-dNxhcfJle6AXDaH_SToRjEBYYaup0/https/media.tenor.com/nDDxJc4FDwEAAAPo/cute.mp4":
"일레이나 끄덕",
"tenor.com/view/majo-no-tabitabi-the-journey-of-elaina-elaina-what-gif-19011602":
"당황한 일레이나",
"images-ext-1.discordapp.net/external/2R41WcvNJwYMD69UKls2cDa_hEL-rzCRCFvOi2DDOVo/https/media.tenor.com/sU3RCOixDbgAAAPo/majo-no-tabitabi-the-journey-of-elaina.mp4":
"일레이나 손짓",
};
export const UnicodeSymbols = {
"㎢": "제곱킬로미터",
"㎡": "제곱미터",
"↑": "위쪽 화살표",
"↓": "아래쪽 화살표",
"←": "왼쪽 화살표",
"→": "오른쪽 화살표",
"↔": "좌우 화살표",
"↖": "왼쪽 위 화살표",
"↗": "오른쪽 위 화살표",
"↘": "오른쪽 아래 화살표",
"↙": "왼쪽 아래 화살표",
};
export const UnicodeSymbolsRegex = new RegExp(
"[" + Object.keys(UnicodeSymbols).join() + "]",
"gu",
);
export function processDots(input: string): string {
return input
.replace(/[.,]+$/, "")
.replace(/[.,]{2,}/g, "")
.replace(/[.,]\s/g, " ");
}
export function saferKorean(input: string): string {
return (
processDots(input.normalize() + " ")
// Process isolated symbols
.replace(/^[?!'"]+ $/, (total) =>
[...total]
.map(
(element) =>
IsolatedSymbolMap[element as keyof typeof IsolatedSymbolMap],
)
.join(""),
)
.replace(/\s\|\|\s/g, " 오얼 ")
.replace(/\s&&\s/g, " 엔드 ")
// Process codeblock
.replace(/```([\s\S]*?)```/g, (_, content: string) => {
const code = content.substring(0, LangPrefixMaxLength).toLowerCase();
let lang = "";
for (const [key, value] of Object.entries(LangPrefixes)) {
if (code.startsWith(key + "\n")) {
lang = value + " ";
break;
}
}
return lang + "코드블럭";
})
// Process link
.replace(/[hH][tT]{2}[pP][sS]?:\/\/(\S+)/g, (_, url: string) => {
const mapped = GIFMap[url as keyof typeof GIFMap] as string | undefined;
if (mapped) return mapped;
if (url.startsWith("tenor.com/view")) {
return "움짤!";
}
return "링크";
})
// Process koreans
.replace(/[아ㅏ]{3,}/g, "아아아")
.replace(/ㄹㅇ/g, (content: string) => {
return "리얼".repeat(Math.min(Math.floor(content.length / 2), 2));
})
.replace(/(ㅇㄴ)+/g, (content: string) => {
return "아니".repeat(Math.min(Math.floor(content.length / 2), 2));
})
.replace(/(ㅇㅎ)+/g, (content: string) => {
return "아하".repeat(Math.min(Math.floor(content.length / 2), 2));
})
.replace(/(ㅇㅋ)+/g, (content: string) => {
return "오키".repeat(Math.min(Math.floor(content.length / 2), 2));
})
.replace(/(ㅊㅋ)+/g, (content: string) => {
return "추카".repeat(Math.min(Math.floor(content.length / 2), 2));
})
.replace(/ㄱ+/g, (content: string) => {
if (content.length == 2) {
return "고고";
} else if (content.length == 3) {
return "고고고";
}
return content;
})
.replace(/ㅋ{2,}/g, (content) => "크".repeat(content.length))
.replace(/ㅌ{2,}/g, "틔틔")
.replace(/ㄷ{2,}/g, "덜덜")
.replace(/ㄴ{2,}/g, "노노")
.replace(/ㅇ{2,}/g, "응응")
.replace(/ㅊ{2,}/g, "추추")
.replace(/ㅠ{2,}/g, "유유")
.replace(/ㅜ{2,}/g, "우우")
.replace(
/[ㄱ-ㅎㄲㄸㅃㅆㅉ]/g,
(char: string) => ChoseongMap[char as keyof typeof ChoseongMap],
)
// Process number, unit
.replace(
/(\+\d+[\s-]+)?([\d-]+)/g,
(_, prefix: string | undefined, phone: string) => {
const all = (prefix ?? "") + phone;
if (!phone.includes("-")) return all;
return PhoneNumberKorean.convert(all);
},
)
.replace(
/([\d,]+)([kKMmgGtTpPeEzZyY][iI]?)[bB]/g,
(_, num: string, mod: string) => {
// 10kib => 십키비바이트
num = IntegerKorean.convertFromString(num);
mod = SIPrefix[mod.toLowerCase() as keyof typeof SIPrefix];
return `${num} ${mod}바이트 `;
},
)
.replace(/([\d,]+)([m]?)[lL]\s/g, (_, num: string, mod: string) => {
// 10l => 십리터
num = IntegerKorean.convertFromString(num);
mod = LiterPrefix[mod as keyof typeof LiterPrefix];
return `${num} ${mod}리터 `;
})
.replace(/([\d,]+)([mck]?)m\s/g, (_, num: string, mod: string) => {
// 10m => 십미터
num = IntegerKorean.convertFromString(num);
mod = MeterPrefix[mod as keyof typeof MeterPrefix];
return `${num} ${mod}미터 `;
})
.replace(
/([\d.]+)\s*([개살시평명])/g,
(_, num: string, postfix: string) => {
// 10명 => 열명
if (num.includes(".")) {
return num + postfix;
}
const intNum = parseInt(num);
if (CallingNumberKorean.canConvert(intNum)) {
return CallingNumberKorean.convert(intNum) + postfix;
} else {
return IntegerKorean.convertFromString(num) + postfix;
}
},
)
.replace(/[\d,]+/g, (num: string) => {
// 1,000 원 => 천원
if (!num.includes(",")) return num;
return IntegerKorean.convertFromString(num);
})
.replace(
/(v?)([\d.]+)([ab]?)/g,
(_, suffix: string, num: string, postfix: string) => {
const dotCount = [...num.matchAll(/\./g)].length;
const hasNoSuffix = suffix == "";
if (hasNoSuffix && dotCount == 0) {
// 일반 숫자는 인트로 읽음
return IntegerKorean.convertFromString(num) + postfix;
} else if (hasNoSuffix && dotCount == 1) {
// 소수는 . 앞은 인트로, 뒤는 플로트로 읽음
const [intPart, floatPart] = num.split(/\./);
return (
IntegerKorean.convertFromString(intPart ?? "") +
"쩜" +
FloatKorean.convert(floatPart ?? "") +
postfix
);
} else if ((suffix == "v" || postfix.length) && dotCount > 1) {
// 버전표기는 버전을 붙여서
return (
"버전" +
FloatKorean.convert(num) +
(VersionPostfix[postfix as keyof typeof VersionPostfix] ?? "")
);
} else {
// 모든 경우에 속하지 않으면 영일이삼사 형태로 읽음
// (예: 111.111.111.111 ip address)
return FloatKorean.convert(num) + postfix;
}
},
)
// Process symbol
.replace(
/[%^&*#@.\-+_=/\\♡$|:;><]/g,
(t) => SymbolMap[t as keyof typeof SymbolMap],
)
.replace(/([?!]+)/g, (_, content: string): string => content[0] ?? "")
.replace(/[ \t\f\r]+/g, " ")
// Process emoji
.replace(
UnicodeSymbolsRegex,
(content: string) =>
UnicodeSymbols[content as keyof typeof UnicodeSymbols] ?? content,
)
.replace(/\p{Extended_Pictographic}/gu, (content: string) => {
return (
EmojiDescriptions[content as keyof typeof EmojiDescriptions] ??
content
);
})
.replace(/\p{Emoji}/u, " ")
.trim()
);
}