yaejunyang/packages/utils/saferKorean.ts
2026-05-21 15:15:53 +00:00

291 lines
8.8 KiB
TypeScript

import CallingNumberKorean from "./callingNumberKorean";
import FloatKorean from "./floatKorean";
import IntegerKorean from "./integerKorean";
import PhoneNumberKorean from "./phoneNumberKorean";
export const IsolatedSymbolMap = {
"?": "물음표",
"!": "느낌표",
"'": "쿼트",
"\"": "더블쿼트",
}
export const SymbolMap = {
"%": "퍼센트",
"$": "달러",
"^": "캐럿",
"&": "엔드",
"*": "스타",
"#": "샵",
"@": "엣",
".": "쩜",
"-": "마이너스",
"+": "플러스",
"_": "언더바",
"=": "이퀄",
"/": "슬래쉬",
"~": "물결표",
"\\": "역슬래쉬",
"♡": "하트 ",
"|": "",
">": "",
"<": "",
":": "콜론",
";": "세미콜론"
};
export const VersionPostfix = {
"a": "알파",
"b": "베타",
};
export const LangPrefixs = {
"typescript": "타입스크립트",
"javascript": "자바스크립트",
"java": "자바",
"kotlin": "코틀린",
"rust": "러스트",
"lua": "루아",
"json": "제이슨",
"yaml": "야믈",
"yml": "야믈",
"toml": "토믈",
"julia": "줄리아",
"matlab": "매트랩",
"erlang": "얼랭",
"elxir": "엘릭서",
"cpp": "씨플플",
"c++": "씨플플",
"csharp": "씨샵",
"cs": "씨샵",
"c#": "씨샵",
"c": "씨",
"pwsh": "파워쉘",
"powershell": "파워쉘",
"cmd": "씨엠디",
"sh": "쉘",
"ps1": "파워셀",
"bat": "배치파일",
"bash": "베시스크립트",
"tex": "텍",
"latex": "레이텍",
"md": "마크다운",
"markdown": "마크다운",
"rs": "러스트",
"kt": "코틀린",
"js": "자바스크립트",
"ts": "타입스크립트",
"tsx": "리액트 타입스크립트",
"jsx": "리액트 자바스크립트",
};
export const LangPrefixMaxLength = (()=>{
let max = 0;
for (const key in LangPrefixs) {
max = Math.max(key.length, max);
}
return max;
})();
export const ChoseongMap = {
"ㄱ": "기역",
"ㄴ": "니은",
"ㄷ": "디귿",
"ㄹ": "리을",
"ㅁ": "미음",
"ㅂ": "비읍",
"ㅅ": "시옷",
"ㅇ": "이응",
"ㅈ": "지읒",
"ㅊ": "치읓",
"ㅋ": "키읔",
"ㅌ": "티읕",
"ㅍ": "피읖",
"ㅎ": "히읗",
"ㄲ": "쌍기역",
"ㄸ": "쌍디귿",
"ㅃ": "쌍비읍",
"ㅆ": "쌍시옷",
"ㅉ": "쌍지읒",
};
export const SIPrefix = {
"k": "킬로",
"ki": "키비",
"m": "메가",
"mi": "메비",
"g": "기가",
"gi": "기비",
"t": "테라",
"ti": "테비",
"p": "페타",
"pi": "페비",
"e": "엑사",
"ei": "엑시",
"z": "제타",
"zi": "제비",
"y": "요타",
"yi": "요비",
};
export const LiterPrefix = {
"m": "밀리",
"": "",
};
export const MeterPrefix = {
"m": "밀리",
"c": "센치",
"": "",
"k": "킬로",
};
export function processDots(input: string): string {
return input.replace(/[\.,]+$/, "")
.replace(/[\.,]{2,}/g, "")
.replace(/[\.,]\s/g, " ");
}
export function saferKorean(input: string): string {
return processDots(input + " ")
// Process isolated symbols
.replace(/^[\?\!\'\"]+ $/, (total)=>(
[...total].map(element => IsolatedSymbolMap[
element as keyof typeof IsolatedSymbolMap
]).join("")
))
.replace(/\s\|\|\s/g, " 오얼 ")
.replace(/\s\&\&\s/g, " 엔드 ")
// Process codeblock
.replace(/\`\`\`([\s\S]*?)\`\`\`/g, (_, content: string)=>{
const code = content.substring(0, LangPrefixMaxLength).toLowerCase();
let lang = "";
for (const [key, value] of Object.entries(LangPrefixs)) {
if (code.startsWith(key + "\n")) {
lang = value + " ";
break;
}
}
return lang + "코드블럭";
})
// Process link
.replace(/[hH][tT]{2}[pP][sS]?:\/\/(\S+)/g, (_, url: string) => {
if (url.startsWith("tenor.com/view")) {
return "움짤";
}
return "링크";
})
// Process koreans
.replace(/ㅋ{2,}/g, (content) => "크".repeat(content.length))
.replace(/[아ㅏ]{3,}/g, "아아아")
.replace(/ㅌ{2,}/g, "틔틔")
.replace(/ㄷ{2,}/g, "덜덜")
.replace(/ㄴ{2,}/g, "노노")
.replace(/(ㅇㅋ)+/g, (content: string) => {
return "오키".repeat(
Math.min(Math.floor(content.length / 2), 4)
);
})
.replace(/(ㅊㅋ)+/g, (content: string) => {
return "추카".repeat(
Math.min(Math.floor(content.length / 2), 4)
);
})
.replace(/ㅊ{2,}/g, "추추")
.replace(/ㄱ+/g, (content: string) => {
if (content.length == 2) {
return "고고";
} else if (content.length == 3) {
return "고고고";
}
return content;
})
.replace(/[ㄱ-ㅎㄲㄸㅃㅆㅉ]/g, (char: string) => ChoseongMap[char as keyof typeof ChoseongMap])
// Process number, unit
.replace(/(\+\d+[\s\-]+)?([\d\-]+)/g, (_, prefix: string, phone: string) => {
const all = (prefix ?? "") + phone;
if (!phone.includes("-")) return all;
return PhoneNumberKorean.convert(all);
})
.replace(/([\d,]+)([kKMmgGtTpP][iI]?)[bB]/g, (_, num: string, mod: string) => {
// 10kib => 십키비바이트
num = IntegerKorean.convertFromString(num);
mod = SIPrefix[mod.toLowerCase() as keyof typeof SIPrefix];
return `${num} ${mod}바이트 `;
})
.replace(/([\d,]+)([m]?)l\s/g, (_, num: string, mod: string) => {
// 10l => 십리터
num = IntegerKorean.convertFromString(num);
mod = LiterPrefix[mod as keyof typeof LiterPrefix];
return `${num} ${mod}리터 `;
})
.replace(/([\d,]+)([mck]?)m\s/g, (_, num: string, mod: string) => {
// 10m => 십미터
num = IntegerKorean.convertFromString(num);
mod = MeterPrefix[mod as keyof typeof MeterPrefix];
return `${num} ${mod}미터 `;
})
.replace(/([\d\.]+)\s*([개살시평명])/g, (_, num: string, postfix: string)=>{
// 10명 => 열명
if (num.includes(".")) {
return num + postfix;
}
const intNum = parseInt(num)
if (CallingNumberKorean.canConvert(intNum)) {
return CallingNumberKorean.convert(intNum) + postfix;
} else {
return IntegerKorean.convertFromString(num) + postfix;
}
})
.replace(/[\d,]+/g, (num: string) => {
// 1,000 원 => 천원
if (!num.includes(",")) return num;
return IntegerKorean.convertFromString(num);
})
.replace(/(v?)([\d\.]+)([ab]?)/g, (_, suffix: string, num: string, postfix: string) => {
const dotCount = [...num.matchAll(/\./g)].length;
const hasNoSuffix = suffix == "";
if (hasNoSuffix && dotCount == 0) {
// 일반 숫자는 인트로 읽음
return IntegerKorean.convertFromString(num) + postfix;
} else if (hasNoSuffix && dotCount == 1) {
// 소수는 . 앞은 인트로, 뒤는 플로트로 읽음
const [intPart, floatPart] = num.split(/\./);
return (
IntegerKorean.convertFromString(intPart)
+ "쩜"
+ FloatKorean.convert(floatPart)
+ postfix
)
} else if ((suffix == "v" || postfix.length) && dotCount > 1) {
// 버전표기는 버전을 붙여서
return (
"버전"
+ FloatKorean.convert(num)
+ (VersionPostfix[
postfix as keyof typeof VersionPostfix
] ?? "")
);
} else {
// 모든 경우에 속하지 않으면 영일이삼사 형태로 읽음
// (예: 111.111.111.111 ip address)
return FloatKorean.convert(num) + postfix;
}
})
.replace(/㎡/g, "제곱미터")
.replace(/㎢/g, "제곱킬로미터")
// Process symbol
.replace(/[\%\^\&\*\#\@\.\-\+\_\=\/\\♡\$\|\:\;\>\<]/g, (t) => (
SymbolMap[t as keyof typeof SymbolMap]
))
.replace(/([\?\!]+)/g, (_, content: string) => content[0])
.replace(/[ \t\f\r]+/g, " ")
// Process emoji
.replace(/\p{Emoji}/u, " ")
.trim()
}