diff --git a/packages/tts/index.ts b/packages/tts/index.ts index 17ba50d..5aa7223 100644 --- a/packages/tts/index.ts +++ b/packages/tts/index.ts @@ -5,13 +5,13 @@ import { Readable } from "stream"; import { createHash } from "node:crypto"; import { join } from "node:path"; import { existsSync } from "node:fs"; +import { saferKorean } from "../utils/saferKorean"; export abstract class TTSModelBase { public ttsify(input: string): string { - return input - .replace(/:[^:]+:/g, (text: string): string => (TTSModelBase.EMOJI_MAP[text] ?? "이모지")) - .replace(/[:*"<>|]/g, "") - .replace(/[\t\n]/g, " ") + return saferKorean( + input.replace(/:[^:]+:/g, (text: string): string => (TTSModelBase.EMOJI_MAP[text] ?? "이모지")) + ); } public abstract createRequestId(text: string): RequestId public abstract getVoiceBuffer(id: RequestId): Promise diff --git a/packages/tts/papago.ts b/packages/tts/papago.ts index 7702c84..92803ca 100644 --- a/packages/tts/papago.ts +++ b/packages/tts/papago.ts @@ -2,7 +2,6 @@ import { createHmac } from "crypto"; import { join } from "path"; import fetch from "../utils/fetch"; import TTSModelBase from "."; -import { saferKorean } from "../utils/saferKorean"; export class TTSPapagoModel extends TTSModelBase { protected cachedVoice: Map> @@ -11,9 +10,7 @@ export class TTSPapagoModel extends TTSModelBase { this.cachedVoice = new Map(); } ttsify(input: string): string { - return super.ttsify(saferKorean( - input - )) + return super.ttsify(input) } public getVoicePath(id: TTSPapagoModel.RequestId): string { const audioFileName = TTSModelBase.hashAudioFile(id.text, `.${id.speaker}.${id.speed.replace(/\-/g, "_")}`); diff --git a/packages/tts/supertonic.ts b/packages/tts/supertonic.ts index f23a1f7..28a8663 100644 --- a/packages/tts/supertonic.ts +++ b/packages/tts/supertonic.ts @@ -1,7 +1,6 @@ import { join } from "path"; import fetch from "../utils/fetch"; import TTSModelBase from "."; -import { saferKorean } from "../utils/saferKorean"; export class TTSSupertonicModel extends TTSModelBase { protected override cachedVoice: Map> @@ -10,9 +9,7 @@ export class TTSSupertonicModel extends TTSModelBase { protected cachedVoice: Map> @@ -15,11 +14,8 @@ export class TTSTypecastModel extends TTSModelBase { this.lastUseApiKeyPath = join(cwd(), "cache", "typecast", "lastUseApiToken"); } ttsify(input: string): string { - return super.ttsify( - saferKorean(input) - .replace(/ㄴㄴ/g, "노노") - .replace(/ㅇㅋ/g, "오키") + input .replace(/ㅜㅜ/g, "눙물") .replace(/빵/g, "빵 크크") .replace(/[\?]+ *ㄴ/g, "물음표ㄴ") diff --git a/packages/utils/saferKorean.ts b/packages/utils/saferKorean.ts index a46ecbc..39152c0 100644 --- a/packages/utils/saferKorean.ts +++ b/packages/utils/saferKorean.ts @@ -146,9 +146,9 @@ export function processDots(input: string): string { } export function saferKorean(input: string): string { - return processDots(input) + return processDots(input + " ") // Process isolated symbols - .replace(/^[\?\!\'\"]+$/, (total)=>( + .replace(/^[\?\!\'\"]+ $/, (total)=>( [...total].map(element => IsolatedSymbolMap[ element as keyof typeof IsolatedSymbolMap ]).join("") @@ -169,11 +169,25 @@ export function saferKorean(input: string): string { return lang + "코드블럭"; }) + // Process link + .replace(/[hH][tT]{2}[pP][sS]?:\/\/(\S+)/g, (_, url: string) => { + if (url.startsWith("tenor.com/view")) { + return "움짤"; + } + return "링크"; + }) + // Process koreans .replace(/ㅋ{2,}/g, (content) => "크".repeat(content.length)) .replace(/[아ㅏ]{3,}/g, "아아아") .replace(/ㅌ{2,}/g, "틔틔") .replace(/ㄷ{2,}/g, "덜덜") + .replace(/ㄴ{2,}/g, "노노") + .replace(/(ㅇㅋ)+/g, (content: string) => { + return "오키".repeat( + Math.min(Math.floor(content.length / 2), 4) + ); + }) .replace(/(ㅊㅋ)+/g, (content: string) => { return "추카".repeat( Math.min(Math.floor(content.length / 2), 4) @@ -197,17 +211,17 @@ export function saferKorean(input: string): string { mod = SIPrefix[mod.toLowerCase() as keyof typeof SIPrefix]; return `${num} ${mod}바이트 `; }) - .replace(/([\d,]+)([mck]?)m/g, (_, num: string, mod: string) => { - // 10m => 십미터 - num = IntegerKorean.convertFromString(num); - mod = MeterPrefix[mod as keyof typeof MeterPrefix]; - return `${num} ${mod}미터`; - }) - .replace(/([\d,]+)([m]?)l/g, (_, num: string, mod: string) => { + .replace(/([\d,]+)([m]?)l\s/g, (_, num: string, mod: string) => { // 10l => 십리터 num = IntegerKorean.convertFromString(num); mod = LiterPrefix[mod as keyof typeof LiterPrefix]; - return `${num} ${mod}리터`; + return `${num} ${mod}리터 `; + }) + .replace(/([\d,]+)([mck]?)m\s/g, (_, num: string, mod: string) => { + // 10m => 십미터 + num = IntegerKorean.convertFromString(num); + mod = MeterPrefix[mod as keyof typeof MeterPrefix]; + return `${num} ${mod}미터 `; }) .replace(/([\d\.]+)\s*([개살시평명])/g, (_, num: string, postfix: string)=>{ // 10명 => 열명 @@ -260,18 +274,11 @@ export function saferKorean(input: string): string { .replace(/㎡/g, "제곱미터") .replace(/㎢/g, "제곱킬로미터") - // Process link - .replace(/[hH][tT]{2}[pP][sS]?:\/\/(\S+)/g, (_, url: string) => { - if (url.startsWith("tenor.com/view")) { - return "움짤"; - } - return "링크"; - }) - // Process symbol .replace(/[\%\^\&\*\#\@\.\-\+\_\=\/\\♡\$\|\:\;\>\<]/g, (t) => ( SymbolMap[t as keyof typeof SymbolMap] )) .replace(/([\?\!]+)/g, (_, content: string) => content[0]) .replace(/[ \t\f\r]+/g, " ") + .trim() }