yaejunyang/packages/tts/index.ts

import { writeFile, mkdir, stat, readFile } from "fs/promises";
import { dirname } from "path";
import { AudioResource, createAudioResource, StreamType } from "@discordjs/voice";
import { Readable } from "stream";
import { createHash } from "node:crypto";
import { join } from "node:path";
import { existsSync } from "node:fs";
import { saferKorean } from "../utils/saferKorean";

export abstract class TTSModelBase<RequestId> {
    public ttsify(input: string): string {
        return saferKorean(
            input.replace(/:[^:]+:/g, (text: string): string => (TTSModelBase.EMOJI_MAP[text] ?? "이모지"))
        );
    }
    public abstract createRequestId(text: string): RequestId
    public abstract getVoiceBuffer(id: RequestId): Promise<ArrayBuffer>
    public abstract getVoicePath(id: RequestId): string

    /**
     * id로 부터 음성을 생성하여 캐시 파일에 저장합니다
     * 생성된 음성을 반환합니다
    */
    public async createVoice(id: RequestId, audioPath?: string): Promise<Buffer> {
        const voiceBuffer = await this.getVoiceBuffer(id);
        audioPath ??= this.getVoicePath(id);
        const buffer = Buffer.from(voiceBuffer);

        await mkdir(dirname(audioPath), { recursive: true });
        await writeFile(audioPath, buffer);

        return buffer;
    }
    /**
     * id로 부터 파일에 캐싱된 음성을 얻거나 없는 경우 생성합니다
     */
    public async getVoice(id: RequestId, audioPath?: string): Promise<Buffer> {
        audioPath ??= this.getVoicePath(id);

        if (existsSync(audioPath)) {
            const buffer = await readFile(audioPath);
            return buffer;
        }

        return this.createVoice(id, audioPath);
    }
    /**
     * id로 부터 메모리에 캐싱된 음성을 얻거나, 파일에 캐싱된
     * 음성을 얻거나, 없는 경우 생성합니다
     */
    protected abstract cachedVoice: Map<String, Promise<Buffer>>
    public async getMemcachedVoice(id: RequestId): Promise<Buffer> {
        const path = this.getVoicePath(id);

        const cached = this.cachedVoice.get(path);
        if (cached) {
            return cached;
        }

        const waitter = this.getVoice(id);
        this.cachedVoice.set(path, waitter);
        setTimeout(
            () => this.cachedVoice.delete(path),
            TTSModelBase.MemCacheTTL
        );
        return await waitter;
    }
}
export namespace TTSModelBase {
    export const EMOJI_MAP: { [key: string]: string } = {
        ":heart:": "하트",
        ":huck:": "헉헉!",
        ":star:": "초롱초롱!"
    }
    export const AudioCachePath = join(
        process.cwd(),
        "cache",
        "audio",
    );
    export function bufferToAudioResource(buf: Buffer): AudioResource {
        const stream = Readable.from(buf);
        const resource = createAudioResource(stream, {
            inlineVolume: true,
            inputType: StreamType.Arbitrary,
        });

        resource.volume?.setVolume(0.3);
        return resource;
    }
    export function hashAudioFile(audio: string, suffix: string = ""): string {
        return createHash("md5").update(audio).digest("hex") + suffix + ".mp3";
    }
    export const MemCacheTTL = 60 * 60 * 1000
}
export default TTSModelBase;