yaejunyang/packages/tts/typecast.ts

import { join } from "path";
import { TYPECAST_TOKENS } from "../env";
import fetch from "../utils/fetch";
import TTSModelBase from ".";
import { readFileSync, writeFileSync } from "fs";
import { cwd } from "process";

export class TTSTypecastModel extends TTSModelBase<TTSTypecastModel.RequestId> {
  protected cachedVoice: Map<string, Promise<Buffer>>;
  private lastUseApiKeyPath: string;
  constructor() {
    super();
    this.cachedVoice = new Map();
    this.lastUseApiKeyPath = join(
      cwd(),
      "cache",
      "typecast",
      "lastUseApiToken",
    );
  }
  ttsify(input: string): string {
    return super.ttsify(
      input
        .replace(/ㅜㅜ/g, "눙물")
        .replace(/빵/g, "빵 크크")
        .replace(/[?]+ *ㄴ/g, "물음표ㄴ"),
    );
  }
  private async getTypecastResponse(
    apiKey: string,
    voiceId: TTSTypecastModel.RequestId,
  ) {
    const payload = {
      text: voiceId.text,
      model: "ssfm-v21",
      voice_id: voiceId.voiceId,
      language: "kor",
      prompt: {
        emotion_preset: "happy", // Options: normal, happy, sad, angry, tonemid, toneup
        emotion_intensity: 1, // Range: 0.0 to 2.0
      },
      output: {
        volume: 45, // Range: 0 to 200
        audio_pitch: 1, // Range: -12 to +12 semitones
        audio_tempo: 1, // Range: 0.5x to 2.0x
        audio_format: "mp3", // Options: wav, mp3
      },
      seed: 22, // For reproducible results
    };

    return await fetch(TTSTypecastModel.TypecastApiUrl, {
      method: "POST",
      headers: {
        "X-API-KEY": apiKey,
        "Content-Type": "application/json",
      },
      body: JSON.stringify(payload),
    });
  }
  async getVoiceBuffer(
    voiceId: TTSTypecastModel.RequestId,
  ): Promise<ArrayBuffer> {
    let response: Response | undefined;

    for (let i = 0; i < TYPECAST_TOKENS.length; i++) {
      response = (await this.getTypecastResponse(
        readFileSync(this.lastUseApiKeyPath, "utf-8"),
        voiceId,
      )) as Response;

      if (response.ok) return await response.arrayBuffer();

      if (response.status === 402) {
        writeFileSync(this.lastUseApiKeyPath, TYPECAST_TOKENS[i]!);
      } else {
        throw new Error(
          `TTS makeID request failed: ${response.status}: ${await response.text()}`,
        );
      }
    }

    throw new Error("Typecast Api use all credit");
  }
  public getVoicePath(id: TTSTypecastModel.RequestId): string {
    const audioFileName = TTSModelBase.hashAudioFile(id.text);
    const audioPath = join(
      TTSTypecastModel.TypecastAudioCachePath,
      id.voiceId,
      audioFileName,
    );
    return audioPath;
  }
  public createRequestId(
    text: string,
    voiceId?: string,
  ): TTSTypecastModel.RequestId {
    return {
      text,
      voiceId: voiceId ?? TTSTypecastModel.DefaultVoiceId,
    };
  }
}
export namespace TTSTypecastModel {
  export const instance = new TTSTypecastModel();
  export type RequestId = { text: string; voiceId: string };
  export const TypecastAudioCachePath = join(
    TTSModelBase.AudioCachePath,
    "typecast",
  );
  export const TypecastApiUrl = "https://api.typecast.ai/v1/text-to-speech";
  export const DefaultVoiceId = "tc_6731b292d944a485bc406efb";
}
export default TTSTypecastModel;