yaejunyang/packages/tts/typecast.ts

113 lines
3.2 KiB
TypeScript

import { join } from "path";
import { TYPECAST_TOKENS } from "../env";
import fetch from "../utils/fetch";
import TTSModelBase from ".";
import { readFileSync, writeFileSync } from "fs";
import { cwd } from "process";
export class TTSTypecastModel extends TTSModelBase<TTSTypecastModel.RequestId> {
protected cachedVoice: Map<string, Promise<Buffer>>;
private lastUseApiKeyPath: string;
constructor() {
super();
this.cachedVoice = new Map();
this.lastUseApiKeyPath = join(
cwd(),
"cache",
"typecast",
"lastUseApiToken",
);
}
ttsify(input: string): string {
return super.ttsify(
input
.replace(/ㅜㅜ/g, "눙물")
.replace(/빵/g, "빵 크크")
.replace(/[?]+ *ㄴ/g, "물음표ㄴ"),
);
}
private async getTypecastResponse(
apiKey: string,
voiceId: TTSTypecastModel.RequestId,
) {
const payload = {
text: voiceId.text,
model: "ssfm-v21",
voice_id: voiceId.voiceId,
language: "kor",
prompt: {
emotion_preset: "happy", // Options: normal, happy, sad, angry, tonemid, toneup
emotion_intensity: 1, // Range: 0.0 to 2.0
},
output: {
volume: 45, // Range: 0 to 200
audio_pitch: 1, // Range: -12 to +12 semitones
audio_tempo: 1, // Range: 0.5x to 2.0x
audio_format: "mp3", // Options: wav, mp3
},
seed: 22, // For reproducible results
};
return await fetch(TTSTypecastModel.TypecastApiUrl, {
method: "POST",
headers: {
"X-API-KEY": apiKey,
"Content-Type": "application/json",
},
body: JSON.stringify(payload),
});
}
async getVoiceBuffer(
voiceId: TTSTypecastModel.RequestId,
): Promise<ArrayBuffer> {
let response: Response | undefined;
for (let i = 0; i < TYPECAST_TOKENS.length; i++) {
response = (await this.getTypecastResponse(
readFileSync(this.lastUseApiKeyPath, "utf-8"),
voiceId,
)) as Response;
if (response.ok) return await response.arrayBuffer();
if (response.status === 402) {
writeFileSync(this.lastUseApiKeyPath, TYPECAST_TOKENS[i]!);
} else {
throw new Error(
`TTS makeID request failed: ${response.status}: ${await response.text()}`,
);
}
}
throw new Error("Typecast Api use all credit");
}
public getVoicePath(id: TTSTypecastModel.RequestId): string {
const audioFileName = TTSModelBase.hashAudioFile(id.text);
const audioPath = join(
TTSTypecastModel.TypecastAudioCachePath,
id.voiceId,
audioFileName,
);
return audioPath;
}
public createRequestId(
text: string,
voiceId?: string,
): TTSTypecastModel.RequestId {
return {
text,
voiceId: voiceId ?? TTSTypecastModel.DefaultVoiceId,
};
}
}
export namespace TTSTypecastModel {
export const instance = new TTSTypecastModel();
export type RequestId = { text: string; voiceId: string };
export const TypecastAudioCachePath = join(
TTSModelBase.AudioCachePath,
"typecast",
);
export const TypecastApiUrl = "https://api.typecast.ai/v1/text-to-speech";
export const DefaultVoiceId = "tc_6731b292d944a485bc406efb";
}
export default TTSTypecastModel;