113 lines
3.2 KiB
TypeScript
113 lines
3.2 KiB
TypeScript
import { join } from "path";
|
|
import { TYPECAST_TOKENS } from "../env";
|
|
import fetch from "../utils/fetch";
|
|
import TTSModelBase from ".";
|
|
import { readFileSync, writeFileSync } from "fs";
|
|
import { cwd } from "process";
|
|
|
|
export class TTSTypecastModel extends TTSModelBase<TTSTypecastModel.RequestId> {
|
|
protected cachedVoice: Map<string, Promise<Buffer>>;
|
|
private lastUseApiKeyPath: string;
|
|
constructor() {
|
|
super();
|
|
this.cachedVoice = new Map();
|
|
this.lastUseApiKeyPath = join(
|
|
cwd(),
|
|
"cache",
|
|
"typecast",
|
|
"lastUseApiToken",
|
|
);
|
|
}
|
|
ttsify(input: string): string {
|
|
return super.ttsify(
|
|
input
|
|
.replace(/ㅜㅜ/g, "눙물")
|
|
.replace(/빵/g, "빵 크크")
|
|
.replace(/[?]+ *ㄴ/g, "물음표ㄴ"),
|
|
);
|
|
}
|
|
private async getTypecastResponse(
|
|
apiKey: string,
|
|
voiceId: TTSTypecastModel.RequestId,
|
|
) {
|
|
const payload = {
|
|
text: voiceId.text,
|
|
model: "ssfm-v21",
|
|
voice_id: voiceId.voiceId,
|
|
language: "kor",
|
|
prompt: {
|
|
emotion_preset: "happy", // Options: normal, happy, sad, angry, tonemid, toneup
|
|
emotion_intensity: 1, // Range: 0.0 to 2.0
|
|
},
|
|
output: {
|
|
volume: 45, // Range: 0 to 200
|
|
audio_pitch: 1, // Range: -12 to +12 semitones
|
|
audio_tempo: 1, // Range: 0.5x to 2.0x
|
|
audio_format: "mp3", // Options: wav, mp3
|
|
},
|
|
seed: 22, // For reproducible results
|
|
};
|
|
|
|
return await fetch(TTSTypecastModel.TypecastApiUrl, {
|
|
method: "POST",
|
|
headers: {
|
|
"X-API-KEY": apiKey,
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify(payload),
|
|
});
|
|
}
|
|
async getVoiceBuffer(
|
|
voiceId: TTSTypecastModel.RequestId,
|
|
): Promise<ArrayBuffer> {
|
|
let response: Response | undefined;
|
|
|
|
for (let i = 0; i < TYPECAST_TOKENS.length; i++) {
|
|
response = (await this.getTypecastResponse(
|
|
readFileSync(this.lastUseApiKeyPath, "utf-8"),
|
|
voiceId,
|
|
)) as Response;
|
|
|
|
if (response.ok) return await response.arrayBuffer();
|
|
|
|
if (response.status === 402) {
|
|
writeFileSync(this.lastUseApiKeyPath, TYPECAST_TOKENS[i]!);
|
|
} else {
|
|
throw new Error(
|
|
`TTS makeID request failed: ${response.status}: ${await response.text()}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
throw new Error("Typecast Api use all credit");
|
|
}
|
|
public getVoicePath(id: TTSTypecastModel.RequestId): string {
|
|
const audioFileName = TTSModelBase.hashAudioFile(id.text);
|
|
const audioPath = join(
|
|
TTSTypecastModel.TypecastAudioCachePath,
|
|
id.voiceId,
|
|
audioFileName,
|
|
);
|
|
return audioPath;
|
|
}
|
|
public createRequestId(
|
|
text: string,
|
|
voiceId?: string,
|
|
): TTSTypecastModel.RequestId {
|
|
return {
|
|
text,
|
|
voiceId: voiceId ?? TTSTypecastModel.DefaultVoiceId,
|
|
};
|
|
}
|
|
}
|
|
export namespace TTSTypecastModel {
|
|
export const instance = new TTSTypecastModel();
|
|
export type RequestId = { text: string; voiceId: string };
|
|
export const TypecastAudioCachePath = join(
|
|
TTSModelBase.AudioCachePath,
|
|
"typecast",
|
|
);
|
|
export const TypecastApiUrl = "https://api.typecast.ai/v1/text-to-speech";
|
|
export const DefaultVoiceId = "tc_6731b292d944a485bc406efb";
|
|
}
|
|
export default TTSTypecastModel;
|