text to speech
OmniVoice
Massively multilingual zero-shot voice-cloning TTS from k2-fsa, supporting 600+ languages.
Model details
View repositoryExample usage
Clone a voice from a short reference clip, then synthesize speech with OmniVoice's OpenAI-style audio endpoints.
Input
1import os
2import httpx
3
4# Your deployed OmniVoice endpoint (find it on the Baseten model dashboard)
5API_BASE = "https://model-<your-model-id>.api.baseten.co/environments/production/sync"
6API_KEY = os.environ["BASETEN_API_KEY"]
7headers = {"Authorization": f"Bearer {API_KEY}"}
8
9# 1) Upload a short reference clip to clone a voice (cached server-side)
10with open("reference.wav", "rb") as f:
11 r = httpx.post(
12 f"{API_BASE}/v1/audio/voices",
13 headers=headers,
14 data={"name": "my_voice", "consent": "yes",
15 "ref_text": "Transcript of the reference clip."},
16 files={"audio_sample": ("reference.wav", f, "audio/wav")},
17 timeout=300,
18 )
19 r.raise_for_status()
20
21# 2) Synthesize speech in the cloned voice
22r = httpx.post(
23 f"{API_BASE}/v1/audio/speech",
24 headers=headers,
25 json={
26 "model": "k2-fsa/OmniVoice",
27 "voice": "my_voice",
28 "input": "Hello from OmniVoice — multilingual, zero-shot voice cloning.",
29 "response_format": "wav",
30 "extra_params": {"num_step": 16},
31 },
32 timeout=300,
33)
34r.raise_for_status()
35with open("output.wav", "wb") as out:
36 out.write(r.content)
37print("Saved output.wav")
38JSON output
1{
2 "audio": "<binary WAV audio bytes>"
3}