Whisper Streaming Large v3

The script below shows how to stream audio directly from your laptops microphone

1import asyncio
2import websockets
3import sounddevice as sd
4import numpy as np
5import json
6
7# Audio config
8SAMPLE_RATE = 16000
9CHUNK_SIZE = 512
10CHUNK_DURATION = CHUNK_SIZE / SAMPLE_RATE
11CHANNELS = 1
12
13headers = {"Authorization": "Api-Key BASETEN-API-KEY-HERE"}
14model_id = ""  # Baseten model id here
15
16# Metadata to send first
17metadata = {
18    "vad_params": {
19        "threshold": 0.5,
20        "min_silence_duration_ms": 300,
21        "speech_pad_ms": 30
22    },
23    "streaming_params": {
24        "encoding": "pcm_s16le",
25        "sample_rate": 16000,
26        "enable_partial_transcripts": True
27    },
28    "whisper_params": {"audio_language": "en"},
29}
30
31async def stream_microphone_audio(ws_url):
32    loop = asyncio.get_running_loop()
33    async with websockets.connect(ws_url, extra_headers=headers) as ws:
34        print("Connected to server")
35
36        # Send the metadata JSON blob
37        await ws.send(json.dumps(metadata))
38        print("Sent metadata to server")
39
40        send_queue = asyncio.Queue()
41
42        # Start audio stream
43        def audio_callback(indata, frames, time_info, status):
44            if status:
45                print(f"Audio warning: {status}")
46            int16_data = (indata * 32767).astype(np.int16).tobytes()
47            loop.call_soon_threadsafe(send_queue.put_nowait, int16_data)
48
49        with sd.InputStream(
50                samplerate=SAMPLE_RATE,
51                blocksize=CHUNK_SIZE,
52                channels=CHANNELS,
53                dtype="float32",
54                callback=audio_callback,
55        ):
56            print("Streaming mic audio...")
57
58            async def send_audio():
59                while True:
60                    chunk = await send_queue.get()
61                    await ws.send(chunk)
62
63            async def receive_server_messages():
64                while True:
65                    response = await ws.recv()
66                    try:
67                        message = json.loads(response)
68                        is_final = message.get("is_final")
69                        transcript = message.get("transcript")
70
71                        if not is_final:
72                            print(f"[partial] {transcript}")
73                        elif is_final:
74                            print(f"[final] {transcript}")
75                        else:
76                            print(f"[unknown type] {message}")
77                    except Exception as e:
78                        print("Non-JSON message or parse error:", response, "| Error:", str(e))
79
80            # Run send + receive tasks concurrently
81            await asyncio.gather(send_audio(), receive_server_messages())
82
83
84# Change this to your actual WebSocket URL
85ws_url = f"wss://model-{model_id}.api.baseten.co/v1/websocket"
86
87asyncio.run(stream_microphone_audio(ws_url))

Whisper Streaming Large v3

Model details

Example usage

transcription models

Whisper Streaming Large v3

Whisper (best performance)

WhisperX

OpenAI models

Whisper Streaming Large v3

Whisper (best performance)

WhisperX

🔥 Trending models

Qwen 3 235B

Orpheus TTS

DeepSeek R1 0528

Explore Baseten today