Transcribe audio files with Streaming

This guide shows you how to transcribe WAV audio files with varying sample rates using our Streaming API.

Quickstart

Here is the complete AssemblyAI Python SDK script to transcribe a WAV audio file using the Streaming API.

1import assemblyai as aai
2from assemblyai.streaming.v3 import (
3 BeginEvent,
4 StreamingClient,
5 StreamingClientOptions,
6 StreamingError,
7 StreamingEvents,
8 StreamingParameters,
9 TerminationEvent,
10 TurnEvent
11)
12from typing import Type
13import sys
14
15YOUR_API_KEY = "YOUR_API_KEY" # Replace with your AssemblyAI API key
16AUDIO_FILE = "audio.wav" # Path to your audio file
17SAMPLE_RATE = 48000 # Change to match the sample rate of your audio file
18SAVE_TRANSCRIPT_TO_FILE = True # Set to False to disable saving transcript to file
19PLAY_AUDIO = True # Set to False to disable audio playback
20
21# Track session data for output file
22session_data = {
23 "session_id": None,
24 "parameters": None,
25 "audio_file": AUDIO_FILE,
26 "audio_duration_seconds": None,
27 "turns": []
28}
29
30def on_begin(self: Type[StreamingClient], event: BeginEvent):
31 "This function is called when the connection has been established."
32
33 session_data["session_id"] = event.id
34 print("Session ID:", event.id, "\n")
35
36def on_turn(self: Type[StreamingClient], event: TurnEvent):
37 "This function is called when a new transcript has been received."
38
39 # Skip empty transcripts
40 if not event.transcript:
41 return
42
43 # Determine status label
44 if not event.end_of_turn:
45 status = "[Partial]"
46 else:
47 status = "[Final]"
48
49 print(f"{status}: {event.transcript}")
50
51 # Track final turns
52 if event.end_of_turn:
53 session_data["turns"].append(event.transcript)
54 print() # Add blank line after final turn for cleaner output
55
56def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
57 "This function is called when the session has ended."
58
59 session_data["audio_duration_seconds"] = event.audio_duration_seconds
60 print(
61 f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
62 )
63
64def on_error(self: Type[StreamingClient], error: StreamingError):
65 "This function is called when an error occurs."
66
67 print(f"Error occurred: {error}")
68
69def save_transcript():
70 "Save the transcript to a file in the same directory as the script."
71 from pathlib import Path
72
73 # Get the audio file name (handles both absolute and relative paths)
74 audio_name = Path(session_data["audio_file"]).stem
75
76 # Generate filename: {file_name}_{session_id}.txt in the same directory as script
77 session_id = session_data["session_id"] or "unknown"
78 output_file = f"{audio_name}_{session_id}.txt"
79
80 with open(output_file, "w") as f:
81 f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
82 f.write(f"Audio file: {session_data['audio_file']}\n")
83 f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
84 f.write(f"Parameters used: {session_data['parameters']}\n")
85 f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api#request.query\n\n")
86
87 f.write("\nTranscription Output\n")
88 for i, turn in enumerate(session_data["turns"], 1):
89 f.write(f"[Turn #{i}]: {turn}\n")
90
91 print(f"Transcript saved to {output_file}")
92
93
94# Create the streaming client
95client = StreamingClient(
96 StreamingClientOptions(
97 api_key=YOUR_API_KEY
98 )
99)
100
101client.on(StreamingEvents.Begin, on_begin)
102client.on(StreamingEvents.Turn, on_turn)
103client.on(StreamingEvents.Termination, on_terminated)
104client.on(StreamingEvents.Error, on_error)
105
106def validate_audio_file(filepath: str, sample_rate: int):
107 """Validate audio file before streaming"""
108 import wave
109 from pathlib import Path
110
111 # Check file extension
112 file_ext = Path(filepath).suffix.lower()
113 if file_ext != ".wav":
114 print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
115 print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
116 sys.exit(1)
117
118 with wave.open(filepath, 'rb') as wav_file:
119 if wav_file.getnchannels() != 1:
120 print("Error: Only mono audio is supported", file=sys.stderr)
121 print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
122 sys.exit(1)
123
124 file_sample_rate = wav_file.getframerate()
125 if file_sample_rate != sample_rate:
126 print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
127 print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
128 sys.exit(1)
129
130def stream_file(filepath: str, sample_rate: int, play_audio: bool = False):
131 """Stream audio file in 50ms chunks, optionally playing audio"""
132 import time
133 import wave
134
135 chunk_duration = 0.05
136 audio_player = None
137
138 if play_audio:
139 try:
140 import pyaudio
141 p = pyaudio.PyAudio()
142 with wave.open(filepath, 'rb') as wav_file:
143 audio_player = p.open(
144 format=p.get_format_from_width(wav_file.getsampwidth()),
145 channels=wav_file.getnchannels(),
146 rate=wav_file.getframerate(),
147 output=True
148 )
149 except ImportError:
150 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
151 print("Install with: pip install pyaudio", file=sys.stderr)
152 play_audio = False
153
154 try:
155 with wave.open(filepath, 'rb') as wav_file:
156 frames_per_chunk = int(sample_rate * chunk_duration)
157
158 while True:
159 frames = wav_file.readframes(frames_per_chunk)
160
161 if not frames:
162 break
163
164 if audio_player:
165 audio_player.write(frames)
166 else:
167 time.sleep(chunk_duration)
168
169 yield frames
170 finally:
171 if audio_player:
172 audio_player.stop_stream()
173 audio_player.close()
174 p.terminate()
175
176# Validate audio file before connecting
177validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
178
179file_stream = stream_file(
180 filepath=AUDIO_FILE,
181 sample_rate=SAMPLE_RATE,
182 play_audio=PLAY_AUDIO,
183)
184
185# Configure streaming parameters
186streaming_params = StreamingParameters(
187 sample_rate=SAMPLE_RATE,
188 format_turns=True,
189 speech_model="universal-streaming-english",
190)
191
192# Store parameters for output file (dynamically capture all set parameters)
193session_data["parameters"] = ", ".join(
194 f"{k}={v}" for k, v in streaming_params.__dict__.items() if v is not None
195)
196
197# Warn if using default turn detection parameters
198turn_params = ["end_of_turn_confidence_threshold", "min_turn_silence", "max_turn_silence"]
199if not any(getattr(streaming_params, p, None) is not None for p in turn_params):
200 print("Warning: Using default turn detection parameters. For best results, fine-tune to your use case:")
201 print("https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations\n")
202
203client.connect(streaming_params)
204
205try:
206 client.stream(file_stream)
207finally:
208 client.disconnect(terminate=True)
209 if SAVE_TRANSCRIPT_TO_FILE:
210 save_transcript()

Step-by-step guide

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install and import packages

Install the AssemblyAI Python SDK and pyaudio for audio playback.

$pip install assemblyai
$pip install pyaudio

Import packages.

1import assemblyai as aai
2from assemblyai.streaming.v3 import (
3 BeginEvent,
4 StreamingClient,
5 StreamingClientOptions,
6 StreamingError,
7 StreamingEvents,
8 StreamingParameters,
9 TerminationEvent,
10 TurnEvent
11)
12from typing import Type
13import sys

Configure settings

Replace YOUR_API_KEY with your API key.

Set AUDIO_FILE to the relative or absolute path of your audio file, and set SAMPLE_RATE to match your file’s sample rate.

See Connect the client to configure all other stream parameters.

1YOUR_API_KEY = "YOUR_API_KEY" # Replace with your AssemblyAI API key
2AUDIO_FILE = "audio.wav" # Path to your audio file
3SAMPLE_RATE = 48000 # Change to match the sample rate of your audio file
4SAVE_TRANSCRIPT_TO_FILE = True # Set to False to disable saving transcript to file
5PLAY_AUDIO = True # Set to False to disable audio playback
6
7# Track session data for output file
8session_data = {
9 "session_id": None,
10 "parameters": None,
11 "audio_file": AUDIO_FILE,
12 "audio_duration_seconds": None,
13 "turns": []
14}

Websocket Event Handlers

1def on_begin(self: Type[StreamingClient], event: BeginEvent):
2 "This function is called when the connection has been established."
3
4 session_data["session_id"] = event.id
5 print("Session ID:", event.id, "\n")
6
7def on_turn(self: Type[StreamingClient], event: TurnEvent):
8 "This function is called when a new transcript has been received."
9
10 # Skip empty transcripts
11 if not event.transcript:
12 return
13
14 # Determine status label
15 if not event.end_of_turn:
16 status = "[Partial]"
17 else:
18 status = "[Final]"
19
20 print(f"{status}: {event.transcript}")
21
22 # Track final turns
23 if event.end_of_turn:
24 session_data["turns"].append(event.transcript)
25 print() # Add blank line after final turn for cleaner output
26
27def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
28 "This function is called when the session has ended."
29
30 session_data["audio_duration_seconds"] = event.audio_duration_seconds
31 print(
32 f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
33 )
34
35def on_error(self: Type[StreamingClient], error: StreamingError):
36 "This function is called when an error occurs."
37
38 print(f"Error occurred: {error}")

Create the streaming client

1# Create the streaming client
2client = StreamingClient(
3 StreamingClientOptions(
4 api_key=YOUR_API_KEY
5 )
6)
7
8client.on(StreamingEvents.Begin, on_begin)
9client.on(StreamingEvents.Turn, on_turn)
10client.on(StreamingEvents.Termination, on_terminated)
11client.on(StreamingEvents.Error, on_error)

Helper functions for streaming files

The following helper functions are used to validate audio files, stream audio in chunks, and save the transcript output:

  • validate_audio_file() - Validates that the audio file is a mono WAV file with the expected sample rate.
  • stream_file() - Streams the audio file in 50ms chunks, optionally playing audio through speakers.
  • save_transcript() - Saves the transcript to a text file after the session ends.
1def save_transcript():
2 "Save the transcript to a file in the same directory as the script."
3 from pathlib import Path
4
5 # Get the audio file name (handles both absolute and relative paths)
6 audio_name = Path(session_data["audio_file"]).stem
7
8 # Generate filename: {file_name}_{session_id}.txt in the same directory as script
9 session_id = session_data["session_id"] or "unknown"
10 output_file = f"{audio_name}_{session_id}.txt"
11
12 with open(output_file, "w") as f:
13 f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
14 f.write(f"Audio file: {session_data['audio_file']}\n")
15 f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
16 f.write(f"Parameters used: {session_data['parameters']}\n")
17 f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api#request.query\n\n")
18
19 f.write("\nTranscription Output\n")
20 for i, turn in enumerate(session_data["turns"], 1):
21 f.write(f"[Turn #{i}]: {turn}\n")
22
23 print(f"Transcript saved to {output_file}")
24
25def validate_audio_file(filepath: str, sample_rate: int):
26 """Validate audio file before streaming"""
27 import wave
28 from pathlib import Path
29
30 # Check file extension
31 file_ext = Path(filepath).suffix.lower()
32 if file_ext != ".wav":
33 print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
34 print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
35 sys.exit(1)
36
37 with wave.open(filepath, 'rb') as wav_file:
38 if wav_file.getnchannels() != 1:
39 print("Error: Only mono audio is supported", file=sys.stderr)
40 print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
41 sys.exit(1)
42
43 file_sample_rate = wav_file.getframerate()
44 if file_sample_rate != sample_rate:
45 print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
46 print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
47 sys.exit(1)
48
49def stream_file(filepath: str, sample_rate: int, play_audio: bool = False):
50 """Stream audio file in 50ms chunks, optionally playing audio"""
51 import time
52 import wave
53
54 chunk_duration = 0.05
55 audio_player = None
56
57 if play_audio:
58 try:
59 import pyaudio
60 p = pyaudio.PyAudio()
61 with wave.open(filepath, 'rb') as wav_file:
62 audio_player = p.open(
63 format=p.get_format_from_width(wav_file.getsampwidth()),
64 channels=wav_file.getnchannels(),
65 rate=wav_file.getframerate(),
66 output=True
67 )
68 except ImportError:
69 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
70 print("Install with: pip install pyaudio", file=sys.stderr)
71 play_audio = False
72
73 try:
74 with wave.open(filepath, 'rb') as wav_file:
75 frames_per_chunk = int(sample_rate * chunk_duration)
76
77 while True:
78 frames = wav_file.readframes(frames_per_chunk)
79
80 if not frames:
81 break
82
83 if audio_player:
84 audio_player.write(frames)
85 else:
86 time.sleep(chunk_duration)
87
88 yield frames
89 finally:
90 if audio_player:
91 audio_player.stop_stream()
92 audio_player.close()
93 p.terminate()
94
95# Validate audio file before connecting
96validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
97
98file_stream = stream_file(
99 filepath=AUDIO_FILE,
100 sample_rate=SAMPLE_RATE,
101 play_audio=PLAY_AUDIO,
102)

Connect the client

A warning is printed if default turn detection parameters are used. This is fine for testing, but for best accuracy and optimal performance, see our recommended settings.

1# Configure streaming parameters
2streaming_params = StreamingParameters(
3 sample_rate=SAMPLE_RATE,
4 format_turns=True,
5 speech_model="universal-streaming-english",
6)
7
8# Store parameters for output file (dynamically capture all set parameters)
9session_data["parameters"] = ", ".join(
10 f"{k}={v}" for k, v in streaming_params.__dict__.items() if v is not None
11)
12
13# Warn if using default turn detection parameters
14turn_params = ["end_of_turn_confidence_threshold", "min_turn_silence", "max_turn_silence"]
15if not any(getattr(streaming_params, p, None) is not None for p in turn_params):
16 print("Warning: Using default turn detection parameters. For best results, fine-tune to your use case:")
17 print("https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations\n")
18
19client.connect(streaming_params)

Stream the file

1try:
2 client.stream(file_stream)
3finally:
4 client.disconnect(terminate=True)
5 if SAVE_TRANSCRIPT_TO_FILE:
6 save_transcript()

The session will terminate once the file is finished streaming. If SAVE_TRANSCRIPT_TO_FILE is enabled (default), the transcript will be saved to {audio_filename}_{session_id}.txt in the current working directory.

The AUDIO_FILE path can be either relative (e.g., audio.wav) or absolute (e.g., /path/to/audio.wav).

Example output

Here’s an example of what the console output looks like when streaming an audio file:

1Warning: Using default turn detection parameters. For best results, fine-tune to your use case:
2https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations
3
4Session ID: f37d7c4e-6be9-47ed-b6fc-7600fc78e34d
5
6[Partial]: the
7[Partial]: the quick
8[Partial]: the quick brown
9[Partial]: the quick brown fox
10[Partial]: the quick brown fox jumps
11[Partial]: the quick brown fox jumps over
12[Partial]: the quick brown fox jumps over the
13[Partial]: the quick brown fox jumps over the lazy
14[Partial]: The quick brown fox jumps over the lazy dog
15[Final]: The quick brown fox jumps over the lazy dog.
16
17[Partial]: It
18[Partial]: It is
19[Partial]: It is a
20[Partial]: It is a common
21[Partial]: It is a common typing
22[Partial]: It is a common typing test
23[Final]: It is a common typing test.
24
25Session terminated: 7.52 seconds of audio processed
26Transcript saved to audio_f37d7c4e-6be9-47ed-b6fc-7600fc78e34d.txt

The output shows:

  • Partial transcripts: Real-time updates as words are recognized (formatted when format_turns=true)
  • Final: The complete turn with proper capitalization and punctuation