Transcribe audio files with Streaming
This guide shows you how to transcribe WAV audio files with varying sample rates using our Streaming API.
Quickstart
Here is the complete AssemblyAI Python SDK script to transcribe a WAV audio file using the Streaming API.
1 import assemblyai as aai 2 from assemblyai.streaming.v3 import ( 3 BeginEvent, 4 StreamingClient, 5 StreamingClientOptions, 6 StreamingError, 7 StreamingEvents, 8 StreamingParameters, 9 TerminationEvent, 10 TurnEvent 11 ) 12 from typing import Type 13 import sys 14 15 YOUR_API_KEY = "YOUR_API_KEY" # Replace with your AssemblyAI API key 16 AUDIO_FILE = "audio.wav" # Path to your audio file 17 SAMPLE_RATE = 48000 # Change to match the sample rate of your audio file 18 SAVE_TRANSCRIPT_TO_FILE = True # Set to False to disable saving transcript to file 19 PLAY_AUDIO = True # Set to False to disable audio playback 20 21 # Track session data for output file 22 session_data = { 23 "session_id": None, 24 "parameters": None, 25 "audio_file": AUDIO_FILE, 26 "audio_duration_seconds": None, 27 "turns": [] 28 } 29 30 def on_begin(self: Type[StreamingClient], event: BeginEvent): 31 "This function is called when the connection has been established." 32 33 session_data["session_id"] = event.id 34 print("Session ID:", event.id, "\n") 35 36 def on_turn(self: Type[StreamingClient], event: TurnEvent): 37 "This function is called when a new transcript has been received." 38 39 # Skip empty transcripts 40 if not event.transcript: 41 return 42 43 # Determine status label 44 if not event.end_of_turn: 45 status = "[Partial]" 46 else: 47 status = "[Final]" 48 49 print(f"{status}: {event.transcript}") 50 51 # Track final turns 52 if event.end_of_turn: 53 session_data["turns"].append(event.transcript) 54 print() # Add blank line after final turn for cleaner output 55 56 def on_terminated(self: Type[StreamingClient], event: TerminationEvent): 57 "This function is called when the session has ended." 58 59 session_data["audio_duration_seconds"] = event.audio_duration_seconds 60 print( 61 f"Session terminated: {event.audio_duration_seconds} seconds of audio processed" 62 ) 63 64 def on_error(self: Type[StreamingClient], error: StreamingError): 65 "This function is called when an error occurs." 66 67 print(f"Error occurred: {error}") 68 69 def save_transcript(): 70 "Save the transcript to a file in the same directory as the script." 71 from pathlib import Path 72 73 # Get the audio file name (handles both absolute and relative paths) 74 audio_name = Path(session_data["audio_file"]).stem 75 76 # Generate filename: {file_name}_{session_id}.txt in the same directory as script 77 session_id = session_data["session_id"] or "unknown" 78 output_file = f"{audio_name}_{session_id}.txt" 79 80 with open(output_file, "w") as f: 81 f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n") 82 f.write(f"Audio file: {session_data['audio_file']}\n") 83 f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n") 84 f.write(f"Parameters used: {session_data['parameters']}\n") 85 f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api#request.query\n\n") 86 87 f.write("\nTranscription Output\n") 88 for i, turn in enumerate(session_data["turns"], 1): 89 f.write(f"[Turn #{i}]: {turn}\n") 90 91 print(f"Transcript saved to {output_file}") 92 93 94 # Create the streaming client 95 client = StreamingClient( 96 StreamingClientOptions( 97 api_key=YOUR_API_KEY 98 ) 99 ) 100 101 client.on(StreamingEvents.Begin, on_begin) 102 client.on(StreamingEvents.Turn, on_turn) 103 client.on(StreamingEvents.Termination, on_terminated) 104 client.on(StreamingEvents.Error, on_error) 105 106 def validate_audio_file(filepath: str, sample_rate: int): 107 """Validate audio file before streaming""" 108 import wave 109 from pathlib import Path 110 111 # Check file extension 112 file_ext = Path(filepath).suffix.lower() 113 if file_ext != ".wav": 114 print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr) 115 print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 116 sys.exit(1) 117 118 with wave.open(filepath, 'rb') as wav_file: 119 if wav_file.getnchannels() != 1: 120 print("Error: Only mono audio is supported", file=sys.stderr) 121 print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 122 sys.exit(1) 123 124 file_sample_rate = wav_file.getframerate() 125 if file_sample_rate != sample_rate: 126 print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr) 127 print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 128 sys.exit(1) 129 130 def stream_file(filepath: str, sample_rate: int, play_audio: bool = False): 131 """Stream audio file in 50ms chunks, optionally playing audio""" 132 import time 133 import wave 134 135 chunk_duration = 0.05 136 audio_player = None 137 138 if play_audio: 139 try: 140 import pyaudio 141 p = pyaudio.PyAudio() 142 with wave.open(filepath, 'rb') as wav_file: 143 audio_player = p.open( 144 format=p.get_format_from_width(wav_file.getsampwidth()), 145 channels=wav_file.getnchannels(), 146 rate=wav_file.getframerate(), 147 output=True 148 ) 149 except ImportError: 150 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr) 151 print("Install with: pip install pyaudio", file=sys.stderr) 152 play_audio = False 153 154 try: 155 with wave.open(filepath, 'rb') as wav_file: 156 frames_per_chunk = int(sample_rate * chunk_duration) 157 158 while True: 159 frames = wav_file.readframes(frames_per_chunk) 160 161 if not frames: 162 break 163 164 if audio_player: 165 audio_player.write(frames) 166 else: 167 time.sleep(chunk_duration) 168 169 yield frames 170 finally: 171 if audio_player: 172 audio_player.stop_stream() 173 audio_player.close() 174 p.terminate() 175 176 # Validate audio file before connecting 177 validate_audio_file(AUDIO_FILE, SAMPLE_RATE) 178 179 file_stream = stream_file( 180 filepath=AUDIO_FILE, 181 sample_rate=SAMPLE_RATE, 182 play_audio=PLAY_AUDIO, 183 ) 184 185 # Configure streaming parameters 186 streaming_params = StreamingParameters( 187 sample_rate=SAMPLE_RATE, 188 format_turns=True, 189 speech_model="universal-streaming-english", 190 ) 191 192 # Store parameters for output file (dynamically capture all set parameters) 193 session_data["parameters"] = ", ".join( 194 f"{k}={v}" for k, v in streaming_params.__dict__.items() if v is not None 195 ) 196 197 # Warn if using default turn detection parameters 198 turn_params = ["end_of_turn_confidence_threshold", "min_turn_silence", "max_turn_silence"] 199 if not any(getattr(streaming_params, p, None) is not None for p in turn_params): 200 print("Warning: Using default turn detection parameters. For best results, fine-tune to your use case:") 201 print("https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations\n") 202 203 client.connect(streaming_params) 204 205 try: 206 client.stream(file_stream) 207 finally: 208 client.disconnect(terminate=True) 209 if SAVE_TRANSCRIPT_TO_FILE: 210 save_transcript()
Step-by-step guide
Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.
Install and import packages
Install the AssemblyAI Python SDK and pyaudio for audio playback.
$ pip install assemblyai $ pip install pyaudio
Import packages.
1 import assemblyai as aai 2 from assemblyai.streaming.v3 import ( 3 BeginEvent, 4 StreamingClient, 5 StreamingClientOptions, 6 StreamingError, 7 StreamingEvents, 8 StreamingParameters, 9 TerminationEvent, 10 TurnEvent 11 ) 12 from typing import Type 13 import sys
Configure settings
Replace YOUR_API_KEY with your API key.
Set AUDIO_FILE to the relative or absolute path of your audio file, and set SAMPLE_RATE to match your file’s sample rate.
See Connect the client to configure all other stream parameters.
1 YOUR_API_KEY = "YOUR_API_KEY" # Replace with your AssemblyAI API key 2 AUDIO_FILE = "audio.wav" # Path to your audio file 3 SAMPLE_RATE = 48000 # Change to match the sample rate of your audio file 4 SAVE_TRANSCRIPT_TO_FILE = True # Set to False to disable saving transcript to file 5 PLAY_AUDIO = True # Set to False to disable audio playback 6 7 # Track session data for output file 8 session_data = { 9 "session_id": None, 10 "parameters": None, 11 "audio_file": AUDIO_FILE, 12 "audio_duration_seconds": None, 13 "turns": [] 14 }
Websocket Event Handlers
1 def on_begin(self: Type[StreamingClient], event: BeginEvent): 2 "This function is called when the connection has been established." 3 4 session_data["session_id"] = event.id 5 print("Session ID:", event.id, "\n") 6 7 def on_turn(self: Type[StreamingClient], event: TurnEvent): 8 "This function is called when a new transcript has been received." 9 10 # Skip empty transcripts 11 if not event.transcript: 12 return 13 14 # Determine status label 15 if not event.end_of_turn: 16 status = "[Partial]" 17 else: 18 status = "[Final]" 19 20 print(f"{status}: {event.transcript}") 21 22 # Track final turns 23 if event.end_of_turn: 24 session_data["turns"].append(event.transcript) 25 print() # Add blank line after final turn for cleaner output 26 27 def on_terminated(self: Type[StreamingClient], event: TerminationEvent): 28 "This function is called when the session has ended." 29 30 session_data["audio_duration_seconds"] = event.audio_duration_seconds 31 print( 32 f"Session terminated: {event.audio_duration_seconds} seconds of audio processed" 33 ) 34 35 def on_error(self: Type[StreamingClient], error: StreamingError): 36 "This function is called when an error occurs." 37 38 print(f"Error occurred: {error}")
Create the streaming client
1 # Create the streaming client 2 client = StreamingClient( 3 StreamingClientOptions( 4 api_key=YOUR_API_KEY 5 ) 6 ) 7 8 client.on(StreamingEvents.Begin, on_begin) 9 client.on(StreamingEvents.Turn, on_turn) 10 client.on(StreamingEvents.Termination, on_terminated) 11 client.on(StreamingEvents.Error, on_error)
Helper functions for streaming files
The following helper functions are used to validate audio files, stream audio in chunks, and save the transcript output:
validate_audio_file()- Validates that the audio file is a mono WAV file with the expected sample rate.stream_file()- Streams the audio file in 50ms chunks, optionally playing audio through speakers.save_transcript()- Saves the transcript to a text file after the session ends.
1 def save_transcript(): 2 "Save the transcript to a file in the same directory as the script." 3 from pathlib import Path 4 5 # Get the audio file name (handles both absolute and relative paths) 6 audio_name = Path(session_data["audio_file"]).stem 7 8 # Generate filename: {file_name}_{session_id}.txt in the same directory as script 9 session_id = session_data["session_id"] or "unknown" 10 output_file = f"{audio_name}_{session_id}.txt" 11 12 with open(output_file, "w") as f: 13 f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n") 14 f.write(f"Audio file: {session_data['audio_file']}\n") 15 f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n") 16 f.write(f"Parameters used: {session_data['parameters']}\n") 17 f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api#request.query\n\n") 18 19 f.write("\nTranscription Output\n") 20 for i, turn in enumerate(session_data["turns"], 1): 21 f.write(f"[Turn #{i}]: {turn}\n") 22 23 print(f"Transcript saved to {output_file}") 24 25 def validate_audio_file(filepath: str, sample_rate: int): 26 """Validate audio file before streaming""" 27 import wave 28 from pathlib import Path 29 30 # Check file extension 31 file_ext = Path(filepath).suffix.lower() 32 if file_ext != ".wav": 33 print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr) 34 print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 35 sys.exit(1) 36 37 with wave.open(filepath, 'rb') as wav_file: 38 if wav_file.getnchannels() != 1: 39 print("Error: Only mono audio is supported", file=sys.stderr) 40 print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 41 sys.exit(1) 42 43 file_sample_rate = wav_file.getframerate() 44 if file_sample_rate != sample_rate: 45 print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr) 46 print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 47 sys.exit(1) 48 49 def stream_file(filepath: str, sample_rate: int, play_audio: bool = False): 50 """Stream audio file in 50ms chunks, optionally playing audio""" 51 import time 52 import wave 53 54 chunk_duration = 0.05 55 audio_player = None 56 57 if play_audio: 58 try: 59 import pyaudio 60 p = pyaudio.PyAudio() 61 with wave.open(filepath, 'rb') as wav_file: 62 audio_player = p.open( 63 format=p.get_format_from_width(wav_file.getsampwidth()), 64 channels=wav_file.getnchannels(), 65 rate=wav_file.getframerate(), 66 output=True 67 ) 68 except ImportError: 69 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr) 70 print("Install with: pip install pyaudio", file=sys.stderr) 71 play_audio = False 72 73 try: 74 with wave.open(filepath, 'rb') as wav_file: 75 frames_per_chunk = int(sample_rate * chunk_duration) 76 77 while True: 78 frames = wav_file.readframes(frames_per_chunk) 79 80 if not frames: 81 break 82 83 if audio_player: 84 audio_player.write(frames) 85 else: 86 time.sleep(chunk_duration) 87 88 yield frames 89 finally: 90 if audio_player: 91 audio_player.stop_stream() 92 audio_player.close() 93 p.terminate() 94 95 # Validate audio file before connecting 96 validate_audio_file(AUDIO_FILE, SAMPLE_RATE) 97 98 file_stream = stream_file( 99 filepath=AUDIO_FILE, 100 sample_rate=SAMPLE_RATE, 101 play_audio=PLAY_AUDIO, 102 )
Connect the client
A warning is printed if default turn detection parameters are used. This is fine for testing, but for best accuracy and optimal performance, see our recommended settings.
1 # Configure streaming parameters 2 streaming_params = StreamingParameters( 3 sample_rate=SAMPLE_RATE, 4 format_turns=True, 5 speech_model="universal-streaming-english", 6 ) 7 8 # Store parameters for output file (dynamically capture all set parameters) 9 session_data["parameters"] = ", ".join( 10 f"{k}={v}" for k, v in streaming_params.__dict__.items() if v is not None 11 ) 12 13 # Warn if using default turn detection parameters 14 turn_params = ["end_of_turn_confidence_threshold", "min_turn_silence", "max_turn_silence"] 15 if not any(getattr(streaming_params, p, None) is not None for p in turn_params): 16 print("Warning: Using default turn detection parameters. For best results, fine-tune to your use case:") 17 print("https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations\n") 18 19 client.connect(streaming_params)
Stream the file
1 try: 2 client.stream(file_stream) 3 finally: 4 client.disconnect(terminate=True) 5 if SAVE_TRANSCRIPT_TO_FILE: 6 save_transcript()
The session will terminate once the file is finished streaming. If SAVE_TRANSCRIPT_TO_FILE is enabled (default), the transcript will be saved to {audio_filename}_{session_id}.txt in the current working directory.
The AUDIO_FILE path can be either relative (e.g., audio.wav) or absolute
(e.g., /path/to/audio.wav).
Example output
Here’s an example of what the console output looks like when streaming an audio file:
1 Warning: Using default turn detection parameters. For best results, fine-tune to your use case: 2 https://www.assemblyai.com/docs/streaming/universal-streaming/turn-detection#quick-start-configurations 3 4 Session ID: f37d7c4e-6be9-47ed-b6fc-7600fc78e34d 5 6 [Partial]: the 7 [Partial]: the quick 8 [Partial]: the quick brown 9 [Partial]: the quick brown fox 10 [Partial]: the quick brown fox jumps 11 [Partial]: the quick brown fox jumps over 12 [Partial]: the quick brown fox jumps over the 13 [Partial]: the quick brown fox jumps over the lazy 14 [Partial]: The quick brown fox jumps over the lazy dog 15 [Final]: The quick brown fox jumps over the lazy dog. 16 17 [Partial]: It 18 [Partial]: It is 19 [Partial]: It is a 20 [Partial]: It is a common 21 [Partial]: It is a common typing 22 [Partial]: It is a common typing test 23 [Final]: It is a common typing test. 24 25 Session terminated: 7.52 seconds of audio processed 26 Transcript saved to audio_f37d7c4e-6be9-47ed-b6fc-7600fc78e34d.txt
The output shows:
- Partial transcripts: Real-time updates as words are recognized (formatted when
format_turns=true) - Final: The complete turn with proper capitalization and punctuation