Transcribe audio files with Streaming

This guide shows you how to transcribe WAV audio files with varying sample rates using our Streaming API.

If you’re streaming a pre-recorded file for benchmarking or testing, see Stream a pre-recorded file in real time for wall-clock pacing that more closely simulates live microphone input.

Quickstart

Here is the complete Python script to transcribe a WAV audio file using the Streaming API.

1 import websocket
2 import json
3 import threading
4 import time
5 import wave
6 import sys
7 import os
8 from urllib.parse import urlencode
9 from pathlib import Path
10 
11 # --- Configuration ---
12 ASSEMBLYAI_API_KEY = os.environ["ASSEMBLYAI_API_KEY"]
13 AUDIO_FILE = "audio.wav"  # Path to your audio file
14 SAMPLE_RATE = 48000  # Change to match the sample rate of your audio file
15 SAVE_TRANSCRIPT_TO_FILE = True  # Set to False to disable saving transcript to file
16 PLAY_AUDIO = True  # Set to False to disable audio playback
17 
18 CONNECTION_PARAMS = {
19     "speech_model": "u3-rt-pro",
20     "sample_rate": SAMPLE_RATE,
21 }
22 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
23 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
24 
25 # Global variables
26 ws_app = None
27 audio_thread = None
28 stop_event = threading.Event()
29 
30 # Track session data for output file
31 session_data = {
32     "session_id": None,
33     "audio_file": AUDIO_FILE,
34     "audio_duration_seconds": None,
35     "turns": []
36 }
37 
38 # --- Helper Functions ---
39 
40 def validate_audio_file(filepath, sample_rate):
41     """Validate audio file before streaming."""
42     file_ext = Path(filepath).suffix.lower()
43     if file_ext != ".wav":
44         print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
45         print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
46         sys.exit(1)
47 
48     with wave.open(filepath, 'rb') as wav_file:
49         if wav_file.getnchannels() != 1:
50             print("Error: Only mono audio is supported", file=sys.stderr)
51             print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
52             sys.exit(1)
53 
54         file_sample_rate = wav_file.getframerate()
55         if file_sample_rate != sample_rate:
56             print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
57             print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
58             sys.exit(1)
59 
60 
61 def save_transcript():
62     """Save the transcript to a file in the same directory as the script."""
63     audio_name = Path(session_data["audio_file"]).stem
64     session_id = session_data["session_id"] or "unknown"
65     output_file = f"{audio_name}_{session_id}.txt"
66 
67     with open(output_file, "w") as f:
68         f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
69         f.write(f"Audio file: {session_data['audio_file']}\n")
70         f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
71         f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/universal-3-pro-streaming/universal-3-pro-streaming#request.query\n\n")
72         f.write("\nTranscription Output\n")
73         for i, turn in enumerate(session_data["turns"], 1):
74             f.write(f"[Turn #{i}]: {turn}\n")
75 
76     print(f"Transcript saved to {output_file}")
77 
78 
79 # --- WebSocket Event Handlers ---
80 
81 def on_open(ws):
82     """Called when the WebSocket connection is established."""
83     print("WebSocket connection opened.")
84     print(f"Connected to: {API_ENDPOINT}")
85 
86     def stream_file():
87         chunk_duration = 0.05  # 50ms chunks
88         audio_player = None
89 
90         if PLAY_AUDIO:
91             try:
92                 import pyaudio
93                 p = pyaudio.PyAudio()
94                 with wave.open(AUDIO_FILE, 'rb') as wav_file:
95                     audio_player = p.open(
96                         format=p.get_format_from_width(wav_file.getsampwidth()),
97                         channels=wav_file.getnchannels(),
98                         rate=wav_file.getframerate(),
99                         output=True
100                     )
101             except ImportError:
102                 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
103                 print("Install with: pip install pyaudio", file=sys.stderr)
104 
105         try:
106             with wave.open(AUDIO_FILE, 'rb') as wav_file:
107                 frames_per_chunk = int(SAMPLE_RATE * chunk_duration)
108 
109                 while not stop_event.is_set():
110                     frames = wav_file.readframes(frames_per_chunk)
111                     if not frames:
112                         break
113 
114                     if audio_player:
115                         audio_player.write(frames)
116                     else:
117                         time.sleep(chunk_duration)
118 
119                     ws.send(frames, websocket.ABNF.OPCODE_BINARY)
120         finally:
121             if audio_player:
122                 audio_player.stop_stream()
123                 audio_player.close()
124                 p.terminate()
125 
126         # All audio sent - terminate the session
127         print("File streaming complete. Waiting for final transcripts...")
128         try:
129             ws.send(json.dumps({"type": "Terminate"}))
130         except Exception:
131             pass
132 
133     global audio_thread
134     audio_thread = threading.Thread(target=stream_file)
135     audio_thread.daemon = True
136     audio_thread.start()
137 
138 
139 def on_message(ws, message):
140     try:
141         data = json.loads(message)
142         msg_type = data.get('type')
143 
144         if msg_type == "Begin":
145             session_data["session_id"] = data.get('id')
146             print(f"Session ID: {data.get('id')}\n")
147         elif msg_type == "Turn":
148             transcript = data.get('transcript', '')
149             if not transcript:
150                 return
151 
152             if data.get('end_of_turn'):
153                 print(f"[Final]: {transcript}\n")
154                 session_data["turns"].append(transcript)
155             else:
156                 print(f"[Partial]: {transcript}")
157         elif msg_type == "Termination":
158             session_data["audio_duration_seconds"] = data.get('audio_duration_seconds', 0)
159             print(f"Session terminated: {data.get('audio_duration_seconds', 0)} seconds of audio processed")
160     except json.JSONDecodeError as e:
161         print(f"Error decoding message: {e}")
162     except Exception as e:
163         print(f"Error handling message: {e}")
164 
165 
166 def on_error(ws, error):
167     """Called when a WebSocket error occurs."""
168     print(f"\nWebSocket Error: {error}")
169     stop_event.set()
170 
171 
172 def on_close(ws, close_status_code, close_msg):
173     """Called when the WebSocket connection is closed."""
174     print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
175     stop_event.set()
176 
177     if SAVE_TRANSCRIPT_TO_FILE:
178         save_transcript()
179 
180     if audio_thread and audio_thread.is_alive():
181         audio_thread.join(timeout=1.0)
182 
183 
184 # --- Main Execution ---
185 def run():
186     global ws_app
187 
188     # Validate audio file before connecting
189     validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
190 
191     # Create WebSocketApp
192     ws_app = websocket.WebSocketApp(
193         API_ENDPOINT,
194         header={"Authorization": ASSEMBLYAI_API_KEY},
195         on_open=on_open,
196         on_message=on_message,
197         on_error=on_error,
198         on_close=on_close,
199     )
200 
201     # Run WebSocketApp in a separate thread
202     ws_thread = threading.Thread(target=ws_app.run_forever)
203     ws_thread.daemon = True
204     ws_thread.start()
205 
206     try:
207         while ws_thread.is_alive():
208             time.sleep(0.1)
209     except KeyboardInterrupt:
210         print("\nCtrl+C received. Stopping...")
211         stop_event.set()
212 
213         if ws_app and ws_app.sock and ws_app.sock.connected:
214             try:
215                 ws_app.send(json.dumps({"type": "Terminate"}))
216                 time.sleep(2)
217             except Exception as e:
218                 print(f"Error sending termination message: {e}")
219 
220         if ws_app:
221             ws_app.close()
222         ws_thread.join(timeout=2.0)
223 
224     except Exception as e:
225         print(f"\nAn unexpected error occurred: {e}")
226         stop_event.set()
227         if ws_app:
228             ws_app.close()
229         ws_thread.join(timeout=2.0)
230 
231     finally:
232         print("Cleanup complete. Exiting.")
233 
234 
235 if __name__ == "__main__":
236     run()

Step-by-step guide

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install and import packages

Install the required packages. PyAudio is optional — only needed for audio playback during streaming.

$ pip install websocket-client
$ pip install pyaudio

Import packages.

1 import websocket
2 import json
3 import os
4 import threading
5 import time
6 import wave
7 import sys
8 from urllib.parse import urlencode
9 from pathlib import Path

Configure settings

Set your ASSEMBLYAI_API_KEY environment variable.

Set AUDIO_FILE to the relative or absolute path of your audio file, and set SAMPLE_RATE to match your file’s sample rate.

1 ASSEMBLYAI_API_KEY = os.environ["ASSEMBLYAI_API_KEY"]
2 AUDIO_FILE = "audio.wav"  # Path to your audio file
3 SAMPLE_RATE = 48000  # Change to match the sample rate of your audio file
4 SAVE_TRANSCRIPT_TO_FILE = True  # Set to False to disable saving transcript to file
5 PLAY_AUDIO = True  # Set to False to disable audio playback
6 
7 CONNECTION_PARAMS = {
8     "speech_model": "u3-rt-pro",
9     "sample_rate": SAMPLE_RATE,
10 }
11 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
12 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
13 
14 # Global variables
15 ws_app = None
16 audio_thread = None
17 stop_event = threading.Event()
18 
19 # Track session data for output file
20 session_data = {
21     "session_id": None,
22     "audio_file": AUDIO_FILE,
23     "audio_duration_seconds": None,
24     "turns": []
25 }

Helper functions

The following helper functions are used to validate audio files and save the transcript output:

validate_audio_file() - Validates that the audio file is a mono WAV file with the expected sample rate.
save_transcript() - Saves the transcript to a text file after the session ends.

1 def validate_audio_file(filepath, sample_rate):
2     """Validate audio file before streaming."""
3     file_ext = Path(filepath).suffix.lower()
4     if file_ext != ".wav":
5         print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
6         print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
7         sys.exit(1)
8 
9     with wave.open(filepath, 'rb') as wav_file:
10         if wav_file.getnchannels() != 1:
11             print("Error: Only mono audio is supported", file=sys.stderr)
12             print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
13             sys.exit(1)
14 
15         file_sample_rate = wav_file.getframerate()
16         if file_sample_rate != sample_rate:
17             print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
18             print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
19             sys.exit(1)
20 
21 
22 def save_transcript():
23     """Save the transcript to a file in the same directory as the script."""
24     audio_name = Path(session_data["audio_file"]).stem
25     session_id = session_data["session_id"] or "unknown"
26     output_file = f"{audio_name}_{session_id}.txt"
27 
28     with open(output_file, "w") as f:
29         f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
30         f.write(f"Audio file: {session_data['audio_file']}\n")
31         f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
32         f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/universal-3-pro-streaming/universal-3-pro-streaming#request.query\n\n")
33         f.write("\nTranscription Output\n")
34         for i, turn in enumerate(session_data["turns"], 1):
35             f.write(f"[Turn #{i}]: {turn}\n")
36 
37     print(f"Transcript saved to {output_file}")

WebSocket event handlers

Open WebSocket and stream audio file

When the connection opens, we start a background thread that reads the WAV file in 50ms chunks and sends them over the WebSocket. If PLAY_AUDIO is enabled, the audio is also played through your speakers.

1 def on_open(ws):
2     """Called when the WebSocket connection is established."""
3     print("WebSocket connection opened.")
4     print(f"Connected to: {API_ENDPOINT}")
5 
6     def stream_file():
7         chunk_duration = 0.05  # 50ms chunks
8         audio_player = None
9 
10         if PLAY_AUDIO:
11             try:
12                 import pyaudio
13                 p = pyaudio.PyAudio()
14                 with wave.open(AUDIO_FILE, 'rb') as wav_file:
15                     audio_player = p.open(
16                         format=p.get_format_from_width(wav_file.getsampwidth()),
17                         channels=wav_file.getnchannels(),
18                         rate=wav_file.getframerate(),
19                         output=True
20                     )
21             except ImportError:
22                 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
23                 print("Install with: pip install pyaudio", file=sys.stderr)
24 
25         try:
26             with wave.open(AUDIO_FILE, 'rb') as wav_file:
27                 frames_per_chunk = int(SAMPLE_RATE * chunk_duration)
28 
29                 while not stop_event.is_set():
30                     frames = wav_file.readframes(frames_per_chunk)
31                     if not frames:
32                         break
33 
34                     if audio_player:
35                         audio_player.write(frames)
36                     else:
37                         time.sleep(chunk_duration)
38 
39                     ws.send(frames, websocket.ABNF.OPCODE_BINARY)
40         finally:
41             if audio_player:
42                 audio_player.stop_stream()
43                 audio_player.close()
44                 p.terminate()
45 
46         # All audio sent - terminate the session
47         print("File streaming complete. Waiting for final transcripts...")
48         try:
49             ws.send(json.dumps({"type": "Terminate"}))
50         except Exception:
51             pass
52 
53     global audio_thread
54     audio_thread = threading.Thread(target=stream_file)
55     audio_thread.daemon = True
56     audio_thread.start()

Handle WebSocket messages

1 def on_message(ws, message):
2     try:
3         data = json.loads(message)
4         msg_type = data.get('type')
5 
6         if msg_type == "Begin":
7             session_data["session_id"] = data.get('id')
8             print(f"Session ID: {data.get('id')}\n")
9         elif msg_type == "Turn":
10             transcript = data.get('transcript', '')
11             if not transcript:
12                 return
13 
14             if data.get('end_of_turn'):
15                 print(f"[Final]: {transcript}\n")
16                 session_data["turns"].append(transcript)
17             else:
18                 print(f"[Partial]: {transcript}")
19         elif msg_type == "Termination":
20             session_data["audio_duration_seconds"] = data.get('audio_duration_seconds', 0)
21             print(f"Session terminated: {data.get('audio_duration_seconds', 0)} seconds of audio processed")
22     except json.JSONDecodeError as e:
23         print(f"Error decoding message: {e}")
24     except Exception as e:
25         print(f"Error handling message: {e}")

WebSocket error and close handlers

1 def on_error(ws, error):
2     """Called when a WebSocket error occurs."""
3     print(f"\nWebSocket Error: {error}")
4     stop_event.set()
5 
6 
7 def on_close(ws, close_status_code, close_msg):
8     """Called when the WebSocket connection is closed."""
9     print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
10     stop_event.set()
11 
12     if SAVE_TRANSCRIPT_TO_FILE:
13         save_transcript()
14 
15     if audio_thread and audio_thread.is_alive():
16         audio_thread.join(timeout=1.0)

Connect and stream the file

1 def run():
2     global ws_app
3 
4     # Validate audio file before connecting
5     validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
6 
7     # Create WebSocketApp
8     ws_app = websocket.WebSocketApp(
9         API_ENDPOINT,
10         header={"Authorization": ASSEMBLYAI_API_KEY},
11         on_open=on_open,
12         on_message=on_message,
13         on_error=on_error,
14         on_close=on_close,
15     )
16 
17     # Run WebSocketApp in a separate thread
18     ws_thread = threading.Thread(target=ws_app.run_forever)
19     ws_thread.daemon = True
20     ws_thread.start()
21 
22     try:
23         while ws_thread.is_alive():
24             time.sleep(0.1)
25     except KeyboardInterrupt:
26         print("\nCtrl+C received. Stopping...")
27         stop_event.set()
28 
29         if ws_app and ws_app.sock and ws_app.sock.connected:
30             try:
31                 ws_app.send(json.dumps({"type": "Terminate"}))
32                 time.sleep(2)
33             except Exception as e:
34                 print(f"Error sending termination message: {e}")
35 
36         if ws_app:
37             ws_app.close()
38         ws_thread.join(timeout=2.0)
39 
40     except Exception as e:
41         print(f"\nAn unexpected error occurred: {e}")
42         stop_event.set()
43         if ws_app:
44             ws_app.close()
45         ws_thread.join(timeout=2.0)
46 
47     finally:
48         print("Cleanup complete. Exiting.")
49 
50 
51 if __name__ == "__main__":
52     run()

The session will terminate once the file is finished streaming. If SAVE_TRANSCRIPT_TO_FILE is enabled (default), the transcript will be saved to {audio_filename}_{session_id}.txt in the current working directory.

The AUDIO_FILE path can be either relative (e.g., audio.wav) or absolute (e.g., /path/to/audio.wav).

Example output

Here’s an example of what the console output looks like when streaming an audio file:

1 WebSocket connection opened.
2 Connected to: wss://streaming.assemblyai.com/v3/ws?speech_model=u3-rt-pro&sample_rate=48000
3 
4 Session ID: f37d7c4e-6be9-47ed-b6fc-7600fc78e34d
5 
6 [Partial]: the
7 [Partial]: the quick
8 [Partial]: the quick brown
9 [Partial]: the quick brown fox
10 [Partial]: the quick brown fox jumps
11 [Partial]: the quick brown fox jumps over
12 [Partial]: the quick brown fox jumps over the
13 [Partial]: the quick brown fox jumps over the lazy
14 [Partial]: The quick brown fox jumps over the lazy dog
15 [Final]: The quick brown fox jumps over the lazy dog.
16 
17 [Partial]: It
18 [Partial]: It is
19 [Partial]: It is a
20 [Partial]: It is a common
21 [Partial]: It is a common typing
22 [Partial]: It is a common typing test
23 [Final]: It is a common typing test.
24 
25 File streaming complete. Waiting for final transcripts...
26 Session terminated: 7.52 seconds of audio processed
27 
28 WebSocket Disconnected: Status=1000, Msg=None
29 Transcript saved to audio_f37d7c4e-6be9-47ed-b6fc-7600fc78e34d.txt
30 Cleanup complete. Exiting.

The output shows:

Partial transcripts: Real-time updates as words are recognized
Final: The complete turn with proper capitalization and punctuation

This guide shows you how to transcribe WAV audio files with varying sample rates using our Streaming API.

If you’re streaming a pre-recorded file for benchmarking or testing, see Stream a pre-recorded file in real time for wall-clock pacing that more closely simulates live microphone input.

Quickstart

Here is the complete Python script to transcribe a WAV audio file using the Streaming API.

1 import websocket
2 import json
3 import threading
4 import time
5 import wave
6 import sys
7 import os
8 from urllib.parse import urlencode
9 from pathlib import Path
10 
11 # --- Configuration ---
12 ASSEMBLYAI_API_KEY = os.environ["ASSEMBLYAI_API_KEY"]
13 AUDIO_FILE = "audio.wav"  # Path to your audio file
14 SAMPLE_RATE = 48000  # Change to match the sample rate of your audio file
15 SAVE_TRANSCRIPT_TO_FILE = True  # Set to False to disable saving transcript to file
16 PLAY_AUDIO = True  # Set to False to disable audio playback
17 
18 CONNECTION_PARAMS = {
19     "speech_model": "u3-rt-pro",
20     "sample_rate": SAMPLE_RATE,
21 }
22 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
23 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
24 
25 # Global variables
26 ws_app = None
27 audio_thread = None
28 stop_event = threading.Event()
29 
30 # Track session data for output file
31 session_data = {
32     "session_id": None,
33     "audio_file": AUDIO_FILE,
34     "audio_duration_seconds": None,
35     "turns": []
36 }
37 
38 # --- Helper Functions ---
39 
40 def validate_audio_file(filepath, sample_rate):
41     """Validate audio file before streaming."""
42     file_ext = Path(filepath).suffix.lower()
43     if file_ext != ".wav":
44         print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
45         print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
46         sys.exit(1)
47 
48     with wave.open(filepath, 'rb') as wav_file:
49         if wav_file.getnchannels() != 1:
50             print("Error: Only mono audio is supported", file=sys.stderr)
51             print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
52             sys.exit(1)
53 
54         file_sample_rate = wav_file.getframerate()
55         if file_sample_rate != sample_rate:
56             print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
57             print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
58             sys.exit(1)
59 
60 
61 def save_transcript():
62     """Save the transcript to a file in the same directory as the script."""
63     audio_name = Path(session_data["audio_file"]).stem
64     session_id = session_data["session_id"] or "unknown"
65     output_file = f"{audio_name}_{session_id}.txt"
66 
67     with open(output_file, "w") as f:
68         f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
69         f.write(f"Audio file: {session_data['audio_file']}\n")
70         f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
71         f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/universal-3-pro-streaming/universal-3-pro-streaming#request.query\n\n")
72         f.write("\nTranscription Output\n")
73         for i, turn in enumerate(session_data["turns"], 1):
74             f.write(f"[Turn #{i}]: {turn}\n")
75 
76     print(f"Transcript saved to {output_file}")
77 
78 
79 # --- WebSocket Event Handlers ---
80 
81 def on_open(ws):
82     """Called when the WebSocket connection is established."""
83     print("WebSocket connection opened.")
84     print(f"Connected to: {API_ENDPOINT}")
85 
86     def stream_file():
87         chunk_duration = 0.05  # 50ms chunks
88         audio_player = None
89 
90         if PLAY_AUDIO:
91             try:
92                 import pyaudio
93                 p = pyaudio.PyAudio()
94                 with wave.open(AUDIO_FILE, 'rb') as wav_file:
95                     audio_player = p.open(
96                         format=p.get_format_from_width(wav_file.getsampwidth()),
97                         channels=wav_file.getnchannels(),
98                         rate=wav_file.getframerate(),
99                         output=True
100                     )
101             except ImportError:
102                 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
103                 print("Install with: pip install pyaudio", file=sys.stderr)
104 
105         try:
106             with wave.open(AUDIO_FILE, 'rb') as wav_file:
107                 frames_per_chunk = int(SAMPLE_RATE * chunk_duration)
108 
109                 while not stop_event.is_set():
110                     frames = wav_file.readframes(frames_per_chunk)
111                     if not frames:
112                         break
113 
114                     if audio_player:
115                         audio_player.write(frames)
116                     else:
117                         time.sleep(chunk_duration)
118 
119                     ws.send(frames, websocket.ABNF.OPCODE_BINARY)
120         finally:
121             if audio_player:
122                 audio_player.stop_stream()
123                 audio_player.close()
124                 p.terminate()
125 
126         # All audio sent - terminate the session
127         print("File streaming complete. Waiting for final transcripts...")
128         try:
129             ws.send(json.dumps({"type": "Terminate"}))
130         except Exception:
131             pass
132 
133     global audio_thread
134     audio_thread = threading.Thread(target=stream_file)
135     audio_thread.daemon = True
136     audio_thread.start()
137 
138 
139 def on_message(ws, message):
140     try:
141         data = json.loads(message)
142         msg_type = data.get('type')
143 
144         if msg_type == "Begin":
145             session_data["session_id"] = data.get('id')
146             print(f"Session ID: {data.get('id')}\n")
147         elif msg_type == "Turn":
148             transcript = data.get('transcript', '')
149             if not transcript:
150                 return
151 
152             if data.get('end_of_turn'):
153                 print(f"[Final]: {transcript}\n")
154                 session_data["turns"].append(transcript)
155             else:
156                 print(f"[Partial]: {transcript}")
157         elif msg_type == "Termination":
158             session_data["audio_duration_seconds"] = data.get('audio_duration_seconds', 0)
159             print(f"Session terminated: {data.get('audio_duration_seconds', 0)} seconds of audio processed")
160     except json.JSONDecodeError as e:
161         print(f"Error decoding message: {e}")
162     except Exception as e:
163         print(f"Error handling message: {e}")
164 
165 
166 def on_error(ws, error):
167     """Called when a WebSocket error occurs."""
168     print(f"\nWebSocket Error: {error}")
169     stop_event.set()
170 
171 
172 def on_close(ws, close_status_code, close_msg):
173     """Called when the WebSocket connection is closed."""
174     print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
175     stop_event.set()
176 
177     if SAVE_TRANSCRIPT_TO_FILE:
178         save_transcript()
179 
180     if audio_thread and audio_thread.is_alive():
181         audio_thread.join(timeout=1.0)
182 
183 
184 # --- Main Execution ---
185 def run():
186     global ws_app
187 
188     # Validate audio file before connecting
189     validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
190 
191     # Create WebSocketApp
192     ws_app = websocket.WebSocketApp(
193         API_ENDPOINT,
194         header={"Authorization": ASSEMBLYAI_API_KEY},
195         on_open=on_open,
196         on_message=on_message,
197         on_error=on_error,
198         on_close=on_close,
199     )
200 
201     # Run WebSocketApp in a separate thread
202     ws_thread = threading.Thread(target=ws_app.run_forever)
203     ws_thread.daemon = True
204     ws_thread.start()
205 
206     try:
207         while ws_thread.is_alive():
208             time.sleep(0.1)
209     except KeyboardInterrupt:
210         print("\nCtrl+C received. Stopping...")
211         stop_event.set()
212 
213         if ws_app and ws_app.sock and ws_app.sock.connected:
214             try:
215                 ws_app.send(json.dumps({"type": "Terminate"}))
216                 time.sleep(2)
217             except Exception as e:
218                 print(f"Error sending termination message: {e}")
219 
220         if ws_app:
221             ws_app.close()
222         ws_thread.join(timeout=2.0)
223 
224     except Exception as e:
225         print(f"\nAn unexpected error occurred: {e}")
226         stop_event.set()
227         if ws_app:
228             ws_app.close()
229         ws_thread.join(timeout=2.0)
230 
231     finally:
232         print("Cleanup complete. Exiting.")
233 
234 
235 if __name__ == "__main__":
236     run()

Step-by-step guide

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install and import packages

Install the required packages. PyAudio is optional — only needed for audio playback during streaming.

$ pip install websocket-client
$ pip install pyaudio

Import packages.

1 import websocket
2 import json
3 import os
4 import threading
5 import time
6 import wave
7 import sys
8 from urllib.parse import urlencode
9 from pathlib import Path

Configure settings

Set your ASSEMBLYAI_API_KEY environment variable.

Set AUDIO_FILE to the relative or absolute path of your audio file, and set SAMPLE_RATE to match your file’s sample rate.

1 ASSEMBLYAI_API_KEY = os.environ["ASSEMBLYAI_API_KEY"]
2 AUDIO_FILE = "audio.wav"  # Path to your audio file
3 SAMPLE_RATE = 48000  # Change to match the sample rate of your audio file
4 SAVE_TRANSCRIPT_TO_FILE = True  # Set to False to disable saving transcript to file
5 PLAY_AUDIO = True  # Set to False to disable audio playback
6 
7 CONNECTION_PARAMS = {
8     "speech_model": "u3-rt-pro",
9     "sample_rate": SAMPLE_RATE,
10 }
11 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
12 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
13 
14 # Global variables
15 ws_app = None
16 audio_thread = None
17 stop_event = threading.Event()
18 
19 # Track session data for output file
20 session_data = {
21     "session_id": None,
22     "audio_file": AUDIO_FILE,
23     "audio_duration_seconds": None,
24     "turns": []
25 }

Helper functions

The following helper functions are used to validate audio files and save the transcript output:

validate_audio_file() - Validates that the audio file is a mono WAV file with the expected sample rate.
save_transcript() - Saves the transcript to a text file after the session ends.

1 def validate_audio_file(filepath, sample_rate):
2     """Validate audio file before streaming."""
3     file_ext = Path(filepath).suffix.lower()
4     if file_ext != ".wav":
5         print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
6         print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
7         sys.exit(1)
8 
9     with wave.open(filepath, 'rb') as wav_file:
10         if wav_file.getnchannels() != 1:
11             print("Error: Only mono audio is supported", file=sys.stderr)
12             print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
13             sys.exit(1)
14 
15         file_sample_rate = wav_file.getframerate()
16         if file_sample_rate != sample_rate:
17             print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
18             print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
19             sys.exit(1)
20 
21 
22 def save_transcript():
23     """Save the transcript to a file in the same directory as the script."""
24     audio_name = Path(session_data["audio_file"]).stem
25     session_id = session_data["session_id"] or "unknown"
26     output_file = f"{audio_name}_{session_id}.txt"
27 
28     with open(output_file, "w") as f:
29         f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
30         f.write(f"Audio file: {session_data['audio_file']}\n")
31         f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
32         f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/universal-3-pro-streaming/universal-3-pro-streaming#request.query\n\n")
33         f.write("\nTranscription Output\n")
34         for i, turn in enumerate(session_data["turns"], 1):
35             f.write(f"[Turn #{i}]: {turn}\n")
36 
37     print(f"Transcript saved to {output_file}")

WebSocket event handlers

Open WebSocket and stream audio file

1 def on_open(ws):
2     """Called when the WebSocket connection is established."""
3     print("WebSocket connection opened.")
4     print(f"Connected to: {API_ENDPOINT}")
5 
6     def stream_file():
7         chunk_duration = 0.05  # 50ms chunks
8         audio_player = None
9 
10         if PLAY_AUDIO:
11             try:
12                 import pyaudio
13                 p = pyaudio.PyAudio()
14                 with wave.open(AUDIO_FILE, 'rb') as wav_file:
15                     audio_player = p.open(
16                         format=p.get_format_from_width(wav_file.getsampwidth()),
17                         channels=wav_file.getnchannels(),
18                         rate=wav_file.getframerate(),
19                         output=True
20                     )
21             except ImportError:
22                 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
23                 print("Install with: pip install pyaudio", file=sys.stderr)
24 
25         try:
26             with wave.open(AUDIO_FILE, 'rb') as wav_file:
27                 frames_per_chunk = int(SAMPLE_RATE * chunk_duration)
28 
29                 while not stop_event.is_set():
30                     frames = wav_file.readframes(frames_per_chunk)
31                     if not frames:
32                         break
33 
34                     if audio_player:
35                         audio_player.write(frames)
36                     else:
37                         time.sleep(chunk_duration)
38 
39                     ws.send(frames, websocket.ABNF.OPCODE_BINARY)
40         finally:
41             if audio_player:
42                 audio_player.stop_stream()
43                 audio_player.close()
44                 p.terminate()
45 
46         # All audio sent - terminate the session
47         print("File streaming complete. Waiting for final transcripts...")
48         try:
49             ws.send(json.dumps({"type": "Terminate"}))
50         except Exception:
51             pass
52 
53     global audio_thread
54     audio_thread = threading.Thread(target=stream_file)
55     audio_thread.daemon = True
56     audio_thread.start()

Handle WebSocket messages

1 def on_message(ws, message):
2     try:
3         data = json.loads(message)
4         msg_type = data.get('type')
5 
6         if msg_type == "Begin":
7             session_data["session_id"] = data.get('id')
8             print(f"Session ID: {data.get('id')}\n")
9         elif msg_type == "Turn":
10             transcript = data.get('transcript', '')
11             if not transcript:
12                 return
13 
14             if data.get('end_of_turn'):
15                 print(f"[Final]: {transcript}\n")
16                 session_data["turns"].append(transcript)
17             else:
18                 print(f"[Partial]: {transcript}")
19         elif msg_type == "Termination":
20             session_data["audio_duration_seconds"] = data.get('audio_duration_seconds', 0)
21             print(f"Session terminated: {data.get('audio_duration_seconds', 0)} seconds of audio processed")
22     except json.JSONDecodeError as e:
23         print(f"Error decoding message: {e}")
24     except Exception as e:
25         print(f"Error handling message: {e}")

WebSocket error and close handlers

1 def on_error(ws, error):
2     """Called when a WebSocket error occurs."""
3     print(f"\nWebSocket Error: {error}")
4     stop_event.set()
5 
6 
7 def on_close(ws, close_status_code, close_msg):
8     """Called when the WebSocket connection is closed."""
9     print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
10     stop_event.set()
11 
12     if SAVE_TRANSCRIPT_TO_FILE:
13         save_transcript()
14 
15     if audio_thread and audio_thread.is_alive():
16         audio_thread.join(timeout=1.0)

Connect and stream the file

1 def run():
2     global ws_app
3 
4     # Validate audio file before connecting
5     validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
6 
7     # Create WebSocketApp
8     ws_app = websocket.WebSocketApp(
9         API_ENDPOINT,
10         header={"Authorization": ASSEMBLYAI_API_KEY},
11         on_open=on_open,
12         on_message=on_message,
13         on_error=on_error,
14         on_close=on_close,
15     )
16 
17     # Run WebSocketApp in a separate thread
18     ws_thread = threading.Thread(target=ws_app.run_forever)
19     ws_thread.daemon = True
20     ws_thread.start()
21 
22     try:
23         while ws_thread.is_alive():
24             time.sleep(0.1)
25     except KeyboardInterrupt:
26         print("\nCtrl+C received. Stopping...")
27         stop_event.set()
28 
29         if ws_app and ws_app.sock and ws_app.sock.connected:
30             try:
31                 ws_app.send(json.dumps({"type": "Terminate"}))
32                 time.sleep(2)
33             except Exception as e:
34                 print(f"Error sending termination message: {e}")
35 
36         if ws_app:
37             ws_app.close()
38         ws_thread.join(timeout=2.0)
39 
40     except Exception as e:
41         print(f"\nAn unexpected error occurred: {e}")
42         stop_event.set()
43         if ws_app:
44             ws_app.close()
45         ws_thread.join(timeout=2.0)
46 
47     finally:
48         print("Cleanup complete. Exiting.")
49 
50 
51 if __name__ == "__main__":
52     run()

The AUDIO_FILE path can be either relative (e.g., audio.wav) or absolute (e.g., /path/to/audio.wav).

Example output

Here’s an example of what the console output looks like when streaming an audio file:

1 WebSocket connection opened.
2 Connected to: wss://streaming.assemblyai.com/v3/ws?speech_model=u3-rt-pro&sample_rate=48000
3 
4 Session ID: f37d7c4e-6be9-47ed-b6fc-7600fc78e34d
5 
6 [Partial]: the
7 [Partial]: the quick
8 [Partial]: the quick brown
9 [Partial]: the quick brown fox
10 [Partial]: the quick brown fox jumps
11 [Partial]: the quick brown fox jumps over
12 [Partial]: the quick brown fox jumps over the
13 [Partial]: the quick brown fox jumps over the lazy
14 [Partial]: The quick brown fox jumps over the lazy dog
15 [Final]: The quick brown fox jumps over the lazy dog.
16 
17 [Partial]: It
18 [Partial]: It is
19 [Partial]: It is a
20 [Partial]: It is a common
21 [Partial]: It is a common typing
22 [Partial]: It is a common typing test
23 [Final]: It is a common typing test.
24 
25 File streaming complete. Waiting for final transcripts...
26 Session terminated: 7.52 seconds of audio processed
27 
28 WebSocket Disconnected: Status=1000, Msg=None
29 Transcript saved to audio_f37d7c4e-6be9-47ed-b6fc-7600fc78e34d.txt
30 Cleanup complete. Exiting.

The output shows:

Partial transcripts: Real-time updates as words are recognized
Final: The complete turn with proper capitalization and punctuation

1	import websocket
2	import json
3	import threading
4	import time
5	import wave
6	import sys
7	import os
8	from urllib.parse import urlencode
9	from pathlib import Path
10
11	# --- Configuration ---
12	ASSEMBLYAI_API_KEY = os.environ["ASSEMBLYAI_API_KEY"]
13	AUDIO_FILE = "audio.wav" # Path to your audio file
14	SAMPLE_RATE = 48000 # Change to match the sample rate of your audio file
15	SAVE_TRANSCRIPT_TO_FILE = True # Set to False to disable saving transcript to file
16	PLAY_AUDIO = True # Set to False to disable audio playback
17
18	CONNECTION_PARAMS = {
19	"speech_model": "u3-rt-pro",
20	"sample_rate": SAMPLE_RATE,
21	}
22	API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
23	API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
24
25	# Global variables
26	ws_app = None
27	audio_thread = None
28	stop_event = threading.Event()
29
30	# Track session data for output file
31	session_data = {
32	"session_id": None,
33	"audio_file": AUDIO_FILE,
34	"audio_duration_seconds": None,
35	"turns": []
36	}
37
38	# --- Helper Functions ---
39
40	def validate_audio_file(filepath, sample_rate):
41	"""Validate audio file before streaming."""
42	file_ext = Path(filepath).suffix.lower()
43	if file_ext != ".wav":
44	print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr)
45	print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
46	sys.exit(1)
47
48	with wave.open(filepath, 'rb') as wav_file:
49	if wav_file.getnchannels() != 1:
50	print("Error: Only mono audio is supported", file=sys.stderr)
51	print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
52	sys.exit(1)
53
54	file_sample_rate = wav_file.getframerate()
55	if file_sample_rate != sample_rate:
56	print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr)
57	print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr)
58	sys.exit(1)
59
60
61	def save_transcript():
62	"""Save the transcript to a file in the same directory as the script."""
63	audio_name = Path(session_data["audio_file"]).stem
64	session_id = session_data["session_id"] or "unknown"
65	output_file = f"{audio_name}_{session_id}.txt"
66
67	with open(output_file, "w") as f:
68	f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n")
69	f.write(f"Audio file: {session_data['audio_file']}\n")
70	f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n")
71	f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/universal-3-pro-streaming/universal-3-pro-streaming#request.query\n\n")
72	f.write("\nTranscription Output\n")
73	for i, turn in enumerate(session_data["turns"], 1):
74	f.write(f"[Turn #{i}]: {turn}\n")
75
76	print(f"Transcript saved to {output_file}")
77
78
79	# --- WebSocket Event Handlers ---
80
81	def on_open(ws):
82	"""Called when the WebSocket connection is established."""
83	print("WebSocket connection opened.")
84	print(f"Connected to: {API_ENDPOINT}")
85
86	def stream_file():
87	chunk_duration = 0.05 # 50ms chunks
88	audio_player = None
89
90	if PLAY_AUDIO:
91	try:
92	import pyaudio
93	p = pyaudio.PyAudio()
94	with wave.open(AUDIO_FILE, 'rb') as wav_file:
95	audio_player = p.open(
96	format=p.get_format_from_width(wav_file.getsampwidth()),
97	channels=wav_file.getnchannels(),
98	rate=wav_file.getframerate(),
99	output=True
100	)
101	except ImportError:
102	print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr)
103	print("Install with: pip install pyaudio", file=sys.stderr)
104
105	try:
106	with wave.open(AUDIO_FILE, 'rb') as wav_file:
107	frames_per_chunk = int(SAMPLE_RATE * chunk_duration)
108
109	while not stop_event.is_set():
110	frames = wav_file.readframes(frames_per_chunk)
111	if not frames:
112	break
113
114	if audio_player:
115	audio_player.write(frames)
116	else:
117	time.sleep(chunk_duration)
118
119	ws.send(frames, websocket.ABNF.OPCODE_BINARY)
120	finally:
121	if audio_player:
122	audio_player.stop_stream()
123	audio_player.close()
124	p.terminate()
125
126	# All audio sent - terminate the session
127	print("File streaming complete. Waiting for final transcripts...")
128	try:
129	ws.send(json.dumps({"type": "Terminate"}))
130	except Exception:
131	pass
132
133	global audio_thread
134	audio_thread = threading.Thread(target=stream_file)
135	audio_thread.daemon = True
136	audio_thread.start()
137
138
139	def on_message(ws, message):
140	try:
141	data = json.loads(message)
142	msg_type = data.get('type')
143
144	if msg_type == "Begin":
145	session_data["session_id"] = data.get('id')
146	print(f"Session ID: {data.get('id')}\n")
147	elif msg_type == "Turn":
148	transcript = data.get('transcript', '')
149	if not transcript:
150	return
151
152	if data.get('end_of_turn'):
153	print(f"[Final]: {transcript}\n")
154	session_data["turns"].append(transcript)
155	else:
156	print(f"[Partial]: {transcript}")
157	elif msg_type == "Termination":
158	session_data["audio_duration_seconds"] = data.get('audio_duration_seconds', 0)
159	print(f"Session terminated: {data.get('audio_duration_seconds', 0)} seconds of audio processed")
160	except json.JSONDecodeError as e:
161	print(f"Error decoding message: {e}")
162	except Exception as e:
163	print(f"Error handling message: {e}")
164
165
166	def on_error(ws, error):
167	"""Called when a WebSocket error occurs."""
168	print(f"\nWebSocket Error: {error}")
169	stop_event.set()
170
171
172	def on_close(ws, close_status_code, close_msg):
173	"""Called when the WebSocket connection is closed."""
174	print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
175	stop_event.set()
176
177	if SAVE_TRANSCRIPT_TO_FILE:
178	save_transcript()
179
180	if audio_thread and audio_thread.is_alive():
181	audio_thread.join(timeout=1.0)
182
183
184	# --- Main Execution ---
185	def run():
186	global ws_app
187
188	# Validate audio file before connecting
189	validate_audio_file(AUDIO_FILE, SAMPLE_RATE)
190
191	# Create WebSocketApp
192	ws_app = websocket.WebSocketApp(
193	API_ENDPOINT,
194	header={"Authorization": ASSEMBLYAI_API_KEY},
195	on_open=on_open,
196	on_message=on_message,
197	on_error=on_error,
198	on_close=on_close,
199	)
200
201	# Run WebSocketApp in a separate thread
202	ws_thread = threading.Thread(target=ws_app.run_forever)
203	ws_thread.daemon = True
204	ws_thread.start()
205
206	try:
207	while ws_thread.is_alive():
208	time.sleep(0.1)
209	except KeyboardInterrupt:
210	print("\nCtrl+C received. Stopping...")
211	stop_event.set()
212
213	if ws_app and ws_app.sock and ws_app.sock.connected:
214	try:
215	ws_app.send(json.dumps({"type": "Terminate"}))
216	time.sleep(2)
217	except Exception as e:
218	print(f"Error sending termination message: {e}")
219
220	if ws_app:
221	ws_app.close()
222	ws_thread.join(timeout=2.0)
223
224	except Exception as e:
225	print(f"\nAn unexpected error occurred: {e}")
226	stop_event.set()
227	if ws_app:
228	ws_app.close()
229	ws_thread.join(timeout=2.0)
230
231	finally:
232	print("Cleanup complete. Exiting.")
233
234
235	if __name__ == "__main__":
236	run()

1	WebSocket connection opened.
2	Connected to: wss://streaming.assemblyai.com/v3/ws?speech_model=u3-rt-pro&sample_rate=48000
3
4	Session ID: f37d7c4e-6be9-47ed-b6fc-7600fc78e34d
5
6	[Partial]: the
7	[Partial]: the quick
8	[Partial]: the quick brown
9	[Partial]: the quick brown fox
10	[Partial]: the quick brown fox jumps
11	[Partial]: the quick brown fox jumps over
12	[Partial]: the quick brown fox jumps over the
13	[Partial]: the quick brown fox jumps over the lazy
14	[Partial]: The quick brown fox jumps over the lazy dog
15	[Final]: The quick brown fox jumps over the lazy dog.
16
17	[Partial]: It
18	[Partial]: It is
19	[Partial]: It is a
20	[Partial]: It is a common
21	[Partial]: It is a common typing
22	[Partial]: It is a common typing test
23	[Final]: It is a common typing test.
24
25	File streaming complete. Waiting for final transcripts...
26	Session terminated: 7.52 seconds of audio processed
27
28	WebSocket Disconnected: Status=1000, Msg=None
29	Transcript saved to audio_f37d7c4e-6be9-47ed-b6fc-7600fc78e34d.txt
30	Cleanup complete. Exiting.