Transcribe audio files with Streaming

This guide shows you how to transcribe audio files using our Streaming API. The Streaming API is capable of transcribing WAV audio files. It can also handle audio files with different sample rates.

Quickstart

1import assemblyai as aai
2from assemblyai.streaming.v3 import (
3 BeginEvent,
4 StreamingClient,
5 StreamingClientOptions,
6 StreamingError,
7 StreamingEvents,
8 StreamingParameters,
9 TerminationEvent,
10 TurnEvent
11)
12from typing import Type
13
14def on_begin(self: Type[StreamingClient], event: BeginEvent):
15 "This function is called when the connection has been established."
16
17 print("Session ID:", event.id)
18
19def on_turn(self: Type[StreamingClient], event: TurnEvent):
20 "This function is called when a new transcript has been received."
21
22 print(event.transcript, end="\r\n")
23
24def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
25 "This function is called when an error occurs."
26
27 print(
28 f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
29 )
30
31def on_error(self: Type[StreamingClient], error: StreamingError):
32 "This function is called when the connection has been closed."
33
34 print(f"Error occurred: {error}")
35
36
37# Create the streaming client
38client = StreamingClient(
39 StreamingClientOptions(
40 api_key="YOUR_API_KEY"
41 )
42)
43
44client.on(StreamingEvents.Begin, on_begin)
45client.on(StreamingEvents.Turn, on_turn)
46client.on(StreamingEvents.Termination, on_terminated)
47client.on(StreamingEvents.Error, on_error)
48
49def stream_file(filepath: str, sample_rate: int):
50 """Stream audio file in 50ms chunks instead of 300ms"""
51 import time
52 import wave
53
54 chunk_duration = 0.1
55
56 with wave.open(filepath, 'rb') as wav_file:
57 if wav_file.getnchannels() != 1:
58 raise ValueError("Only mono audio is supported")
59
60 file_sample_rate = wav_file.getframerate()
61 if file_sample_rate != sample_rate:
62 print(f"Warning: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})")
63
64 frames_per_chunk = int(file_sample_rate * chunk_duration)
65
66 while True:
67 frames = wav_file.readframes(frames_per_chunk)
68
69 if not frames:
70 break
71
72 yield frames
73
74 time.sleep(chunk_duration)
75
76file_stream = stream_file(
77 filepath="audio.wav",
78 sample_rate=44100,
79)
80
81client.stream(file_stream)
82
83client.disconnect()

Step-by-step guide

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install/import packages & set API key

Install the package assemblyai.

$pip install assemblyai

Import packages.

1import assemblyai as aai
2from assemblyai.streaming.v3 import (
3 BeginEvent,
4 StreamingClient,
5 StreamingClientOptions,
6 StreamingError,
7 StreamingEvents,
8 StreamingParameters,
9 TerminationEvent,
10 TurnEvent
11)
12from typing import Type

Websocket Event Handlers

1def on_begin(self: Type[StreamingClient], event: BeginEvent):
2 "This function is called when the connection has been established."
3
4 print("Session ID:", event.id)
5
6def on_turn(self: Type[StreamingClient], event: TurnEvent):
7 "This function is called when a new transcript has been received."
8
9 print(event.transcript, end="\r\n")
10
11def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
12 "This function is called when an error occurs."
13
14 print(
15 f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
16 )
17
18def on_error(self: Type[StreamingClient], error: StreamingError):
19 "This function is called when the connection has been closed."
20
21 print(f"Error occurred: {error}")

Create the streaming client

1# Create the streaming client
2client = StreamingClient(
3 StreamingClientOptions(
4 api_key="YOUR_API_KEY"
5 )
6)
7
8client.on(StreamingEvents.Begin, on_begin)
9client.on(StreamingEvents.Turn, on_turn)
10client.on(StreamingEvents.Termination, on_terminated)
11client.on(StreamingEvents.Error, on_error)

Helper functions for streaming files

Create a helper function to stream your file.

1def stream_file(filepath: str, sample_rate: int):
2 """Stream audio file in 50ms chunks instead of 300ms"""
3 import time
4 import wave
5
6 chunk_duration = 0.1
7
8 with wave.open(filepath, 'rb') as wav_file:
9 if wav_file.getnchannels() != 1:
10 raise ValueError("Only mono audio is supported")
11
12 file_sample_rate = wav_file.getframerate()
13 if file_sample_rate != sample_rate:
14 print(f"Warning: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})")
15
16 frames_per_chunk = int(file_sample_rate * chunk_duration)
17
18 while True:
19 frames = wav_file.readframes(frames_per_chunk)
20
21 if not frames:
22 break
23
24 yield frames
25
26 time.sleep(chunk_duration)
27
28file_stream = stream_file(
29 filepath="audio.wav",
30 sample_rate=44100,
31)

Stream the file

1client.stream(file_stream)

Disconnect the client

1client.disconnect()

You can press Ctrl+C to stop the transcription.