Transcribe audio files with Streaming | AssemblyAI

This guide shows you how to transcribe audio files using our Streaming API. The Streaming API is capable of transcribing WAV audio files. It can also handle audio files with different sample rates.

Quickstart

1 import assemblyai as aai
2 from assemblyai.streaming.v3 import (
3     BeginEvent,
4     StreamingClient,
5     StreamingClientOptions,
6     StreamingError,
7     StreamingEvents,
8     StreamingParameters,
9     TerminationEvent,
10     TurnEvent
11 )
12 from typing import Type
13 
14 def on_begin(self: Type[StreamingClient], event: BeginEvent):
15   "This function is called when the connection has been established."
16 
17   print("Session ID:", event.id)
18 
19 def on_turn(self: Type[StreamingClient], event: TurnEvent):
20   "This function is called when a new transcript has been received."
21 
22   print(event.transcript, end="\r\n")
23 
24 def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
25   "This function is called when an error occurs."
26 
27   print(
28     f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
29   )
30 
31 def on_error(self: Type[StreamingClient], error: StreamingError):
32   "This function is called when the connection has been closed."
33 
34   print(f"Error occurred: {error}")
35 
36 
37 # Create the streaming client
38 client = StreamingClient(
39   StreamingClientOptions(
40     api_key="YOUR_API_KEY"
41   )
42 )
43 
44 client.on(StreamingEvents.Begin, on_begin)
45 client.on(StreamingEvents.Turn, on_turn)
46 client.on(StreamingEvents.Termination, on_terminated)
47 client.on(StreamingEvents.Error, on_error)
48 
49 def stream_file(filepath: str, sample_rate: int):
50     """Stream audio file in 50ms chunks instead of 300ms"""
51     import time
52     import wave
53 
54     chunk_duration = 0.1
55 
56     with wave.open(filepath, 'rb') as wav_file:
57         if wav_file.getnchannels() != 1:
58             raise ValueError("Only mono audio is supported")
59 
60         file_sample_rate = wav_file.getframerate()
61         if file_sample_rate != sample_rate:
62             print(f"Warning: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})")
63 
64         frames_per_chunk = int(file_sample_rate * chunk_duration)
65 
66         while True:
67             frames = wav_file.readframes(frames_per_chunk)
68 
69             if not frames:
70                 break
71 
72             yield frames
73 
74             time.sleep(chunk_duration)
75 
76 file_stream = stream_file(
77   filepath="audio.wav",
78   sample_rate=44100,
79 )
80 
81 client.stream(file_stream)
82 
83 client.disconnect()

Step-by-step guide

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install/import packages & set API key

Install the package assemblyai.

$ pip install assemblyai

Import packages.

1 import assemblyai as aai
2 from assemblyai.streaming.v3 import (
3     BeginEvent,
4     StreamingClient,
5     StreamingClientOptions,
6     StreamingError,
7     StreamingEvents,
8     StreamingParameters,
9     TerminationEvent,
10     TurnEvent
11 )
12 from typing import Type

Websocket Event Handlers

1 def on_begin(self: Type[StreamingClient], event: BeginEvent):
2   "This function is called when the connection has been established."
3 
4   print("Session ID:", event.id)
5 
6 def on_turn(self: Type[StreamingClient], event: TurnEvent):
7   "This function is called when a new transcript has been received."
8 
9   print(event.transcript, end="\r\n")
10 
11 def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
12   "This function is called when an error occurs."
13 
14   print(
15     f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
16   )
17 
18 def on_error(self: Type[StreamingClient], error: StreamingError):
19   "This function is called when the connection has been closed."
20 
21   print(f"Error occurred: {error}")

Create the streaming client

1 # Create the streaming client
2 client = StreamingClient(
3   StreamingClientOptions(
4     api_key="YOUR_API_KEY"
5   )
6 )
7 
8 client.on(StreamingEvents.Begin, on_begin)
9 client.on(StreamingEvents.Turn, on_turn)
10 client.on(StreamingEvents.Termination, on_terminated)
11 client.on(StreamingEvents.Error, on_error)

Helper functions for streaming files

Create a helper function to stream your file.

1 def stream_file(filepath: str, sample_rate: int):
2     """Stream audio file in 50ms chunks instead of 300ms"""
3     import time
4     import wave
5 
6     chunk_duration = 0.1
7 
8     with wave.open(filepath, 'rb') as wav_file:
9         if wav_file.getnchannels() != 1:
10             raise ValueError("Only mono audio is supported")
11 
12         file_sample_rate = wav_file.getframerate()
13         if file_sample_rate != sample_rate:
14             print(f"Warning: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})")
15 
16         frames_per_chunk = int(file_sample_rate * chunk_duration)
17 
18         while True:
19             frames = wav_file.readframes(frames_per_chunk)
20 
21             if not frames:
22                 break
23 
24             yield frames
25 
26             time.sleep(chunk_duration)
27 
28 file_stream = stream_file(
29   filepath="audio.wav",
30   sample_rate=44100,
31 )

Stream the file

1 client.stream(file_stream)

Disconnect the client

1 client.disconnect()

You can press Ctrl+C to stop the transcription.