Apply Noise Reduction to Audio for Streaming Speech-to-Text

This guide demonstrates how to implement a noise reduction system for real-time audio transcription using AssemblyAI’s Streaming STT and the noisereduce library. You’ll learn how to create a custom audio stream that preprocesses incoming audio to remove background noise before it reaches the transcription service.

This solution is particularly valuable for:

  • Voice assistants operating in noisy environments
  • Customer service applications processing calls
  • Meeting transcription tools
  • Voice-enabled applications requiring high accuracy

The implementation uses Python and combines proven audio processing techniques with AssemblyAI’s powerful transcription capabilities. While our example focuses on microphone input, the principles can be applied to any real-time audio stream.

Quickstart

1import logging
2import numpy as np
3import noisereduce as nr
4import assemblyai as aai
5from typing import Type
6from assemblyai.streaming.v3 import (
7 BeginEvent,
8 StreamingClient,
9 StreamingClientOptions,
10 StreamingError,
11 StreamingEvents,
12 StreamingParameters,
13 TerminationEvent,
14 TurnEvent,
15)
16
17logging.basicConfig(level=logging.INFO)
18
19api_key = "<YOUR_API_KEY>"
20
21# --- Noise-reduced microphone stream ---
22def noise_reduced_mic_stream(sample_rate=16000):
23 mic = aai.extras.MicrophoneStream(sample_rate=sample_rate)
24 buffer = np.array([], dtype=np.int16)
25 buffer_size = int(sample_rate * 0.5) # 0.5 seconds
26
27 for raw_audio in mic:
28 audio_data = np.frombuffer(raw_audio, dtype=np.int16)
29 buffer = np.append(buffer, audio_data)
30
31 if len(buffer) >= buffer_size:
32 float_audio = buffer.astype(np.float32) / 32768.0
33 denoised = nr.reduce_noise(
34 y=float_audio,
35 sr=sample_rate,
36 prop_decrease=0.75,
37 n_fft=1024,
38 )
39 int_audio = (denoised * 32768.0).astype(np.int16)
40 buffer = buffer[-1024:] # keep some overlap
41 yield int_audio.tobytes()
42
43
44# --- Event Handlers ---
45def on_begin(self: Type[StreamingClient], event: BeginEvent):
46 print(f" Session started: {event.id}")
47
48def on_turn(self: Type[StreamingClient], event: TurnEvent):
49 print(f"{event.transcript} ({event.end_of_turn})")
50
51def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
52 print(f" Session terminated after {event.audio_duration_seconds} seconds")
53
54def on_error(self: Type[StreamingClient], error: StreamingError):
55 print(f" Error occurred: {error}")
56
57# --- Main Function ---
58def main():
59 client = StreamingClient(
60 StreamingClientOptions(
61 api_key=api_key,
62 api_host="streaming.assemblyai.com",
63 )
64 )
65
66 client.on(StreamingEvents.Begin, on_begin)
67 client.on(StreamingEvents.Turn, on_turn)
68 client.on(StreamingEvents.Termination, on_terminated)
69 client.on(StreamingEvents.Error, on_error)
70
71 client.connect(
72 StreamingParameters(
73 sample_rate=16000,
74 )
75 )
76
77 try:
78 denoised_stream = noise_reduced_mic_stream(sample_rate=16000)
79 client.stream(denoised_stream)
80 finally:
81 client.disconnect(terminate=True)
82
83if __name__ == "__main__":
84 main()

Step-by-step guide

First, install the following packages: assemblyai, noisereduce, numpy

$pip install assemblyai noisereduce numpy
1import logging
2import numpy as np
3import noisereduce as nr
4import assemblyai as aai
5from typing import Type
6from assemblyai.streaming.v3 import (
7 BeginEvent,
8 StreamingClient,
9 StreamingClientOptions,
10 StreamingError,
11 StreamingEvents,
12 StreamingParameters,
13 TerminationEvent,
14 TurnEvent,
15)

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for a free account and get your API key from your dashboard. Please note that Streaming Speech-to-text is available for upgraded accounts only. If you’re on the free plan, you’ll need to upgrade your account by adding a credit card.

1api_key = "<YOUR_API_KEY>"

Make sure not to share this token with anyone - it is a private key associated uniquely to your account.

Create a generator function that includes noise reduction.

1def noise_reduced_mic_stream(sample_rate=16000):
2 mic = aai.extras.MicrophoneStream(sample_rate=sample_rate)
3 buffer = np.array([], dtype=np.int16)
4 buffer_size = int(sample_rate * 0.5) # 0.5 seconds
5 for raw_audio in mic:
6 audio_data = np.frombuffer(raw_audio, dtype=np.int16)
7 buffer = np.append(buffer, audio_data)
8 if len(buffer) >= buffer_size:
9 float_audio = buffer.astype(np.float32) / 32768.0
10 denoised = nr.reduce_noise(
11 y=float_audio,
12 sr=sample_rate,
13 prop_decrease=0.75,
14 n_fft=1024,
15 )
16 int_audio = (denoised * 32768.0).astype(np.int16)
17 buffer = buffer[-1024:] # keep some overlap
18 yield int_audio.tobytes()

Create functions to handle different events during transcription.

1def on_begin(self: Type[StreamingClient], event: BeginEvent):
2 print(f" Session started: {event.id}")
3
4def on_turn(self: Type[StreamingClient], event: TurnEvent):
5 print(f"{event.transcript} ({event.end_of_turn})")
6
7def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
8 print(f" Session terminated after {event.audio_duration_seconds} seconds")
9
10def on_error(self: Type[StreamingClient], error: StreamingError):
11 print(f" Error occurred: {error}")

Now we create our transcriber and noise_reduced_mic_stream.

1def main():
2 client = StreamingClient(
3 StreamingClientOptions(
4 api_key=api_key,
5 api_host="streaming.assemblyai.com",
6 )
7 )
8
9 client.on(StreamingEvents.Begin, on_begin)
10 client.on(StreamingEvents.Turn, on_turn)
11 client.on(StreamingEvents.Termination, on_terminated)
12 client.on(StreamingEvents.Error, on_error)
13
14 client.connect(
15 StreamingParameters(
16 sample_rate=16000,
17 )
18 )
19
20 try:
21 denoised_stream = noise_reduced_mic_stream(sample_rate=16000)
22 client.stream(denoised_stream)
23 finally:
24 client.disconnect(terminate=True)
25
26if __name__ == "__main__":
27 main()

You can press Ctrl+C to stop the transcription.