For AI agents: a documentation index is available at the root level at /llms.txt and /llms-full.txt. Append /llms.txt to any URL for a page-level index, or .md for the markdown version of any page.
PlaygroundChangelogSign In
OverviewAPI ReferencePre-recorded STTStreaming STTVoice AgentsSpeech UnderstandingGuardrailsLLM GatewayFAQ
OverviewAPI ReferencePre-recorded STTStreaming STTVoice AgentsSpeech UnderstandingGuardrailsLLM GatewayFAQ
  • Getting started
    • Transcribe streaming audio
    • Model selection
    • View model benchmarks
    • Evaluate model accuracy
    • Cloud endpoints & data residency
    • Manage concurrent sessions
    • Webhooks
    • Self-hosted streaming
  • Models
    • Whisper Streaming
    • Medical Mode
  • Features
    • Boost specific terms
    • Label speakers and separate channels
    • PII redaction
    • Filter profanity
    • Authenticate with a temporary token
    • Common session errors and closures
  • Integrations
    • LiveKit
    • Pipecat
  • Guides
      • Build a meeting notetaker
      • Build a medical scribe
      • Build a voice agent
      • Build a contact center application
      • Apply LLM Gateway to streaming audio
        • Use LLM Gateway with Streaming Speech-to-Text (STT)
        • Translate Streaming STT Transcripts with LLM Gateway
LogoLogo
PlaygroundChangelogSign In
On this page
  • Quickstart
  • Step-by-Step Instructions
  • Install Dependencies
  • Import Packages & Set API Key
  • Audio Configuration & Global Variables
  • Define Translate Text Function
  • Websocket Event Handlers
  • Open Websocket
  • Handle Websocket Messages
  • Close Websocket
  • Websocket Error Handling
  • Begin Streaming STT Transcription
GuidesTutorialsStreaming with LLM Gateway

Translate Streaming STT Transcripts with LLM Gateway

Was this page helpful?
Previous

Apply Noise Reduction to Audio for Streaming Speech-to-Text

Next
Built with

In this guide, you’ll learn how to implement real-time translation of final transcripts using AssemblyAI’s Streaming API and LLM Gateway.

Quickstart

Python
JavaScript
1import pyaudio
2import websocket
3import json
4import threading
5import time
6import requests
7from urllib.parse import urlencode
8
9YOUR_API_KEY = "YOUR_API_KEY" # Replace with your actual API key
10
11CONNECTION_PARAMS = {
12 "sample_rate": 16000,
13 "speech_model": "u3-rt-pro",
14}
15API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
16API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
17
18FRAMES_PER_BUFFER = 800
19SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
20CHANNELS = 1
21FORMAT = pyaudio.paInt16
22
23audio = None
24stream = None
25ws_app = None
26audio_thread = None
27stop_event = threading.Event()
28
29def translate_text(text):
30 """Called when translating final transcripts."""
31 headers = {
32 "authorization": YOUR_API_KEY
33 }
34
35 llm_gateway_data = {
36 "model": "gemini-2.5-flash-lite",
37 "messages": [
38 {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"}
39 ],
40 "max_tokens": 1000
41 }
42
43 result = requests.post(
44 "https://llm-gateway.assemblyai.com/v1/chat/completions",
45 headers=headers,
46 json=llm_gateway_data
47 )
48 return result.json()["choices"][0]["message"]["content"]
49
50def on_open(ws):
51 print("WebSocket connection opened.")
52 def stream_audio():
53 global stream
54 while not stop_event.is_set():
55 try:
56 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
57 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
58 except Exception as e:
59 print(f"Error streaming audio: {e}")
60 break
61
62 global audio_thread
63 audio_thread = threading.Thread(target=stream_audio)
64 audio_thread.daemon = True
65 audio_thread.start()
66
67def on_message(ws, message):
68 try:
69 data = json.loads(message)
70 msg_type = data.get("type")
71
72 if msg_type == "Begin":
73 print(f"Session began: ID={data.get('id')}")
74 elif msg_type == "Turn":
75 transcript = data.get("transcript", "")
76 if data.get("end_of_turn"):
77 print(f"\r{' ' * 80}\r", end="")
78 print(translate_text(transcript))
79 else:
80 print(f"\r{transcript}", end="")
81 elif msg_type == "Termination":
82 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
83 except Exception as e:
84 print(f"Error handling message: {e}")
85
86def on_error(ws, error):
87 print(f"\nWebSocket Error: {error}")
88 stop_event.set()
89
90def on_close(ws, close_status_code, close_msg):
91 print(f"\nWebSocket Disconnected: Status={close_status_code}")
92 global stream, audio
93 stop_event.set()
94 if stream:
95 if stream.is_active():
96 stream.stop_stream()
97 stream.close()
98 if audio:
99 audio.terminate()
100
101def run():
102 global audio, stream, ws_app
103
104 audio = pyaudio.PyAudio()
105 stream = audio.open(
106 input=True,
107 frames_per_buffer=FRAMES_PER_BUFFER,
108 channels=CHANNELS,
109 format=FORMAT,
110 rate=SAMPLE_RATE,
111 )
112 print("Speak into your microphone. Press Ctrl+C to stop.")
113
114 ws_app = websocket.WebSocketApp(
115 API_ENDPOINT,
116 header={"Authorization": YOUR_API_KEY},
117 on_open=on_open,
118 on_message=on_message,
119 on_error=on_error,
120 on_close=on_close,
121 )
122
123 ws_thread = threading.Thread(target=ws_app.run_forever)
124 ws_thread.daemon = True
125 ws_thread.start()
126
127 try:
128 while ws_thread.is_alive():
129 time.sleep(0.1)
130 except KeyboardInterrupt:
131 print("\nStopping...")
132 stop_event.set()
133 if ws_app and ws_app.sock and ws_app.sock.connected:
134 ws_app.send(json.dumps({"type": "Terminate"}))
135 time.sleep(2)
136 if ws_app:
137 ws_app.close()
138 ws_thread.join(timeout=2.0)
139
140if __name__ == "__main__":
141 run()

Step-by-Step Instructions

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install Dependencies

Python
JavaScript
$pip install websocket-client pyaudio requests

Import Packages & Set API Key

Python
JavaScript
1import pyaudio
2import websocket
3import json
4import threading
5import time
6import requests
7from urllib.parse import urlencode
8
9YOUR_API_KEY = "YOUR_API_KEY" # Replace with your actual API key

Audio Configuration & Global Variables

Set all of your audio configurations and global variables.

Python
JavaScript
1CONNECTION_PARAMS = {
2 "sample_rate": 16000,
3 "speech_model": "u3-rt-pro",
4}
5API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
6API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
7
8FRAMES_PER_BUFFER = 800
9SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
10CHANNELS = 1
11FORMAT = pyaudio.paInt16
12
13audio = None
14stream = None
15ws_app = None
16audio_thread = None
17stop_event = threading.Event()

Define Translate Text Function

Define a function called translate_text (Python) or translateText (JavaScript), which uses LLM Gateway to translate the English final transcripts into another language. This example is translating the text into Spanish. To set this to a different language, just replace “Spanish” in the prompt with your language of choice.

Python
JavaScript
1def translate_text(text):
2 """Called when translating final transcripts."""
3 headers = {
4 "authorization": YOUR_API_KEY
5 }
6
7 llm_gateway_data = {
8 "model": "gemini-2.5-flash-lite",
9 "messages": [
10 {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"}
11 ],
12 "max_tokens": 1000
13 }
14
15 result = requests.post(
16 "https://llm-gateway.assemblyai.com/v1/chat/completions",
17 headers=headers,
18 json=llm_gateway_data
19 )
20 return result.json()["choices"][0]["message"]["content"]

Websocket Event Handlers

Open Websocket

Python
JavaScript
1def on_open(ws):
2 print("WebSocket connection opened.")
3 def stream_audio():
4 global stream
5 while not stop_event.is_set():
6 try:
7 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
8 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
9 except Exception as e:
10 print(f"Error streaming audio: {e}")
11 break
12
13 global audio_thread
14 audio_thread = threading.Thread(target=stream_audio)
15 audio_thread.daemon = True
16 audio_thread.start()

Handle Websocket Messages

In this function, use the previously defined translate_text / translateText to translate all final transcripts.

Python
JavaScript
1def on_message(ws, message):
2 try:
3 data = json.loads(message)
4 msg_type = data.get("type")
5
6 if msg_type == "Begin":
7 print(f"Session began: ID={data.get('id')}")
8 elif msg_type == "Turn":
9 transcript = data.get("transcript", "")
10 if data.get("end_of_turn"):
11 print(f"\r{' ' * 80}\r", end="")
12 print(translate_text(transcript))
13 else:
14 print(f"\r{transcript}", end="")
15 elif msg_type == "Termination":
16 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
17 except Exception as e:
18 print(f"Error handling message: {e}")

Close Websocket

Python
JavaScript
1def on_close(ws, close_status_code, close_msg):
2 print(f"\nWebSocket Disconnected: Status={close_status_code}")
3 global stream, audio
4 stop_event.set()
5 if stream:
6 if stream.is_active():
7 stream.stop_stream()
8 stream.close()
9 if audio:
10 audio.terminate()

Websocket Error Handling

Python
JavaScript
1def on_error(ws, error):
2 print(f"\nWebSocket Error: {error}")
3 stop_event.set()

Begin Streaming STT Transcription

Python
JavaScript
1def run():
2 global audio, stream, ws_app
3
4 audio = pyaudio.PyAudio()
5 stream = audio.open(
6 input=True,
7 frames_per_buffer=FRAMES_PER_BUFFER,
8 channels=CHANNELS,
9 format=FORMAT,
10 rate=SAMPLE_RATE,
11 )
12 print("Speak into your microphone. Press Ctrl+C to stop.")
13
14 ws_app = websocket.WebSocketApp(
15 API_ENDPOINT,
16 header={"Authorization": YOUR_API_KEY},
17 on_open=on_open,
18 on_message=on_message,
19 on_error=on_error,
20 on_close=on_close,
21 )
22
23 ws_thread = threading.Thread(target=ws_app.run_forever)
24 ws_thread.daemon = True
25 ws_thread.start()
26
27 try:
28 while ws_thread.is_alive():
29 time.sleep(0.1)
30 except KeyboardInterrupt:
31 print("\nStopping...")
32 stop_event.set()
33 if ws_app and ws_app.sock and ws_app.sock.connected:
34 ws_app.send(json.dumps({"type": "Terminate"}))
35 time.sleep(2)
36 if ws_app:
37 ws_app.close()
38 ws_thread.join(timeout=2.0)
39
40if __name__ == "__main__":
41 run()