In this guide, you’ll learn how to implement real-time translation of final transcripts using AssemblyAI’s Streaming API and LLM Gateway.
1 import pyaudio 2 import websocket 3 import json 4 import threading 5 import time 6 import requests 7 from urllib.parse import urlencode 8 9 YOUR_API_KEY = "YOUR_API_KEY" # Replace with your actual API key 10 11 CONNECTION_PARAMS = { 12 "sample_rate": 16000, 13 "speech_model": "u3-rt-pro", 14 } 15 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws" 16 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}" 17 18 FRAMES_PER_BUFFER = 800 19 SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"] 20 CHANNELS = 1 21 FORMAT = pyaudio.paInt16 22 23 audio = None 24 stream = None 25 ws_app = None 26 audio_thread = None 27 stop_event = threading.Event() 28 29 def translate_text(text): 30 """Called when translating final transcripts.""" 31 headers = { 32 "authorization": YOUR_API_KEY 33 } 34 35 llm_gateway_data = { 36 "model": "gemini-2.5-flash-lite", 37 "messages": [ 38 {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"} 39 ], 40 "max_tokens": 1000 41 } 42 43 result = requests.post( 44 "https://llm-gateway.assemblyai.com/v1/chat/completions", 45 headers=headers, 46 json=llm_gateway_data 47 ) 48 return result.json()["choices"][0]["message"]["content"] 49 50 def on_open(ws): 51 print("WebSocket connection opened.") 52 def stream_audio(): 53 global stream 54 while not stop_event.is_set(): 55 try: 56 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) 57 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY) 58 except Exception as e: 59 print(f"Error streaming audio: {e}") 60 break 61 62 global audio_thread 63 audio_thread = threading.Thread(target=stream_audio) 64 audio_thread.daemon = True 65 audio_thread.start() 66 67 def on_message(ws, message): 68 try: 69 data = json.loads(message) 70 msg_type = data.get("type") 71 72 if msg_type == "Begin": 73 print(f"Session began: ID={data.get('id')}") 74 elif msg_type == "Turn": 75 transcript = data.get("transcript", "") 76 if data.get("end_of_turn"): 77 print(f"\r{' ' * 80}\r", end="") 78 print(translate_text(transcript)) 79 else: 80 print(f"\r{transcript}", end="") 81 elif msg_type == "Termination": 82 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio") 83 except Exception as e: 84 print(f"Error handling message: {e}") 85 86 def on_error(ws, error): 87 print(f"\nWebSocket Error: {error}") 88 stop_event.set() 89 90 def on_close(ws, close_status_code, close_msg): 91 print(f"\nWebSocket Disconnected: Status={close_status_code}") 92 global stream, audio 93 stop_event.set() 94 if stream: 95 if stream.is_active(): 96 stream.stop_stream() 97 stream.close() 98 if audio: 99 audio.terminate() 100 101 def run(): 102 global audio, stream, ws_app 103 104 audio = pyaudio.PyAudio() 105 stream = audio.open( 106 input=True, 107 frames_per_buffer=FRAMES_PER_BUFFER, 108 channels=CHANNELS, 109 format=FORMAT, 110 rate=SAMPLE_RATE, 111 ) 112 print("Speak into your microphone. Press Ctrl+C to stop.") 113 114 ws_app = websocket.WebSocketApp( 115 API_ENDPOINT, 116 header={"Authorization": YOUR_API_KEY}, 117 on_open=on_open, 118 on_message=on_message, 119 on_error=on_error, 120 on_close=on_close, 121 ) 122 123 ws_thread = threading.Thread(target=ws_app.run_forever) 124 ws_thread.daemon = True 125 ws_thread.start() 126 127 try: 128 while ws_thread.is_alive(): 129 time.sleep(0.1) 130 except KeyboardInterrupt: 131 print("\nStopping...") 132 stop_event.set() 133 if ws_app and ws_app.sock and ws_app.sock.connected: 134 ws_app.send(json.dumps({"type": "Terminate"})) 135 time.sleep(2) 136 if ws_app: 137 ws_app.close() 138 ws_thread.join(timeout=2.0) 139 140 if __name__ == "__main__": 141 run()
Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.
$ pip install websocket-client pyaudio requests
1 import pyaudio 2 import websocket 3 import json 4 import threading 5 import time 6 import requests 7 from urllib.parse import urlencode 8 9 YOUR_API_KEY = "YOUR_API_KEY" # Replace with your actual API key
Set all of your audio configurations and global variables.
1 CONNECTION_PARAMS = { 2 "sample_rate": 16000, 3 "speech_model": "u3-rt-pro", 4 } 5 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws" 6 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}" 7 8 FRAMES_PER_BUFFER = 800 9 SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"] 10 CHANNELS = 1 11 FORMAT = pyaudio.paInt16 12 13 audio = None 14 stream = None 15 ws_app = None 16 audio_thread = None 17 stop_event = threading.Event()
Define a function called translate_text (Python) or translateText (JavaScript), which uses LLM Gateway to translate the English final transcripts into another language. This example is translating the text into Spanish. To set this to a different language, just replace “Spanish” in the prompt with your language of choice.
1 def translate_text(text): 2 """Called when translating final transcripts.""" 3 headers = { 4 "authorization": YOUR_API_KEY 5 } 6 7 llm_gateway_data = { 8 "model": "gemini-2.5-flash-lite", 9 "messages": [ 10 {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"} 11 ], 12 "max_tokens": 1000 13 } 14 15 result = requests.post( 16 "https://llm-gateway.assemblyai.com/v1/chat/completions", 17 headers=headers, 18 json=llm_gateway_data 19 ) 20 return result.json()["choices"][0]["message"]["content"]
1 def on_open(ws): 2 print("WebSocket connection opened.") 3 def stream_audio(): 4 global stream 5 while not stop_event.is_set(): 6 try: 7 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) 8 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY) 9 except Exception as e: 10 print(f"Error streaming audio: {e}") 11 break 12 13 global audio_thread 14 audio_thread = threading.Thread(target=stream_audio) 15 audio_thread.daemon = True 16 audio_thread.start()
In this function, use the previously defined translate_text / translateText to translate all final transcripts.
1 def on_message(ws, message): 2 try: 3 data = json.loads(message) 4 msg_type = data.get("type") 5 6 if msg_type == "Begin": 7 print(f"Session began: ID={data.get('id')}") 8 elif msg_type == "Turn": 9 transcript = data.get("transcript", "") 10 if data.get("end_of_turn"): 11 print(f"\r{' ' * 80}\r", end="") 12 print(translate_text(transcript)) 13 else: 14 print(f"\r{transcript}", end="") 15 elif msg_type == "Termination": 16 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio") 17 except Exception as e: 18 print(f"Error handling message: {e}")
1 def on_close(ws, close_status_code, close_msg): 2 print(f"\nWebSocket Disconnected: Status={close_status_code}") 3 global stream, audio 4 stop_event.set() 5 if stream: 6 if stream.is_active(): 7 stream.stop_stream() 8 stream.close() 9 if audio: 10 audio.terminate()
1 def on_error(ws, error): 2 print(f"\nWebSocket Error: {error}") 3 stop_event.set()
1 def run(): 2 global audio, stream, ws_app 3 4 audio = pyaudio.PyAudio() 5 stream = audio.open( 6 input=True, 7 frames_per_buffer=FRAMES_PER_BUFFER, 8 channels=CHANNELS, 9 format=FORMAT, 10 rate=SAMPLE_RATE, 11 ) 12 print("Speak into your microphone. Press Ctrl+C to stop.") 13 14 ws_app = websocket.WebSocketApp( 15 API_ENDPOINT, 16 header={"Authorization": YOUR_API_KEY}, 17 on_open=on_open, 18 on_message=on_message, 19 on_error=on_error, 20 on_close=on_close, 21 ) 22 23 ws_thread = threading.Thread(target=ws_app.run_forever) 24 ws_thread.daemon = True 25 ws_thread.start() 26 27 try: 28 while ws_thread.is_alive(): 29 time.sleep(0.1) 30 except KeyboardInterrupt: 31 print("\nStopping...") 32 stop_event.set() 33 if ws_app and ws_app.sock and ws_app.sock.connected: 34 ws_app.send(json.dumps({"type": "Terminate"})) 35 time.sleep(2) 36 if ws_app: 37 ws_app.close() 38 ws_thread.join(timeout=2.0) 39 40 if __name__ == "__main__": 41 run()