Translate Real-time STT Transcripts with LLM Gateway

In this guide, you’ll learn how to implement real-time translation of final transcripts using AssemblyAI’s Streaming API and LLM Gateway.

Quickstart

Python
JavaScript

import pyaudio
import websocket
import json
import threading
import time
import requests
from urllib.parse import urlencode

YOUR_API_KEY = "YOUR_API_KEY"  # Replace with your actual API key

CONNECTION_PARAMS = {
    "sample_rate": 16000,
    "speech_model": "u3-rt-pro",
}
API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"

FRAMES_PER_BUFFER = 800
SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
CHANNELS = 1
FORMAT = pyaudio.paInt16

audio = None
stream = None
ws_app = None
audio_thread = None
stop_event = threading.Event()

def translate_text(text):
    """Called when translating final transcripts."""
    headers = {
        "authorization": YOUR_API_KEY
    }

    llm_gateway_data = {
        "model": "gemini-2.5-flash-lite",
        "messages": [
            {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"}
        ],
        "max_tokens": 1000
    }

    result = requests.post(
        "https://llm-gateway.assemblyai.com/v1/chat/completions",
        headers=headers,
        json=llm_gateway_data
    )
    return result.json()["choices"][0]["message"]["content"]

def on_open(ws):
    print("WebSocket connection opened.")
    def stream_audio():
        global stream
        while not stop_event.is_set():
            try:
                audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
                ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
            except Exception as e:
                print(f"Error streaming audio: {e}")
                break

    global audio_thread
    audio_thread = threading.Thread(target=stream_audio)
    audio_thread.daemon = True
    audio_thread.start()

def on_message(ws, message):
    try:
        data = json.loads(message)
        msg_type = data.get("type")

        if msg_type == "Begin":
            print(f"Session began: ID={data.get('id')}")
        elif msg_type == "Turn":
            transcript = data.get("transcript", "")
            if data.get("end_of_turn"):
                print(f"\r{' ' * 80}\r", end="")
                print(translate_text(transcript))
            else:
                print(f"\r{transcript}", end="")
        elif msg_type == "Termination":
            print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
    except Exception as e:
        print(f"Error handling message: {e}")

def on_error(ws, error):
    print(f"\nWebSocket Error: {error}")
    stop_event.set()

def on_close(ws, close_status_code, close_msg):
    print(f"\nWebSocket Disconnected: Status={close_status_code}")
    global stream, audio
    stop_event.set()
    if stream:
        if stream.is_active():
            stream.stop_stream()
        stream.close()
    if audio:
        audio.terminate()

def run():
    global audio, stream, ws_app

    audio = pyaudio.PyAudio()
    stream = audio.open(
        input=True,
        frames_per_buffer=FRAMES_PER_BUFFER,
        channels=CHANNELS,
        format=FORMAT,
        rate=SAMPLE_RATE,
    )
    print("Speak into your microphone. Press Ctrl+C to stop.")

    ws_app = websocket.WebSocketApp(
        API_ENDPOINT,
        header={"Authorization": YOUR_API_KEY},
        on_open=on_open,
        on_message=on_message,
        on_error=on_error,
        on_close=on_close,
    )

    ws_thread = threading.Thread(target=ws_app.run_forever)
    ws_thread.daemon = True
    ws_thread.start()

    try:
        while ws_thread.is_alive():
            time.sleep(0.1)
    except KeyboardInterrupt:
        print("\nStopping...")
        stop_event.set()
        if ws_app and ws_app.sock and ws_app.sock.connected:
            ws_app.send(json.dumps({"type": "Terminate"}))
            time.sleep(2)
        if ws_app:
            ws_app.close()
        ws_thread.join(timeout=2.0)

if __name__ == "__main__":
    run()

import WebSocket from "ws";
import mic from "mic";

const YOUR_API_KEY = "YOUR_API_KEY";
const CONNECTION_PARAMS = {
  sample_rate: 16000,
  speech_model: "u3-rt-pro",
};
const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${new URLSearchParams(CONNECTION_PARAMS).toString()}`;
const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;

let micInstance = null;
let ws = null;

async function translateText(text) {
  const response = await fetch(
    "https://llm-gateway.assemblyai.com/v1/chat/completions",
    {
      method: "POST",
      headers: {
        Authorization: YOUR_API_KEY,
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: "gemini-2.5-flash-lite",
        messages: [
          { role: "user", content: `Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: ${text}` },
        ],
        max_tokens: 1000,
      }),
    }
  );
  const data = await response.json();
  return data.choices[0].message.content;
}

function run() {
  ws = new WebSocket(API_ENDPOINT, {
    headers: { Authorization: YOUR_API_KEY },
  });

  ws.on("open", () => {
    console.log("WebSocket connection opened.");

    micInstance = mic({
      rate: String(SAMPLE_RATE),
      channels: "1",
      bitwidth: "16",
      encoding: "signed-integer",
      endian: "little",
    });

    const micInputStream = micInstance.getAudioStream();
    micInputStream.on("data", (data) => {
      if (ws.readyState === WebSocket.OPEN) {
        ws.send(data);
      }
    });

    micInstance.start();
    console.log("Speak into your microphone. Press Ctrl+C to stop.");
  });

  ws.on("message", async (data) => {
    try {
      const msg = JSON.parse(data);
      if (msg.type === "Begin") {
        console.log(`Session began: ID=${msg.id}`);
      } else if (msg.type === "Turn") {
        const transcript = msg.transcript || "";
        if (msg.end_of_turn) {
          process.stdout.write("\r" + " ".repeat(80) + "\r");
          const translated = await translateText(transcript);
          console.log(translated);
        } else {
          process.stdout.write(`\r${transcript}`);
        }
      } else if (msg.type === "Termination") {
        console.log(
          `\nSession terminated: ${msg.audio_duration_seconds}s of audio`
        );
      }
    } catch (e) {
      console.error("Error handling message:", e);
    }
  });

  ws.on("error", (error) => {
    console.error("WebSocket error:", error);
  });

  ws.on("close", (code) => {
    console.log(`WebSocket closed: ${code}`);
    if (micInstance) micInstance.stop();
  });

  process.on("SIGINT", () => {
    console.log("\nStopping...");
    if (micInstance) micInstance.stop();
    if (ws && ws.readyState === WebSocket.OPEN) {
      ws.send(JSON.stringify({ type: "Terminate" }));
      setTimeout(() => {
        ws.close();
        process.exit(0);
      }, 2000);
    } else {
      process.exit(0);
    }
  });
}

run();

Step-by-Step Instructions

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install Dependencies

Python
JavaScript

pip install websocket-client pyaudio requests

npm install ws mic

Import Packages & Set API Key

Python
JavaScript

import pyaudio
import websocket
import json
import threading
import time
import requests
from urllib.parse import urlencode

YOUR_API_KEY = "YOUR_API_KEY"  # Replace with your actual API key

import WebSocket from "ws";
import mic from "mic";

const YOUR_API_KEY = "YOUR_API_KEY";

Audio Configuration & Global Variables

Set all of your audio configurations and global variables.

Python
JavaScript

CONNECTION_PARAMS = {
    "sample_rate": 16000,
    "speech_model": "u3-rt-pro",
}
API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"

FRAMES_PER_BUFFER = 800
SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
CHANNELS = 1
FORMAT = pyaudio.paInt16

audio = None
stream = None
ws_app = None
audio_thread = None
stop_event = threading.Event()

const CONNECTION_PARAMS = {
  sample_rate: 16000,
  speech_model: "u3-rt-pro",
};
const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${new URLSearchParams(CONNECTION_PARAMS).toString()}`;
const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;

let micInstance = null;
let ws = null;

Define Translate Text Function

Define a function called translate_text (Python) or translateText (JavaScript), which uses LLM Gateway to translate the English final transcripts into another language. This example is translating the text into Spanish. To set this to a different language, just replace “Spanish” in the prompt with your language of choice.

Python
JavaScript

def translate_text(text):
    """Called when translating final transcripts."""
    headers = {
        "authorization": YOUR_API_KEY
    }

    llm_gateway_data = {
        "model": "gemini-2.5-flash-lite",
        "messages": [
            {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"}
        ],
        "max_tokens": 1000
    }

    result = requests.post(
        "https://llm-gateway.assemblyai.com/v1/chat/completions",
        headers=headers,
        json=llm_gateway_data
    )
    return result.json()["choices"][0]["message"]["content"]

async function translateText(text) {
  const response = await fetch(
    "https://llm-gateway.assemblyai.com/v1/chat/completions",
    {
      method: "POST",
      headers: {
        Authorization: YOUR_API_KEY,
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: "gemini-2.5-flash-lite",
        messages: [
          { role: "user", content: `Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: ${text}` },
        ],
        max_tokens: 1000,
      }),
    }
  );
  const data = await response.json();
  return data.choices[0].message.content;
}

Websocket Event Handlers

Open Websocket

Python
JavaScript

def on_open(ws):
    print("WebSocket connection opened.")
    def stream_audio():
        global stream
        while not stop_event.is_set():
            try:
                audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
                ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
            except Exception as e:
                print(f"Error streaming audio: {e}")
                break

    global audio_thread
    audio_thread = threading.Thread(target=stream_audio)
    audio_thread.daemon = True
    audio_thread.start()

ws.on("open", () => {
  console.log("WebSocket connection opened.");

  micInstance = mic({
    rate: String(SAMPLE_RATE),
    channels: "1",
    bitwidth: "16",
    encoding: "signed-integer",
    endian: "little",
  });

  const micInputStream = micInstance.getAudioStream();
  micInputStream.on("data", (data) => {
    if (ws.readyState === WebSocket.OPEN) {
      ws.send(data);
    }
  });

  micInstance.start();
  console.log("Speak into your microphone. Press Ctrl+C to stop.");
});

Handle Websocket Messages

In this function, use the previously defined translate_text / translateText to translate all final transcripts.

Python
JavaScript

def on_message(ws, message):
    try:
        data = json.loads(message)
        msg_type = data.get("type")

        if msg_type == "Begin":
            print(f"Session began: ID={data.get('id')}")
        elif msg_type == "Turn":
            transcript = data.get("transcript", "")
            if data.get("end_of_turn"):
                print(f"\r{' ' * 80}\r", end="")
                print(translate_text(transcript))
            else:
                print(f"\r{transcript}", end="")
        elif msg_type == "Termination":
            print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
    except Exception as e:
        print(f"Error handling message: {e}")

ws.on("message", async (data) => {
  try {
    const msg = JSON.parse(data);
    if (msg.type === "Begin") {
      console.log(`Session began: ID=${msg.id}`);
    } else if (msg.type === "Turn") {
      const transcript = msg.transcript || "";
      if (msg.end_of_turn) {
        process.stdout.write("\r" + " ".repeat(80) + "\r");
        const translated = await translateText(transcript);
        console.log(translated);
      } else {
        process.stdout.write(`\r${transcript}`);
      }
    } else if (msg.type === "Termination") {
      console.log(
        `\nSession terminated: ${msg.audio_duration_seconds}s of audio`
      );
    }
  } catch (e) {
    console.error("Error handling message:", e);
  }
});

Close Websocket

Python
JavaScript

def on_close(ws, close_status_code, close_msg):
    print(f"\nWebSocket Disconnected: Status={close_status_code}")
    global stream, audio
    stop_event.set()
    if stream:
        if stream.is_active():
            stream.stop_stream()
        stream.close()
    if audio:
        audio.terminate()

ws.on("close", (code) => {
  console.log(`WebSocket closed: ${code}`);
  if (micInstance) micInstance.stop();
});

Websocket Error Handling

Python
JavaScript

def on_error(ws, error):
    print(f"\nWebSocket Error: {error}")
    stop_event.set()

ws.on("error", (error) => {
  console.error("WebSocket error:", error);
});

Begin Real-time STT Transcription

Python
JavaScript

def run():
    global audio, stream, ws_app

    audio = pyaudio.PyAudio()
    stream = audio.open(
        input=True,
        frames_per_buffer=FRAMES_PER_BUFFER,
        channels=CHANNELS,
        format=FORMAT,
        rate=SAMPLE_RATE,
    )
    print("Speak into your microphone. Press Ctrl+C to stop.")

    ws_app = websocket.WebSocketApp(
        API_ENDPOINT,
        header={"Authorization": YOUR_API_KEY},
        on_open=on_open,
        on_message=on_message,
        on_error=on_error,
        on_close=on_close,
    )

    ws_thread = threading.Thread(target=ws_app.run_forever)
    ws_thread.daemon = True
    ws_thread.start()

    try:
        while ws_thread.is_alive():
            time.sleep(0.1)
    except KeyboardInterrupt:
        print("\nStopping...")
        stop_event.set()
        if ws_app and ws_app.sock and ws_app.sock.connected:
            ws_app.send(json.dumps({"type": "Terminate"}))
            time.sleep(2)
        if ws_app:
            ws_app.close()
        ws_thread.join(timeout=2.0)

if __name__ == "__main__":
    run()

function run() {
  ws = new WebSocket(API_ENDPOINT, {
    headers: { Authorization: YOUR_API_KEY },
  });

  // ... event handlers defined above ...

  process.on("SIGINT", () => {
    console.log("\nStopping...");
    if (micInstance) micInstance.stop();
    if (ws && ws.readyState === WebSocket.OPEN) {
      ws.send(JSON.stringify({ type: "Terminate" }));
      setTimeout(() => {
        ws.close();
        process.exit(0);
      }, 2000);
    } else {
      process.exit(0);
    }
  });
}

run();

​Quickstart

​Step-by-Step Instructions

​Install Dependencies

​Import Packages & Set API Key

​Audio Configuration & Global Variables

​Define Translate Text Function

​Websocket Event Handlers

​Open Websocket

​Handle Websocket Messages

​Close Websocket

​Websocket Error Handling

​Begin Real-time STT Transcription