Migrating from Streaming v2 to Streaming v3 (JavaScript)

This cookbook guides you through migrating from AssemblyAI’s legacy Real-time STT model (v2) to our latest Universal Real-time STT model (v3), which provides ultra-low latency for faster transcription, intelligent endpointing for more natural speech detection, and improved accuracy across various audio conditions. Check out this blog post to learn more about this new model!

Overview of changes

The migration involves several key improvements:

API Version: Upgrade from v2 (/v2/realtime/ws) to v3 (/v3/ws)
Enhanced Error Handling: Robust cleanup and resource management
Modern Message Format: Updated message types and structure
Configuration Options: More flexible connection parameters
Graceful Shutdown: Proper termination handling

You can follow the step-by-step guide below to make changes to your existing code but here is what your code should look like in the end:

const WebSocket = require("ws");
const mic = require("mic");
const querystring = require("querystring");
const fs = require("fs");

// --- Configuration ---
const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
const CONNECTION_PARAMS = {
  sample_rate: 16000,
  format_turns: true, // Request formatted final transcripts
};
const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;

// Audio Configuration
const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
const CHANNELS = 1;

// Global variables
let micInstance = null;
let micInputStream = null;
let ws = null;
let stopRequested = false;

// WAV recording variables
let recordedFrames = []; // Store audio frames for WAV file

// --- Helper functions ---
function clearLine() {
  process.stdout.write("\r" + " ".repeat(80) + "\r");
}

function formatTimestamp(timestamp) {
  return new Date(timestamp * 1000).toISOString();
}

function createWavHeader(sampleRate, channels, dataLength) {
  const buffer = Buffer.alloc(44);

  // RIFF header
  buffer.write("RIFF", 0);
  buffer.writeUInt32LE(36 + dataLength, 4);
  buffer.write("WAVE", 8);

  // fmt chunk
  buffer.write("fmt ", 12);
  buffer.writeUInt32LE(16, 16); // fmt chunk size
  buffer.writeUInt16LE(1, 20); // PCM format
  buffer.writeUInt16LE(channels, 22);
  buffer.writeUInt32LE(sampleRate, 24);
  buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
  buffer.writeUInt16LE(channels * 2, 32); // block align
  buffer.writeUInt16LE(16, 34); // bits per sample

  // data chunk
  buffer.write("data", 36);
  buffer.writeUInt32LE(dataLength, 40);

  return buffer;
}

function saveWavFile() {
  if (recordedFrames.length === 0) {
    console.log("No audio data recorded.");
    return;
  }

  // Generate filename with timestamp
  const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
  const filename = `recorded_audio_${timestamp}.wav`;

  try {
    // Combine all recorded frames
    const audioData = Buffer.concat(recordedFrames);
    const dataLength = audioData.length;

    // Create WAV header
    const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);

    // Write WAV file
    const wavFile = Buffer.concat([wavHeader, audioData]);
    fs.writeFileSync(filename, wavFile);

    console.log(`Audio saved to: ${filename}`);
    console.log(
      `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`
    );
  } catch (error) {
    console.error(`Error saving WAV file: ${error}`);
  }
}

// --- Main function ---
async function run() {
  console.log("Starting AssemblyAI streaming transcription...");
  console.log("Audio will be saved to a WAV file when the session ends.");

  // Initialize WebSocket connection
  ws = new WebSocket(API_ENDPOINT, {
    headers: {
      Authorization: YOUR_API_KEY,
    },
  });

  // Setup WebSocket event handlers
  ws.on("open", () => {
    console.log("WebSocket connection opened.");
    console.log(`Connected to: ${API_ENDPOINT}`);
    // Start the microphone
    startMicrophone();
  });

  ws.on("message", (message) => {
    try {
      const data = JSON.parse(message);
      const msgType = data.type;

      if (msgType === "Begin") {
        const sessionId = data.id;
        const expiresAt = data.expires_at;
        console.log(
          `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
        );
      } else if (msgType === "Turn") {
        const transcript = data.transcript || "";
        if (data.end_of_turn) {
          clearLine();
          console.log(transcript);
        } else {
          process.stdout.write(`\r${transcript}`);
        }
      } else if (msgType === "Termination") {
        const audioDuration = data.audio_duration_seconds;
        const sessionDuration = data.session_duration_seconds;
        console.log(
          `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
        );
      }
    } catch (error) {
      console.error(`\nError handling message: ${error}`);
      console.error(`Message data: ${message}`);
    }
  });

  ws.on("error", (error) => {
    console.error(`\nWebSocket Error: ${error}`);
    cleanup();
  });

  ws.on("close", (code, reason) => {
    console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
    cleanup();
  });

  // Handle process termination
  setupTerminationHandlers();
}

function startMicrophone() {
  try {
    micInstance = mic({
      rate: SAMPLE_RATE.toString(),
      channels: CHANNELS.toString(),
      debug: false,
      exitOnSilence: 6, // This won't actually exit, just a parameter for mic
    });

    micInputStream = micInstance.getAudioStream();

    micInputStream.on("data", (data) => {
      if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
        // Store audio data for WAV recording
        recordedFrames.push(Buffer.from(data));

        // Send audio data to WebSocket
        ws.send(data);
      }
    });

    micInputStream.on("error", (err) => {
      console.error(`Microphone Error: ${err}`);
      cleanup();
    });

    micInstance.start();
    console.log("Microphone stream opened successfully.");
    console.log("Speak into your microphone. Press Ctrl+C to stop.");
  } catch (error) {
    console.error(`Error opening microphone stream: ${error}`);
    cleanup();
  }
}

function cleanup() {
  stopRequested = true;

  // Save recorded audio to WAV file
  saveWavFile();

  // Stop microphone if it's running
  if (micInstance) {
    try {
      micInstance.stop();
    } catch (error) {
      console.error(`Error stopping microphone: ${error}`);
    }
    micInstance = null;
  }

  // Close WebSocket connection if it's open
  if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
    try {
      // Send termination message if possible
      if (ws.readyState === WebSocket.OPEN) {
        const terminateMessage = { type: "Terminate" };
        console.log(
          `Sending termination message: ${JSON.stringify(terminateMessage)}`
        );
        ws.send(JSON.stringify(terminateMessage));
      }
      ws.close();
    } catch (error) {
      console.error(`Error closing WebSocket: ${error}`);
    }
    ws = null;
  }

  console.log("Cleanup complete.");
}

function setupTerminationHandlers() {
  // Handle Ctrl+C and other termination signals
  process.on("SIGINT", () => {
    console.log("\nCtrl+C received. Stopping...");
    cleanup();
    // Give time for cleanup before exiting
    setTimeout(() => process.exit(0), 1000);
  });

  process.on("SIGTERM", () => {
    console.log("\nTermination signal received. Stopping...");
    cleanup();
    // Give time for cleanup before exiting
    setTimeout(() => process.exit(0), 1000);
  });

  // Handle uncaught exceptions
  process.on("uncaughtException", (error) => {
    console.error(`\nUncaught exception: ${error}`);
    cleanup();
    // Give time for cleanup before exiting
    setTimeout(() => process.exit(1), 1000);
  });
}

// Start the application
run();

For more information on our Universal Streaming feature, see this section of our official documentation.

Step-by-step migration guide

1. Update API endpoint and configuration

Before (v2):

const API_KEY = "<YOUR_API_KEY>";
const SAMPLE_RATE = 16000; // 16kHz sample rate

const ws = new WebSocket(
  `wss://api.assemblyai.com/v2/realtime/ws?sample_rate=${SAMPLE_RATE}`,
  {
    headers: {
      Authorization: API_KEY,
    },
  }
);

After (v3):

// --- Configuration ---
const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
const CONNECTION_PARAMS = {
  sample_rate: 16000,
  format_turns: true, // Request formatted final transcripts
};
const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;

// Initialize WebSocket connection
ws = new WebSocket(API_ENDPOINT, {
  headers: {
    Authorization: YOUR_API_KEY,
  },
});

Key Changes:

New base URL: streaming.assemblyai.com instead of api.assemblyai.com
Version upgrade: /v3/ws instead of /v2/realtime/ws
Configuration via URL parameters using querystring
Added format_turns option for better transcript formatting

2. Audio configuration

Before (v2):

const SAMPLE_RATE = 16000;
const CHANNELS = 1;

After (v3):

const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
const CHANNELS = 1;

Key Changes:

Sample rate now references the configuration parameter

3. Update message handling schema

Before (v2):

ws.on("message", (message) => {
  try {
    const msg = JSON.parse(message);
    const msgType = msg.message_type;

    if (msgType === "SessionBegins") {
      const sessionId = msg.session_id;
      console.log("Session ID:", sessionId);
      return;
    }

    const text = msg.text || "";
    if (!text) {
      return;
    }

    if (msgType === "PartialTranscript") {
      console.log("Partial:", text);
    } else if (msgType === "FinalTranscript") {
      console.log("Final:", text);
    } else if (msgType === "error") {
      console.error("Error:", msg.error);
    }
  } catch (error) {
    console.error("Error handling message:", error);
  }
});

After (v3):

ws.on("message", (message) => {
  try {
    const data = JSON.parse(message);
    const msgType = data.type;
    if (msgType === "Begin") {
      const sessionId = data.id;
      const expiresAt = data.expires_at;
      console.log(
        `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
      );
    } else if (msgType === "Turn") {
      const transcript = data.transcript || "";
      if (data.end_of_turn) {
        clearLine();
        console.log(transcript);
      } else {
        process.stdout.write(`\r${transcript}`);
      }
    } else if (msgType === "Termination") {
      const audioDuration = data.audio_duration_seconds;
      const sessionDuration = data.session_duration_seconds;
      console.log(
        `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
      );
    }
  } catch (error) {
    console.error(`\nError handling message: ${error}`);
    console.error(`Message data: ${message}`);
  }
});

Key Changes:

Message types renamed: SessionBegins → Begin, PartialTranscript/FinalTranscript → Turn
Field names updated: message_type → type, session_id → id, text → transcript
Added session expiration timestamp handling (expires_at)
New transcript formatting with turn_is_formatted flag
Added turn tracking with turn_order and end_of_turn fields
New confidence scoring with end_of_turn_confidence
Added Termination message with session statistics
Error handling moved from message-based to WebSocket events

4. Add graceful shutdown handling and improve error handling and logging

Before (v2):

ws.on("close", (code, reason) => onClose(ws, code, reason));

function onClose(ws, code, reason) {
  if (recording) {
    recording.end();
  }
  console.log("Disconnected");
}

process.on("SIGINT", async function () {
  console.log();
  console.log("Stopping recording");
  if (recording) {
    recording.end();
  }
  console.log("Closing real-time transcript connection");
  if (ws.readyState === WebSocket.OPEN) {
    ws.close();
  }
  process.exit();
});

After (v3):

ws.on("close", (code, reason) => {
  console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
  cleanup();
});

function cleanup() {
  stopRequested = true;
  // Save recorded audio to WAV file
  saveWavFile();
  // Stop microphone if it's running
  if (micInstance) {
    try {
      micInstance.stop();
    } catch (error) {
      console.error(`Error stopping microphone: ${error}`);
    }
    micInstance = null;
  }
  // Close WebSocket connection if it's open
  if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
    try {
      // Send termination message if possible
      if (ws.readyState === WebSocket.OPEN) {
        const terminateMessage = { type: "Terminate" };
        console.log(
          `Sending termination message: ${JSON.stringify(terminateMessage)}`
        );
        ws.send(JSON.stringify(terminateMessage));
      }
      ws.close();
    } catch (error) {
      console.error(`Error closing WebSocket: ${error}`);
    }
    ws = null;
  }
  console.log("Cleanup complete.");
}

function setupTerminationHandlers() {
  // Handle Ctrl+C and other termination signals
  process.on("SIGINT", () => {
    console.log("\nCtrl+C received. Stopping...");
    cleanup();
    // Give time for cleanup before exiting
    setTimeout(() => process.exit(0), 1000);
  });
  process.on("SIGTERM", () => {
    console.log("\nTermination signal received. Stopping...");
    cleanup();
    // Give time for cleanup before exiting
    setTimeout(() => process.exit(0), 1000);
  });
  // Handle uncaught exceptions
  process.on("uncaughtException", (error) => {
    console.error(`\nUncaught exception: ${error}`);
    cleanup();
    // Give time for cleanup before exiting
    setTimeout(() => process.exit(1), 1000);
  });
}

Key Changes:

Proper KeyboardInterrupt handling
Graceful termination message sending
Detailed error context and timestamps
Proper exception type handling
Resource cleanup on all error paths
Connection status checking before operations

Streaming is billed on the total duration that your WebSocket connection stays open (session duration), not on the amount of audio sent. Always terminate sessions explicitly to avoid being billed for idle time — unterminated sessions auto-close after 3 hours and are billed for the full duration. See Streaming Speech-to-Text billing for details.

Migration checklist

Update API endpoint from v2 to v3
Update message type handling (Begin, Turn, Termination)
Add proper resource cleanup in all code paths
Update field names in message parsing
Add graceful shutdown with termination messages
Add detailed error logging with context
Test KeyboardInterrupt handling
Verify audio resource cleanup
Test connection failure scenarios

Testing your migration

Basic Functionality: Verify transcription works with simple speech
Error Handling: Test with invalid API keys or network issues
Graceful Shutdown: Test Ctrl+C interruption
Resource Cleanup: Monitor for memory leaks during extended use
Message Formatting: Test with format_turns enabled/disabled

Common migration issues

Issue: “WebSocket connection failed”

Solution: Verify you’re using the new v3 endpoint URL and proper authentication header format.

Issue: “Message type not recognized”

Solution: Update message type handling from old names (SessionBegins, PartialTranscript) to new ones (Begin, Turn).

Benefits of migration

Improved Reliability: Better error handling and recovery
Lower Latency: Reduced buffer sizes for faster response
Enhanced Features: Formatted transcripts and session statistics
Better Resource Management: Proper cleanup prevents memory leaks
Graceful Shutdown: Clean termination with proper cleanup

Conclusion

This migration provides a more robust, maintainable, and feature-rich streaming transcription implementation. The enhanced error handling, resource management, and modern API features make it suitable for production use cases where reliability and performance are critical.

​Overview of changes

​Step-by-step migration guide

​1. Update API endpoint and configuration

​2. Audio configuration

​3. Update message handling schema

​4. Add graceful shutdown handling and improve error handling and logging

​Migration checklist

​Testing your migration

​Common migration issues

​Issue: “WebSocket connection failed”

​Issue: “Message type not recognized”

​Benefits of migration

​Conclusion

Overview of changes

Step-by-step migration guide

1. Update API endpoint and configuration

2. Audio configuration

3. Update message handling schema

4. Add graceful shutdown handling and improve error handling and logging

Migration checklist

Testing your migration

Common migration issues

Issue: “WebSocket connection failed”

Issue: “Message type not recognized”

Benefits of migration

Conclusion