Migrating from Streaming v2 to Streaming v3 (JavaScript)

This cookbook guides you through migrating from AssemblyAI’s legacy Streaming STT model (v2) to our latest Universal Streaming STT model (v3), which provides ultra-low latency for faster transcription, intelligent endpointing for more natural speech detection, and improved accuracy across various audio conditions.

Check out this blog post to learn more about this new model!

Overview of changes

The migration involves several key improvements:

  • API Version: Upgrade from v2 (/v2/realtime/ws) to v3 (/v3/ws)
  • Enhanced Error Handling: Robust cleanup and resource management
  • Modern Message Format: Updated message types and structure
  • Configuration Options: More flexible connection parameters
  • Graceful Shutdown: Proper termination handling

You can follow the step-by-step guide below to make changes to your existing code but here is what your code should look like in the end:

1const WebSocket = require("ws");
2const mic = require("mic");
3const querystring = require("querystring");
4const fs = require("fs");
5
6// --- Configuration ---
7const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
8const CONNECTION_PARAMS = {
9 sample_rate: 16000,
10 format_turns: true, // Request formatted final transcripts
11};
12const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
13const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
14
15// Audio Configuration
16const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
17const CHANNELS = 1;
18
19// Global variables
20let micInstance = null;
21let micInputStream = null;
22let ws = null;
23let stopRequested = false;
24
25// WAV recording variables
26let recordedFrames = []; // Store audio frames for WAV file
27
28// --- Helper functions ---
29function clearLine() {
30 process.stdout.write("\r" + " ".repeat(80) + "\r");
31}
32
33function formatTimestamp(timestamp) {
34 return new Date(timestamp * 1000).toISOString();
35}
36
37function createWavHeader(sampleRate, channels, dataLength) {
38 const buffer = Buffer.alloc(44);
39
40 // RIFF header
41 buffer.write("RIFF", 0);
42 buffer.writeUInt32LE(36 + dataLength, 4);
43 buffer.write("WAVE", 8);
44
45 // fmt chunk
46 buffer.write("fmt ", 12);
47 buffer.writeUInt32LE(16, 16); // fmt chunk size
48 buffer.writeUInt16LE(1, 20); // PCM format
49 buffer.writeUInt16LE(channels, 22);
50 buffer.writeUInt32LE(sampleRate, 24);
51 buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
52 buffer.writeUInt16LE(channels * 2, 32); // block align
53 buffer.writeUInt16LE(16, 34); // bits per sample
54
55 // data chunk
56 buffer.write("data", 36);
57 buffer.writeUInt32LE(dataLength, 40);
58
59 return buffer;
60}
61
62function saveWavFile() {
63 if (recordedFrames.length === 0) {
64 console.log("No audio data recorded.");
65 return;
66 }
67
68 // Generate filename with timestamp
69 const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
70 const filename = `recorded_audio_${timestamp}.wav`;
71
72 try {
73 // Combine all recorded frames
74 const audioData = Buffer.concat(recordedFrames);
75 const dataLength = audioData.length;
76
77 // Create WAV header
78 const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);
79
80 // Write WAV file
81 const wavFile = Buffer.concat([wavHeader, audioData]);
82 fs.writeFileSync(filename, wavFile);
83
84 console.log(`Audio saved to: ${filename}`);
85 console.log(
86 `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`
87 );
88 } catch (error) {
89 console.error(`Error saving WAV file: ${error}`);
90 }
91}
92
93// --- Main function ---
94async function run() {
95 console.log("Starting AssemblyAI streaming transcription...");
96 console.log("Audio will be saved to a WAV file when the session ends.");
97
98 // Initialize WebSocket connection
99 ws = new WebSocket(API_ENDPOINT, {
100 headers: {
101 Authorization: YOUR_API_KEY,
102 },
103 });
104
105 // Setup WebSocket event handlers
106 ws.on("open", () => {
107 console.log("WebSocket connection opened.");
108 console.log(`Connected to: ${API_ENDPOINT}`);
109 // Start the microphone
110 startMicrophone();
111 });
112
113 ws.on("message", (message) => {
114 try {
115 const data = JSON.parse(message);
116 const msgType = data.type;
117
118 if (msgType === "Begin") {
119 const sessionId = data.id;
120 const expiresAt = data.expires_at;
121 console.log(
122 `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
123 );
124 } else if (msgType === "Turn") {
125 const transcript = data.transcript || "";
126 if (data.end_of_turn) {
127 clearLine();
128 console.log(transcript);
129 } else {
130 process.stdout.write(`\r${transcript}`);
131 }
132 } else if (msgType === "Termination") {
133 const audioDuration = data.audio_duration_seconds;
134 const sessionDuration = data.session_duration_seconds;
135 console.log(
136 `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
137 );
138 }
139 } catch (error) {
140 console.error(`\nError handling message: ${error}`);
141 console.error(`Message data: ${message}`);
142 }
143 });
144
145 ws.on("error", (error) => {
146 console.error(`\nWebSocket Error: ${error}`);
147 cleanup();
148 });
149
150 ws.on("close", (code, reason) => {
151 console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
152 cleanup();
153 });
154
155 // Handle process termination
156 setupTerminationHandlers();
157}
158
159function startMicrophone() {
160 try {
161 micInstance = mic({
162 rate: SAMPLE_RATE.toString(),
163 channels: CHANNELS.toString(),
164 debug: false,
165 exitOnSilence: 6, // This won't actually exit, just a parameter for mic
166 });
167
168 micInputStream = micInstance.getAudioStream();
169
170 micInputStream.on("data", (data) => {
171 if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
172 // Store audio data for WAV recording
173 recordedFrames.push(Buffer.from(data));
174
175 // Send audio data to WebSocket
176 ws.send(data);
177 }
178 });
179
180 micInputStream.on("error", (err) => {
181 console.error(`Microphone Error: ${err}`);
182 cleanup();
183 });
184
185 micInstance.start();
186 console.log("Microphone stream opened successfully.");
187 console.log("Speak into your microphone. Press Ctrl+C to stop.");
188 } catch (error) {
189 console.error(`Error opening microphone stream: ${error}`);
190 cleanup();
191 }
192}
193
194function cleanup() {
195 stopRequested = true;
196
197 // Save recorded audio to WAV file
198 saveWavFile();
199
200 // Stop microphone if it's running
201 if (micInstance) {
202 try {
203 micInstance.stop();
204 } catch (error) {
205 console.error(`Error stopping microphone: ${error}`);
206 }
207 micInstance = null;
208 }
209
210 // Close WebSocket connection if it's open
211 if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
212 try {
213 // Send termination message if possible
214 if (ws.readyState === WebSocket.OPEN) {
215 const terminateMessage = { type: "Terminate" };
216 console.log(
217 `Sending termination message: ${JSON.stringify(terminateMessage)}`
218 );
219 ws.send(JSON.stringify(terminateMessage));
220 }
221 ws.close();
222 } catch (error) {
223 console.error(`Error closing WebSocket: ${error}`);
224 }
225 ws = null;
226 }
227
228 console.log("Cleanup complete.");
229}
230
231function setupTerminationHandlers() {
232 // Handle Ctrl+C and other termination signals
233 process.on("SIGINT", () => {
234 console.log("\nCtrl+C received. Stopping...");
235 cleanup();
236 // Give time for cleanup before exiting
237 setTimeout(() => process.exit(0), 1000);
238 });
239
240 process.on("SIGTERM", () => {
241 console.log("\nTermination signal received. Stopping...");
242 cleanup();
243 // Give time for cleanup before exiting
244 setTimeout(() => process.exit(0), 1000);
245 });
246
247 // Handle uncaught exceptions
248 process.on("uncaughtException", (error) => {
249 console.error(`\nUncaught exception: ${error}`);
250 cleanup();
251 // Give time for cleanup before exiting
252 setTimeout(() => process.exit(1), 1000);
253 });
254}
255
256// Start the application
257run();

For more information on our Universal Streaming feature, see this section of our official documentation.

Step-by-step migration guide

1. Update API endpoint and configuration

Before (v2):

1const API_KEY = "<YOUR_API_KEY>";
2const SAMPLE_RATE = 16000; // 16kHz sample rate
3
4const ws = new WebSocket(
5 `wss://api.assemblyai.com/v2/realtime/ws?sample_rate=${SAMPLE_RATE}`,
6 {
7 headers: {
8 Authorization: API_KEY,
9 },
10 }
11);

After (v3):

1// --- Configuration ---
2const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
3const CONNECTION_PARAMS = {
4 sample_rate: 16000,
5 format_turns: true, // Request formatted final transcripts
6};
7const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
8const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
9
10// Initialize WebSocket connection
11ws = new WebSocket(API_ENDPOINT, {
12 headers: {
13 Authorization: YOUR_API_KEY,
14 },
15});

Key Changes:

  • New base URL: streaming.assemblyai.com instead of api.assemblyai.com
  • Version upgrade: /v3/ws instead of /v2/realtime/ws
  • Configuration via URL parameters using querystring
  • Added format_turns option for better transcript formatting

2. Audio configuration

Before (v2):

1const SAMPLE_RATE = 16000;
2const CHANNELS = 1;

After (v3):

1const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
2const CHANNELS = 1;

Key Changes:

  • Sample rate now references the configuration parameter

3. Update message handling schema

Before (v2):

1ws.on("message", (message) => {
2 try {
3 const msg = JSON.parse(message);
4 const msgType = msg.message_type;
5
6 if (msgType === "SessionBegins") {
7 const sessionId = msg.session_id;
8 console.log("Session ID:", sessionId);
9 return;
10 }
11
12 const text = msg.text || "";
13 if (!text) {
14 return;
15 }
16
17 if (msgType === "PartialTranscript") {
18 console.log("Partial:", text);
19 } else if (msgType === "FinalTranscript") {
20 console.log("Final:", text);
21 } else if (msgType === "error") {
22 console.error("Error:", msg.error);
23 }
24 } catch (error) {
25 console.error("Error handling message:", error);
26 }
27});

After (v3):

1ws.on("message", (message) => {
2 try {
3 const data = JSON.parse(message);
4 const msgType = data.type;
5 if (msgType === "Begin") {
6 const sessionId = data.id;
7 const expiresAt = data.expires_at;
8 console.log(
9 `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
10 );
11 } else if (msgType === "Turn") {
12 const transcript = data.transcript || "";
13 if (data.end_of_turn) {
14 clearLine();
15 console.log(transcript);
16 } else {
17 process.stdout.write(`\r${transcript}`);
18 }
19 } else if (msgType === "Termination") {
20 const audioDuration = data.audio_duration_seconds;
21 const sessionDuration = data.session_duration_seconds;
22 console.log(
23 `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
24 );
25 }
26 } catch (error) {
27 console.error(`\nError handling message: ${error}`);
28 console.error(`Message data: ${message}`);
29 }
30});

Key Changes:

  • Message types renamed: SessionBeginsBegin, PartialTranscript/FinalTranscriptTurn
  • Field names updated: message_typetype, session_idid, texttranscript
  • Added session expiration timestamp handling (expires_at)
  • New transcript formatting with turn_is_formatted flag
  • Added turn tracking with turn_order and end_of_turn fields
  • New confidence scoring with end_of_turn_confidence
  • Added Termination message with session statistics
  • Error handling moved from message-based to WebSocket events

4. Add graceful shutdown handling and improve error handling and logging

Before (v2):

1ws.on("close", (code, reason) => onClose(ws, code, reason));
2
3function onClose(ws, code, reason) {
4 if (recording) {
5 recording.end();
6 }
7 console.log("Disconnected");
8}
9
10process.on("SIGINT", async function () {
11 console.log();
12 console.log("Stopping recording");
13 if (recording) {
14 recording.end();
15 }
16 console.log("Closing real-time transcript connection");
17 if (ws.readyState === WebSocket.OPEN) {
18 ws.close();
19 }
20 process.exit();
21});

After (v3):

1ws.on("close", (code, reason) => {
2 console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
3 cleanup();
4});
5
6function cleanup() {
7 stopRequested = true;
8 // Save recorded audio to WAV file
9 saveWavFile();
10 // Stop microphone if it's running
11 if (micInstance) {
12 try {
13 micInstance.stop();
14 } catch (error) {
15 console.error(`Error stopping microphone: ${error}`);
16 }
17 micInstance = null;
18 }
19 // Close WebSocket connection if it's open
20 if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
21 try {
22 // Send termination message if possible
23 if (ws.readyState === WebSocket.OPEN) {
24 const terminateMessage = { type: "Terminate" };
25 console.log(
26 `Sending termination message: ${JSON.stringify(terminateMessage)}`
27 );
28 ws.send(JSON.stringify(terminateMessage));
29 }
30 ws.close();
31 } catch (error) {
32 console.error(`Error closing WebSocket: ${error}`);
33 }
34 ws = null;
35 }
36 console.log("Cleanup complete.");
37}
38
39function setupTerminationHandlers() {
40 // Handle Ctrl+C and other termination signals
41 process.on("SIGINT", () => {
42 console.log("\nCtrl+C received. Stopping...");
43 cleanup();
44 // Give time for cleanup before exiting
45 setTimeout(() => process.exit(0), 1000);
46 });
47 process.on("SIGTERM", () => {
48 console.log("\nTermination signal received. Stopping...");
49 cleanup();
50 // Give time for cleanup before exiting
51 setTimeout(() => process.exit(0), 1000);
52 });
53 // Handle uncaught exceptions
54 process.on("uncaughtException", (error) => {
55 console.error(`\nUncaught exception: ${error}`);
56 cleanup();
57 // Give time for cleanup before exiting
58 setTimeout(() => process.exit(1), 1000);
59 });
60}

Key Changes:

  • Proper KeyboardInterrupt handling
  • Graceful termination message sending
  • Detailed error context and timestamps
  • Proper exception type handling
  • Resource cleanup on all error paths
  • Connection status checking before operations

Note: Pricing is based on session duration so it is very important to close sessions properly to avoid unexpected usage and cost.

Migration checklist

  • Update API endpoint from v2 to v3
  • Update message type handling (Begin, Turn, Termination)
  • Add proper resource cleanup in all code paths
  • Update field names in message parsing
  • Add graceful shutdown with termination messages
  • Add detailed error logging with context
  • Test KeyboardInterrupt handling
  • Verify audio resource cleanup
  • Test connection failure scenarios

Testing your migration

  1. Basic Functionality: Verify transcription works with simple speech
  2. Error Handling: Test with invalid API keys or network issues
  3. Graceful Shutdown: Test Ctrl+C interruption
  4. Resource Cleanup: Monitor for memory leaks during extended use
  5. Message Formatting: Test with format_turns enabled/disabled

Common migration issues

Issue: “WebSocket connection failed”

Solution: Verify you’re using the new v3 endpoint URL and proper authentication header format.

Issue: “Message type not recognized”

Solution: Update message type handling from old names (SessionBegins, PartialTranscript) to new ones (Begin, Turn).

Benefits of migration

  • Improved Reliability: Better error handling and recovery
  • Lower Latency: Reduced buffer sizes for faster response
  • Enhanced Features: Formatted transcripts and session statistics
  • Better Resource Management: Proper cleanup prevents memory leaks
  • Graceful Shutdown: Clean termination with proper cleanup

Conclusion

This migration provides a more robust, maintainable, and feature-rich streaming transcription implementation. The enhanced error handling, resource management, and modern API features make it suitable for production use cases where reliability and performance are critical.