| 1 | const WebSocket = require("ws"); |
| 2 | const mic = require("mic"); |
| 3 | const querystring = require("querystring"); |
| 4 | const fs = require("fs"); |
| 5 | |
| 6 | // --- Configuration --- |
| 7 | const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key |
| 8 | const CONNECTION_PARAMS = { |
| 9 | sample_rate: 16000, |
| 10 | format_turns: true, // Request formatted final transcripts |
| 11 | }; |
| 12 | const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"; |
| 13 | const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`; |
| 14 | |
| 15 | // Audio Configuration |
| 16 | const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate; |
| 17 | const CHANNELS = 1; |
| 18 | |
| 19 | // Global variables |
| 20 | let micInstance = null; |
| 21 | let micInputStream = null; |
| 22 | let ws = null; |
| 23 | let stopRequested = false; |
| 24 | |
| 25 | // WAV recording variables |
| 26 | let recordedFrames = []; // Store audio frames for WAV file |
| 27 | |
| 28 | // --- Helper functions --- |
| 29 | function clearLine() { |
| 30 | process.stdout.write("\r" + " ".repeat(80) + "\r"); |
| 31 | } |
| 32 | |
| 33 | function formatTimestamp(timestamp) { |
| 34 | return new Date(timestamp * 1000).toISOString(); |
| 35 | } |
| 36 | |
| 37 | function createWavHeader(sampleRate, channels, dataLength) { |
| 38 | const buffer = Buffer.alloc(44); |
| 39 | |
| 40 | // RIFF header |
| 41 | buffer.write("RIFF", 0); |
| 42 | buffer.writeUInt32LE(36 + dataLength, 4); |
| 43 | buffer.write("WAVE", 8); |
| 44 | |
| 45 | // fmt chunk |
| 46 | buffer.write("fmt ", 12); |
| 47 | buffer.writeUInt32LE(16, 16); // fmt chunk size |
| 48 | buffer.writeUInt16LE(1, 20); // PCM format |
| 49 | buffer.writeUInt16LE(channels, 22); |
| 50 | buffer.writeUInt32LE(sampleRate, 24); |
| 51 | buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate |
| 52 | buffer.writeUInt16LE(channels * 2, 32); // block align |
| 53 | buffer.writeUInt16LE(16, 34); // bits per sample |
| 54 | |
| 55 | // data chunk |
| 56 | buffer.write("data", 36); |
| 57 | buffer.writeUInt32LE(dataLength, 40); |
| 58 | |
| 59 | return buffer; |
| 60 | } |
| 61 | |
| 62 | function saveWavFile() { |
| 63 | if (recordedFrames.length === 0) { |
| 64 | console.log("No audio data recorded."); |
| 65 | return; |
| 66 | } |
| 67 | |
| 68 | // Generate filename with timestamp |
| 69 | const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19); |
| 70 | const filename = `recorded_audio_${timestamp}.wav`; |
| 71 | |
| 72 | try { |
| 73 | // Combine all recorded frames |
| 74 | const audioData = Buffer.concat(recordedFrames); |
| 75 | const dataLength = audioData.length; |
| 76 | |
| 77 | // Create WAV header |
| 78 | const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength); |
| 79 | |
| 80 | // Write WAV file |
| 81 | const wavFile = Buffer.concat([wavHeader, audioData]); |
| 82 | fs.writeFileSync(filename, wavFile); |
| 83 | |
| 84 | console.log(`Audio saved to: ${filename}`); |
| 85 | console.log( |
| 86 | `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds` |
| 87 | ); |
| 88 | } catch (error) { |
| 89 | console.error(`Error saving WAV file: ${error}`); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | // --- Main function --- |
| 94 | async function run() { |
| 95 | console.log("Starting AssemblyAI streaming transcription..."); |
| 96 | console.log("Audio will be saved to a WAV file when the session ends."); |
| 97 | |
| 98 | // Initialize WebSocket connection |
| 99 | ws = new WebSocket(API_ENDPOINT, { |
| 100 | headers: { |
| 101 | Authorization: YOUR_API_KEY, |
| 102 | }, |
| 103 | }); |
| 104 | |
| 105 | // Setup WebSocket event handlers |
| 106 | ws.on("open", () => { |
| 107 | console.log("WebSocket connection opened."); |
| 108 | console.log(`Connected to: ${API_ENDPOINT}`); |
| 109 | // Start the microphone |
| 110 | startMicrophone(); |
| 111 | }); |
| 112 | |
| 113 | ws.on("message", (message) => { |
| 114 | try { |
| 115 | const data = JSON.parse(message); |
| 116 | const msgType = data.type; |
| 117 | |
| 118 | if (msgType === "Begin") { |
| 119 | const sessionId = data.id; |
| 120 | const expiresAt = data.expires_at; |
| 121 | console.log( |
| 122 | `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}` |
| 123 | ); |
| 124 | } else if (msgType === "Turn") { |
| 125 | const transcript = data.transcript || ""; |
| 126 | if (data.end_of_turn) { |
| 127 | clearLine(); |
| 128 | console.log(transcript); |
| 129 | } else { |
| 130 | process.stdout.write(`\r${transcript}`); |
| 131 | } |
| 132 | } else if (msgType === "Termination") { |
| 133 | const audioDuration = data.audio_duration_seconds; |
| 134 | const sessionDuration = data.session_duration_seconds; |
| 135 | console.log( |
| 136 | `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s` |
| 137 | ); |
| 138 | } |
| 139 | } catch (error) { |
| 140 | console.error(`\nError handling message: ${error}`); |
| 141 | console.error(`Message data: ${message}`); |
| 142 | } |
| 143 | }); |
| 144 | |
| 145 | ws.on("error", (error) => { |
| 146 | console.error(`\nWebSocket Error: ${error}`); |
| 147 | cleanup(); |
| 148 | }); |
| 149 | |
| 150 | ws.on("close", (code, reason) => { |
| 151 | console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`); |
| 152 | cleanup(); |
| 153 | }); |
| 154 | |
| 155 | // Handle process termination |
| 156 | setupTerminationHandlers(); |
| 157 | } |
| 158 | |
| 159 | function startMicrophone() { |
| 160 | try { |
| 161 | micInstance = mic({ |
| 162 | rate: SAMPLE_RATE.toString(), |
| 163 | channels: CHANNELS.toString(), |
| 164 | debug: false, |
| 165 | exitOnSilence: 6, // This won't actually exit, just a parameter for mic |
| 166 | }); |
| 167 | |
| 168 | micInputStream = micInstance.getAudioStream(); |
| 169 | |
| 170 | micInputStream.on("data", (data) => { |
| 171 | if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) { |
| 172 | // Store audio data for WAV recording |
| 173 | recordedFrames.push(Buffer.from(data)); |
| 174 | |
| 175 | // Send audio data to WebSocket |
| 176 | ws.send(data); |
| 177 | } |
| 178 | }); |
| 179 | |
| 180 | micInputStream.on("error", (err) => { |
| 181 | console.error(`Microphone Error: ${err}`); |
| 182 | cleanup(); |
| 183 | }); |
| 184 | |
| 185 | micInstance.start(); |
| 186 | console.log("Microphone stream opened successfully."); |
| 187 | console.log("Speak into your microphone. Press Ctrl+C to stop."); |
| 188 | } catch (error) { |
| 189 | console.error(`Error opening microphone stream: ${error}`); |
| 190 | cleanup(); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | function cleanup() { |
| 195 | stopRequested = true; |
| 196 | |
| 197 | // Save recorded audio to WAV file |
| 198 | saveWavFile(); |
| 199 | |
| 200 | // Stop microphone if it's running |
| 201 | if (micInstance) { |
| 202 | try { |
| 203 | micInstance.stop(); |
| 204 | } catch (error) { |
| 205 | console.error(`Error stopping microphone: ${error}`); |
| 206 | } |
| 207 | micInstance = null; |
| 208 | } |
| 209 | |
| 210 | // Close WebSocket connection if it's open |
| 211 | if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) { |
| 212 | try { |
| 213 | // Send termination message if possible |
| 214 | if (ws.readyState === WebSocket.OPEN) { |
| 215 | const terminateMessage = { type: "Terminate" }; |
| 216 | console.log( |
| 217 | `Sending termination message: ${JSON.stringify(terminateMessage)}` |
| 218 | ); |
| 219 | ws.send(JSON.stringify(terminateMessage)); |
| 220 | } |
| 221 | ws.close(); |
| 222 | } catch (error) { |
| 223 | console.error(`Error closing WebSocket: ${error}`); |
| 224 | } |
| 225 | ws = null; |
| 226 | } |
| 227 | |
| 228 | console.log("Cleanup complete."); |
| 229 | } |
| 230 | |
| 231 | function setupTerminationHandlers() { |
| 232 | // Handle Ctrl+C and other termination signals |
| 233 | process.on("SIGINT", () => { |
| 234 | console.log("\nCtrl+C received. Stopping..."); |
| 235 | cleanup(); |
| 236 | // Give time for cleanup before exiting |
| 237 | setTimeout(() => process.exit(0), 1000); |
| 238 | }); |
| 239 | |
| 240 | process.on("SIGTERM", () => { |
| 241 | console.log("\nTermination signal received. Stopping..."); |
| 242 | cleanup(); |
| 243 | // Give time for cleanup before exiting |
| 244 | setTimeout(() => process.exit(0), 1000); |
| 245 | }); |
| 246 | |
| 247 | // Handle uncaught exceptions |
| 248 | process.on("uncaughtException", (error) => { |
| 249 | console.error(`\nUncaught exception: ${error}`); |
| 250 | cleanup(); |
| 251 | // Give time for cleanup before exiting |
| 252 | setTimeout(() => process.exit(1), 1000); |
| 253 | }); |
| 254 | } |
| 255 | |
| 256 | // Start the application |
| 257 | run(); |