Getting startedEnd-to-end examples
Real-time live captioner
Real-time live captioner
Stream audio from your microphone with keyterms prompting for domain-specific accuracy, ideal for live events, accessibility, and broadcast captioning.
Products used: Streaming STT + Universal-3 Pro + keyterms prompting
Model selection: Uses u3-rt-pro for sub-300ms latency with format_turns enabled for clean, readable captions.
Python
JavaScript
1 # pip install pyaudio websocket-client 2 import pyaudio 3 import websocket 4 import json 5 import threading 6 import time 7 from urllib.parse import urlencode 8 9 # ── Config ──────────────────────────────────────────────────── 10 YOUR_API_KEY = "YOUR_API_KEY" 11 12 # Add domain-specific terms to boost recognition accuracy 13 KEYTERMS = ["AssemblyAI", "Universal-3 Pro", "LLM Gateway", "speech-to-text"] 14 15 CONNECTION_PARAMS = { 16 "sample_rate": 16000, 17 "speech_model": "u3-rt-pro", 18 "format_turns": True, 19 "keyterms_prompt": KEYTERMS, 20 } 21 22 API_ENDPOINT = ( 23 f"wss://streaming.assemblyai.com/v3/ws?{urlencode(CONNECTION_PARAMS, doseq=True)}" 24 ) 25 26 # Audio settings 27 FRAMES_PER_BUFFER = 800 28 SAMPLE_RATE = 16000 29 stop_event = threading.Event() 30 caption_count = 0 31 32 def on_open(ws): 33 print(f"Live captioning started — keyterms: {', '.join(KEYTERMS)}") 34 print("Speak into your microphone. Press Ctrl+C to stop.\n") 35 print("-" * 60) 36 37 def stream_audio(): 38 audio = pyaudio.PyAudio() 39 stream = audio.open( 40 input=True, frames_per_buffer=FRAMES_PER_BUFFER, 41 channels=1, format=pyaudio.paInt16, rate=SAMPLE_RATE, 42 ) 43 while not stop_event.is_set(): 44 try: 45 data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) 46 ws.send(data, websocket.ABNF.OPCODE_BINARY) 47 except Exception: 48 break 49 stream.stop_stream() 50 stream.close() 51 audio.terminate() 52 53 threading.Thread(target=stream_audio, daemon=True).start() 54 55 def on_message(ws, message): 56 global caption_count 57 data = json.loads(message) 58 59 if data.get("type") == "Turn": 60 transcript = data.get("transcript", "") 61 if data.get("end_of_turn") and transcript: 62 caption_count += 1 63 print(f"\r[{caption_count:03d}] {transcript}") 64 elif transcript: 65 # Show partial (live) caption 66 print(f"\r >> {transcript[-70:]}", end="", flush=True) 67 68 elif data.get("type") == "Termination": 69 duration = data.get("audio_duration_seconds", 0) 70 print(f"\n{'=' * 60}") 71 print(f"Session ended — {caption_count} captions, {duration}s of audio") 72 73 def on_error(ws, error): 74 print(f"\nError: {error}") 75 stop_event.set() 76 77 def on_close(ws, code, msg): 78 stop_event.set() 79 80 ws_app = websocket.WebSocketApp( 81 API_ENDPOINT, 82 header={"Authorization": YOUR_API_KEY}, 83 on_open=on_open, on_message=on_message, 84 on_error=on_error, on_close=on_close, 85 ) 86 87 ws_thread = threading.Thread(target=ws_app.run_forever, daemon=True) 88 ws_thread.start() 89 90 try: 91 while ws_thread.is_alive(): 92 time.sleep(0.1) 93 except KeyboardInterrupt: 94 print("\n\nStopping...") 95 stop_event.set() 96 if ws_app.sock and ws_app.sock.connected: 97 ws_app.send(json.dumps({"type": "Terminate"})) 98 time.sleep(2) 99 ws_app.close()
Example output
Live captioning started — keyterms: AssemblyAI, Universal-3 Pro, LLM Gateway, speech-to-text Speak into your microphone. Press Ctrl+C to stop. ------------------------------------------------------------ [001] Welcome everyone to today's demo of AssemblyAI's speech-to-text platform. [002] We'll be showing you how Universal-3 Pro handles real-time transcription. [003] The LLM Gateway integration lets you add AI analysis on top of your transcripts without switching providers. ============================================================ Session ended — 3 captions, 24s of audio
See the End-to-end examples overview for all available pipelines.