Medical Mode

Universal-3 RT Prou3-rt-pro
Universal Streaming Englishuniversal-streaming-english
Universal Streaming Multilingualuniversal-streaming-multilingual

Englishen
Spanishes
Germande
Frenchfr

US & EU

Medical Mode is an add-on that enhances streaming transcription accuracy for medical terminology — including medication names, procedures, conditions, and dosages. It is optimized for medical entity recognition to correct terms that other models frequently get wrong.

Medical Mode can be used with all of our Streaming STT models.

Enable Medical Mode by setting the domain connection parameter to "medical-v1". No other changes to your existing pipeline are required.

Medical Mode is billed as a separate add-on. See the pricing page for details.

Quickstart

$pip install websocket-client pyaudio
1import pyaudio
2import websocket
3import json
4import threading
5import time
6from urllib.parse import urlencode
7from datetime import datetime
8
9# --- Configuration ---
10YOUR_API_KEY = "YOUR-API-KEY" # Replace with your actual API key
11
12CONNECTION_PARAMS = {
13 "sample_rate": 16000,
14 "speech_model": "u3-rt-pro",
15 "domain": "medical-v1"
16}
17API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
18API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
19
20# Audio Configuration
21FRAMES_PER_BUFFER = 800 # 50ms of audio (0.05s * 16000Hz)
22SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
23CHANNELS = 1
24FORMAT = pyaudio.paInt16
25
26# Global variables for audio stream and websocket
27audio = None
28stream = None
29ws_app = None
30audio_thread = None
31stop_event = threading.Event() # To signal the audio thread to stop
32
33# --- WebSocket Event Handlers ---
34
35
36def on_open(ws):
37 """Called when the WebSocket connection is established."""
38 print("WebSocket connection opened.")
39 print(f"Connected to: {API_ENDPOINT}")
40
41 # Start sending audio data in a separate thread
42 def stream_audio():
43 global stream
44 print("Starting audio streaming...")
45 while not stop_event.is_set():
46 try:
47 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
48
49 # Send audio data as binary message
50 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
51 except Exception as e:
52 print(f"Error streaming audio: {e}")
53 break
54 print("Audio streaming stopped.")
55
56 global audio_thread
57 audio_thread = threading.Thread(target=stream_audio)
58 audio_thread.daemon = True
59 audio_thread.start()
60
61def on_message(ws, message):
62 try:
63 data = json.loads(message)
64 msg_type = data.get('type')
65
66 if msg_type == "Begin":
67 session_id = data.get('id')
68 expires_at = data.get('expires_at')
69 print(f"\nSession began: ID={session_id}, ExpiresAt={datetime.fromtimestamp(expires_at)}")
70 elif msg_type == "Turn":
71 transcript = data.get('transcript', '')
72 formatted = data.get('turn_is_formatted', False)
73
74 if formatted:
75 print('\r' + ' ' * 80 + '\r', end='')
76 print(transcript)
77 else:
78 print(f"\r{transcript}", end='')
79 elif msg_type == "Termination":
80 audio_duration = data.get('audio_duration_seconds', 0)
81 session_duration = data.get('session_duration_seconds', 0)
82 print(f"\nSession Terminated: Audio Duration={audio_duration}s, Session Duration={session_duration}s")
83 except json.JSONDecodeError as e:
84 print(f"Error decoding message: {e}")
85 except Exception as e:
86 print(f"Error handling message: {e}")
87
88def on_error(ws, error):
89 """Called when a WebSocket error occurs."""
90 print(f"\nWebSocket Error: {error}")
91 stop_event.set()
92
93
94def on_close(ws, close_status_code, close_msg):
95 """Called when the WebSocket connection is closed."""
96 print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
97
98 global stream, audio
99 stop_event.set()
100
101 if stream:
102 if stream.is_active():
103 stream.stop_stream()
104 stream.close()
105 stream = None
106 if audio:
107 audio.terminate()
108 audio = None
109 if audio_thread and audio_thread.is_alive():
110 audio_thread.join(timeout=1.0)
111
112
113# --- Main Execution ---
114def run():
115 global audio, stream, ws_app
116
117 audio = pyaudio.PyAudio()
118
119 try:
120 stream = audio.open(
121 input=True,
122 frames_per_buffer=FRAMES_PER_BUFFER,
123 channels=CHANNELS,
124 format=FORMAT,
125 rate=SAMPLE_RATE,
126 )
127 print("Microphone stream opened successfully.")
128 print("Speak into your microphone. Press Ctrl+C to stop.")
129 except Exception as e:
130 print(f"Error opening microphone stream: {e}")
131 if audio:
132 audio.terminate()
133 return
134
135 ws_app = websocket.WebSocketApp(
136 API_ENDPOINT,
137 header={"Authorization": YOUR_API_KEY},
138 on_open=on_open,
139 on_message=on_message,
140 on_error=on_error,
141 on_close=on_close,
142 )
143
144 ws_thread = threading.Thread(target=ws_app.run_forever)
145 ws_thread.daemon = True
146 ws_thread.start()
147
148 try:
149 while ws_thread.is_alive():
150 time.sleep(0.1)
151 except KeyboardInterrupt:
152 print("\nCtrl+C received. Stopping...")
153 stop_event.set()
154
155 if ws_app and ws_app.sock and ws_app.sock.connected:
156 try:
157 terminate_message = {"type": "Terminate"}
158 print(f"Sending termination message: {json.dumps(terminate_message)}")
159 ws_app.send(json.dumps(terminate_message))
160 time.sleep(5)
161 except Exception as e:
162 print(f"Error sending termination message: {e}")
163
164 if ws_app:
165 ws_app.close()
166
167 ws_thread.join(timeout=2.0)
168
169 except Exception as e:
170 print(f"\nAn unexpected error occurred: {e}")
171 stop_event.set()
172 if ws_app:
173 ws_app.close()
174 ws_thread.join(timeout=2.0)
175
176 finally:
177 if stream and stream.is_active():
178 stream.stop_stream()
179 if stream:
180 stream.close()
181 if audio:
182 audio.terminate()
183 print("Cleanup complete. Exiting.")
184
185
186if __name__ == "__main__":
187 run()

Example output

Without Medical Mode:

1I have here insulin to be used for both prandial mealtime and sliding scale is
2insulin lisprohumalog subcutaneously.

With Medical Mode, lisprohumalog is updated to Lispro (Humalog) - following the standard medical convention of writing the generic name first, with the brand name in parentheses.

1I have here insulin to be used for both prandial mealtime and sliding scale is
2insulin Lispro (Humalog) subcutaneously.

Use cases

Medical Mode is designed for healthcare AI applications where accurate medical terminology is critical:

  • Ambient clinical documentation — Capture medication names, dosages, and clinical terms correctly during live patient encounters.
  • Real-time medical scribes — Deliver accurate transcripts to clinicians during or immediately after a consult.
  • Front-office voice agents — Handle drug names, provider names, and clinic-specific terminology in scheduling calls and insurance verification.
  • Medical contact centers — Transcribe calls with correct medical vocabulary for downstream processing and quality assurance.

Combine with other features

Medical Mode works alongside other streaming features. You can combine it with:

1CONNECTION_PARAMS = {
2 "sample_rate": 16000,
3 "speech_model": "u3-rt-pro",
4 "domain": "medical-v1",
5 "speaker_labels": "true",
6 "keyterms_prompt": json.dumps(["Lisinopril", "Metformin", "Humalog"])
7}

HIPAA compliance

AssemblyAI offers a Business Associate Agreement (BAA) for customers who need to process Protected Health Information (PHI). AssemblyAI is SOC 2 Type 2, ISO 27001:2022, and PCI DSS v4.0 certified. Medical Mode does not change existing data handling or retention policies.

For BAA setup or enterprise pricing, contact our sales team.