Quickstart
- Python
- JavaScript
import requests
import time
base_url = "https://api.assemblyai.com"
headers = {
"authorization": "<YOUR_API_KEY>"
}
# Step 1: Transcribe and get VTT subtitles
with open("./my-audio.mp3", "rb") as f:
response = requests.post(base_url + "/v2/upload", headers=headers, data=f)
upload_url = response.json()["upload_url"]
data = {"audio_url": upload_url, "speech_models": ["universal-3-pro"]}
response = requests.post(base_url + "/v2/transcript", json=data, headers=headers)
transcript_id = response.json()['id']
polling_endpoint = base_url + "/v2/transcript/" + transcript_id
while True:
transcription_result = requests.get(polling_endpoint, headers=headers).json()
if transcription_result['status'] == 'completed':
break
elif transcription_result['status'] == 'error':
raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
else:
time.sleep(3)
# Get VTT subtitles
vtt_response = requests.get(f"{polling_endpoint}/vtt", headers=headers)
vtt_content = vtt_response.text
# Step 2: Define phases and analyze with LLM Gateway
phases = ["Introduction", "Complaint", "Resolution", "Goodbye"]
prompt = f'''
Analyze the following transcript of a phone call conversation and divide it into the following phases:
{', '.join(phases)}
You will be given the transcript in the format of VTT captions.
For each phase:
1. Identify the start and end timestamps (in seconds)
2. Provide a brief summary of what happened in that phase
Format your response as a JSON object with the following structure:
{{
"phases": [
{{
"name": "Phase Name",
"start_time": start_time_in_seconds,
"end_time": end_time_in_seconds,
"summary": "Brief summary of the phase"
}},
...
]
}}
Ensure that all parts of the conversation are covered by a phase, using "Other" for any parts that don't fit into the specified phases.
'''
llm_gateway_data = {
"model": "claude-sonnet-4-5-20250929",
"messages": [
{"role": "user", "content": f"{prompt}\n\nVTT Transcript:\n{vtt_content}"}
],
"max_tokens": 2000
}
response = requests.post(
"https://llm-gateway.assemblyai.com/v1/chat/completions",
headers=headers,
json=llm_gateway_data
)
result = response.json()["choices"][0]["message"]["content"]
print(result)
import fs from "fs-extra";
const baseUrl = "https://api.assemblyai.com";
const headers = {
authorization: "<YOUR_API_KEY>",
};
// Step 1: Transcribe and get VTT subtitles
const audioData = await fs.readFile("./my-audio.mp3");
let res = await fetch(`${baseUrl}/v2/upload`, {
method: "POST",
headers,
body: audioData,
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const uploadResponse = await res.json();
const uploadUrl = uploadResponse.upload_url;
const data = { audio_url: uploadUrl, speech_models: ["universal-3-pro"] };
res = await fetch(`${baseUrl}/v2/transcript`, {
method: "POST",
headers: { ...headers, "Content-Type": "application/json" },
body: JSON.stringify(data),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const transcriptResponse = await res.json();
const transcriptId = transcriptResponse.id;
const pollingEndpoint = `${baseUrl}/v2/transcript/${transcriptId}`;
while (true) {
res = await fetch(pollingEndpoint, { headers });
if (!res.ok) throw new Error(`Error: ${res.status}`);
const transcriptionResult = await res.json();
if (transcriptionResult.status === "completed") {
break;
} else if (transcriptionResult.status === "error") {
throw new Error(`Transcription failed: ${transcriptionResult.error}`);
} else {
await new Promise((resolve) => setTimeout(resolve, 3000));
}
}
// Get VTT subtitles
res = await fetch(`${pollingEndpoint}/vtt`, { headers });
if (!res.ok) throw new Error(`Error: ${res.status}`);
const vttContent = await res.text();
// Step 2: Define phases and analyze with LLM Gateway
const phases = ["Introduction", "Complaint", "Resolution", "Goodbye"];
const prompt = `
Analyze the following transcript of a phone call conversation and divide it into the following phases:
${phases.join(", ")}
You will be given the transcript in the format of VTT captions.
For each phase:
1. Identify the start and end timestamps (in seconds)
2. Provide a brief summary of what happened in that phase
Format your response as a JSON object with the following structure:
{
"phases": [
{
"name": "Phase Name",
"start_time": start_time_in_seconds,
"end_time": end_time_in_seconds,
"summary": "Brief summary of the phase"
},
...
]
}
Ensure that all parts of the conversation are covered by a phase, using "Other" for any parts that don't fit into the specified phases.
`;
const llmGatewayData = {
model: "claude-sonnet-4-5-20250929",
messages: [
{ role: "user", content: `${prompt}\n\nVTT Transcript:\n${vttContent}` },
],
max_tokens: 2000,
};
res = await fetch("https://llm-gateway.assemblyai.com/v1/chat/completions", {
method: "POST",
headers: { ...headers, "Content-Type": "application/json" },
body: JSON.stringify(llmGatewayData),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const response = await res.json();
const result = response.choices[0].message.content;
console.log(result);
Get Started
Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for an AssemblyAI account and get your API key from your dashboard.Step-by-Step Instructions
Install the required packages:- Python
- JavaScript
pip install requests
npm install fs-extra
- Python
- JavaScript
import requests
import time
base_url = "https://api.assemblyai.com"
headers = {
"authorization": "<YOUR_API_KEY>"
}
with open("./my-audio.mp3", "rb") as f:
response = requests.post(base_url + "/v2/upload",
headers=headers,
data=f)
upload_url = response.json()["upload_url"]
data = {
"audio_url": upload_url, # You can also use a URL to an audio or video file on the web
"speech_models": ["universal-3-pro"]
}
url = base_url + "/v2/transcript"
response = requests.post(url, json=data, headers=headers)
transcript_id = response.json()['id']
polling_endpoint = base_url + "/v2/transcript/" + transcript_id
while True:
transcription_result = requests.get(polling_endpoint, headers=headers).json()
if transcription_result['status'] == 'completed':
print(f"Transcript ID: {transcript_id}")
break
elif transcription_result['status'] == 'error':
raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
else:
time.sleep(3)
# Get VTT subtitles for timestamp information
vtt_response = requests.get(f"{polling_endpoint}/vtt", headers=headers)
vtt_content = vtt_response.text
with open(f"transcript_{transcript_id}.vtt", "w") as vtt_file:
vtt_file.write(vtt_content)
import fs from "fs-extra";
const baseUrl = "https://api.assemblyai.com";
const headers = {
authorization: "<YOUR_API_KEY>",
};
const audioData = await fs.readFile("./my-audio.mp3");
let res = await fetch(`${baseUrl}/v2/upload`, {
method: "POST",
headers,
body: audioData,
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const uploadResponse = await res.json();
const uploadUrl = uploadResponse.upload_url;
const data = {
audio_url: uploadUrl, // You can also use a URL to an audio or video file on the web
speech_models: ["universal-3-pro"],
};
const url = `${baseUrl}/v2/transcript`;
res = await fetch(url, {
method: "POST",
headers: { ...headers, "Content-Type": "application/json" },
body: JSON.stringify(data),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const transcriptResponse = await res.json();
const transcriptId = transcriptResponse.id;
const pollingEndpoint = `${baseUrl}/v2/transcript/${transcriptId}`;
while (true) {
res = await fetch(pollingEndpoint, { headers });
if (!res.ok) throw new Error(`Error: ${res.status}`);
const transcriptionResult = await res.json();
if (transcriptionResult.status === "completed") {
console.log("Transcript ID:", transcriptId);
break;
} else if (transcriptionResult.status === "error") {
throw new Error(`Transcription failed: ${transcriptionResult.error}`);
} else {
await new Promise((resolve) => setTimeout(resolve, 3000));
}
}
// Get VTT subtitles for timestamp information
res = await fetch(`${pollingEndpoint}/vtt`, { headers });
if (!res.ok) throw new Error(`Error: ${res.status}`);
const vttContent = await res.text();
fs.writeFileSync(`transcript_${transcriptId}.vtt`, vttContent);
phases that can be used for customer support calls:
- Python
- JavaScript
phases = [
"Introduction",
"Complaint",
"Resolution",
"Goodbye"
]
const phases = [
"Introduction",
"Complaint",
"Resolution",
"Goodbye",
];
- Python
- JavaScript
prompt = f'''
Analyze the following transcript of a phone call conversation and divide it into the following phases:
{', '.join(phases)}
You will be given the transcript in the format of VTT captions.
For each phase:
1. Identify the start and end timestamps (in seconds)
2. Provide a brief summary of what happened in that phase
Format your response as a JSON object with the following structure:
{{
"phases": [
{{
"name": "Phase Name",
"start_time": start_time_in_seconds,
"end_time": end_time_in_seconds,
"summary": "Brief summary of the phase"
}},
...
]
}}
Ensure that all parts of the conversation are covered by a phase, using "Other" for any parts that don't fit into the specified phases.
'''
llm_gateway_data = {
"model": "claude-sonnet-4-5-20250929",
"messages": [
{"role": "user", "content": f"{prompt}\n\nVTT Transcript:\n{vtt_content}"}
],
"max_tokens": 2000
}
response = requests.post(
"https://llm-gateway.assemblyai.com/v1/chat/completions",
headers=headers,
json=llm_gateway_data
)
result = response.json()["choices"][0]["message"]["content"]
print(result)
const prompt = `
Analyze the following transcript of a phone call conversation and divide it into the following phases:
${phases.join(", ")}
You will be given the transcript in the format of VTT captions.
For each phase:
1. Identify the start and end timestamps (in seconds)
2. Provide a brief summary of what happened in that phase
Format your response as a JSON object with the following structure:
{
"phases": [
{
"name": "Phase Name",
"start_time": start_time_in_seconds,
"end_time": end_time_in_seconds,
"summary": "Brief summary of the phase"
},
...
]
}
Ensure that all parts of the conversation are covered by a phase, using "Other" for any parts that don't fit into the specified phases.
`;
const llmGatewayData = {
model: "claude-sonnet-4-5-20250929",
messages: [
{ role: "user", content: `${prompt}\n\nVTT Transcript:\n${vttContent}` },
],
max_tokens: 2000,
};
let res = await fetch("https://llm-gateway.assemblyai.com/v1/chat/completions", {
method: "POST",
headers: { ...headers, "Content-Type": "application/json" },
body: JSON.stringify(llmGatewayData),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const response = await res.json();
const result = response.choices[0].message.content;
console.log(result);
{
"phases": [
{
"name": "Introduction",
"start_time": 1.52,
"end_time": 15.57,
"summary": "The customer service representative greets the caller and asks how they can help. The caller states they want to know the status of their order refund."
},
{
"name": "Complaint",
"start_time": 15.57,
"end_time": 59.41,
"summary": "The representative asks for the order ID, which the caller provides. The representative confirms the order details and that it was cancelled. The caller mentions they couldn't complete their test."
},
{
"name": "Resolution",
"start_time": 59.41,
"end_time": 210.01,
"summary": "The representative informs the caller that the refund was initiated on April 8th and will be credited by April 21st. They explain the refund timeline and bank processing days. The caller expresses some confusion about the timeline, and the representative clarifies the process."
},
{
"name": "Goodbye",
"start_time": 210.01,
"end_time": 235.8,
"summary": "The caller accepts the explanation. The representative asks if there's anything else they can help with, requests feedback, and concludes the call with a farewell."
}
]
}