Create Subtitles with Speaker Labels

Quickstart

1import assemblyai as aai
2
3# SETTINGS
4aai.settings.api_key = "YOUR-API-KEY"
5filename = "YOUR-FILE-NAME"
6transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True))
7transcript = transcriber.transcribe(filename)
8
9# Maximum number of words per subtitle
10max_words_per_subtitle = 6
11
12# Color assignments for speakers
13speaker_colors = {
14 "A": "red",
15 "B": "orange",
16 "C": "yellow",
17 "D": "yellowgreen",
18 "E": "green",
19 "F": "lightskyblue",
20 "G": "purple",
21 "H": "mediumpurple",
22 "I": "pink",
23 "J": "brown",
24}
25
26# Process transcription segments
27def process_segments(segments):
28 srt_content = ""
29 subtitle_index = 1
30 for segment in segments:
31 speaker = segment.speaker
32 color = speaker_colors.get(speaker, "black") # Default color is black
33
34 # Split text into words and group into chunks
35 words = segment.words
36 for i in range(0, len(words), max_words_per_subtitle):
37 chunk = words[i:i + max_words_per_subtitle]
38 start_time = chunk[0].start # -1 indicates continuation
39 end_time = chunk[-1].end
40 srt_content += create_subtitle(subtitle_index, start_time, end_time, chunk, color)
41 subtitle_index += 1
42
43 return srt_content
44
45
46# Create a single subtitle
47def create_subtitle(index, start_time, end_time, words, color):
48 text = ""
49 for word in words:
50 text += word.text + ' '
51 start_srt = format_time(start_time)
52 end_srt = format_time(end_time)
53 return f"{index}\n{start_srt} --> {end_srt}\n<font color=\"{color}\">{text}</font>\n\n"
54
55# Format time in SRT style
56def format_time(milliseconds):
57 hours, remainder = divmod(milliseconds, 3600000)
58 minutes, remainder = divmod(remainder, 60000)
59 seconds, milliseconds = divmod(remainder, 1000)
60 return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{int(milliseconds):03}"
61
62# Generate SRT content
63sentences = transcript.get_sentences()
64srt_content = process_segments(sentences)
65
66# Save to SRT file
67with open(filename + '.srt', 'w') as file:
68 file.write(srt_content)
69
70print(f"SRT file generated: {filename}.srt")

This Colab will demonstrate how to use AssemblyAI’s Speaker Diarization model together to format subtitles according to their respective speaker.

Step-by-step guide

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for an AssemblyAI account and get your API key from your dashboard.

$pip install assemblyai

First, we will configure our API key as well as our file to be transcribed. Then, we decide on a number of words we want to have per subtitle.

Lastly, we transcribe our file.

1import assemblyai as aai
2
3# SETTINGS
4aai.settings.api_key = "YOUR-API-KEY"
5filename = "YOUR-FILE-NAME"
6transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True))
7transcript = transcriber.transcribe(filename)
8
9# Maximum number of words per subtitle
10max_words_per_subtitle = 6

How the code works

speaker_colors is a dictionary that maps speaker identifiers (like “A”, “B”, “C”, etc.) to specific colors. Each speaker in the transcription will be associated with a unique color in the subtitles.

When Speaker Diarization is enabled, sentences in our API response have a speaker code under the speaker key. We use the speaker code to determine the color of the subtitle text.

1# Color assignments for speakers
2speaker_colors = {
3 "A": "red",
4 "B": "orange",
5 "C": "yellow",
6 "D": "yellowgreen",
7 "E": "green",
8 "F": "lightskyblue",
9 "G": "purple",
10 "H": "mediumpurple",
11 "I": "pink",
12 "J": "brown",
13}
14
15# Process transcription segments
16def process_segments(segments):
17 srt_content = ""
18 subtitle_index = 1
19 for segment in segments:
20 speaker = segment.speaker
21 color = speaker_colors.get(speaker, "black") # Default color is black
22
23 # Split text into words and group into chunks
24 words = segment.words
25 for i in range(0, len(words), max_words_per_subtitle):
26 chunk = words[i:i + max_words_per_subtitle]
27 start_time = chunk[0].start # -1 indicates continuation
28 end_time = chunk[-1].end
29 srt_content += create_subtitle(subtitle_index, start_time, end_time, chunk, color)
30 subtitle_index += 1
31
32 return srt_content
33
34# Create a single subtitle
35def create_subtitle(index, start_time, end_time, words, color):
36 text = ""
37 for word in words:
38 text += word.text + ' '
39 start_srt = format_time(start_time)
40 end_srt = format_time(end_time)
41 return f"{index}\n{start_srt} --> {end_srt}\n<font color=\"{color}\">{text}</font>\n\n"
42
43# Format time in SRT style
44def format_time(milliseconds):
45 hours, remainder = divmod(milliseconds, 3600000)
46 minutes, remainder = divmod(remainder, 60000)
47 seconds, milliseconds = divmod(remainder, 1000)
48 return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{int(milliseconds):03}"

Our last step is to generate and save our subtitle file!

1# Generate SRT content
2sentences = transcript.get_sentences()
3srt_content = process_segments(sentences)
4
5# Save to SRT file
6with open(filename + '.srt', 'w') as file:
7 file.write(srt_content)
8
9print(f"SRT file generated: {filename}.srt")