recallai · gerrysaporito · Mar 9, 2026 · Mar 2, 2026 · Mar 9, 2026
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ output
 .vscode
 dist
 .turbo
+.DS_Store
diff --git a/bot_async_transcription_hybrid_diarization/README.md b/bot_async_transcription_hybrid_diarization/README.md
@@ -6,17 +6,25 @@ This example demonstrates how to get accurate speaker attribution in your transc
 
 Standard transcription diarization has a tradeoff:
 
+- **Speaker-timeline diarization** (from Recall.ai) uses active speaker events emitted by the meeting platform to know who is in the meeting (using the participant display names), but can't distinguish multiple people speaking from the same participant tile (e.g. calling from the same device/room).
 - **Machine diarization** (from providers like Deepgram) distinguishes different voices, but only gives you anonymous labels like "Speaker 0" and "Speaker 1".
-- **Speaker timeline diarization** (from Recall.ai) uses participant speaker change events from the meeting platform to determine who is speaking but is unable to distinguish participants if they're speaking from the same participant tile (e.g. calling from the same device/room).
 
-**Hybrid diarization combines both approaches.** It uses machine diarization to detect distinct voices, then maps them to real participant names when there's a clear 1-to-1 match. When multiple people share a device, it falls back to anonymous speaker labels.
+**Hybrid diarization combines both approaches.** It uses machine diarization to detect distinct voices per participant, then maps them to real participant names when there's a clear 1-to-1 match. When multiple people share a device (i.e. a participant has more than one anonymous speaker label), it falls back to anonymous speaker labels.
 
 ## How It Works
 
 The server listens for webhook events from Recall.ai:
 
 1. When `recording.done` is received, it triggers async transcript creation via Recall's API
-2. When `transcript.done` is received, it downloads both the transcript and speaker timeline data, then merges them using the hybrid diarization algorithm
+2. When `transcript.done` is received, it downloads both the transcript and the participants list, then merges them using the hybrid diarization algorithm
+
+### Hybrid Diarization Algorithm
+
+Each transcript part has a participant name in the format `{participant_id}-{anonymous_label}` (e.g. `200-0` means participant ID 200, anonymous label 0). The algorithm:
+
+1. Builds a map of `participant_id → Set<anonymous_label>` from all transcript parts
+2. If a participant has **exactly one** anonymous label, we can confidently attribute all their segments to a single speaker — so we replace the anonymous label with the real participant name and metadata
+3. If a participant has **multiple** anonymous labels (e.g. `200-0` and `200-1`), multiple people are sharing that device, so we leave those segments with their anonymous labels
 
 ## Prerequisites
 
@@ -103,5 +111,7 @@ Replace `RECALL_REGION`, `RECALL_API_KEY`, and `YOUR_MEETING_URL` with your own
 
 After the call ends and the transcript is processed, you'll find the output files in the `output/` folder, organized by recording ID:
 
-- `transcript.json` — The transcript data with hybrid diarization applied
-- `readable.txt` — A human-readable version of the transcript
+- `participants.json` — The list of participants in the meeting
+- `transcript.json` — The raw transcript parts (before hybrid diarization)
+- `hybrid_diarization_transcript.json` — The transcript with hybrid diarization applied
+- `hybrid_diarization_transcript.txt` — A human-readable version of the hybrid diarized transcript
diff --git a/bot_async_transcription_hybrid_diarization/src/bot_async_transcription_hybrid_diarization.ts b/bot_async_transcription_hybrid_diarization/src/bot_async_transcription_hybrid_diarization.ts
@@ -4,8 +4,8 @@ import { z } from "zod";
 import { env } from "./config/env";
 import { convert_to_hybrid_diarized_transcript_parts } from "./convert_to_hybrid_diarized_transcript_parts";
 import { convert_to_readable_transcript } from "./convert_to_readable_transcript";
+import { ParticipantPartSchema } from "./schemas/ParticipantPartSchema";
 import { RecordingArtifactSchema } from "./schemas/RecordingArtifactSchema";
-import { SpeakerTimelinePartSchema } from "./schemas/SpeakerTimelinePartSchema";
 import { TranscriptArtifactEventSchema, type TranscriptArtifactEventType } from "./schemas/TranscriptArtifactEventSchema";
 import { TranscriptArtifactSchema } from "./schemas/TranscriptArtifactSchema";
 import { TranscriptPartSchema } from "./schemas/TranscriptPartSchema";
@@ -24,6 +24,7 @@ export async function create_async_transcript(args: { recording_id: string }) {
         },
         body: JSON.stringify({
             provider: { deepgram_async: { diarize: true } },
+            diarization: { use_separate_streams_when_available: true },
         }),
     });
     if (!response.ok) throw new Error(await response.text());
@@ -42,47 +43,44 @@ export async function bot_async_transcription(args: { msg: TranscriptArtifactEve
     if (!recording.media_shortcuts?.transcript?.data?.download_url) {
         throw new Error("Transcript download URL is null");
     }
-    if (!recording.media_shortcuts.participant_events?.data?.speaker_timeline_download_url) {
-        throw new Error("Speaker timeline download URL is null");
+    if (!recording.media_shortcuts.participant_events?.data?.participants_download_url) {
+        throw new Error("Participants download URL is null");
     }
 
     // Retrieve and format transcript data.
     const transcript_parts = await retrieve_transcript_parts({
         download_url: recording.media_shortcuts.transcript.data.download_url,
     });
     console.log(`Retrieved ${transcript_parts.length} transcript parts`);
-    const speaker_timeline_data = await retrieve_speaker_timeline_parts({
-        download_url: recording.media_shortcuts.participant_events.data.speaker_timeline_download_url,
+    const participants = await retrieve_participants({
+        download_url: recording.media_shortcuts.participant_events.data.participants_download_url,
     });
-    console.log(`Retrieved ${speaker_timeline_data.length} speaker timeline parts`);
+    console.log(`Retrieved ${participants.length} participants`);
     const hybrid_transcript_parts = convert_to_hybrid_diarized_transcript_parts({
         transcript_parts,
-        speaker_timeline_data,
+        participants,
     });
     console.log(`Formatted ${hybrid_transcript_parts.length} hybrid transcript parts`);
     const readable_hybrid_transcript_parts = convert_to_readable_transcript({ transcript_parts: hybrid_transcript_parts });
     console.log(`Formatted ${readable_hybrid_transcript_parts.length} readable hybrid transcript parts`);
 
-    // Write the hybrid transcript parts data to a file.
-    const output_path_events = path.join(
-        process.cwd(),
-        `output/recording-${msg.data.recording.id}/transcript.json`,
-    );
-    if (!fs.existsSync(output_path_events)) {
-        fs.mkdirSync(path.dirname(output_path_events), { recursive: true });
-        fs.writeFileSync(output_path_events, "[]", { flag: "w+" });
-    }
-    fs.writeFileSync(output_path_events, JSON.stringify(hybrid_transcript_parts, null, 2), { flag: "w+" });
-
-    // Write the readable hybrid transcript to a file.
-    const output_path_readable = path.join(
-        process.cwd(),
-        `output/recording-${msg.data.recording.id}/readable.txt`,
-    );
-    if (!fs.existsSync(output_path_readable)) {
-        fs.mkdirSync(path.dirname(output_path_readable), { recursive: true });
-        fs.writeFileSync(output_path_readable, "", { flag: "w+" });
-    }
+    const output_dir = path.join(process.cwd(), `output/recording-${msg.data.recording.id}`);
+    fs.mkdirSync(output_dir, { recursive: true });
+
+    // Write the participants list to a file.
+    const output_path_participants = path.join(output_dir, "participants.json");
+    fs.writeFileSync(output_path_participants, JSON.stringify(participants, null, 2), { flag: "w+" });
+
+    // Write the raw transcript parts to a file.
+    const output_path_transcript = path.join(output_dir, "transcript.json");
+    fs.writeFileSync(output_path_transcript, JSON.stringify(transcript_parts, null, 2), { flag: "w+" });
+
+    // Write the hybrid diarized transcript parts to a file.
+    const output_path_hybrid = path.join(output_dir, "hybrid_diarization_transcript.json");
+    fs.writeFileSync(output_path_hybrid, JSON.stringify(hybrid_transcript_parts, null, 2), { flag: "w+" });
+
+    // Write the readable hybrid diarized transcript to a file.
+    const output_path_readable = path.join(output_dir, "hybrid_diarization_transcript.txt");
     fs.writeFileSync(output_path_readable, readable_hybrid_transcript_parts.map((t) => t ? `${t.speaker}: ${t.paragraph}` : "").join("\n"), { flag: "w+" });
 
     // Return the transcript parts and readable transcript.
@@ -121,13 +119,13 @@ async function retrieve_transcript_parts(args: { download_url: string }) {
 }
 
 /**
- * Retrieve the speaker timeline data from the participant events artifact's `download_url`.
+ * Retrieve the participants list from the participant events artifact's `participants_download_url`.
  */
-async function retrieve_speaker_timeline_parts(args: { download_url: string }) {
+async function retrieve_participants(args: { download_url: string }) {
     const { download_url } = z.object({ download_url: z.string() }).parse(args);
 
     const response = await fetch(download_url);
     if (!response.ok) throw new Error(await response.text());
 
-    return SpeakerTimelinePartSchema.array().parse(await response.json());
+    return ParticipantPartSchema.array().parse(await response.json());
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,3 +5,4 @@ output @@
     .vscode
     dist
     .turbo
+    .DS_Store