recallai · gerrysaporito · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/bot_retranscribe_failed_transcription_jobs/.env.sample b/bot_retranscribe_failed_transcription_jobs/.env.sample
@@ -0,0 +1,2 @@
+RECALL_API_KEY=RECALL_API_KEY
+RECALL_REGION=RECALL_REGION # e.g. us-west-2, us-east-1, eu-central-1, ap-northeast-1
diff --git a/bot_retranscribe_failed_transcription_jobs/README.md b/bot_retranscribe_failed_transcription_jobs/README.md
@@ -0,0 +1,138 @@
+# Retranscribe Failed Bot Transcription Jobs
+
+This example demonstrates how to bulk retranscribe recordings from bots using the Recall.ai API.
+
+This script lists bots by date range and metadata filters (only bots with status `done` or `analysis_failed`) and creates new async transcript jobs for each recording. This is useful for retrying failed transcription jobs or re-transcribing with different settings.
+
+## Pre-requisites
+
+-   [Node.js](https://nodejs.org/en/download)
+-   [NPM](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm)
+
+## Quickstart
+
+### 1. Set up environment variables
+
+Copy the `.env.sample` file and rename it to `.env`:
+
+```bash
+cp .env.sample .env
+```
+
+Then fill out the variables in the `.env` file:
+
+-   `RECALL_API_KEY` - Your Recall.ai API key
+-   `RECALL_REGION` - Your Recall.ai region (e.g., `us-west-2`)
+
+### 2. Install dependencies
+
+Open this directory in a terminal and run:
+
+```bash
+npm install
+```
+
+### 3. Run the script
+
+Retranscribe all recordings from bots that joined after a specific date using Recall.ai async transcription:
+
+```bash
+npx ts-node src/index.ts \
+  --start_date_utc "2025-12-15 00:00:00" \
+  --transcript_config '{"provider":{"recallai_async":{}}}'
+```
+
+Retranscribe recordings within a date range using AssemblyAI:
+
+```bash
+npx ts-node src/index.ts \
+  --start_date_utc "2025-12-15 00:00:00" \
+  --end_date_utc "2025-12-31 00:00:00" \
+  --transcript_config '{"provider":{"assembly_ai_async":{"language_code":"en_us"}}}'
+```
+
+Filter by custom metadata to retranscribe only specific customer's recordings:
+
+```bash
+npx ts-node src/index.ts \
+  --start_date_utc "2025-12-15 00:00:00" \
+  --metadata '{"team_id":"1872"}' \
+  --transcript_config '{"provider":{"recallai_async":{"language_code":"en"}}}'
+```
+
+Retranscribe with custom vocabulary/key terms:
+
+```bash
+npx ts-node src/index.ts \
+  --start_date_utc "2025-12-15 00:00:00" \
+  --transcript_config '{"provider":{"recallai_async":{"key_terms":["Recall","API","transcription"]}}}'
+```
+
+### 4. View the output
+
+The script will output progress and final count:
+
+```
+Retranscribing recordings from bots: 2025-12-15 00:00:00 → 2025-12-31 00:00:00
+
+Transcript config: {"provider":{"recallai_async":{}}}
+
+{ pageCount: 5, nextPage: null }
+Created transcript job for recording: rec_abc123 (bot: bot_xyz789)
+Created transcript job for recording: rec_def456 (bot: bot_uvw012)
+Bot bot_nop345 has no recordings, skipping
+...
+
+Created 4 transcript jobs (skipped 1 bots with no recordings)
+```
+
+## CLI Options
+
+| Option                | Required | Description                                                                 |
+| --------------------- | -------- | --------------------------------------------------------------------------- |
+| `--start_date_utc`    | Yes      | Process bots that joined after this date                                    |
+| `--end_date_utc`      | No       | Process bots that joined before this date                                   |
+| `--metadata`          | No       | JSON object to filter by custom bot metadata (e.g., `'{"team_id":"1872"}'`) |
+| `--transcript_config` | Yes      | JSON object with transcript configuration                                   |
+| `--help`              | No       | Show help message                                                           |
+
+**Note:** The script only processes bots with status `done` or `analysis_failed` (i.e., bots that have completed and have recordings available).
+
+## Transcript Configuration
+
+The `--transcript_config` option accepts a JSON object with the following structure:
+
+```json
+{
+    "metadata": {}, // Optional: custom metadata for the transcript
+    "diarization": {}, // Optional: diarization settings
+    "provider": {
+        // Required: transcription provider config
+        "recallai_async": {}, // OR
+        "assembly_ai_async": {}
+    }
+}
+```
+
+### Recall.ai Async Provider Options
+
+| Option             | Type    | Default | Description                                    |
+| ------------------ | ------- | ------- | ---------------------------------------------- |
+| `language_code`    | string  | "auto"  | Language code (e.g., "en", "es", "fr", "auto") |
+| `spelling`         | array   | []      | Find/replace text in transcript                |
+| `key_terms`        | array   | []      | Boost recognition of specific terms            |
+| `filter_profanity` | boolean | false   | Filter profane words                           |
+
+### AssemblyAI Async Provider Options
+
+| Option          | Type    | Default | Description                    |
+| --------------- | ------- | ------- | ------------------------------ |
+| `language_code` | string  | "en_us" | Language code                  |
+| `punctuate`     | boolean | true    | Enable automatic punctuation   |
+| `format_text`   | boolean | true    | Enable text formatting         |
+| `disfluencies`  | boolean | false   | Include filler words (umm, uh) |
+
+## API Reference
+
+-   [Create Async Transcript](https://docs.recall.ai/reference/recording_create_transcript_create)
+-   [List Bots](https://docs.recall.ai/reference/bot_list)
diff --git a/bot_retranscribe_failed_transcription_jobs/package.json b/bot_retranscribe_failed_transcription_jobs/package.json
@@ -0,0 +1,22 @@
+{
+    "name": "bot_retranscribe_failed_transcription_jobs",
+    "version": "1.0.0",
+    "description": "Retranscribe failed transcription jobs by creating async transcript jobs for recordings",
+    "main": "index.ts",
+    "scripts": {
+        "dev": "ts-node src/index.ts"
+    },
+    "author": "Gerry Saporito",
+    "license": "MIT",
+    "devDependencies": {
+        "@types/mri": "^1.1.4",
+        "@types/node": "^24.10.1",
+        "ts-node": "^10.9.2",
+        "typescript": "^5.9.3"
+    },
+    "dependencies": {
+        "dotenv": "^17.2.3",
+        "mri": "^1.2.0",
+        "zod": "^4.1.13"
+    }
+}
diff --git a/bot_retranscribe_failed_transcription_jobs/src/bot_retranscribe_failed_transcription_jobs.ts b/bot_retranscribe_failed_transcription_jobs/src/bot_retranscribe_failed_transcription_jobs.ts
@@ -0,0 +1,126 @@
+import { z } from "zod";
+import { env } from "./config/env";
+import { fetch_with_retry } from "./fetch_with_retry";
+import { BotArtifactSchema } from "./schemas/BotArtifactSchema";
+
+/**
+ * Retranscribe failed transcription jobs by creating async transcript jobs for recordings.
+ */
+export async function bot_retranscribe_failed_transcription_jobs(args: any) {
+    const { start_date_utc, end_date_utc, metadata, transcript_config } = z.object({
+        start_date_utc: z.string(),
+        end_date_utc: z.string().optional(),
+        metadata: z.record(z.string(), z.string()).optional(),
+        transcript_config: z.record(z.string(), z.unknown()),
+    }).parse(args);
+
+    let count = 0;
+    let skipped = 0;
+    let next: string | null = null;
+    do {
+        const page = await list_bots({
+            join_at_after: start_date_utc,
+            join_at_before: end_date_utc,
+            metadata,
+            next,
+        });
+        console.log({ pageCount: page.results.length, nextPage: page.next });
+
+        await Promise.all(page.results.map(async (bot) => {
+            const recordings = bot.recordings || [];
+            if (recordings.length === 0) {
+                console.log(`Bot ${bot.id} has no recordings, skipping`);
+                skipped++;
+                return;
+            }
+
+            for (const recording of recordings) {
+                const recording_id = recording.id;
+                try {
+                    await create_async_transcript_job({ recording_id, transcript_config });
+                    console.log(`Created transcript job for recording: ${recording_id} (bot: ${bot.id})`);
+                    count++;
+                } catch (error) {
+                    console.error(`Failed to create transcript job for recording ${recording_id}: ${error}`);
+                }
+            }
+        }));
+        next = page.next;
+    } while (next);
+
+    return { count, skipped };
+}
+
+/**
+ * Filters bots by the given arguments.
+ * Returns a page of bots and the next page URL to fetch the next page of bots.
+ */
+async function list_bots(args: {
+    next?: string | null; // next page URL
+    join_at_after?: string; // ISO 8601, e.g. "2025-12-15 00:00:00"
+    join_at_before?: string; // ISO 8601, e.g. "2025-12-15 00:25:00"
+    metadata?: Record<string, string>; // add one key-value pair
+}) {
+    const { next, join_at_after, join_at_before, metadata } = z.object({
+        next: z.string().nullable(),
+        join_at_after: z.string().optional(),
+        join_at_before: z.string().optional(),
+        metadata: z.record(z.string(), z.string()).optional(),
+    }).parse(args);
+
+    const url = next
+        ? new URL(next)
+        : new URL(`https://${env.RECALL_REGION}.recall.ai/api/v1/bot`);
+    if (!next) {
+        ["done", "analysis_failed"].forEach((status) => {
+            url.searchParams.append("status", status);
+        });
+        if (join_at_after) url.searchParams.set("join_at_after", join_at_after);
+        if (join_at_before) url.searchParams.set("join_at_before", join_at_before);
+        if (metadata) {
+            for (const [key, value] of Object.entries(metadata)) {
+                url.searchParams.set(`metadata__${key}`, value);
+            }
+        }
+    }
+
+    const response = await fetch_with_retry(url.toString(), {
+        method: "GET",
+        headers: {
+            "Authorization": `${env.RECALL_API_KEY}`,
+            "Content-Type": "application/json",
+        },
+    });
+    if (!response.ok) throw new Error(await response.text());
+
+    return z.object({
+        results: BotArtifactSchema.array(),
+        next: z.string().nullable(),
+    }).parse(await response.json());
+}
+
+/**
+ * Creates an async transcript job for a recording.
+ * API Docs: https://docs.recall.ai/reference/recording_create_transcript_create
+ */
+async function create_async_transcript_job(args: {
+    recording_id: string;
+    transcript_config: Record<string, unknown>;
+}) {
+    const { recording_id, transcript_config } = z.object({
+        recording_id: z.string(),
+        transcript_config: z.record(z.string(), z.unknown()),
+    }).parse(args);
+
+    const response = await fetch_with_retry(`https://${env.RECALL_REGION}.recall.ai/api/v1/recording/${recording_id}/create_transcript/`, {
+        method: "POST",
+        headers: {
+            "Authorization": `${env.RECALL_API_KEY}`,
+            "Content-Type": "application/json",
+        },
+        body: JSON.stringify(transcript_config),
+    });
+    if (!response.ok) throw new Error(await response.text());
+
+    return response.json();
+}
diff --git a/bot_retranscribe_failed_transcription_jobs/src/config/env.ts b/bot_retranscribe_failed_transcription_jobs/src/config/env.ts
@@ -0,0 +1,6 @@
+import dotenv from "dotenv";
+import { EnvSchema } from "../schemas/EnvSchema";
+
+dotenv.config();
+
+export const env = EnvSchema.parse(process.env);
diff --git a/bot_retranscribe_failed_transcription_jobs/src/fetch_with_retry.ts b/bot_retranscribe_failed_transcription_jobs/src/fetch_with_retry.ts
@@ -0,0 +1,24 @@
+/**
+ * Helper function to fetch with retry.
+ * Respects the Retry-After header.
+ */
+export async function fetch_with_retry(url: string, options: RequestInit, max_attempts: number = 5): Promise<Response> {
+    for (let attempt = 1; attempt <= max_attempts; attempt++) {
+        const response = await fetch(url, options);
+        if (response.status === 429) {
+            let retry_after = Number(response.headers.get("Retry-After")) || 0;
+            console.log(`Rate limit exceeded, retrying in ${retry_after} seconds`);
+            if (!retry_after) {
+                console.error("Retry-After header not found");
+                retry_after = 0;
+            }
+            await new Promise((resolve) => setTimeout(
+                resolve,
+                1000 * (retry_after + Math.ceil(Math.random() * 5)),
+            ));
+            continue;
+        }
+        return response;
+    }
+    throw new Error(`Max attempts (${max_attempts}) reached while fetching ${url}. options=${JSON.stringify(options)}`);
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		RECALL_API_KEY=RECALL_API_KEY
		RECALL_REGION=RECALL_REGION # e.g. us-west-2, us-east-1, eu-central-1, ap-northeast-1