Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bot_retranscribe_failed_transcription_jobs/.env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
RECALL_API_KEY=RECALL_API_KEY
RECALL_REGION=RECALL_REGION # e.g. us-west-2, us-east-1, eu-central-1, ap-northeast-1
138 changes: 138 additions & 0 deletions bot_retranscribe_failed_transcription_jobs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Retranscribe Failed Bot Transcription Jobs

This example demonstrates how to bulk retranscribe recordings from bots using the Recall.ai API.

This script lists bots by date range and metadata filters (only bots with status `done` or `analysis_failed`) and creates new async transcript jobs for each recording. This is useful for retrying failed transcription jobs or re-transcribing with different settings.

## Pre-requisites

- [Node.js](https://nodejs.org/en/download)
- [NPM](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm)

## Quickstart

### 1. Set up environment variables

Copy the `.env.sample` file and rename it to `.env`:

```bash
cp .env.sample .env
```

Then fill out the variables in the `.env` file:

- `RECALL_API_KEY` - Your Recall.ai API key
- `RECALL_REGION` - Your Recall.ai region (e.g., `us-west-2`)

### 2. Install dependencies

Open this directory in a terminal and run:

```bash
npm install
```

### 3. Run the script

Retranscribe all recordings from bots that joined after a specific date using Recall.ai async transcription:

```bash
npx ts-node src/index.ts \
--start_date_utc "2025-12-15 00:00:00" \
--transcript_config '{"provider":{"recallai_async":{}}}'
```

Retranscribe recordings within a date range using AssemblyAI:

```bash
npx ts-node src/index.ts \
--start_date_utc "2025-12-15 00:00:00" \
--end_date_utc "2025-12-31 00:00:00" \
--transcript_config '{"provider":{"assembly_ai_async":{"language_code":"en_us"}}}'
```

Filter by custom metadata to retranscribe only specific customer's recordings:

```bash
npx ts-node src/index.ts \
--start_date_utc "2025-12-15 00:00:00" \
--metadata '{"team_id":"1872"}' \
--transcript_config '{"provider":{"recallai_async":{"language_code":"en"}}}'
```

Retranscribe with custom vocabulary/key terms:

```bash
npx ts-node src/index.ts \
--start_date_utc "2025-12-15 00:00:00" \
--transcript_config '{"provider":{"recallai_async":{"key_terms":["Recall","API","transcription"]}}}'
```

### 4. View the output

The script will output progress and final count:

```
Retranscribing recordings from bots: 2025-12-15 00:00:00 → 2025-12-31 00:00:00

Transcript config: {"provider":{"recallai_async":{}}}

{ pageCount: 5, nextPage: null }
Created transcript job for recording: rec_abc123 (bot: bot_xyz789)
Created transcript job for recording: rec_def456 (bot: bot_uvw012)
Bot bot_nop345 has no recordings, skipping
...

Created 4 transcript jobs (skipped 1 bots with no recordings)
```

## CLI Options

| Option | Required | Description |
| --------------------- | -------- | --------------------------------------------------------------------------- |
| `--start_date_utc` | Yes | Process bots that joined after this date |
| `--end_date_utc` | No | Process bots that joined before this date |
| `--metadata` | No | JSON object to filter by custom bot metadata (e.g., `'{"team_id":"1872"}'`) |
| `--transcript_config` | Yes | JSON object with transcript configuration |
| `--help` | No | Show help message |

**Note:** The script only processes bots with status `done` or `analysis_failed` (i.e., bots that have completed and have recordings available).

## Transcript Configuration

The `--transcript_config` option accepts a JSON object with the following structure:

```json
{
"metadata": {}, // Optional: custom metadata for the transcript
"diarization": {}, // Optional: diarization settings
"provider": {
// Required: transcription provider config
"recallai_async": {}, // OR
"assembly_ai_async": {}
}
}
```

### Recall.ai Async Provider Options

| Option | Type | Default | Description |
| ------------------ | ------- | ------- | ---------------------------------------------- |
| `language_code` | string | "auto" | Language code (e.g., "en", "es", "fr", "auto") |
| `spelling` | array | [] | Find/replace text in transcript |
| `key_terms` | array | [] | Boost recognition of specific terms |
| `filter_profanity` | boolean | false | Filter profane words |

### AssemblyAI Async Provider Options

| Option | Type | Default | Description |
| --------------- | ------- | ------- | ------------------------------ |
| `language_code` | string | "en_us" | Language code |
| `punctuate` | boolean | true | Enable automatic punctuation |
| `format_text` | boolean | true | Enable text formatting |
| `disfluencies` | boolean | false | Include filler words (umm, uh) |

## API Reference

- [Create Async Transcript](https://docs.recall.ai/reference/recording_create_transcript_create)
- [List Bots](https://docs.recall.ai/reference/bot_list)
22 changes: 22 additions & 0 deletions bot_retranscribe_failed_transcription_jobs/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"name": "bot_retranscribe_failed_transcription_jobs",
"version": "1.0.0",
"description": "Retranscribe failed transcription jobs by creating async transcript jobs for recordings",
"main": "index.ts",
"scripts": {
"dev": "ts-node src/index.ts"
},
"author": "Gerry Saporito",
"license": "MIT",
"devDependencies": {
"@types/mri": "^1.1.4",
"@types/node": "^24.10.1",
"ts-node": "^10.9.2",
"typescript": "^5.9.3"
},
"dependencies": {
"dotenv": "^17.2.3",
"mri": "^1.2.0",
"zod": "^4.1.13"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import { z } from "zod";
import { env } from "./config/env";
import { fetch_with_retry } from "./fetch_with_retry";
import { BotArtifactSchema } from "./schemas/BotArtifactSchema";

/**
* Retranscribe failed transcription jobs by creating async transcript jobs for recordings.
*/
export async function bot_retranscribe_failed_transcription_jobs(args: any) {
const { start_date_utc, end_date_utc, metadata, transcript_config } = z.object({
start_date_utc: z.string(),
end_date_utc: z.string().optional(),
metadata: z.record(z.string(), z.string()).optional(),
transcript_config: z.record(z.string(), z.unknown()),
}).parse(args);

let count = 0;
let skipped = 0;
let next: string | null = null;
do {
const page = await list_bots({
join_at_after: start_date_utc,
join_at_before: end_date_utc,
metadata,
next,
});
console.log({ pageCount: page.results.length, nextPage: page.next });

await Promise.all(page.results.map(async (bot) => {
const recordings = bot.recordings || [];
if (recordings.length === 0) {
console.log(`Bot ${bot.id} has no recordings, skipping`);
skipped++;
return;
}

for (const recording of recordings) {
const recording_id = recording.id;
try {
await create_async_transcript_job({ recording_id, transcript_config });
console.log(`Created transcript job for recording: ${recording_id} (bot: ${bot.id})`);
count++;
} catch (error) {
console.error(`Failed to create transcript job for recording ${recording_id}: ${error}`);
}
}
}));
next = page.next;
} while (next);

return { count, skipped };
}

/**
* Filters bots by the given arguments.
* Returns a page of bots and the next page URL to fetch the next page of bots.
*/
async function list_bots(args: {
next?: string | null; // next page URL
join_at_after?: string; // ISO 8601, e.g. "2025-12-15 00:00:00"
join_at_before?: string; // ISO 8601, e.g. "2025-12-15 00:25:00"
metadata?: Record<string, string>; // add one key-value pair
}) {
const { next, join_at_after, join_at_before, metadata } = z.object({
next: z.string().nullable(),
join_at_after: z.string().optional(),
join_at_before: z.string().optional(),
metadata: z.record(z.string(), z.string()).optional(),
}).parse(args);

const url = next
? new URL(next)
: new URL(`https://${env.RECALL_REGION}.recall.ai/api/v1/bot`);
if (!next) {
["done", "analysis_failed"].forEach((status) => {
url.searchParams.append("status", status);
});
if (join_at_after) url.searchParams.set("join_at_after", join_at_after);
if (join_at_before) url.searchParams.set("join_at_before", join_at_before);
if (metadata) {
for (const [key, value] of Object.entries(metadata)) {
url.searchParams.set(`metadata__${key}`, value);
}
}
}

const response = await fetch_with_retry(url.toString(), {
method: "GET",
headers: {
"Authorization": `${env.RECALL_API_KEY}`,
"Content-Type": "application/json",
},
});
if (!response.ok) throw new Error(await response.text());

return z.object({
results: BotArtifactSchema.array(),
next: z.string().nullable(),
}).parse(await response.json());
}

/**
* Creates an async transcript job for a recording.
* API Docs: https://docs.recall.ai/reference/recording_create_transcript_create
*/
async function create_async_transcript_job(args: {
recording_id: string;
transcript_config: Record<string, unknown>;
}) {
const { recording_id, transcript_config } = z.object({
recording_id: z.string(),
transcript_config: z.record(z.string(), z.unknown()),
}).parse(args);

const response = await fetch_with_retry(`https://${env.RECALL_REGION}.recall.ai/api/v1/recording/${recording_id}/create_transcript/`, {
method: "POST",
headers: {
"Authorization": `${env.RECALL_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(transcript_config),
});
if (!response.ok) throw new Error(await response.text());

return response.json();
}
6 changes: 6 additions & 0 deletions bot_retranscribe_failed_transcription_jobs/src/config/env.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import dotenv from "dotenv";
import { EnvSchema } from "../schemas/EnvSchema";

dotenv.config();

export const env = EnvSchema.parse(process.env);
24 changes: 24 additions & 0 deletions bot_retranscribe_failed_transcription_jobs/src/fetch_with_retry.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* Helper function to fetch with retry.
* Respects the Retry-After header.
*/
export async function fetch_with_retry(url: string, options: RequestInit, max_attempts: number = 5): Promise<Response> {
for (let attempt = 1; attempt <= max_attempts; attempt++) {
const response = await fetch(url, options);
if (response.status === 429) {
let retry_after = Number(response.headers.get("Retry-After")) || 0;
console.log(`Rate limit exceeded, retrying in ${retry_after} seconds`);
if (!retry_after) {
console.error("Retry-After header not found");
retry_after = 0;
}
await new Promise((resolve) => setTimeout(
resolve,
1000 * (retry_after + Math.ceil(Math.random() * 5)),
));
continue;
}
return response;
}
throw new Error(`Max attempts (${max_attempts}) reached while fetching ${url}. options=${JSON.stringify(options)}`);
}
Loading