From 8bf8fd1682575fbc305f167d7a099a3c5629df45 Mon Sep 17 00:00:00 2001 From: Shahar Carmi Date: Mon, 8 Jun 2026 14:09:01 +0300 Subject: [PATCH] fix: discover() ergonomics, dataFormat/env aliases, and Transport listener leak discover(): - DiscoverResult.data is now always an array (empty on failure) and gains a .results alias plus Symbol.iterator, so documented usage never throws. - Failed discover() surfaces server-provided error/message detail in the thrown APIError instead of a bare status. DX: - dataFormat accepts 'md' as an alias for 'markdown'. - BRIGHTDATA_API_KEY accepted as a fallback env var after BRIGHTDATA_API_TOKEN. Transport: - Replace the per-instance beforeExit listener with a single shared listener tracked by an open-transport counter, fixing MaxListenersExceededWarning when many clients are created. Warning message is now count-aware. README updated for all of the above. New unit tests cover each change (309 unit tests green; lint, typecheck, and build clean). Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitignore | 4 +++ README.md | 19 +++++++++++-- src/api/discover/job.ts | 4 ++- src/api/discover/result.ts | 17 +++++++++++- src/client.ts | 3 +- src/core/transport.ts | 48 ++++++++++++++++++++++++-------- src/schemas/discover.ts | 3 ++ src/schemas/request.ts | 6 +++- src/types/discover.ts | 2 ++ tests/client.test.ts | 35 +++++++++++++++++++++++ tests/discover.test.ts | 57 ++++++++++++++++++++++++++++++++++++++ tests/transport.test.ts | 20 +++++++++++-- 12 files changed, 197 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index f5acc81..72134a8 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,7 @@ logs *.log test.mjs test.js + +# Local-only agent context files (not for commit) +CLAUDE.md +AGENTS.md diff --git a/README.md b/README.md index 549df16..40e1674 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ const screenshot = await client.scrapeUrl('https://example.com', { // Full options const result = await client.scrapeUrl('https://example.com', { format: 'raw', // 'raw' (default) or 'json' - dataFormat: 'html', // 'html' (default), 'markdown', 'screenshot' + dataFormat: 'html', // 'html' (default), 'markdown' (alias: 'md'), 'screenshot' country: 'gb', // two-letter country code method: 'GET', // HTTP method }); @@ -177,10 +177,23 @@ console.log(result.rowCount); AI-powered web search with relevance ranking based on intent. +`discover()` resolves to a `DiscoverResult` wrapper (not a bare array). The items +are on `result.data` (or its alias `result.results`), and the result is iterable. +On failure `result.success` is `false`, `result.error` carries the reason, and +`result.data` / `result.results` stay an empty array — so iterating never throws. + ```javascript // Basic search const result = await client.discover('artificial intelligence trends 2026'); -console.log(result.data); // [{ link, title, description, relevance_score }, ...] + +if (!result.success) { + console.error('discover failed:', result.error); +} else { + console.log(result.results); // [{ link, title, description, relevance_score }, ...] + for (const item of result) { + console.log(`[${item.relevance_score}] ${item.title}`); + } +} // With intent for semantic ranking const result = await client.discover('Tesla battery technology', { @@ -317,7 +330,7 @@ Get your API token from [Bright Data Control Panel](https://brightdata.com/cp/se ### Environment Variables ```env -BRIGHTDATA_API_TOKEN=your_api_token +BRIGHTDATA_API_TOKEN=your_api_token # BRIGHTDATA_API_KEY also accepted BRIGHTDATA_WEB_UNLOCKER_ZONE=your_web_unlocker_zone # Optional BRIGHTDATA_SERP_ZONE=your_serp_zone # Optional BRIGHTDATA_BROWSERAPI_USERNAME=your_browser_username # Optional, for Browser API diff --git a/src/api/discover/job.ts b/src/api/discover/job.ts index fc53036..57178f6 100644 --- a/src/api/discover/job.ts +++ b/src/api/discover/job.ts @@ -77,8 +77,10 @@ export class DiscoverJob { } if (response.status === 'error' || response.status === 'failed') { + const detail = response.error || response.message; throw new APIError( - `Discover task ${this.taskId} failed with status: ${response.status}`, + `Discover task ${this.taskId} failed with status: ${response.status}` + + (detail ? ` — ${detail}` : ''), ); } diff --git a/src/api/discover/result.ts b/src/api/discover/result.ts index 6ca1a37..7463d4a 100644 --- a/src/api/discover/result.ts +++ b/src/api/discover/result.ts @@ -24,7 +24,9 @@ export class DiscoverResult extends BaseResult { readonly taskId: string | null; constructor(fields: DiscoverResultFields) { - super(fields); + // `data` is always an array (empty on failure) so callers can safely + // iterate `result.data` / `result.results` without a null check. + super({ ...fields, data: fields.data ?? [] }); this.query = fields.query; this.intent = fields.intent ?? null; this.durationSeconds = fields.durationSeconds ?? null; @@ -32,6 +34,19 @@ export class DiscoverResult extends BaseResult { this.taskId = fields.taskId ?? null; } + /** + * The discovered items. Alias for `data`, kept in sync with the raw API's + * `results` field. Always an array (empty when the search failed). + */ + get results(): DiscoverResultItem[] { + return this.data ?? []; + } + + /** Iterate the discovered items directly: `for (const item of result)`. */ + [Symbol.iterator](): Iterator { + return this.results[Symbol.iterator](); + } + override toJSON(): Record { return { ...super.toJSON(), diff --git a/src/client.ts b/src/client.ts index 2f57f67..5a14643 100644 --- a/src/client.ts +++ b/src/client.ts @@ -104,6 +104,7 @@ export class bdclient { ); const { BRIGHTDATA_API_TOKEN, + BRIGHTDATA_API_KEY, BRIGHTDATA_VERBOSE, BRIGHTDATA_WEB_UNLOCKER_ZONE, BRIGHTDATA_SERP_ZONE, @@ -123,7 +124,7 @@ export class bdclient { const apiKey = assertSchema( ApiKeySchema, - opt.apiKey || BRIGHTDATA_API_TOKEN, + opt.apiKey || BRIGHTDATA_API_TOKEN || BRIGHTDATA_API_KEY, 'bdclient.options.apiKey', ); diff --git a/src/core/transport.ts b/src/core/transport.ts index 06b55f3..e67cc9d 100644 --- a/src/core/transport.ts +++ b/src/core/transport.ts @@ -44,6 +44,40 @@ function isAbortTimeout(err: Error): boolean { ); } +// A single shared `beforeExit` listener tracks every open Transport via a +// counter, instead of one listener per instance. Registering a listener per +// Transport tripped Node's MaxListenersExceededWarning once an app created +// more than ~10 clients. The listener is installed lazily on the first open +// Transport and removed again once the last one is closed. +let openTransportCount = 0; +let beforeExitListener: (() => void) | null = null; + +function warnUnclosedTransports(): void { + if (openTransportCount > 0) { + console.warn( + `[brightdata-sdk] ${openTransportCount} Transport(s) were not closed. ` + + 'Call client.close() or use "await using client = new bdclient()" ' + + 'to avoid keeping the process alive.', + ); + } +} + +function registerOpenTransport(): void { + openTransportCount++; + if (!beforeExitListener) { + beforeExitListener = warnUnclosedTransports; + process.on('beforeExit', beforeExitListener); + } +} + +function unregisterOpenTransport(): void { + if (openTransportCount > 0) openTransportCount--; + if (openTransportCount === 0 && beforeExitListener) { + process.removeListener('beforeExit', beforeExitListener); + beforeExitListener = null; + } +} + export interface TransportOptions { apiKey: string; timeout?: number; @@ -76,16 +110,6 @@ export class Transport { private closed = false; private logger = getLogger('transport'); - private onBeforeExit = () => { - if (!this.closed) { - console.warn( - '[brightdata-sdk] Transport was not closed. ' + - 'Call client.close() or use "await using client = new bdclient()" ' + - 'to avoid keeping the process alive.', - ); - } - }; - constructor(opts: TransportOptions) { this.authHeaders = getAuthHeaders(opts.apiKey); this.defaultTimeout = opts.timeout ?? DEFAULT_TIMEOUT; @@ -109,7 +133,7 @@ export class Transport { errorCodes: RETRY_ERROR_CODES, }), ); - process.on('beforeExit', this.onBeforeExit); + registerOpenTransport(); } get headers(): Record { @@ -235,7 +259,7 @@ export class Transport { async close(): Promise { if (this.closed) return; this.closed = true; - process.removeListener('beforeExit', this.onBeforeExit); + unregisterOpenTransport(); this.rateLimiter?.destroy(); await (this.agent as Agent).close(); } diff --git a/src/schemas/discover.ts b/src/schemas/discover.ts index 0393cb1..2e434d4 100644 --- a/src/schemas/discover.ts +++ b/src/schemas/discover.ts @@ -30,5 +30,8 @@ export const DiscoverTriggerResponseSchema = z export const DiscoverPollResponseSchema = z .object({ status: z.enum(['processing', 'done', 'error', 'failed']), + // Surfaced when status is 'error'/'failed' — server provides a reason + error: z.string().optional(), + message: z.string().optional(), }) .passthrough(); diff --git a/src/schemas/request.ts b/src/schemas/request.ts index de75bda..8d5d8f9 100644 --- a/src/schemas/request.ts +++ b/src/schemas/request.ts @@ -17,7 +17,11 @@ export const RequestOptionsBaseSchema = z.object({ .transform((v) => v.toUpperCase() as 'GET' | 'POST') .optional(), format: z.enum(['json', 'raw']).optional(), - dataFormat: z.enum(['html', 'markdown', 'screenshot']).optional(), + dataFormat: z + // 'md' is accepted as a convenience alias for 'markdown' + .enum(['html', 'markdown', 'md', 'screenshot']) + .transform((v) => (v === 'md' ? 'markdown' : v)) + .optional(), }); export const FetchingOptionsSchema = z.object({ diff --git a/src/types/discover.ts b/src/types/discover.ts index 5f0a7a8..292cf42 100644 --- a/src/types/discover.ts +++ b/src/types/discover.ts @@ -8,5 +8,7 @@ export interface DiscoverOperations { status: string; results?: unknown[]; duration_seconds?: number; + error?: string; + message?: string; }>; } diff --git a/tests/client.test.ts b/tests/client.test.ts index e61e043..2f9e51b 100644 --- a/tests/client.test.ts +++ b/tests/client.test.ts @@ -199,3 +199,38 @@ describe('bdclient - Lazy initialization', () => { expect(keys).toContain('datasets'); }); }); + +describe('bdclient - dataFormat md alias', () => { + test("dataFormat: 'md' is sent to the API as 'markdown'", async () => { + vi.mocked(Transport.prototype.request).mockClear(); + const client = new bdclient({ + apiKey: 'test_token_1234567890abcdef', + autoCreateZones: false, + }); + await client.scrapeUrl('https://example.com', { dataFormat: 'md' }); + + const body = JSON.parse( + vi.mocked(Transport.prototype.request).mock.calls[0][1] + ?.body as string, + ); + expect(body.data_format).toBe('markdown'); + }); +}); + +describe('bdclient - API key env fallback', () => { + test('accepts BRIGHTDATA_API_KEY when BRIGHTDATA_API_TOKEN is unset', () => { + const { BRIGHTDATA_API_TOKEN, BRIGHTDATA_API_KEY } = process.env; + delete process.env.BRIGHTDATA_API_TOKEN; + process.env.BRIGHTDATA_API_KEY = 'env_key_1234567890abcdef'; + try { + expect(() => new bdclient({ autoCreateZones: false })).not.toThrow(); + } finally { + if (BRIGHTDATA_API_TOKEN === undefined) + delete process.env.BRIGHTDATA_API_TOKEN; + else process.env.BRIGHTDATA_API_TOKEN = BRIGHTDATA_API_TOKEN; + if (BRIGHTDATA_API_KEY === undefined) + delete process.env.BRIGHTDATA_API_KEY; + else process.env.BRIGHTDATA_API_KEY = BRIGHTDATA_API_KEY; + } + }); +}); diff --git a/tests/discover.test.ts b/tests/discover.test.ts index 23ec5de..24752d5 100644 --- a/tests/discover.test.ts +++ b/tests/discover.test.ts @@ -125,6 +125,39 @@ describe('DiscoverService.search', () => { expect(result.success).toBe(false); expect(result.error).toContain('failed with status: error'); }); + + test('failed result keeps data/results as an empty array (never null)', async () => { + mockRequestSequence([ + { statusCode: 200, body: JSON.stringify({ task_id: 'task_fail' }) }, + { statusCode: 200, body: JSON.stringify({ status: 'failed' }) }, + ]); + + const result = await service.search('test'); + expect(result.success).toBe(false); + expect(result.data).toEqual([]); + expect(result.results).toEqual([]); + // documented usage must not throw on failure + expect(() => { + for (const _ of result) void _; + }).not.toThrow(); + }); + + test('surfaces server-provided error detail on failure', async () => { + mockRequestSequence([ + { statusCode: 200, body: JSON.stringify({ task_id: 'task_fail' }) }, + { + statusCode: 200, + body: JSON.stringify({ + status: 'error', + error: 'quota exceeded', + }), + }, + ]); + + const result = await service.search('test'); + expect(result.success).toBe(false); + expect(result.error).toContain('quota exceeded'); + }); }); // --- DiscoverJob --- @@ -287,4 +320,28 @@ describe('DiscoverResult', () => { expect(result.totalResults).toBeNull(); expect(result.taskId).toBeNull(); }); + + test('data and results default to an empty array when omitted', () => { + const result = new DiscoverResult({ success: false, query: 'q' }); + expect(result.data).toEqual([]); + expect(result.results).toEqual([]); + }); + + test('.results aliases .data', () => { + const items = [ + { link: 'https://x.com', title: 'X', description: 'd', relevance_score: 0.8 }, + ]; + const result = new DiscoverResult({ success: true, data: items, query: 'q' }); + expect(result.results).toBe(result.data); + expect(result.results).toEqual(items); + }); + + test('is iterable over its items', () => { + const items = [ + { link: 'https://a.com', title: 'A', description: 'd', relevance_score: 1 }, + { link: 'https://b.com', title: 'B', description: 'd', relevance_score: 0.5 }, + ]; + const result = new DiscoverResult({ success: true, data: items, query: 'q' }); + expect([...result]).toEqual(items); + }); }); diff --git a/tests/transport.test.ts b/tests/transport.test.ts index 36414f4..2bae2b9 100644 --- a/tests/transport.test.ts +++ b/tests/transport.test.ts @@ -125,7 +125,7 @@ describe('Transport lifecycle', () => { process.emit('beforeExit', 0); expect(warnSpy).toHaveBeenCalledWith( - expect.stringContaining('Transport was not closed'), + expect.stringContaining('were not closed'), ); warnSpy.mockRestore(); }); @@ -138,10 +138,26 @@ describe('Transport lifecycle', () => { process.emit('beforeExit', 0); expect(warnSpy).not.toHaveBeenCalledWith( - expect.stringContaining('Transport was not closed'), + expect.stringContaining('were not closed'), ); warnSpy.mockRestore(); }); + + it('registers a single shared beforeExit listener for many transports', async () => { + const before = process.listenerCount('beforeExit'); + const transports = Array.from( + { length: 15 }, + () => new Transport({ apiKey: API_KEY }), + ); + + // one shared listener regardless of how many transports are open + expect(process.listenerCount('beforeExit')).toBe(before + 1); + + for (const t of transports) await t.close(); + + // listener is removed once the last transport closes + expect(process.listenerCount('beforeExit')).toBe(before); + }); }); });