diff --git a/packages/connectors/reddit/package.json b/packages/connectors/reddit/package.json new file mode 100644 index 0000000..8860417 --- /dev/null +++ b/packages/connectors/reddit/package.json @@ -0,0 +1,46 @@ +{ + "name": "@spool-lab/connector-reddit", + "version": "0.1.0", + "description": "Reddit Saved and Upvoted posts for Spool", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "files": ["dist"], + "keywords": ["spool-connector", "reddit"], + "scripts": { + "build": "tsc", + "clean": "rm -rf dist", + "prepack": "pnpm run build" + }, + "peerDependencies": { + "@spool/connector-sdk": "workspace:^" + }, + "devDependencies": { + "@spool/connector-sdk": "workspace:^", + "@types/node": "^22.15.3", + "typescript": "^5.7.3" + }, + "spool": { + "type": "connector", + "connectors": [ + { + "id": "reddit-saved", + "platform": "reddit", + "label": "Reddit Saved", + "description": "Posts and comments you saved on Reddit", + "color": "#FF4500", + "ephemeral": false, + "capabilities": ["fetch", "cookies:chrome", "log"] + }, + { + "id": "reddit-upvoted", + "platform": "reddit", + "label": "Reddit Upvoted", + "description": "Posts you upvoted on Reddit", + "color": "#FF4500", + "ephemeral": false, + "capabilities": ["fetch", "cookies:chrome", "log"] + } + ] + } +} diff --git a/packages/connectors/reddit/src/fetch.ts b/packages/connectors/reddit/src/fetch.ts new file mode 100644 index 0000000..b994c97 --- /dev/null +++ b/packages/connectors/reddit/src/fetch.ts @@ -0,0 +1,227 @@ +import type { FetchCapability, Cookie, CapturedItem } from '@spool/connector-sdk' +import { SyncError, SyncErrorCode, abortableSleep } from '@spool/connector-sdk' + +const USER_AGENT = + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36' + +const PAGE_SIZE = 100 +const RELEVANT_COOKIE_NAMES = new Set(['reddit_session', 'loid', 'token_v2', 'edgebucket']) + +export interface RedditAuth { + cookieHeader: string +} + +export function buildAuth(cookies: Cookie[]): RedditAuth | null { + const parts: string[] = [] + let hasSession = false + for (const c of cookies) { + if (!RELEVANT_COOKIE_NAMES.has(c.name)) continue + if (c.name === 'reddit_session') hasSession = true + parts.push(`${c.name}=${c.value}`) + } + return hasSession ? { cookieHeader: parts.join('; ') } : null +} + +export interface RedditClient { + cookieHeader: string + fetch: FetchCapability + signal: AbortSignal +} + +interface RedditThing { + kind: string + data: Record +} + +interface RedditListing { + data: { + after: string | null + children: RedditThing[] + } +} + +export interface RedditPage { + items: CapturedItem[] + nextCursor: string | null +} + +function headers(cookieHeader: string): Record { + return { + cookie: cookieHeader, + 'user-agent': USER_AGENT, + accept: 'application/json', + } +} + +async function fetchJson(url: string, client: RedditClient): Promise { + const { cookieHeader, fetch: fetchFn, signal } = client + let lastCause: 'rate-limit' | 'server-error' | null = null + + for (let attempt = 0; attempt < 4; attempt++) { + if (signal.aborted) throw signal.reason + + let response: Response + try { + response = await fetchFn(url, { headers: headers(cookieHeader), signal }) + } catch (err) { + if (signal.aborted) throw signal.reason + const message = err instanceof Error ? err.message : String(err) + if (message.includes('ENOTFOUND') || message.includes('ENETUNREACH')) { + throw new SyncError(SyncErrorCode.NETWORK_OFFLINE, message, err) + } + if (message.includes('ETIMEDOUT') || message.includes('timeout')) { + throw new SyncError(SyncErrorCode.NETWORK_TIMEOUT, message, err) + } + throw new SyncError(SyncErrorCode.CONNECTOR_ERROR, message, err) + } + + if (response.status === 429) { + lastCause = 'rate-limit' + await abortableSleep(Math.min(15 * Math.pow(2, attempt), 120) * 1000, signal) + continue + } + if (response.status >= 500) { + lastCause = 'server-error' + await abortableSleep(5000 * (attempt + 1), signal) + continue + } + if (response.status === 401 || response.status === 403) { + throw new SyncError( + SyncErrorCode.AUTH_SESSION_EXPIRED, + `Reddit returned ${response.status}. Your session may have expired — open reddit.com in Chrome and log in again.`, + ) + } + if (!response.ok) { + const text = await response.text().catch(() => '') + throw new SyncError( + SyncErrorCode.API_UNEXPECTED_STATUS, + `Reddit returned ${response.status}: ${text.slice(0, 300)}`, + ) + } + + try { + return await response.json() + } catch (err) { + throw new SyncError(SyncErrorCode.API_PARSE_ERROR, 'Failed to parse Reddit response as JSON', err) + } + } + + throw new SyncError( + lastCause === 'rate-limit' ? SyncErrorCode.API_RATE_LIMITED : SyncErrorCode.API_SERVER_ERROR, + `${lastCause === 'rate-limit' ? 'Rate limited' : 'Server errors'} after 4 retry attempts.`, + ) +} + +export async function fetchUsername(client: RedditClient): Promise { + const json = (await fetchJson('https://old.reddit.com/api/me.json', client)) as any + const name = json?.data?.name + if (typeof name !== 'string' || !name) { + throw new SyncError( + SyncErrorCode.AUTH_NOT_LOGGED_IN, + 'Reddit did not return a username — you may not be logged in. Open reddit.com in Chrome, log in, then retry.', + ) + } + return name +} + +// Reddit uses sentinel strings like 'self', 'default', 'nsfw', 'spoiler', 'image' +// in the thumbnail field when there is no preview. Filter those out. +function validThumbnail(url: unknown): string | null { + if (typeof url !== 'string') return null + if (!url.startsWith('http')) return null + return url +} + +function thingToItem(thing: RedditThing): CapturedItem | null { + const d = thing.data + const platformId = typeof d.name === 'string' ? d.name : null + if (!platformId) return null + + const permalink = typeof d.permalink === 'string' ? `https://www.reddit.com${d.permalink}` : null + const capturedAt = typeof d.created_utc === 'number' + ? new Date(d.created_utc * 1000).toISOString() + : new Date().toISOString() + const author = typeof d.author === 'string' ? d.author : null + + const baseMetadata = { + subreddit: d.subreddit, + subredditPrefixed: d.subreddit_name_prefixed, + score: d.score, + permalink, + } + + if (thing.kind === 't3') { + const title = typeof d.title === 'string' ? d.title : '(untitled)' + const selftext = typeof d.selftext === 'string' ? d.selftext : '' + const externalUrl = typeof d.url === 'string' ? d.url : null + return { + url: externalUrl ?? permalink ?? `https://www.reddit.com/${platformId}`, + title, + contentText: selftext || title, + author, + platform: 'reddit', + platformId, + contentType: 'post', + thumbnailUrl: validThumbnail(d.thumbnail), + metadata: { + ...baseMetadata, + numComments: d.num_comments, + externalUrl, + isSelf: d.is_self, + over18: d.over_18, + domain: d.domain, + }, + capturedAt, + rawJson: JSON.stringify(thing), + } + } + + if (thing.kind === 't1') { + const body = typeof d.body === 'string' ? d.body : '' + const linkTitle = typeof d.link_title === 'string' ? d.link_title : '' + const title = body.length > 120 ? body.slice(0, 117) + '...' : body || linkTitle || '(comment)' + return { + url: permalink ?? `https://www.reddit.com/${platformId}`, + title, + contentText: body, + author, + platform: 'reddit', + platformId, + contentType: 'comment', + thumbnailUrl: null, + metadata: { + ...baseMetadata, + linkTitle, + linkId: d.link_id, + linkPermalink: d.link_permalink, + }, + capturedAt, + rawJson: JSON.stringify(thing), + } + } + + return null +} + +function parseListing(json: unknown): RedditPage { + const listing = json as RedditListing | undefined + const children = listing?.data?.children ?? [] + const items: CapturedItem[] = [] + for (const thing of children) { + const item = thingToItem(thing) + if (item) items.push(item) + } + return { items, nextCursor: listing?.data?.after ?? null } +} + +export async function fetchListingPage( + listing: 'saved' | 'upvoted', + username: string, + cursor: string | null, + client: RedditClient, +): Promise { + const params = new URLSearchParams({ limit: String(PAGE_SIZE), raw_json: '1' }) + if (cursor) params.set('after', cursor) + const url = `https://old.reddit.com/user/${encodeURIComponent(username)}/${listing}.json?${params}` + return parseListing(await fetchJson(url, client)) +} diff --git a/packages/connectors/reddit/src/index.ts b/packages/connectors/reddit/src/index.ts new file mode 100644 index 0000000..a401a9f --- /dev/null +++ b/packages/connectors/reddit/src/index.ts @@ -0,0 +1,104 @@ +import type { + Connector, + ConnectorCapabilities, + AuthStatus, + PageResult, + FetchContext, +} from '@spool/connector-sdk' +import { SyncError, SyncErrorCode } from '@spool/connector-sdk' +import { buildAuth, fetchUsername, fetchListingPage } from './fetch.js' + +interface RedditSession { + cookieHeader: string + username: string +} + +async function readCookieHeader(caps: ConnectorCapabilities): Promise { + const cookies = await caps.cookies.get({ browser: 'chrome', url: 'https://reddit.com' }) + const auth = buildAuth(cookies) + if (!auth) { + throw new SyncError( + SyncErrorCode.AUTH_NOT_LOGGED_IN, + 'No reddit_session cookie found in Chrome. Log into reddit.com in Chrome and retry.', + ) + } + return auth.cookieHeader +} + +abstract class RedditListingConnector implements Connector { + abstract readonly id: string + abstract readonly label: string + abstract readonly description: string + abstract readonly listing: 'saved' | 'upvoted' + + readonly platform = 'reddit' + readonly color = '#FF4500' + readonly ephemeral = false + + private cached: RedditSession | null = null + + constructor(protected readonly caps: ConnectorCapabilities) {} + + async checkAuth(): Promise { + try { + await readCookieHeader(this.caps) + return { ok: true } + } catch (err) { + if (err instanceof SyncError) { + return { ok: false, error: err.code, message: err.message, hint: err.message } + } + return { + ok: false, + error: SyncErrorCode.AUTH_UNKNOWN, + message: err instanceof Error ? err.message : String(err), + hint: 'Check that Chrome is installed and you are logged into reddit.com.', + } + } + } + + async fetchPage(ctx: FetchContext): Promise { + const signal = ctx.signal ?? new AbortController().signal + try { + if (!this.cached) { + const cookieHeader = await readCookieHeader(this.caps) + const client = { cookieHeader, fetch: this.caps.fetch, signal } + this.cached = { cookieHeader, username: await fetchUsername(client) } + } + const client = { cookieHeader: this.cached.cookieHeader, fetch: this.caps.fetch, signal } + + const page = await this.caps.log.span( + 'fetchPage', + () => fetchListingPage(this.listing, this.cached!.username, ctx.cursor, client), + { attributes: { 'reddit.listing': this.listing, 'reddit.phase': ctx.phase, 'reddit.cursor': ctx.cursor ?? 'initial' } }, + ) + + if (ctx.phase === 'forward' && ctx.sinceItemId) { + const anchorIdx = page.items.findIndex(i => i.platformId === ctx.sinceItemId) + if (anchorIdx >= 0) { + return { items: page.items.slice(0, anchorIdx), nextCursor: null } + } + } + + return page + } catch (err) { + if (err instanceof SyncError && err.needsReauth) this.cached = null + throw err + } + } +} + +export class RedditSavedConnector extends RedditListingConnector { + readonly id = 'reddit-saved' + readonly label = 'Reddit Saved' + readonly description = 'Posts and comments you saved on Reddit' + readonly listing = 'saved' +} + +export class RedditUpvotedConnector extends RedditListingConnector { + readonly id = 'reddit-upvoted' + readonly label = 'Reddit Upvoted' + readonly description = 'Posts you upvoted on Reddit' + readonly listing = 'upvoted' +} + +export const connectors = [RedditSavedConnector, RedditUpvotedConnector] diff --git a/packages/connectors/reddit/tsconfig.json b/packages/connectors/reddit/tsconfig.json new file mode 100644 index 0000000..a84e1f5 --- /dev/null +++ b/packages/connectors/reddit/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "es2022", + "module": "nodenext", + "moduleResolution": "nodenext", + "lib": ["es2022"], + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "isolatedModules": true, + "types": ["node"] + }, + "include": ["src/**/*"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/landing/public/registry.json b/packages/landing/public/registry.json index e00f4a5..3225747 100644 --- a/packages/landing/public/registry.json +++ b/packages/landing/public/registry.json @@ -64,6 +64,32 @@ "npm": "https://www.npmjs.com/package/@spool-lab/connector-github", "packageDescription": "What you star and the notifications GitHub sends you." }, + { + "name": "@spool-lab/connector-reddit", + "id": "reddit-saved", + "platform": "reddit", + "label": "Reddit Saved", + "description": "Posts and comments you saved on Reddit", + "color": "#FF4500", + "author": "spool-lab", + "category": "social", + "firstParty": true, + "npm": "https://www.npmjs.com/package/@spool-lab/connector-reddit", + "packageDescription": "Your saved and upvoted posts from Reddit." + }, + { + "name": "@spool-lab/connector-reddit", + "id": "reddit-upvoted", + "platform": "reddit", + "label": "Reddit Upvoted", + "description": "Posts you upvoted on Reddit", + "color": "#FF4500", + "author": "spool-lab", + "category": "social", + "firstParty": true, + "npm": "https://www.npmjs.com/package/@spool-lab/connector-reddit", + "packageDescription": "Your saved and upvoted posts from Reddit." + }, { "name": "@spool-lab/connector-xiaohongshu", "id": "xiaohongshu-notes", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 87563be..f0480d3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -174,6 +174,18 @@ importers: specifier: ^5.7.3 version: 5.9.3 + packages/connectors/reddit: + devDependencies: + '@spool/connector-sdk': + specifier: workspace:^ + version: link:../../connector-sdk + '@types/node': + specifier: ^22.15.3 + version: 22.19.17 + typescript: + specifier: ^5.7.3 + version: 5.9.3 + packages/connectors/twitter-bookmarks: devDependencies: '@spool/connector-sdk':