diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c5f6bbd..b404665 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,10 +14,6 @@ jobs: - uses: actions/checkout@v4 - uses: oven-sh/setup-bun@v2 - run: bun install - # scrapegraph-js is pinned to a GitHub commit (PR #13 head, not yet on npm) - # and ships without a prebuilt dist/ — build it in-place so module - # resolution works. - - run: cd node_modules/scrapegraph-js && bun install && bun run build - run: bun test lint: @@ -27,5 +23,4 @@ jobs: - uses: actions/checkout@v4 - uses: oven-sh/setup-bun@v2 - run: bun install - - run: cd node_modules/scrapegraph-js && bun install && bun run build - run: bun run check diff --git a/bun.lock b/bun.lock index 5b4c862..3b4f736 100644 --- a/bun.lock +++ b/bun.lock @@ -9,7 +9,7 @@ "chalk": "^5.4.1", "citty": "^0.1.6", "dotenv": "^17.2.4", - "scrapegraph-js": "github:ScrapeGraphAI/scrapegraph-js#096c110", + "scrapegraph-js": "^2.1.0", }, "devDependencies": { "@biomejs/biome": "^1.9.4", @@ -229,7 +229,7 @@ "rollup": ["rollup@4.57.1", "", { "dependencies": { "@types/estree": "1.0.8" }, "optionalDependencies": { "@rollup/rollup-android-arm-eabi": "4.57.1", "@rollup/rollup-android-arm64": "4.57.1", "@rollup/rollup-darwin-arm64": "4.57.1", "@rollup/rollup-darwin-x64": "4.57.1", "@rollup/rollup-freebsd-arm64": "4.57.1", "@rollup/rollup-freebsd-x64": "4.57.1", "@rollup/rollup-linux-arm-gnueabihf": "4.57.1", "@rollup/rollup-linux-arm-musleabihf": "4.57.1", "@rollup/rollup-linux-arm64-gnu": "4.57.1", "@rollup/rollup-linux-arm64-musl": "4.57.1", "@rollup/rollup-linux-loong64-gnu": "4.57.1", "@rollup/rollup-linux-loong64-musl": "4.57.1", "@rollup/rollup-linux-ppc64-gnu": "4.57.1", "@rollup/rollup-linux-ppc64-musl": "4.57.1", "@rollup/rollup-linux-riscv64-gnu": "4.57.1", "@rollup/rollup-linux-riscv64-musl": "4.57.1", "@rollup/rollup-linux-s390x-gnu": "4.57.1", "@rollup/rollup-linux-x64-gnu": "4.57.1", "@rollup/rollup-linux-x64-musl": "4.57.1", "@rollup/rollup-openbsd-x64": "4.57.1", "@rollup/rollup-openharmony-arm64": "4.57.1", "@rollup/rollup-win32-arm64-msvc": "4.57.1", "@rollup/rollup-win32-ia32-msvc": "4.57.1", "@rollup/rollup-win32-x64-gnu": "4.57.1", "@rollup/rollup-win32-x64-msvc": "4.57.1", "fsevents": "~2.3.2" }, "bin": { "rollup": "dist/bin/rollup" } }, "sha512-oQL6lgK3e2QZeQ7gcgIkS2YZPg5slw37hYufJ3edKlfQSGGm8ICoxswK15ntSzF/a8+h7ekRy7k7oWc3BQ7y8A=="], - "scrapegraph-js": ["scrapegraph-js@github:ScrapeGraphAI/scrapegraph-js#096c110", { "dependencies": { "zod": "^4.3.6" } }, "ScrapeGraphAI-scrapegraph-js-096c110"], + "scrapegraph-js": ["scrapegraph-js@2.1.0", "", { "dependencies": { "zod": "^4.3.6" } }, "sha512-A2U0pK3fsd9vhKdONkSHptssPB+V0E/X5JbMIVcwWzKFlrmRFUT2kjiWya3StoCgxvmZKPokrxC5ZxOE4WQJMQ=="], "sisteransi": ["sisteransi@1.0.5", "", {}, "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg=="], diff --git a/package.json b/package.json index c3d5793..3d8c75c 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,7 @@ "chalk": "^5.4.1", "citty": "^0.1.6", "dotenv": "^17.2.4", - "scrapegraph-js": "github:ScrapeGraphAI/scrapegraph-js#096c110" + "scrapegraph-js": "^2.1.0" }, "devDependencies": { "@biomejs/biome": "^1.9.4", diff --git a/src/commands/crawl.ts b/src/commands/crawl.ts index 45036dd..f61c486 100644 --- a/src/commands/crawl.ts +++ b/src/commands/crawl.ts @@ -1,27 +1,13 @@ import { defineCommand } from "citty"; import { crawl } from "scrapegraph-js"; -import type { ApiCrawlRequest, ApiScrapeFormatEntry } from "scrapegraph-js"; +import type { CrawlRequest, FetchConfig } from "scrapegraph-js"; import { resolveApiKey } from "../lib/folders.js"; +import { BASE_FORMATS, type BaseFormat, buildBaseFormat } from "../lib/formats.js"; import * as log from "../lib/log.js"; - -const FORMATS = [ - "markdown", - "html", - "screenshot", - "branding", - "links", - "images", - "summary", -] as const; -type Format = (typeof FORMATS)[number]; +import { parseIntArg, parseJsonArg } from "../lib/parse.js"; const POLL_INTERVAL_MS = 3000; -function buildFormat(f: Format): ApiScrapeFormatEntry { - if (f === "markdown" || f === "html") return { type: f, mode: "normal" }; - return { type: f } as ApiScrapeFormatEntry; -} - export default defineCommand({ meta: { name: "crawl", @@ -36,7 +22,7 @@ export default defineCommand({ format: { type: "string", alias: "f", - description: `Per-page format(s), comma-separated: ${FORMATS.join(", ")} (default: markdown)`, + description: `Per-page format(s), comma-separated: ${BASE_FORMATS.join(", ")} (default: markdown)`, }, "max-pages": { type: "string", description: "Maximum pages to crawl (default 50, max 1000)" }, "max-depth": { type: "string", description: "Crawl depth (default 2)" }, @@ -64,24 +50,40 @@ export default defineCommand({ .map((f) => f.trim()) .filter(Boolean); for (const f of requested) { - if (!FORMATS.includes(f as Format)) - out.error(`Unknown format: ${f}. Valid: ${FORMATS.join(", ")}`); + if (!BASE_FORMATS.includes(f as BaseFormat)) + out.error(`Unknown format: ${f}. Valid: ${BASE_FORMATS.join(", ")}`); } - const formats = requested.map((f) => buildFormat(f as Format)); - - const params: ApiCrawlRequest = { url: args.url, formats }; - const mut = params as Record; - if (args["max-pages"]) mut.maxPages = Number(args["max-pages"]); - if (args["max-depth"]) mut.maxDepth = Number(args["max-depth"]); - if (args["max-links-per-page"]) mut.maxLinksPerPage = Number(args["max-links-per-page"]); - if (args["allow-external"]) mut.allowExternal = true; - if (args["include-patterns"]) mut.includePatterns = JSON.parse(args["include-patterns"]); - if (args["exclude-patterns"]) mut.excludePatterns = JSON.parse(args["exclude-patterns"]); + const formats = requested.map((f) => buildBaseFormat(f as BaseFormat)); const fetchConfig: Record = {}; if (args.mode) fetchConfig.mode = args.mode; if (args.stealth) fetchConfig.stealth = true; - if (Object.keys(fetchConfig).length > 0) mut.fetchConfig = fetchConfig; + + const params: CrawlRequest = { + url: args.url, + formats, + ...(args["max-pages"] && { maxPages: parseIntArg(args["max-pages"], "max-pages", out) }), + ...(args["max-depth"] && { maxDepth: parseIntArg(args["max-depth"], "max-depth", out) }), + ...(args["max-links-per-page"] && { + maxLinksPerPage: parseIntArg(args["max-links-per-page"], "max-links-per-page", out), + }), + ...(args["allow-external"] && { allowExternal: true }), + ...(args["include-patterns"] && { + includePatterns: parseJsonArg( + args["include-patterns"], + "include-patterns", + out, + ) as string[], + }), + ...(args["exclude-patterns"] && { + excludePatterns: parseJsonArg( + args["exclude-patterns"], + "exclude-patterns", + out, + ) as string[], + }), + ...(Object.keys(fetchConfig).length > 0 && { fetchConfig: fetchConfig as FetchConfig }), + }; out.start("Starting crawl"); const job = await crawl.start(apiKey, params); diff --git a/src/commands/extract.ts b/src/commands/extract.ts index 0f116fe..755fc48 100644 --- a/src/commands/extract.ts +++ b/src/commands/extract.ts @@ -1,8 +1,9 @@ import { defineCommand } from "citty"; import { extract } from "scrapegraph-js"; -import type { ApiExtractRequest } from "scrapegraph-js"; +import type { ExtractRequest, FetchConfig } from "scrapegraph-js"; import { resolveApiKey } from "../lib/folders.js"; import * as log from "../lib/log.js"; +import { parseIntArg, parseJsonArg } from "../lib/parse.js"; export default defineCommand({ meta: { @@ -42,16 +43,20 @@ export default defineCommand({ const fetchConfig: Record = {}; if (args.mode) fetchConfig.mode = args.mode; if (args.stealth) fetchConfig.stealth = true; - if (args.scrolls) fetchConfig.scrolls = Number(args.scrolls); - if (args.cookies) fetchConfig.cookies = JSON.parse(args.cookies); - if (args.headers) fetchConfig.headers = JSON.parse(args.headers); + if (args.scrolls) fetchConfig.scrolls = parseIntArg(args.scrolls, "scrolls", out); + if (args.cookies) fetchConfig.cookies = parseJsonArg(args.cookies, "cookies", out); + if (args.headers) fetchConfig.headers = parseJsonArg(args.headers, "headers", out); if (args.country) fetchConfig.country = args.country; - const params: ApiExtractRequest = { url: args.url, prompt: args.prompt }; - if (args.schema) (params as Record).schema = JSON.parse(args.schema); - if (args["html-mode"]) (params as Record).mode = args["html-mode"]; - if (Object.keys(fetchConfig).length > 0) - (params as Record).fetchConfig = fetchConfig; + const params: ExtractRequest = { + url: args.url, + prompt: args.prompt, + ...(args.schema && { + schema: parseJsonArg(args.schema, "schema", out) as Record, + }), + ...(args["html-mode"] && { mode: args["html-mode"] as "normal" | "reader" | "prune" }), + ...(Object.keys(fetchConfig).length > 0 && { fetchConfig: fetchConfig as FetchConfig }), + }; out.start("Extracting"); const result = await extract(apiKey, params); diff --git a/src/commands/history.ts b/src/commands/history.ts index 97c8a76..c8ff8b9 100644 --- a/src/commands/history.ts +++ b/src/commands/history.ts @@ -2,20 +2,21 @@ import * as p from "@clack/prompts"; import chalk from "chalk"; import { defineCommand } from "citty"; import { history } from "scrapegraph-js"; -import type { ApiHistoryEntry, ApiHistoryService } from "scrapegraph-js"; +import type { HistoryEntry, Service } from "scrapegraph-js"; import { resolveApiKey } from "../lib/folders.js"; import * as log from "../lib/log.js"; +import { parseIntArg } from "../lib/parse.js"; const SERVICES = ["scrape", "extract", "search", "monitor", "crawl"] as const; const VALID = SERVICES.join(", "); const LOAD_MORE = "__load_more__"; -function entryUrl(e: ApiHistoryEntry): string { +function entryUrl(e: HistoryEntry): string { const params = e.params as Record; return String(params.url ?? params.query ?? ""); } -function entryLabel(e: ApiHistoryEntry): string { +function entryLabel(e: HistoryEntry): string { const short = e.id.length > 12 ? `${e.id.slice(0, 12)}…` : e.id; const url = entryUrl(e); const urlShort = url.length > 50 ? `${url.slice(0, 49)}…` : url; @@ -24,7 +25,7 @@ function entryLabel(e: ApiHistoryEntry): string { return `${chalk.dim(short)} ${color(e.status)} ${urlShort}`; } -function entryHint(e: ApiHistoryEntry): string { +function entryHint(e: HistoryEntry): string { if (!e.createdAt) return ""; const d = new Date(e.createdAt); return Number.isNaN(d.getTime()) ? e.createdAt : d.toLocaleString(); @@ -49,11 +50,14 @@ export default defineCommand({ const quiet = !!args.json; const out = log.create(quiet); const apiKey = await resolveApiKey(quiet); - const service = args.service as ApiHistoryService | undefined; - if (service && !SERVICES.includes(service)) out.error(`Invalid service. Valid: ${VALID}`); + const rawService = args.service; + if (rawService && !SERVICES.includes(rawService as Service)) { + out.error(`Invalid service. Valid: ${VALID}`); + } + const service = rawService as Service | undefined; const requestId = (args as { _: string[] })._.at(1); - const limit = args["page-size"] ? Number(args["page-size"]) : 20; - let page = args.page ? Number(args.page) : 1; + const limit = args["page-size"] ? parseIntArg(args["page-size"], "page-size", out) : 20; + let page = args.page ? parseIntArg(args.page, "page", out) : 1; const fetchPage = async (pg: number) => { const r = await history.list(apiKey, { @@ -62,7 +66,7 @@ export default defineCommand({ ...(service ? { service } : {}), }); if (!r.data) out.error(r.error); - const d = r.data as { data: ApiHistoryEntry[]; pagination: { total: number } }; + const d = r.data as { data: HistoryEntry[]; pagination: { total: number } }; return { rows: d.data ?? [], hasMore: (d.pagination?.total ?? 0) > pg * limit, diff --git a/src/commands/monitor.ts b/src/commands/monitor.ts index fbdfe6b..70342ed 100644 --- a/src/commands/monitor.ts +++ b/src/commands/monitor.ts @@ -3,12 +3,15 @@ import chalk from "chalk"; import { defineCommand } from "citty"; import { monitor } from "scrapegraph-js"; import type { - ApiMonitorCreateInput, - ApiMonitorUpdateInput, - ApiScrapeFormatEntry, + FetchConfig, + FormatConfig, + MonitorCreateRequest, + MonitorUpdateRequest, } from "scrapegraph-js"; import { resolveApiKey } from "../lib/folders.js"; +import { BASE_FORMATS, type BaseFormat, buildBaseFormat } from "../lib/formats.js"; import * as log from "../lib/log.js"; +import { parseIntArg } from "../lib/parse.js"; const ACTIONS = [ "create", @@ -22,24 +25,14 @@ const ACTIONS = [ ] as const; type Action = (typeof ACTIONS)[number]; -const FORMATS = [ - "markdown", - "html", - "screenshot", - "branding", - "links", - "images", - "summary", -] as const; - -function buildFormats(raw: string): ApiScrapeFormatEntry[] { +function buildFormats(raw: string, onInvalid: (f: string) => never): FormatConfig[] { return raw .split(",") .map((f) => f.trim()) .filter(Boolean) .map((f) => { - if (f === "markdown" || f === "html") return { type: f, mode: "normal" as const }; - return { type: f } as ApiScrapeFormatEntry; + if (!BASE_FORMATS.includes(f as BaseFormat)) onInvalid(f); + return buildBaseFormat(f as BaseFormat); }); } @@ -64,7 +57,7 @@ export default defineCommand({ format: { type: "string", alias: "f", - description: `Formats to track, comma-separated: ${FORMATS.join(", ")} (default: markdown)`, + description: `Formats to track, comma-separated: ${BASE_FORMATS.join(", ")} (default: markdown)`, }, "webhook-url": { type: "string", description: "Webhook URL for change notifications" }, mode: { type: "string", alias: "m", description: "Fetch mode: auto (default), fast, js" }, @@ -81,33 +74,33 @@ export default defineCommand({ if (!ACTIONS.includes(action)) { out.error(`Unknown action: ${action}. Valid: ${ACTIONS.join(", ")}`); - return; } const needsId: Action[] = ["get", "update", "delete", "pause", "resume", "activity"]; if (needsId.includes(action) && !args.id) { out.error(`--id is required for ${action}`); - return; } const fetchConfig: Record = {}; if (args.mode) fetchConfig.mode = args.mode; if (args.stealth) fetchConfig.stealth = true; + const onInvalidFormat = (f: string): never => + out.error(`Unknown format: ${f}. Valid: ${BASE_FORMATS.join(", ")}`); + switch (action) { case "create": { - if (!args.url) return out.error("--url is required for create"); - if (!args.interval) return out.error("--interval is required for create"); + if (!args.url) out.error("--url is required for create"); + if (!args.interval) out.error("--interval is required for create"); - const params: ApiMonitorCreateInput = { + const params: MonitorCreateRequest = { url: args.url, interval: args.interval, - formats: buildFormats(args.format ?? "markdown"), + formats: buildFormats(args.format ?? "markdown", onInvalidFormat), + ...(args.name && { name: args.name }), + ...(args["webhook-url"] && { webhookUrl: args["webhook-url"] }), + ...(Object.keys(fetchConfig).length > 0 && { fetchConfig: fetchConfig as FetchConfig }), }; - const mut = params as Record; - if (args.name) mut.name = args.name; - if (args["webhook-url"]) mut.webhookUrl = args["webhook-url"]; - if (Object.keys(fetchConfig).length > 0) mut.fetchConfig = fetchConfig; out.start("Creating monitor"); const result = await monitor.create(apiKey, params); @@ -146,13 +139,13 @@ export default defineCommand({ } case "update": { - const params: ApiMonitorUpdateInput = {}; - const mut = params as Record; - if (args.name) mut.name = args.name; - if (args.interval) mut.interval = args.interval; - if (args["webhook-url"]) mut.webhookUrl = args["webhook-url"]; - if (args.format) mut.formats = buildFormats(args.format); - if (Object.keys(fetchConfig).length > 0) mut.fetchConfig = fetchConfig; + const params: MonitorUpdateRequest = { + ...(args.name && { name: args.name }), + ...(args.interval && { interval: args.interval }), + ...(args["webhook-url"] && { webhookUrl: args["webhook-url"] }), + ...(args.format && { formats: buildFormats(args.format, onInvalidFormat) }), + ...(Object.keys(fetchConfig).length > 0 && { fetchConfig: fetchConfig as FetchConfig }), + }; out.start("Updating monitor"); const result = await monitor.update(apiKey, args.id as string, params); @@ -190,9 +183,10 @@ export default defineCommand({ } case "activity": { - const qp: { limit?: number; cursor?: string } = {}; - if (args.limit) qp.limit = Number(args.limit); - if (args.cursor) qp.cursor = args.cursor; + const qp: { limit?: number; cursor?: string } = { + ...(args.limit && { limit: parseIntArg(args.limit, "limit", out) }), + ...(args.cursor && { cursor: args.cursor }), + }; out.start("Fetching monitor activity"); const result = await monitor.activity(apiKey, args.id as string, qp); diff --git a/src/commands/scrape.ts b/src/commands/scrape.ts index 6ce0106..4b48ae5 100644 --- a/src/commands/scrape.ts +++ b/src/commands/scrape.ts @@ -1,19 +1,12 @@ import { defineCommand } from "citty"; import { scrape } from "scrapegraph-js"; -import type { ApiScrapeFormatEntry, ApiScrapeRequest } from "scrapegraph-js"; +import type { FetchConfig, FormatConfig, ScrapeRequest } from "scrapegraph-js"; import { resolveApiKey } from "../lib/folders.js"; +import { BASE_FORMATS, type BaseFormat, type HtmlMode, buildBaseFormat } from "../lib/formats.js"; import * as log from "../lib/log.js"; +import { parseIntArg, parseJsonArg } from "../lib/parse.js"; -const FORMATS = [ - "markdown", - "html", - "screenshot", - "branding", - "links", - "images", - "summary", - "json", -] as const; +const FORMATS = [...BASE_FORMATS, "json"] as const; type Format = (typeof FORMATS)[number]; export default defineCommand({ @@ -53,60 +46,43 @@ export default defineCommand({ out.docs("https://docs.scrapegraphai.com/api-reference/scrape"); const apiKey = await resolveApiKey(!!args.json); - const htmlMode = (args["html-mode"] ?? "normal") as "normal" | "reader" | "prune"; + const htmlMode = (args["html-mode"] ?? "normal") as HtmlMode; const requested = (args.format ?? "markdown") .split(",") .map((f) => f.trim()) .filter(Boolean); - const formats: ApiScrapeFormatEntry[] = []; + const formats: FormatConfig[] = []; for (const f of requested) { if (!FORMATS.includes(f as Format)) { out.error(`Unknown format: ${f}. Valid: ${FORMATS.join(", ")}`); } - switch (f as Format) { - case "markdown": - formats.push({ type: "markdown", mode: htmlMode }); - break; - case "html": - formats.push({ type: "html", mode: htmlMode }); - break; - case "json": - if (!args.prompt) out.error("--prompt is required when format includes json"); - formats.push({ - type: "json", - prompt: args.prompt as string, - ...(args.schema ? { schema: JSON.parse(args.schema) } : {}), - mode: htmlMode, - }); - break; - case "screenshot": - formats.push({ type: "screenshot" }); - break; - case "branding": - formats.push({ type: "branding" }); - break; - case "links": - formats.push({ type: "links" }); - break; - case "images": - formats.push({ type: "images" }); - break; - case "summary": - formats.push({ type: "summary" }); - break; + if (f === "json") { + if (!args.prompt) out.error("--prompt is required when format includes json"); + formats.push({ + type: "json", + prompt: args.prompt as string, + ...(args.schema && { + schema: parseJsonArg(args.schema, "schema", out) as Record, + }), + mode: htmlMode, + }); + } else { + formats.push(buildBaseFormat(f as BaseFormat, htmlMode)); } } const fetchConfig: Record = {}; if (args.mode) fetchConfig.mode = args.mode; if (args.stealth) fetchConfig.stealth = true; - if (args.scrolls) fetchConfig.scrolls = Number(args.scrolls); + if (args.scrolls) fetchConfig.scrolls = parseIntArg(args.scrolls, "scrolls", out); if (args.country) fetchConfig.country = args.country; - const params: ApiScrapeRequest = { url: args.url, formats }; - if (Object.keys(fetchConfig).length > 0) - (params as unknown as Record).fetchConfig = fetchConfig; + const params: ScrapeRequest = { + url: args.url, + formats, + ...(Object.keys(fetchConfig).length > 0 && { fetchConfig: fetchConfig as FetchConfig }), + }; out.start("Scraping"); const result = await scrape(apiKey, params); diff --git a/src/commands/search.ts b/src/commands/search.ts index b574942..53c52fe 100644 --- a/src/commands/search.ts +++ b/src/commands/search.ts @@ -1,8 +1,12 @@ import { defineCommand } from "citty"; import { search } from "scrapegraph-js"; -import type { ApiSearchRequest } from "scrapegraph-js"; +import type { FetchConfig, SearchRequest } from "scrapegraph-js"; import { resolveApiKey } from "../lib/folders.js"; import * as log from "../lib/log.js"; +import { parseIntArg, parseJsonArg } from "../lib/parse.js"; + +type TimeRange = NonNullable; +type SearchFormat = NonNullable; export default defineCommand({ meta: { @@ -43,19 +47,24 @@ export default defineCommand({ out.docs("https://docs.scrapegraphai.com/api-reference/search"); const apiKey = await resolveApiKey(!!args.json); - const params: ApiSearchRequest = { query: args.query }; - const mut = params as Record; - if (args["num-results"]) mut.numResults = Number(args["num-results"]); - if (args.prompt) mut.prompt = args.prompt; - if (args.schema) mut.schema = JSON.parse(args.schema); - if (args.format) mut.format = args.format; - if (args.country) mut.locationGeoCode = args.country; - if (args["time-range"]) mut.timeRange = args["time-range"]; - const fetchConfig: Record = {}; if (args.stealth) fetchConfig.stealth = true; - if (args.headers) fetchConfig.headers = JSON.parse(args.headers); - if (Object.keys(fetchConfig).length > 0) mut.fetchConfig = fetchConfig; + if (args.headers) fetchConfig.headers = parseJsonArg(args.headers, "headers", out); + + const params: SearchRequest = { + query: args.query, + ...(args["num-results"] && { + numResults: parseIntArg(args["num-results"], "num-results", out), + }), + ...(args.prompt && { prompt: args.prompt }), + ...(args.schema && { + schema: parseJsonArg(args.schema, "schema", out) as Record, + }), + ...(args.format && { format: args.format as SearchFormat }), + ...(args.country && { locationGeoCode: args.country }), + ...(args["time-range"] && { timeRange: args["time-range"] as TimeRange }), + ...(Object.keys(fetchConfig).length > 0 && { fetchConfig: fetchConfig as FetchConfig }), + }; out.start("Searching"); const result = await search(apiKey, params); diff --git a/src/lib/formats.ts b/src/lib/formats.ts new file mode 100644 index 0000000..64edcac --- /dev/null +++ b/src/lib/formats.ts @@ -0,0 +1,33 @@ +import type { FormatConfig } from "scrapegraph-js"; + +export const BASE_FORMATS = [ + "markdown", + "html", + "screenshot", + "branding", + "links", + "images", + "summary", +] as const; + +export type BaseFormat = (typeof BASE_FORMATS)[number]; +export type HtmlMode = "normal" | "reader" | "prune"; + +export function buildBaseFormat(f: BaseFormat, mode: HtmlMode = "normal"): FormatConfig { + switch (f) { + case "markdown": + return { type: "markdown", mode }; + case "html": + return { type: "html", mode }; + case "screenshot": + return { type: "screenshot" }; + case "branding": + return { type: "branding" }; + case "links": + return { type: "links" }; + case "images": + return { type: "images" }; + case "summary": + return { type: "summary" }; + } +} diff --git a/src/lib/log.ts b/src/lib/log.ts index 66c7467..d5bdeda 100644 --- a/src/lib/log.ts +++ b/src/lib/log.ts @@ -33,7 +33,7 @@ export function create(quiet = false) { if (quiet) console.log(JSON.stringify(data)); else console.log(`\n${highlight(JSON.stringify(data, null, 2))}\n`); }, - error(message?: string) { + error(message?: string): never { p.log.error(message ?? "Unknown error"); process.exit(1); }, diff --git a/src/lib/parse.ts b/src/lib/parse.ts new file mode 100644 index 0000000..0056a61 --- /dev/null +++ b/src/lib/parse.ts @@ -0,0 +1,18 @@ +import type { Logger } from "./log.js"; + +export function parseJsonArg(raw: string, field: string, out: Logger): unknown { + try { + return JSON.parse(raw); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + out.error(`--${field}: invalid JSON (${msg})`); + } +} + +export function parseIntArg(raw: string, field: string, out: Logger): number { + const n = Number(raw); + if (!Number.isFinite(n)) { + out.error(`--${field}: expected a number, got "${raw}"`); + } + return n; +}