From 5b5df8db6011e2e2776c301e09a0d19fb1ba0923 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Fri, 23 Jan 2026 18:43:07 +0200 Subject: [PATCH 01/11] refactor: clarify project structure --- .codex/skills/docs-sync/SKILL.md | 1 + AGENTS.md | 57 ++++++++++--------- README.md | 51 +++++++---------- .../name-explorer/{ => clients}/database.ts | 2 +- .../{ => clients}/parse-names.ts | 0 .../name-explorer/{ => clients}/pipeline.ts | 6 +- .../{ => clients}/stats-generator.ts | 7 +-- .../{ => clients}/stats-page-generator.ts | 4 +- .../{decades.ts => constants.ts} | 3 + src/cli/name-explorer/main.ts | 13 +++-- .../{ => tools}/fetch-name-tool.ts | 2 +- src/cli/name-explorer/tools/index.ts | 2 + src/cli/name-explorer/{ => tools}/sql-tool.ts | 2 +- .../{types.ts => types/ai-output.ts} | 0 src/cli/name-explorer/types/index.ts | 2 + .../{stats-types.ts => types/stats.ts} | 0 .../clients/publication-pipeline.ts | 6 +- .../clients/publication-scraper.ts | 2 +- .../clients/review-page-generator.ts | 2 +- src/cli/scrape-publications/main.ts | 2 +- .../scrape-publications}/types/index.ts | 0 21 files changed, 83 insertions(+), 81 deletions(-) rename src/cli/name-explorer/{ => clients}/database.ts (99%) rename src/cli/name-explorer/{ => clients}/parse-names.ts (100%) rename src/cli/name-explorer/{ => clients}/pipeline.ts (98%) rename src/cli/name-explorer/{ => clients}/stats-generator.ts (99%) rename src/cli/name-explorer/{ => clients}/stats-page-generator.ts (99%) rename src/cli/name-explorer/{decades.ts => constants.ts} (67%) rename src/cli/name-explorer/{ => tools}/fetch-name-tool.ts (99%) create mode 100644 src/cli/name-explorer/tools/index.ts rename src/cli/name-explorer/{ => tools}/sql-tool.ts (97%) rename src/cli/name-explorer/{types.ts => types/ai-output.ts} (100%) create mode 100644 src/cli/name-explorer/types/index.ts rename src/cli/name-explorer/{stats-types.ts => types/stats.ts} (100%) rename src/{ => cli/scrape-publications}/clients/publication-pipeline.ts (98%) rename src/{ => cli/scrape-publications}/clients/publication-scraper.ts (99%) rename src/{ => cli/scrape-publications}/clients/review-page-generator.ts (98%) rename src/{ => cli/scrape-publications}/types/index.ts (100%) diff --git a/.codex/skills/docs-sync/SKILL.md b/.codex/skills/docs-sync/SKILL.md index 5ba78b3..834fc8e 100644 --- a/.codex/skills/docs-sync/SKILL.md +++ b/.codex/skills/docs-sync/SKILL.md @@ -36,6 +36,7 @@ Documentation files to consider: - When adding new commands, include both the command and a brief explanation - Do not introduce instructions that conflict with `AGENTS.md` - Do not edit `CLAUDE.md` directly; update `AGENTS.md` instead +- Mermaid: wrap node text in quotes like `A["Label"]` and `B{"Question?"}` to avoid parse issues with punctuation ## Output Requirements diff --git a/AGENTS.md b/AGENTS.md index 8868f57..e66047e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -31,33 +31,36 @@ Available pnpm scripts for development and testing: ## Project layout -| Path | Description | -| ----------------------------------------- | ----------------------------------------------- | -| `src/cli/guestbook/main.ts` | Guestbook CLI entry point | -| `src/cli/guestbook/README.md` | Guestbook CLI docs | -| `src/cli/name-explorer/main.ts` | Name Explorer CLI entry point | -| `src/cli/name-explorer/README.md` | Name Explorer CLI docs | -| `src/cli/scrape-publications/main.ts` | Publication scraping CLI entry point | -| `src/cli/scrape-publications/README.md` | Publication scraping CLI docs | -| `src/clients/*` | Publication scraping pipeline clients | -| `src/utils/parse-args.ts` | Shared CLI argument parsing helper | -| `src/utils/question-handler.ts` | Shared CLI prompt + validation helper | -| `src/tools/index.ts` | Tool exports | -| `src/tools/fetch-url/fetch-url-tool.ts` | Safe HTTP fetch tool with SSRF protection | -| `src/tools/read-file/read-file-tool.ts` | Agent tool for reading files under `tmp` | -| `src/tools/write-file/write-file-tool.ts` | Agent tool for writing files under `tmp` | -| `src/tools/list-files/list-files-tool.ts` | Agent tool for listing files under `tmp` | -| `src/tools/utils/fs.ts` | Path safety utilities | -| `src/tools/utils/html-processing.ts` | HTML sanitization + extraction helpers | -| `src/tools/utils/url-safety.ts` | URL safety + SSRF protection helpers | -| `src/tools/utils/test-utils.ts` | Shared test helpers | -| `src/tools/*/*.test.ts` | Vitest tests for tools and safety utils | -| `src/types/index.ts` | Zod schemas for publication pipeline | -| `eslint.config.ts` | ESLint configuration | -| `prettier.config.ts` | Prettier configuration | -| `tsconfig.json` | TypeScript configuration | -| `vitest.config.ts` | Vitest configuration | -| `tmp/` | Runtime scratch space for tool + scraper output | +| Path | Description | +| -------------------------------------------- | ----------------------------------------------- | +| `src/cli/guestbook/main.ts` | Guestbook CLI entry point | +| `src/cli/guestbook/README.md` | Guestbook CLI docs | +| `src/cli/name-explorer/main.ts` | Name Explorer CLI entry point | +| `src/cli/name-explorer/README.md` | Name Explorer CLI docs | +| `src/cli/scrape-publications/main.ts` | Publication scraping CLI entry point | +| `src/cli/scrape-publications/README.md` | Publication scraping CLI docs | +| `src/cli/scrape-publications/clients/*` | Publication scraping pipeline clients | +| `src/cli/scrape-publications/types/index.ts` | Publication Zod schemas | +| `src/clients/logger.ts` | Shared console logger | +| `src/clients/fetch.ts` | Shared HTTP fetch + sanitization helpers | +| `src/clients/playwright-scraper.ts` | Playwright-based web scraper | +| `src/utils/parse-args.ts` | Shared CLI argument parsing helper | +| `src/utils/question-handler.ts` | Shared CLI prompt + validation helper | +| `src/tools/index.ts` | Tool exports | +| `src/tools/fetch-url/fetch-url-tool.ts` | Safe HTTP fetch tool with SSRF protection | +| `src/tools/read-file/read-file-tool.ts` | Agent tool for reading files under `tmp` | +| `src/tools/write-file/write-file-tool.ts` | Agent tool for writing files under `tmp` | +| `src/tools/list-files/list-files-tool.ts` | Agent tool for listing files under `tmp` | +| `src/tools/utils/fs.ts` | Path safety utilities | +| `src/tools/utils/html-processing.ts` | HTML sanitization + extraction helpers | +| `src/tools/utils/url-safety.ts` | URL safety + SSRF protection helpers | +| `src/tools/utils/test-utils.ts` | Shared test helpers | +| `src/tools/*/*.test.ts` | Vitest tests for tools and safety utils | +| `eslint.config.ts` | ESLint configuration | +| `prettier.config.ts` | Prettier configuration | +| `tsconfig.json` | TypeScript configuration | +| `vitest.config.ts` | Vitest configuration | +| `tmp/` | Runtime scratch space for tool + scraper output | ## Tools diff --git a/README.md b/README.md index 7d4066b..2eb7d51 100644 --- a/README.md +++ b/README.md @@ -78,42 +78,33 @@ src/ │ │ ├── main.ts # Name Explorer CLI entry point │ │ └── README.md # Name Explorer CLI docs │ └── scrape-publications/ -│ ├── main.ts # Publication scraping CLI -│ └── README.md # Publication scraping docs +│ ├── main.ts # Publication scraping CLI entry point +│ ├── README.md # Publication scraping docs +│ ├── clients/ # Publication-specific clients +│ │ ├── publication-pipeline.ts # Pipeline orchestration +│ │ ├── publication-scraper.ts # Link discovery + selector inference +│ │ └── review-page-generator.ts # Review HTML generator +│ └── types/ +│ └── index.ts # Publication Zod schemas ├── clients/ -│ ├── fetch.ts # HTTP fetch + sanitization helpers -│ ├── logger.ts # Console logger -│ ├── playwright-scraper.ts # Playwright-based scraper for JS-rendered pages -│ ├── publication-pipeline.ts # Pipeline orchestration -│ ├── publication-scraper.ts # Link discovery + selector inference -│ └── review-page-generator.ts # Review HTML generator +│ ├── fetch.ts # Shared HTTP fetch + sanitization +│ ├── logger.ts # Shared console logger +│ └── playwright-scraper.ts # Playwright-based web scraper ├── utils/ │ ├── parse-args.ts # Shared CLI arg parsing helper │ └── question-handler.ts # Shared CLI prompt + validation helper ├── tools/ -│ ├── fetch-url/ -│ │ ├── fetch-url-tool.ts # Safe fetch tool -│ │ └── fetch-url-tool.test.ts # Fetch tool tests -│ ├── index.ts # Tool exports -│ ├── list-files/ -│ │ ├── list-files-tool.ts # List tool implementation -│ │ └── list-files-tool.test.ts # List tool tests -│ ├── read-file/ -│ │ ├── read-file-tool.ts # Read tool implementation -│ │ └── read-file-tool.test.ts # Read tool tests -│ ├── write-file/ -│ │ ├── write-file-tool.ts # Write tool implementation -│ │ └── write-file-tool.test.ts # Write tool tests +│ ├── index.ts # Tool exports +│ ├── fetch-url/ # Safe fetch tool +│ ├── list-files/ # List files tool +│ ├── read-file/ # Read file tool +│ ├── write-file/ # Write file tool │ └── utils/ -│ ├── fs.ts # Path safety utilities -│ ├── html-processing.ts # HTML sanitization + extraction helpers -│ ├── html-processing.test.ts # HTML processing tests -│ ├── url-safety.ts # SSRF protection helpers -│ ├── url-safety.test.ts # URL safety tests -│ └── test-utils.ts # Shared test helpers -└── types/ - └── index.ts # Zod schemas for publication pipeline -tmp/ # Runtime scratch space (tool I/O) +│ ├── fs.ts # Path safety utilities +│ ├── html-processing.ts # HTML sanitization + extraction helpers +│ ├── url-safety.ts # SSRF protection helpers +│ └── test-utils.ts # Shared test helpers +tmp/ # Runtime scratch space (tool I/O) ``` ## CLI conventions diff --git a/src/cli/name-explorer/database.ts b/src/cli/name-explorer/clients/database.ts similarity index 99% rename from src/cli/name-explorer/database.ts rename to src/cli/name-explorer/clients/database.ts index a083fdc..0cfb543 100644 --- a/src/cli/name-explorer/database.ts +++ b/src/cli/name-explorer/clients/database.ts @@ -1,7 +1,7 @@ import { DatabaseSync } from "node:sqlite"; import type { SQLInputValue } from "node:sqlite"; import fs from "node:fs"; -import type { Logger } from "../../clients/logger"; +import type { Logger } from "../../../clients/logger"; import type { NameEntry } from "./parse-names"; export interface NameRow { diff --git a/src/cli/name-explorer/parse-names.ts b/src/cli/name-explorer/clients/parse-names.ts similarity index 100% rename from src/cli/name-explorer/parse-names.ts rename to src/cli/name-explorer/clients/parse-names.ts diff --git a/src/cli/name-explorer/pipeline.ts b/src/cli/name-explorer/clients/pipeline.ts similarity index 98% rename from src/cli/name-explorer/pipeline.ts rename to src/cli/name-explorer/clients/pipeline.ts index e09c6d0..f1a380b 100644 --- a/src/cli/name-explorer/pipeline.ts +++ b/src/cli/name-explorer/clients/pipeline.ts @@ -1,10 +1,10 @@ import fs from "node:fs/promises"; import path from "node:path"; -import { Fetch } from "../../clients/fetch"; -import type { Logger } from "../../clients/logger"; +import { Fetch } from "../../../clients/fetch"; +import type { Logger } from "../../../clients/logger"; import type { ConsolidatedData } from "./database"; import { AggregatedNameDatabase, NameDatabase } from "./database"; -import { FETCH_DECADES } from "./decades"; +import { FETCH_DECADES } from "../constants"; import type { ParsedNames } from "./parse-names"; import { parseNamesHtml } from "./parse-names"; diff --git a/src/cli/name-explorer/stats-generator.ts b/src/cli/name-explorer/clients/stats-generator.ts similarity index 99% rename from src/cli/name-explorer/stats-generator.ts rename to src/cli/name-explorer/clients/stats-generator.ts index 7ffee30..1a85224 100644 --- a/src/cli/name-explorer/stats-generator.ts +++ b/src/cli/name-explorer/clients/stats-generator.ts @@ -1,5 +1,5 @@ import type { NameDatabase } from "./database"; -import { DECADES } from "./decades"; +import { DECADES, FIRST_DECADE, LAST_DECADE } from "../constants"; import type { AllStats, ChurnMetrics, @@ -15,10 +15,7 @@ import type { SuffixStats, TopName, UnisexName, -} from "./stats-types"; - -const FIRST_DECADE = DECADES[0] ?? "1889"; -const LAST_DECADE = DECADES[DECADES.length - 1] ?? "2020"; +} from "../types"; export class StatsGenerator { constructor(private db: NameDatabase) {} diff --git a/src/cli/name-explorer/stats-page-generator.ts b/src/cli/name-explorer/clients/stats-page-generator.ts similarity index 99% rename from src/cli/name-explorer/stats-page-generator.ts rename to src/cli/name-explorer/clients/stats-page-generator.ts index 4ce5c39..4a2a4f9 100644 --- a/src/cli/name-explorer/stats-page-generator.ts +++ b/src/cli/name-explorer/clients/stats-page-generator.ts @@ -1,5 +1,5 @@ -import type { Logger } from "../../clients/logger"; -import type { AllStats, LetterStats, TopName, UnisexName } from "./stats-types"; +import type { Logger } from "../../../clients/logger"; +import type { AllStats, LetterStats, TopName, UnisexName } from "../types"; export interface StatsPageGeneratorConfig { logger: Logger; diff --git a/src/cli/name-explorer/decades.ts b/src/cli/name-explorer/constants.ts similarity index 67% rename from src/cli/name-explorer/decades.ts rename to src/cli/name-explorer/constants.ts index 518ac63..5307486 100644 --- a/src/cli/name-explorer/decades.ts +++ b/src/cli/name-explorer/constants.ts @@ -16,3 +16,6 @@ export const DECADES: string[] = [ ]; export const FETCH_DECADES: string[] = DECADES.slice().reverse(); + +export const FIRST_DECADE = DECADES[0] ?? "1889"; +export const LAST_DECADE = DECADES[DECADES.length - 1] ?? "2020"; diff --git a/src/cli/name-explorer/main.ts b/src/cli/name-explorer/main.ts index fd0c2c0..06a1cad 100644 --- a/src/cli/name-explorer/main.ts +++ b/src/cli/name-explorer/main.ts @@ -6,11 +6,14 @@ import { writeFile } from "fs/promises"; import { z } from "zod"; import { Agent, MemorySession, Runner } from "@openai/agents"; import { Logger } from "../../clients/logger"; -import { NameSuggesterPipeline } from "./pipeline"; -import { StatsGenerator } from "./stats-generator"; -import { StatsPageGenerator } from "./stats-page-generator"; -import { createFetchNameTool } from "./fetch-name-tool"; -import { createAggregatedSqlQueryTool, createSqlQueryTool } from "./sql-tool"; +import { NameSuggesterPipeline } from "./clients/pipeline"; +import { StatsGenerator } from "./clients/stats-generator"; +import { StatsPageGenerator } from "./clients/stats-page-generator"; +import { createFetchNameTool } from "./tools/fetch-name-tool"; +import { + createAggregatedSqlQueryTool, + createSqlQueryTool, +} from "./tools/sql-tool"; import { parseArgs } from "../../utils/parse-args"; import { QuestionHandler } from "../../utils/question-handler"; import { diff --git a/src/cli/name-explorer/fetch-name-tool.ts b/src/cli/name-explorer/tools/fetch-name-tool.ts similarity index 99% rename from src/cli/name-explorer/fetch-name-tool.ts rename to src/cli/name-explorer/tools/fetch-name-tool.ts index 62b4b31..bb972c9 100644 --- a/src/cli/name-explorer/fetch-name-tool.ts +++ b/src/cli/name-explorer/tools/fetch-name-tool.ts @@ -3,7 +3,7 @@ import path from "node:path"; import { tool } from "@openai/agents"; import { JSDOM } from "jsdom"; import { z } from "zod"; -import { resolveAndValidateUrl } from "../../tools/utils/url-safety"; +import { resolveAndValidateUrl } from "../../../tools/utils/url-safety"; /** * Statistics for a single decade row diff --git a/src/cli/name-explorer/tools/index.ts b/src/cli/name-explorer/tools/index.ts new file mode 100644 index 0000000..6088e3c --- /dev/null +++ b/src/cli/name-explorer/tools/index.ts @@ -0,0 +1,2 @@ +export * from "./sql-tool"; +export * from "./fetch-name-tool"; diff --git a/src/cli/name-explorer/sql-tool.ts b/src/cli/name-explorer/tools/sql-tool.ts similarity index 97% rename from src/cli/name-explorer/sql-tool.ts rename to src/cli/name-explorer/tools/sql-tool.ts index d3e2d27..81a2c21 100644 --- a/src/cli/name-explorer/sql-tool.ts +++ b/src/cli/name-explorer/tools/sql-tool.ts @@ -1,6 +1,6 @@ import { tool } from "@openai/agents"; import { z } from "zod"; -import type { AggregatedNameDatabase, NameDatabase } from "./database"; +import type { AggregatedNameDatabase, NameDatabase } from "../clients/database"; const DANGEROUS_KEYWORDS = [ "DROP", diff --git a/src/cli/name-explorer/types.ts b/src/cli/name-explorer/types/ai-output.ts similarity index 100% rename from src/cli/name-explorer/types.ts rename to src/cli/name-explorer/types/ai-output.ts diff --git a/src/cli/name-explorer/types/index.ts b/src/cli/name-explorer/types/index.ts new file mode 100644 index 0000000..3d0ff02 --- /dev/null +++ b/src/cli/name-explorer/types/index.ts @@ -0,0 +1,2 @@ +export * from "./ai-output"; +export * from "./stats"; diff --git a/src/cli/name-explorer/stats-types.ts b/src/cli/name-explorer/types/stats.ts similarity index 100% rename from src/cli/name-explorer/stats-types.ts rename to src/cli/name-explorer/types/stats.ts diff --git a/src/clients/publication-pipeline.ts b/src/cli/scrape-publications/clients/publication-pipeline.ts similarity index 98% rename from src/clients/publication-pipeline.ts rename to src/cli/scrape-publications/clients/publication-pipeline.ts index 755bacc..dafc124 100644 --- a/src/clients/publication-pipeline.ts +++ b/src/cli/scrape-publications/clients/publication-pipeline.ts @@ -4,11 +4,11 @@ import crypto from "node:crypto"; import slug from "slug"; import { NodeHtmlMarkdown } from "node-html-markdown"; import type { z } from "zod"; -import { Fetch } from "./fetch"; -import { PlaywrightScraper } from "./playwright-scraper"; +import { Fetch } from "../../../clients/fetch"; +import { PlaywrightScraper } from "../../../clients/playwright-scraper"; import { PublicationScraper } from "./publication-scraper"; import { ReviewPageGenerator } from "./review-page-generator"; -import type { Logger } from "./logger"; +import type { Logger } from "../../../clients/logger"; import type { PublicationLink, LinkCandidate, diff --git a/src/clients/publication-scraper.ts b/src/cli/scrape-publications/clients/publication-scraper.ts similarity index 99% rename from src/clients/publication-scraper.ts rename to src/cli/scrape-publications/clients/publication-scraper.ts index bd2937d..31fc4cb 100644 --- a/src/clients/publication-scraper.ts +++ b/src/cli/scrape-publications/clients/publication-scraper.ts @@ -8,7 +8,7 @@ import { ContentSelectorResult, } from "../types/index"; import type { LinkCandidate } from "../types/index"; -import type { Logger } from "./logger"; +import type { Logger } from "../../../clients/logger"; type SelectorAgent = Agent; type ContentSelectorAgent = Agent; diff --git a/src/clients/review-page-generator.ts b/src/cli/scrape-publications/clients/review-page-generator.ts similarity index 98% rename from src/clients/review-page-generator.ts rename to src/cli/scrape-publications/clients/review-page-generator.ts index cbcb29c..0d5d5ac 100644 --- a/src/clients/review-page-generator.ts +++ b/src/cli/scrape-publications/clients/review-page-generator.ts @@ -1,7 +1,7 @@ import { marked } from "marked"; import type { z } from "zod"; import type { Publication } from "../types/index"; -import type { Logger } from "./logger"; +import type { Logger } from "../../../clients/logger"; export interface ReviewPageGeneratorConfig { logger: Logger; diff --git a/src/cli/scrape-publications/main.ts b/src/cli/scrape-publications/main.ts index c38173b..7b51ae0 100644 --- a/src/cli/scrape-publications/main.ts +++ b/src/cli/scrape-publications/main.ts @@ -8,7 +8,7 @@ import slug from "slug"; import path from "node:path"; import { z } from "zod"; import { Logger } from "../../clients/logger"; -import { PublicationPipeline } from "../../clients/publication-pipeline"; +import { PublicationPipeline } from "./clients/publication-pipeline"; import { parseArgs } from "../../utils/parse-args"; const logger = new Logger({ level: "info", useColors: true }); diff --git a/src/types/index.ts b/src/cli/scrape-publications/types/index.ts similarity index 100% rename from src/types/index.ts rename to src/cli/scrape-publications/types/index.ts From a52b5e36f8961f31f77a96b63eb0321547082879 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Fri, 23 Jan 2026 22:22:32 +0200 Subject: [PATCH 02/11] refactor: add new lint rules --- README.md | 1 + eslint.config.ts | 15 ++- package.json | 1 + src/cli/name-explorer/clients/database.ts | 28 +++-- src/cli/name-explorer/clients/parse-names.ts | 38 ++++--- src/cli/name-explorer/clients/pipeline.ts | 16 +-- .../name-explorer/clients/stats-generator.ts | 24 +++- .../clients/stats-page-generator.ts | 8 +- src/cli/name-explorer/main.ts | 30 ++--- .../name-explorer/tools/fetch-name-tool.ts | 66 ++++++----- src/cli/name-explorer/tools/sql-tool.ts | 16 +-- src/cli/name-explorer/types/index.ts | 2 +- src/cli/name-explorer/types/stats.ts | 56 +++++----- .../clients/publication-pipeline.ts | 24 ++-- .../clients/publication-scraper.ts | 103 +++++++++++++----- .../clients/review-page-generator.ts | 4 +- src/clients/fetch.ts | 4 +- src/clients/logger.ts | 4 +- src/clients/playwright-scraper.ts | 12 +- src/tools/fetch-url/fetch-url-tool.test.ts | 11 +- src/tools/fetch-url/fetch-url-tool.ts | 24 ++-- src/tools/utils/html-processing.ts | 71 ++++++------ src/tools/utils/url-safety.ts | 78 ++++++++----- src/utils/parse-args.ts | 10 +- src/utils/question-handler.ts | 12 +- 25 files changed, 388 insertions(+), 270 deletions(-) diff --git a/README.md b/README.md index 2eb7d51..aff86ca 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ A minimal TypeScript CLI sandbox for testing agent workflows and safe web scrapi | `pnpm run:scrape-publications` | Scrape publication links and build a review page | | `pnpm typecheck` | Run TypeScript type checking | | `pnpm lint` | Run ESLint for code quality | +| `pnpm lint:fix` | Run ESLint and auto-fix issues | | `pnpm format` | Format code with Prettier | | `pnpm format:check` | Check code formatting | | `pnpm test` | Run Vitest test suite | diff --git a/eslint.config.ts b/eslint.config.ts index 1e541d8..bac74ff 100644 --- a/eslint.config.ts +++ b/eslint.config.ts @@ -39,7 +39,20 @@ export default defineConfig( allowConstantLoopConditions: true, }, ], - "@typescript-eslint/no-non-null-assertion": "error", + // Enforce arrow functions over function declarations + "func-style": ["error", "expression"], + "@typescript-eslint/no-floating-promises": [ + "error", + { ignoreVoid: true }, + ], + "@typescript-eslint/switch-exhaustiveness-check": "error", + "@typescript-eslint/no-non-null-assertion": "error", // Disallow non-null assertions + "@typescript-eslint/consistent-type-exports": "error", // Ensure consistent usage of type exports + "@typescript-eslint/consistent-type-definitions": ["error", "type"], // Use 'type' over 'interface' + "prefer-const": "error", + "no-var": "error", + eqeqeq: ["error", "smart"], + curly: ["error", "all"], "import/consistent-type-specifier-style": ["error", "prefer-top-level"], }, }, diff --git a/package.json b/package.json index 4846b37..08ea5ea 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ "node:tsx": "node --disable-warning=ExperimentalWarning --import tsx", "typecheck": "tsc --noEmit", "lint": "eslint .", + "lint:fix": "eslint . --fix", "format": "prettier --write .", "format:check": "prettier --check .", "test": "vitest" diff --git a/src/cli/name-explorer/clients/database.ts b/src/cli/name-explorer/clients/database.ts index 0cfb543..9e5a5fc 100644 --- a/src/cli/name-explorer/clients/database.ts +++ b/src/cli/name-explorer/clients/database.ts @@ -4,24 +4,24 @@ import fs from "node:fs"; import type { Logger } from "../../../clients/logger"; import type { NameEntry } from "./parse-names"; -export interface NameRow { +export type NameRow = { id: number; decade: string; gender: "boy" | "girl"; rank: number; name: string; count: number; -} +}; -export interface DecadeData { +export type DecadeData = { decade: string; boys: NameEntry[]; girls: NameEntry[]; -} +}; -export interface ConsolidatedData { +export type ConsolidatedData = { decades: DecadeData[]; -} +}; export class NameDatabase { private db: DatabaseSync; @@ -134,12 +134,12 @@ export class NameDatabase { } } -export interface AggregatedNameRow { +export type AggregatedNameRow = { id: number; name: string; count: number; gender: "male" | "female"; -} +}; export class AggregatedNameDatabase { private db: DatabaseSync; @@ -183,13 +183,19 @@ export class AggregatedNameDatabase { try { for (const line of dataLines) { const [name, countStr] = line.split(","); - if (!name || !countStr) continue; + if (!name || !countStr) { + continue; + } // Parse count with thousand separators like "43.276" or "43,276" const normalizedCount = countStr.replace(/[^\d]/g, ""); - if (!normalizedCount) continue; + if (!normalizedCount) { + continue; + } const count = Number.parseInt(normalizedCount, 10); - if (Number.isNaN(count)) continue; + if (Number.isNaN(count)) { + continue; + } insert.run(name.trim(), count, gender); } diff --git a/src/cli/name-explorer/clients/parse-names.ts b/src/cli/name-explorer/clients/parse-names.ts index cb37c18..d711be6 100644 --- a/src/cli/name-explorer/clients/parse-names.ts +++ b/src/cli/name-explorer/clients/parse-names.ts @@ -1,30 +1,21 @@ import { JSDOM } from "jsdom"; -export interface NameEntry { +export type NameEntry = { rank: number; name: string; count: number; -} +}; -export interface ParsedNames { +export type ParsedNames = { decade: string; boys: NameEntry[]; girls: NameEntry[]; -} +}; -export function parseNamesHtml(html: string, decade: string): ParsedNames { - const dom = new JSDOM(html); - const tables = dom.window.document.querySelectorAll("table"); - - // First table is boys (Miehet), second is girls (Naiset) - const boys = parseTable(tables[0]); - const girls = parseTable(tables[1]); - - return { decade, boys, girls }; -} - -function parseTable(table: Element | undefined): NameEntry[] { - if (!table) return []; +const parseTable = (table: Element | undefined): NameEntry[] => { + if (!table) { + return []; + } const rows = table.querySelectorAll("tbody tr"); return Array.from(rows).map((row) => { @@ -34,4 +25,15 @@ function parseTable(table: Element | undefined): NameEntry[] { const count = parseInt(cells[2]?.textContent.replace(/\s/g, "") ?? "0", 10); return { rank, name, count }; }); -} +}; + +export const parseNamesHtml = (html: string, decade: string): ParsedNames => { + const dom = new JSDOM(html); + const tables = dom.window.document.querySelectorAll("table"); + + // First table is boys (Miehet), second is girls (Naiset) + const boys = parseTable(tables[0]); + const girls = parseTable(tables[1]); + + return { decade, boys, girls }; +}; diff --git a/src/cli/name-explorer/clients/pipeline.ts b/src/cli/name-explorer/clients/pipeline.ts index f1a380b..be2df77 100644 --- a/src/cli/name-explorer/clients/pipeline.ts +++ b/src/cli/name-explorer/clients/pipeline.ts @@ -8,35 +8,35 @@ import { FETCH_DECADES } from "../constants"; import type { ParsedNames } from "./parse-names"; import { parseNamesHtml } from "./parse-names"; -export interface NameSuggesterPipelineConfig { +export type NameSuggesterPipelineConfig = { logger: Logger; outputDir: string; refetch?: boolean; -} +}; export type { DecadeData, ConsolidatedData } from "./database"; -export interface FetchDecadePageResult { +export type FetchDecadePageResult = { html: string; markdown: string; parsedNames: ParsedNames; fromCache: boolean; -} +}; -export interface ProcessAllDecadesResult { +export type ProcessAllDecadesResult = { totalPages: number; cachedPages: number; fetchedPages: number; -} +}; -export interface SetupResult { +export type SetupResult = { outputPath: string; totalPages: number; cachedPages: number; fetchedPages: number; db: NameDatabase; aggregatedDb: AggregatedNameDatabase | null; -} +}; const BASE_URL = "https://nimipalvelu.dvv.fi/suosituimmat-etunimet"; const REQUEST_DELAY_MS = 500; diff --git a/src/cli/name-explorer/clients/stats-generator.ts b/src/cli/name-explorer/clients/stats-generator.ts index 1a85224..e2b0efa 100644 --- a/src/cli/name-explorer/clients/stats-generator.ts +++ b/src/cli/name-explorer/clients/stats-generator.ts @@ -33,7 +33,9 @@ export class StatsGenerator { const total = totalRow?.total ?? 0; const nameCount = totalRow?.cnt ?? 0; - if (total === 0) continue; + if (total === 0) { + continue; + } // Top-N concentration const topConcentration = { @@ -245,7 +247,9 @@ export class StatsGenerator { for (let i = 1; i < DECADES.length; i++) { const fromDecade = DECADES[i - 1]; const toDecade = DECADES[i]; - if (!fromDecade || !toDecade) continue; + if (!fromDecade || !toDecade) { + continue; + } const rows = this.db.query<{ name: string; @@ -292,7 +296,9 @@ export class StatsGenerator { for (let i = 1; i < DECADES.length; i++) { const prevDecade = DECADES[i - 1]; const currDecade = DECADES[i]; - if (!prevDecade || !currDecade) continue; + if (!prevDecade || !currDecade) { + continue; + } const rows = this.db.query<{ name: string; @@ -356,12 +362,16 @@ export class StatsGenerator { const parts = key.split("|"); const name = parts[0]; const gender = parts[1]; - if (!name || !gender) continue; + if (!name || !gender) { + continue; + } for (let i = 1; i < decadeList.length; i++) { const prevEntry = decadeList[i - 1]; const currEntry = decadeList[i]; - if (!prevEntry || !currEntry) continue; + if (!prevEntry || !currEntry) { + continue; + } const prevIdx = DECADES.indexOf(prevEntry.decade); const currIdx = DECADES.indexOf(currEntry.decade); @@ -389,7 +399,9 @@ export class StatsGenerator { for (let i = 1; i < DECADES.length; i++) { const fromDecade = DECADES[i - 1]; const toDecade = DECADES[i]; - if (!fromDecade || !toDecade) continue; + if (!fromDecade || !toDecade) { + continue; + } for (const gender of ["boy", "girl"] as const) { // Get name sets diff --git a/src/cli/name-explorer/clients/stats-page-generator.ts b/src/cli/name-explorer/clients/stats-page-generator.ts index 4a2a4f9..813956d 100644 --- a/src/cli/name-explorer/clients/stats-page-generator.ts +++ b/src/cli/name-explorer/clients/stats-page-generator.ts @@ -1,9 +1,9 @@ import type { Logger } from "../../../clients/logger"; import type { AllStats, LetterStats, TopName, UnisexName } from "../types"; -export interface StatsPageGeneratorConfig { +export type StatsPageGeneratorConfig = { logger: Logger; -} +}; export class StatsPageGenerator { private logger: Logger; @@ -786,7 +786,9 @@ details > div { padding: 1rem; } ${decades .map((decade) => { const names = unisexByDecade.get(decade) ?? []; - if (names.length === 0) return ""; + if (names.length === 0) { + return ""; + } return `
diff --git a/src/cli/name-explorer/main.ts b/src/cli/name-explorer/main.ts index 06a1cad..26c2961 100644 --- a/src/cli/name-explorer/main.ts +++ b/src/cli/name-explorer/main.ts @@ -41,18 +41,8 @@ const pipeline = new NameSuggesterPipeline({ const { db, aggregatedDb } = await pipeline.setup(); -// --- Run selected mode --- -if (mode === "stats") { - await runStatsMode(); -} else { - await runAiMode(); -} - -db.close(); -aggregatedDb?.close(); - // --- Stats Mode: Generate HTML statistics page --- -async function runStatsMode() { +const runStatsMode = async () => { logger.info("Computing statistics..."); const statsGenerator = new StatsGenerator(db); const stats = statsGenerator.computeAll(); @@ -64,10 +54,10 @@ async function runStatsMode() { const outputPath = "tmp/name-explorer/statistics.html"; await writeFile(outputPath, html, "utf-8"); logger.info(`Statistics page written to ${outputPath}`); -} +}; // --- AI Mode: Interactive Q&A with SQL agent --- -async function runAiMode() { +const runAiMode = async () => { logger.info("Starting AI mode..."); const tools = [ @@ -112,7 +102,9 @@ When answering, do not include any questions. Do not include markdown or extra k runner.on("agent_tool_start", (_context, _agent, tool, details) => { const toolCall = details.toolCall as Record; const callId = toolCall.id as string; - if (toolsInProgress.has(callId)) return; + if (toolsInProgress.has(callId)) { + return; + } toolsInProgress.add(callId); const args = String(toolCall.arguments); @@ -163,4 +155,14 @@ When answering, do not include any questions. Do not include markdown or extra k logger.answer(output.content); break; } +}; + +// --- Run selected mode --- +if (mode === "stats") { + await runStatsMode(); +} else { + await runAiMode(); } + +db.close(); +aggregatedDb?.close(); diff --git a/src/cli/name-explorer/tools/fetch-name-tool.ts b/src/cli/name-explorer/tools/fetch-name-tool.ts index bb972c9..885fa93 100644 --- a/src/cli/name-explorer/tools/fetch-name-tool.ts +++ b/src/cli/name-explorer/tools/fetch-name-tool.ts @@ -8,7 +8,7 @@ import { resolveAndValidateUrl } from "../../../tools/utils/url-safety"; /** * Statistics for a single decade row */ -export interface NameStatRow { +export type NameStatRow = { decade: string; men: number | null; women: number | null; @@ -16,12 +16,12 @@ export interface NameStatRow { menUnder5?: boolean; womenUnder5?: boolean; totalUnder5?: boolean; -} +}; /** * Complete result from fetching name statistics */ -export interface NameStatResult { +export type NameStatResult = { name: string; rows: NameStatRow[]; totals: { @@ -33,20 +33,20 @@ export interface NameStatResult { totalUnder5?: boolean; }; fetchedAt: string; -} +}; /** * Error result when fetch fails */ -export interface NameStatError { +export type NameStatError = { error: string; name: string; -} +}; -interface ParsedValue { +type ParsedValue = { value: number | null; isUnder5: boolean; -} +}; const DVV_BASE_URL = "https://nimipalvelu.dvv.fi/etunimihaku"; const USER_AGENT = "cli-agent-sandbox/1.0"; @@ -59,7 +59,7 @@ const DEFAULT_TIMEOUT_MS = 15000; * - "alle X" (under X, privacy-protected) -> returns null with flag * - "0" -> 0 */ -function parseTableValue(rawValue: string): ParsedValue { +const parseTableValue = (rawValue: string): ParsedValue => { const trimmed = rawValue.trim(); // Handle "alle X" (Finnish for "under X") - privacy protection for small counts @@ -90,12 +90,12 @@ function parseTableValue(rawValue: string): ParsedValue { } return { value: parsed, isUnder5: false }; -} +}; /** * Parse a single table row into a NameStatRow */ -function parseTableRow(cells: Element[]): NameStatRow | null { +const parseTableRow = (cells: Element[]): NameStatRow | null => { const [decadeCell, menCell, womenCell, totalCell] = cells; if (!decadeCell || !menCell || !womenCell || !totalCell) { return null; @@ -113,20 +113,26 @@ function parseTableRow(cells: Element[]): NameStatRow | null { total: totalParsed.value, }; - if (menParsed.isUnder5) row.menUnder5 = true; - if (womenParsed.isUnder5) row.womenUnder5 = true; - if (totalParsed.isUnder5) row.totalUnder5 = true; + if (menParsed.isUnder5) { + row.menUnder5 = true; + } + if (womenParsed.isUnder5) { + row.womenUnder5 = true; + } + if (totalParsed.isUnder5) { + row.totalUnder5 = true; + } return row; -} +}; /** * Extract name statistics from the DVV HTML page */ -function extractNameStatistics( +const extractNameStatistics = ( html: string, name: string -): NameStatResult | NameStatError { +): NameStatResult | NameStatError => { const dom = new JSDOM(html); const document = dom.window.document; @@ -199,9 +205,15 @@ function extractNameStatistics( total: totalParsed.value, }; - if (menParsed.isUnder5) totals.menUnder5 = true; - if (womenParsed.isUnder5) totals.womenUnder5 = true; - if (totalParsed.isUnder5) totals.totalUnder5 = true; + if (menParsed.isUnder5) { + totals.menUnder5 = true; + } + if (womenParsed.isUnder5) { + totals.womenUnder5 = true; + } + if (totalParsed.isUnder5) { + totals.totalUnder5 = true; + } } } @@ -211,27 +223,27 @@ function extractNameStatistics( totals, fetchedAt: new Date().toISOString(), }; -} +}; -async function fileExists(filePath: string): Promise { +const fileExists = async (filePath: string): Promise => { try { await fs.access(filePath); return true; } catch { return false; } -} +}; -export interface FetchNameToolOptions { +export type FetchNameToolOptions = { cacheDir: string; refetch?: boolean; maxRequests?: number; -} +}; /** * Create a tool for fetching individual name statistics from DVV */ -export function createFetchNameTool(options: FetchNameToolOptions) { +export const createFetchNameTool = (options: FetchNameToolOptions) => { const { cacheDir, refetch = false, maxRequests = 3 } = options; let requestCount = 0; @@ -352,4 +364,4 @@ For aggregate statistics across top 100 names per decade, use the SQL database t } }, }); -} +}; diff --git a/src/cli/name-explorer/tools/sql-tool.ts b/src/cli/name-explorer/tools/sql-tool.ts index 81a2c21..65dc57c 100644 --- a/src/cli/name-explorer/tools/sql-tool.ts +++ b/src/cli/name-explorer/tools/sql-tool.ts @@ -14,10 +14,12 @@ const DANGEROUS_KEYWORDS = [ "EXECUTE", ]; -function validateReadOnlyQuery(sql: string): { +const validateReadOnlyQuery = ( + sql: string +): { valid: boolean; error?: string; -} { +} => { const trimmedSql = sql.trim(); // Must start with SELECT @@ -39,9 +41,9 @@ function validateReadOnlyQuery(sql: string): { } return { valid: true }; -} +}; -export function createSqlQueryTool(db: NameDatabase) { +export const createSqlQueryTool = (db: NameDatabase) => { return tool({ name: "query_names_database", description: `Execute a read-only SQL query against the Finnish names database (decade-based data). @@ -68,9 +70,9 @@ Example queries: } }, }); -} +}; -export function createAggregatedSqlQueryTool(db: AggregatedNameDatabase) { +export const createAggregatedSqlQueryTool = (db: AggregatedNameDatabase) => { return tool({ name: "query_aggregated_names", description: `Execute a read-only SQL query against the aggregated Finnish names database (total counts across all time). @@ -98,4 +100,4 @@ Example queries: } }, }); -} +}; diff --git a/src/cli/name-explorer/types/index.ts b/src/cli/name-explorer/types/index.ts index 3d0ff02..e89da03 100644 --- a/src/cli/name-explorer/types/index.ts +++ b/src/cli/name-explorer/types/index.ts @@ -1,2 +1,2 @@ export * from "./ai-output"; -export * from "./stats"; +export type * from "./stats"; diff --git a/src/cli/name-explorer/types/stats.ts b/src/cli/name-explorer/types/stats.ts index e713fac..73f394e 100644 --- a/src/cli/name-explorer/types/stats.ts +++ b/src/cli/name-explorer/types/stats.ts @@ -1,6 +1,6 @@ // TypeScript interfaces for name statistics -export interface DecadeGenderStats { +export type DecadeGenderStats = { decade: string; gender: "boy" | "girl"; totalBirths: number; @@ -17,18 +17,18 @@ export interface DecadeGenderStats { hhi: number; effectiveNames: number; entropy: number; -} +}; -export interface TopName { +export type TopName = { decade: string; gender: "boy" | "girl"; rank: number; name: string; count: number; share: number; -} +}; -export interface NameDynamics { +export type NameDynamics = { name: string; gender: "boy" | "girl"; peakDecade: string; @@ -39,9 +39,9 @@ export interface NameDynamics { longevity: number; avgRank: number; rankStddev: number; -} +}; -export interface RankChange { +export type RankChange = { name: string; gender: "boy" | "girl"; fromDecade: string; @@ -49,26 +49,26 @@ export interface RankChange { fromRank: number; toRank: number; change: number; -} +}; -export interface NewEntry { +export type NewEntry = { name: string; gender: "boy" | "girl"; decade: string; rank: number; count: number; -} +}; -export interface Comeback { +export type Comeback = { name: string; gender: "boy" | "girl"; comebackDecade: string; previousDecade: string; gapDecades: number; comebackRank: number; -} +}; -export interface ChurnMetrics { +export type ChurnMetrics = { fromDecade: string; toDecade: string; gender: "boy" | "girl"; @@ -76,52 +76,52 @@ export interface ChurnMetrics { newNames: number; exitedNames: number; jaccardSimilarity: number; -} +}; -export interface UnisexName { +export type UnisexName = { name: string; decade: string; boyRank: number; girlRank: number; boyCount: number; girlCount: number; -} +}; -export interface EvergreenName { +export type EvergreenName = { name: string; gender: "boy" | "girl"; decadesPresent: number; avgRank: number; totalCount: number; -} +}; -export interface LetterStats { +export type LetterStats = { decade: string; gender: "boy" | "girl"; letter: string; nameCount: number; totalBirths: number; share: number; -} +}; -export interface SuffixStats { +export type SuffixStats = { decade: string; gender: "boy" | "girl"; suffix: string; nameCount: number; totalBirths: number; share: number; -} +}; -export interface NameLengthStats { +export type NameLengthStats = { decade: string; gender: "boy" | "girl"; avgLength: number; minLength: number; maxLength: number; -} +}; -export interface SpecialCharStats { +export type SpecialCharStats = { decade: string; gender: "boy" | "girl"; namesWithUmlautA: number; @@ -129,9 +129,9 @@ export interface SpecialCharStats { totalNames: number; umlautAShare: number; umlautOShare: number; -} +}; -export interface AllStats { +export type AllStats = { generatedAt: string; dataSource: string; decadeRange: { first: string; last: string }; @@ -152,4 +152,4 @@ export interface AllStats { suffixStats: SuffixStats[]; nameLengthStats: NameLengthStats[]; specialCharStats: SpecialCharStats[]; -} +}; diff --git a/src/cli/scrape-publications/clients/publication-pipeline.ts b/src/cli/scrape-publications/clients/publication-pipeline.ts index dafc124..ed8f0af 100644 --- a/src/cli/scrape-publications/clients/publication-pipeline.ts +++ b/src/cli/scrape-publications/clients/publication-pipeline.ts @@ -18,39 +18,39 @@ import type { export type FetchSource = "playwright" | "basic-fetch"; -export interface PublicationPipelineConfig { +export type PublicationPipelineConfig = { logger: Logger; outputDir: string; refetch?: boolean; -} +}; -export interface FetchSourceResult { +export type FetchSourceResult = { markdown: string; html: string; fromCache: { markdown: boolean; html: boolean }; source: FetchSource; -} +}; -export interface DiscoverLinksResult { +export type DiscoverLinksResult = { allLinks: string[]; filteredLinks: string[]; linkCandidates: z.infer[]; source: FetchSource; usedFallback: boolean; -} +}; -export interface IdentifyAndExtractResult { +export type IdentifyAndExtractResult = { selectors: z.infer; publications: z.infer[]; -} +}; -export interface FetchPublicationsResult { +export type FetchPublicationsResult = { fetchedCount: number; skippedCount: number; markdownCount: number; -} +}; -export interface ExtractContentResult { +export type ExtractContentResult = { publications: z.infer[]; report: { total: number; @@ -58,7 +58,7 @@ export interface ExtractContentResult { failed: number; results: { success: boolean; filename: string; error?: string }[]; }; -} +}; const MAX_TITLE_SLUG_LENGTH = 80; diff --git a/src/cli/scrape-publications/clients/publication-scraper.ts b/src/cli/scrape-publications/clients/publication-scraper.ts index 31fc4cb..63fc6ca 100644 --- a/src/cli/scrape-publications/clients/publication-scraper.ts +++ b/src/cli/scrape-publications/clients/publication-scraper.ts @@ -13,11 +13,11 @@ import type { Logger } from "../../../clients/logger"; type SelectorAgent = Agent; type ContentSelectorAgent = Agent; -export interface PublicationScraperConfig { +export type PublicationScraperConfig = { logger: Logger; selectorAgent?: SelectorAgent; contentSelectorAgent?: ContentSelectorAgent; -} +}; export class PublicationScraper { private logger: Logger; @@ -138,7 +138,9 @@ IMPORTANT: Respond with ONLY a valid JSON object: private getStructureSignature(html: string): string { const dom = new JSDOM(html); const root = dom.window.document.body.firstElementChild; - if (!root) return "unknown"; + if (!root) { + return "unknown"; + } const tag = root.tagName.toLowerCase(); const hasImage = !!root.querySelector("img"); @@ -156,10 +158,18 @@ IMPORTANT: Respond with ONLY a valid JSON object: */ private scoreStructureSignature(signature: string): number { let score = 0; - if (signature.includes("h=true")) score += 10; // Has heading - strong signal - if (signature.includes("img=true")) score += 5; // Has image - if (signature.includes("date=true")) score += 5; // Has date - if (signature.startsWith("article:")) score += 5; // Semantic article tag + if (signature.includes("h=true")) { + score += 10; + } // Has heading - strong signal + if (signature.includes("img=true")) { + score += 5; + } // Has image + if (signature.includes("date=true")) { + score += 5; + } // Has date + if (signature.startsWith("article:")) { + score += 5; + } // Semantic article tag return score; } @@ -231,7 +241,9 @@ IMPORTANT: Respond with ONLY a valid JSON object: const anchors = doc.querySelectorAll("a[href]"); for (const anchor of anchors) { const href = anchor.getAttribute("href"); - if (!href) continue; + if (!href) { + continue; + } // Check if the href matches (could be relative or absolute) if ( @@ -263,39 +275,54 @@ IMPORTANT: Respond with ONLY a valid JSON object: if (targetAnchor) { const titleElement = targetAnchor.querySelector(selectors.titleSelector); let title = titleElement?.textContent.trim(); - if (title && title.length > 3) return this.cleanTitle(title); + if (title && title.length > 3) { + return this.cleanTitle(title); + } // Strategy 2: Anchor title attribute const anchorTitle = targetAnchor.getAttribute("title")?.trim(); - if (anchorTitle && anchorTitle.length > 3) + if (anchorTitle && anchorTitle.length > 3) { return this.cleanTitle(anchorTitle); + } // Strategy 3: Heading inside the anchor (h1-h6) const heading = targetAnchor.querySelector("h1, h2, h3, h4, h5, h6"); title = heading?.textContent.trim(); - if (title && title.length > 3) return this.cleanTitle(title); + if (title && title.length > 3) { + return this.cleanTitle(title); + } // Strategy 4: Direct anchor text title = targetAnchor.textContent.trim(); - if (title && title.length > 3) return this.cleanTitle(title); + if (title && title.length > 3) { + return this.cleanTitle(title); + } } // Fallback: Try document-level selectors if no target anchor found const titleElement = doc.querySelector(selectors.titleSelector); let title = titleElement?.textContent.trim(); - if (title && title.length > 3) return this.cleanTitle(title); + if (title && title.length > 3) { + return this.cleanTitle(title); + } const anchor = doc.querySelector("a[title]"); title = anchor?.getAttribute("title")?.trim(); - if (title && title.length > 3) return this.cleanTitle(title); + if (title && title.length > 3) { + return this.cleanTitle(title); + } const heading = doc.querySelector("a h1, a h2, a h3, a h4, a h5, a h6"); title = heading?.textContent.trim(); - if (title && title.length > 3) return this.cleanTitle(title); + if (title && title.length > 3) { + return this.cleanTitle(title); + } const mainAnchor = doc.querySelector("a[href]"); title = mainAnchor?.textContent.trim(); - if (title && title.length > 3) return this.cleanTitle(title); + if (title && title.length > 3) { + return this.cleanTitle(title); + } return null; } @@ -304,7 +331,9 @@ IMPORTANT: Respond with ONLY a valid JSON object: * Parses a date from an element, checking datetime attribute first, then text content. */ private parseDateFromElement(el: Element | null): string | undefined { - if (!el) return undefined; + if (!el) { + return undefined; + } const raw = el.getAttribute("datetime") ?? el.textContent.trim(); return raw ? this.parseToIsoDate(raw) : undefined; } @@ -330,36 +359,48 @@ IMPORTANT: Respond with ONLY a valid JSON object: if (selectors.dateSelector) { const dateEl = targetAnchor.querySelector(selectors.dateSelector); const date = this.parseDateFromElement(dateEl); - if (date) return date; + if (date) { + return date; + } } // Strategy 2: