From 8392dab2bbb215ae9d8edb43770c3188774906b3 Mon Sep 17 00:00:00 2001 From: Angel Baez Date: Sat, 4 Apr 2026 01:26:58 -0500 Subject: [PATCH 1/5] fix: chunk byline IN clauses to stay within D1 SQL variable limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #219. hydrateEntryBylines builds unbounded IN (?, ?, …) clauses that exceed Cloudflare D1's bound-parameter limit on large collections. Adds a chunks() utility and applies it defense-in-depth at the repository level: getContentBylinesMany, findByUserIds, and getAuthorIds now batch IDs in groups of 50. --- packages/core/src/bylines/index.ts | 19 ++-- .../core/src/database/repositories/byline.ts | 89 ++++++++++--------- packages/core/src/utils/chunks.ts | 17 ++++ .../unit/database/repositories/byline.test.ts | 65 ++++++++++++++ packages/core/tests/unit/utils/chunks.test.ts | 38 ++++++++ 5 files changed, 178 insertions(+), 50 deletions(-) create mode 100644 packages/core/src/utils/chunks.ts create mode 100644 packages/core/tests/unit/utils/chunks.test.ts diff --git a/packages/core/src/bylines/index.ts b/packages/core/src/bylines/index.ts index 6f86eb02b..32280a24e 100644 --- a/packages/core/src/bylines/index.ts +++ b/packages/core/src/bylines/index.ts @@ -12,6 +12,7 @@ import { BylineRepository } from "../database/repositories/byline.js"; import type { BylineSummary, ContentBylineCredit } from "../database/repositories/types.js"; import { validateIdentifier } from "../database/validate.js"; import { getDb } from "../loader.js"; +import { chunks, SQL_BATCH_SIZE } from "../utils/chunks.js"; /** * Get a byline by ID. @@ -222,15 +223,17 @@ async function getAuthorIds( const tableName = `ec_${collection}`; validateIdentifier(tableName, "content table"); - const result = await sql<{ id: string; author_id: string | null }>` - SELECT id, author_id FROM ${sql.ref(tableName)} - WHERE id IN (${sql.join(entryIds.map((id) => sql`${id}`))}) - `.execute(db); - const map = new Map(); - for (const row of result.rows) { - if (row.author_id) { - map.set(row.id, row.author_id); + for (const chunk of chunks(entryIds, SQL_BATCH_SIZE)) { + const result = await sql<{ id: string; author_id: string | null }>` + SELECT id, author_id FROM ${sql.ref(tableName)} + WHERE id IN (${sql.join(chunk.map((id) => sql`${id}`))}) + `.execute(db); + + for (const row of result.rows) { + if (row.author_id) { + map.set(row.id, row.author_id); + } } } return map; diff --git a/packages/core/src/database/repositories/byline.ts b/packages/core/src/database/repositories/byline.ts index b78f54235..9d51fb940 100644 --- a/packages/core/src/database/repositories/byline.ts +++ b/packages/core/src/database/repositories/byline.ts @@ -1,6 +1,7 @@ import { sql, type Kysely, type Selectable } from "kysely"; import { ulid } from "ulidx"; +import { chunks, SQL_BATCH_SIZE } from "../../utils/chunks.js"; import { listTablesLike } from "../dialect-helpers.js"; import type { BylineTable, Database } from "../types.js"; import { validateIdentifier } from "../validate.js"; @@ -259,41 +260,43 @@ export class BylineRepository { const result = new Map(); if (contentIds.length === 0) return result; - const rows = await this.db - .selectFrom("_emdash_content_bylines as cb") - .innerJoin("_emdash_bylines as b", "b.id", "cb.byline_id") - .select([ - "cb.content_id as content_id", - "cb.sort_order as sort_order", - "cb.role_label as role_label", - "b.id as id", - "b.slug as slug", - "b.display_name as display_name", - "b.bio as bio", - "b.avatar_media_id as avatar_media_id", - "b.website_url as website_url", - "b.user_id as user_id", - "b.is_guest as is_guest", - "b.created_at as created_at", - "b.updated_at as updated_at", - ]) - .where("cb.collection_slug", "=", collectionSlug) - .where("cb.content_id", "in", contentIds) - .orderBy("cb.sort_order", "asc") - .execute(); + for (const chunk of chunks(contentIds, SQL_BATCH_SIZE)) { + const rows = await this.db + .selectFrom("_emdash_content_bylines as cb") + .innerJoin("_emdash_bylines as b", "b.id", "cb.byline_id") + .select([ + "cb.content_id as content_id", + "cb.sort_order as sort_order", + "cb.role_label as role_label", + "b.id as id", + "b.slug as slug", + "b.display_name as display_name", + "b.bio as bio", + "b.avatar_media_id as avatar_media_id", + "b.website_url as website_url", + "b.user_id as user_id", + "b.is_guest as is_guest", + "b.created_at as created_at", + "b.updated_at as updated_at", + ]) + .where("cb.collection_slug", "=", collectionSlug) + .where("cb.content_id", "in", chunk) + .orderBy("cb.sort_order", "asc") + .execute(); - for (const row of rows) { - const contentId = row.content_id; - const credit: ContentBylineCredit = { - byline: rowToByline(row), - sortOrder: row.sort_order, - roleLabel: row.role_label, - }; - const existing = result.get(contentId); - if (existing) { - existing.push(credit); - } else { - result.set(contentId, [credit]); + for (const row of rows) { + const contentId = row.content_id; + const credit: ContentBylineCredit = { + byline: rowToByline(row), + sortOrder: row.sort_order, + roleLabel: row.role_label, + }; + const existing = result.get(contentId); + if (existing) { + existing.push(credit); + } else { + result.set(contentId, [credit]); + } } } @@ -308,15 +311,17 @@ export class BylineRepository { const result = new Map(); if (userIds.length === 0) return result; - const rows = await this.db - .selectFrom("_emdash_bylines") - .selectAll() - .where("user_id", "in", userIds) - .execute(); + for (const chunk of chunks(userIds, SQL_BATCH_SIZE)) { + const rows = await this.db + .selectFrom("_emdash_bylines") + .selectAll() + .where("user_id", "in", chunk) + .execute(); - for (const row of rows) { - if (row.user_id) { - result.set(row.user_id, rowToByline(row)); + for (const row of rows) { + if (row.user_id) { + result.set(row.user_id, rowToByline(row)); + } } } return result; diff --git a/packages/core/src/utils/chunks.ts b/packages/core/src/utils/chunks.ts new file mode 100644 index 000000000..9ff9f0f40 --- /dev/null +++ b/packages/core/src/utils/chunks.ts @@ -0,0 +1,17 @@ +/** + * Split an array into chunks of at most `size` elements. + * + * Used to keep SQL `IN (?, ?, …)` clauses within Cloudflare D1's + * bound-parameter limit (~100 per statement). + */ +export function chunks(arr: T[], size: number): T[][] { + if (arr.length === 0) return []; + const result: T[][] = []; + for (let i = 0; i < arr.length; i += size) { + result.push(arr.slice(i, i + size)); + } + return result; +} + +/** Conservative default chunk size for SQL IN clauses (well within D1's limit). */ +export const SQL_BATCH_SIZE = 50; diff --git a/packages/core/tests/unit/database/repositories/byline.test.ts b/packages/core/tests/unit/database/repositories/byline.test.ts index 121322cd5..f72d58591 100644 --- a/packages/core/tests/unit/database/repositories/byline.test.ts +++ b/packages/core/tests/unit/database/repositories/byline.test.ts @@ -4,6 +4,7 @@ import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { BylineRepository } from "../../../../src/database/repositories/byline.js"; import { ContentRepository } from "../../../../src/database/repositories/content.js"; import type { Database } from "../../../../src/database/types.js"; +import { SQL_BATCH_SIZE } from "../../../../src/utils/chunks.js"; import { setupTestDatabaseWithCollections, teardownTestDatabase } from "../../../utils/test-db.js"; describe("BylineRepository", () => { @@ -139,6 +140,70 @@ describe("BylineRepository", () => { expect(bylines[1]?.byline.id).toBe(first.id); }); + it("getContentBylinesMany handles more IDs than SQL_BATCH_SIZE", async () => { + const byline = await bylineRepo.create({ + slug: "batch-author", + displayName: "Batch Author", + }); + + // Create a few real content entries with bylines + const realIds: string[] = []; + for (let i = 0; i < 3; i++) { + const content = await contentRepo.create({ + type: "post", + slug: `batch-post-${i}`, + data: { title: `Batch Post ${i}` }, + }); + await bylineRepo.setContentBylines("post", content.id, [{ bylineId: byline.id }]); + realIds.push(content.id); + } + + // Build an ID list larger than SQL_BATCH_SIZE with the real IDs spread across chunks + const ids: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + ids.push(`fake-id-${i}`); + } + // Place real IDs so they span different chunks + ids[0] = realIds[0]!; + ids[SQL_BATCH_SIZE - 1] = realIds[1]!; + ids[SQL_BATCH_SIZE + 5] = realIds[2]!; + + const result = await bylineRepo.getContentBylinesMany("post", ids); + + // All 3 real entries should have their byline resolved + expect(result.get(realIds[0]!)).toHaveLength(1); + expect(result.get(realIds[1]!)).toHaveLength(1); + expect(result.get(realIds[2]!)).toHaveLength(1); + expect(result.get(realIds[0]!)![0]!.byline.id).toBe(byline.id); + }); + + it("findByUserIds handles more IDs than SQL_BATCH_SIZE", async () => { + // Create a real user so the FK constraint is satisfied + const userId = "user-batch-test"; + await db + .insertInto("users" as any) + .values({ id: userId, email: "batch@test.com", name: "Batch", role: 50 }) + .execute(); + + const byline = await bylineRepo.create({ + slug: "user-batch", + displayName: "User Batch", + userId, + }); + + // Build a user ID list larger than SQL_BATCH_SIZE + const userIds: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + userIds.push(`user-fake-${i}`); + } + userIds[SQL_BATCH_SIZE + 5] = userId; + + const result = await bylineRepo.findByUserIds(userIds); + + expect(result.size).toBe(1); + expect(result.get(userId)?.id).toBe(byline.id); + }); + it("deletes byline, removes links, and nulls primary_byline_id", async () => { const byline = await bylineRepo.create({ slug: "delete-me", diff --git a/packages/core/tests/unit/utils/chunks.test.ts b/packages/core/tests/unit/utils/chunks.test.ts new file mode 100644 index 000000000..7b604de67 --- /dev/null +++ b/packages/core/tests/unit/utils/chunks.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from "vitest"; + +import { chunks, SQL_BATCH_SIZE } from "../../../src/utils/chunks.js"; + +describe("chunks", () => { + it("returns empty array for empty input", () => { + expect(chunks([], 10)).toEqual([]); + }); + + it("returns single chunk when array fits within size", () => { + expect(chunks([1, 2, 3], 5)).toEqual([[1, 2, 3]]); + }); + + it("splits array into even chunks", () => { + expect(chunks([1, 2, 3, 4], 2)).toEqual([ + [1, 2], + [3, 4], + ]); + }); + + it("handles remainder in last chunk", () => { + expect(chunks([1, 2, 3, 4, 5], 2)).toEqual([[1, 2], [3, 4], [5]]); + }); + + it("handles chunk size of 1", () => { + expect(chunks([1, 2, 3], 1)).toEqual([[1], [2], [3]]); + }); + + it("handles array exactly equal to chunk size", () => { + expect(chunks([1, 2, 3], 3)).toEqual([[1, 2, 3]]); + }); +}); + +describe("SQL_BATCH_SIZE", () => { + it("is 50", () => { + expect(SQL_BATCH_SIZE).toBe(50); + }); +}); From 709058956772b2217df91f4d738057a548df79ce Mon Sep 17 00:00:00 2001 From: Angel Baez Date: Sat, 4 Apr 2026 01:27:47 -0500 Subject: [PATCH 2/5] chore: add changeset for byline chunking fix --- .changeset/cute-eagles-rescue.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/cute-eagles-rescue.md diff --git a/.changeset/cute-eagles-rescue.md b/.changeset/cute-eagles-rescue.md new file mode 100644 index 000000000..b1725c6a7 --- /dev/null +++ b/.changeset/cute-eagles-rescue.md @@ -0,0 +1,5 @@ +--- +"emdash": patch +--- + +Fixes byline hydration exceeding D1 SQL variable limit on large collections by chunking IN clauses. From cbebe8ef7368fdfad50fb7c84f6c02919b9888b2 Mon Sep 17 00:00:00 2001 From: Angel Baez Date: Sat, 4 Apr 2026 01:39:07 -0500 Subject: [PATCH 3/5] fix: deduplicate content IDs before chunking and add integration tests Deduplicates contentIds in getContentBylinesMany to prevent duplicate credits when the same ID appears across chunk boundaries. Adds tests for the duplication edge case and an end-to-end getBylinesForEntries test spanning both explicit and inferred byline paths. --- .../core/src/database/repositories/byline.ts | 3 +- .../tests/unit/bylines/bylines-query.test.ts | 61 +++++++++++++++++++ .../unit/database/repositories/byline.test.ts | 26 ++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/packages/core/src/database/repositories/byline.ts b/packages/core/src/database/repositories/byline.ts index 9d51fb940..800bf03a0 100644 --- a/packages/core/src/database/repositories/byline.ts +++ b/packages/core/src/database/repositories/byline.ts @@ -260,7 +260,8 @@ export class BylineRepository { const result = new Map(); if (contentIds.length === 0) return result; - for (const chunk of chunks(contentIds, SQL_BATCH_SIZE)) { + const uniqueContentIds = [...new Set(contentIds)]; + for (const chunk of chunks(uniqueContentIds, SQL_BATCH_SIZE)) { const rows = await this.db .selectFrom("_emdash_content_bylines as cb") .innerJoin("_emdash_bylines as b", "b.id", "cb.byline_id") diff --git a/packages/core/tests/unit/bylines/bylines-query.test.ts b/packages/core/tests/unit/bylines/bylines-query.test.ts index 77e6d0125..416f81ea7 100644 --- a/packages/core/tests/unit/bylines/bylines-query.test.ts +++ b/packages/core/tests/unit/bylines/bylines-query.test.ts @@ -5,6 +5,7 @@ import { BylineRepository } from "../../../src/database/repositories/byline.js"; import { ContentRepository } from "../../../src/database/repositories/content.js"; import { UserRepository } from "../../../src/database/repositories/user.js"; import type { Database } from "../../../src/database/types.js"; +import { SQL_BATCH_SIZE } from "../../../src/utils/chunks.js"; import { setupTestDatabaseWithCollections, teardownTestDatabase } from "../../utils/test-db.js"; // Mock the loader's getDb to return our test database @@ -230,6 +231,66 @@ describe("Byline query functions", () => { expect(result.get(post.id)?.[0]?.byline.displayName).toBe("Batch Author"); }); + it("handles batches larger than SQL_BATCH_SIZE across explicit and inferred bylines", async () => { + const userRepo = new UserRepository(db); + const explicitByline = await bylineRepo.create({ + slug: "large-batch-explicit", + displayName: "Large Batch Explicit", + }); + + const explicitPost1 = await contentRepo.create({ + type: "post", + slug: "large-batch-explicit-1", + data: { title: "Large Batch Explicit 1" }, + }); + await bylineRepo.setContentBylines("post", explicitPost1.id, [{ bylineId: explicitByline.id }]); + + const inferredPostIds: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 2; i++) { + const user = await userRepo.create({ + email: `large-batch-${i}@example.com`, + displayName: `Large Batch ${i}`, + role: "editor", + }); + + await bylineRepo.create({ + slug: `large-batch-${i}`, + displayName: `Large Batch ${i}`, + userId: user.id, + }); + + const post = await contentRepo.create({ + type: "post", + slug: `large-batch-post-${i}`, + data: { title: `Large Batch Post ${i}` }, + authorId: user.id, + }); + inferredPostIds.push(post.id); + } + + const explicitPost2 = await contentRepo.create({ + type: "post", + slug: "large-batch-explicit-2", + data: { title: "Large Batch Explicit 2" }, + }); + await bylineRepo.setContentBylines("post", explicitPost2.id, [{ bylineId: explicitByline.id }]); + + const entryIds = [explicitPost1.id, ...inferredPostIds, explicitPost2.id]; + const result = await getBylinesForEntries("post", entryIds); + + expect(result.size).toBe(entryIds.length); + expect(result.get(explicitPost1.id)?.[0]?.source).toBe("explicit"); + expect(result.get(explicitPost1.id)?.[0]?.byline.displayName).toBe("Large Batch Explicit"); + expect(result.get(explicitPost2.id)?.[0]?.source).toBe("explicit"); + expect(result.get(explicitPost2.id)?.[0]?.byline.displayName).toBe("Large Batch Explicit"); + expect(result.get(inferredPostIds[0]!)?.[0]?.source).toBe("inferred"); + expect(result.get(inferredPostIds[0]!)?.[0]?.byline.displayName).toBe("Large Batch 0"); + expect(result.get(inferredPostIds[SQL_BATCH_SIZE + 1]!)?.[0]?.source).toBe("inferred"); + expect(result.get(inferredPostIds[SQL_BATCH_SIZE + 1]!)?.[0]?.byline.displayName).toBe( + `Large Batch ${SQL_BATCH_SIZE + 1}`, + ); + }); + it("returns empty map for empty input", async () => { const result = await getBylinesForEntries("post", []); expect(result.size).toBe(0); diff --git a/packages/core/tests/unit/database/repositories/byline.test.ts b/packages/core/tests/unit/database/repositories/byline.test.ts index f72d58591..d6fa4d7dd 100644 --- a/packages/core/tests/unit/database/repositories/byline.test.ts +++ b/packages/core/tests/unit/database/repositories/byline.test.ts @@ -177,6 +177,32 @@ describe("BylineRepository", () => { expect(result.get(realIds[0]!)![0]!.byline.id).toBe(byline.id); }); + it("getContentBylinesMany does not duplicate credits for repeated content IDs", async () => { + const byline = await bylineRepo.create({ + slug: "duplicate-batch-author", + displayName: "Duplicate Batch Author", + }); + + const content = await contentRepo.create({ + type: "post", + slug: "duplicate-batch-post", + data: { title: "Duplicate Batch Post" }, + }); + await bylineRepo.setContentBylines("post", content.id, [{ bylineId: byline.id }]); + + const ids: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + ids.push(`fake-id-${i}`); + } + ids[0] = content.id; + ids[SQL_BATCH_SIZE + 5] = content.id; + + const result = await bylineRepo.getContentBylinesMany("post", ids); + + expect(result.get(content.id)).toHaveLength(1); + expect(result.get(content.id)?.[0]?.byline.id).toBe(byline.id); + }); + it("findByUserIds handles more IDs than SQL_BATCH_SIZE", async () => { // Create a real user so the FK constraint is satisfied const userId = "user-batch-test"; From e3d056bd6a7c72b592f72ca0a59e50553b054958 Mon Sep 17 00:00:00 2001 From: Angel Baez Date: Thu, 9 Apr 2026 18:46:52 -0500 Subject: [PATCH 4/5] fix: chunk SEO IN clause to stay within D1 SQL variable limit SeoRepository.getMany builds a WHERE content_id IN (?, ?, ...) clause alongside a collection = ? filter. On Cloudflare D1, which caps bound parameters at 100 per query, passing 100 content ids produces 101 parameters and trips the limit: D1_ERROR: too many SQL variables at offset 369: SQLITE_ERROR This is the same root cause as the byline hydration fix in the sibling commit, but on a different repository that wasn't covered there. SeoRepository.getMany is called from handleContentList before hydrateBylinesMany, so on any collection with has_seo = 1 and >= 99 items, it's the first function to fail on the admin content list endpoint. Apply the same chunking pattern using the shared chunks() helper and SQL_BATCH_SIZE constant. Deduplicate contentIds before chunking for consistency with the byline fix. Pre-fill result with defaults so the two-pass resolve-then-fill-missing logic collapses to a single pass. Adds unit tests covering: - input size larger than SQL_BATCH_SIZE, real ids spread across chunks - all-missing ids get defaults - duplicate input ids resolve cleanly without duplicate rows Repro of the underlying D1 limit for the record: wrangler d1 execute --remote --command \ "SELECT 1 WHERE 'x' = ? AND 1 IN (?,?,...x100)" -> too many SQL variables at offset 231: SQLITE_ERROR [code: 7500] --- .changeset/brave-seals-hydrate.md | 5 + .../core/src/database/repositories/seo.ts | 38 +++--- .../unit/database/repositories/seo.test.ts | 114 ++++++++++++++++++ 3 files changed, 140 insertions(+), 17 deletions(-) create mode 100644 .changeset/brave-seals-hydrate.md create mode 100644 packages/core/tests/unit/database/repositories/seo.test.ts diff --git a/.changeset/brave-seals-hydrate.md b/.changeset/brave-seals-hydrate.md new file mode 100644 index 000000000..067ebde58 --- /dev/null +++ b/.changeset/brave-seals-hydrate.md @@ -0,0 +1,5 @@ +--- +"emdash": patch +--- + +Fixes SEO hydration exceeding D1 SQL variable limit on large collections by chunking the `content_id IN (...)` clause in `SeoRepository.getMany`. diff --git a/packages/core/src/database/repositories/seo.ts b/packages/core/src/database/repositories/seo.ts index 1af31b4f8..4cc21bd44 100644 --- a/packages/core/src/database/repositories/seo.ts +++ b/packages/core/src/database/repositories/seo.ts @@ -1,5 +1,6 @@ import { sql, type Kysely } from "kysely"; +import { chunks, SQL_BATCH_SIZE } from "../../utils/chunks.js"; import type { Database } from "../types.js"; import type { ContentSeo, ContentSeoInput } from "./types.js"; @@ -61,37 +62,40 @@ export class SeoRepository { } /** - * Get SEO data for multiple content items in a single query. + * Get SEO data for multiple content items. * Returns a Map keyed by content_id. Items without SEO rows get defaults. + * + * Chunks the `content_id IN (…)` clause so the total bound-parameter count + * per statement (ids + the `collection = ?` filter) stays within Cloudflare + * D1's 100-variable limit regardless of how many content items are passed. */ async getMany(collection: string, contentIds: string[]): Promise> { const result = new Map(); if (contentIds.length === 0) return result; - // Batch query — single SELECT with IN clause - const rows = await this.db - .selectFrom("_emdash_seo") - .selectAll() - .where("collection", "=", collection) - .where("content_id", "in", contentIds) - .execute(); - - // Index fetched rows by content_id - const rowMap = new Map(rows.map((r) => [r.content_id, r])); - + // Pre-fill with defaults so every input id has an entry even if no row exists. for (const id of contentIds) { - const row = rowMap.get(id); - if (row) { - result.set(id, { + result.set(id, { ...SEO_DEFAULTS }); + } + + const uniqueContentIds = [...new Set(contentIds)]; + for (const chunk of chunks(uniqueContentIds, SQL_BATCH_SIZE)) { + const rows = await this.db + .selectFrom("_emdash_seo") + .selectAll() + .where("collection", "=", collection) + .where("content_id", "in", chunk) + .execute(); + + for (const row of rows) { + result.set(row.content_id, { title: row.seo_title ?? null, description: row.seo_description ?? null, image: row.seo_image ?? null, canonical: row.seo_canonical ?? null, noIndex: row.seo_no_index === 1, }); - } else { - result.set(id, { ...SEO_DEFAULTS }); } } diff --git a/packages/core/tests/unit/database/repositories/seo.test.ts b/packages/core/tests/unit/database/repositories/seo.test.ts new file mode 100644 index 000000000..8e7649d6d --- /dev/null +++ b/packages/core/tests/unit/database/repositories/seo.test.ts @@ -0,0 +1,114 @@ +import type { Kysely } from "kysely"; +import { describe, it, expect, beforeEach, afterEach } from "vitest"; + +import { ContentRepository } from "../../../../src/database/repositories/content.js"; +import { SeoRepository } from "../../../../src/database/repositories/seo.js"; +import type { Database } from "../../../../src/database/types.js"; +import { SQL_BATCH_SIZE } from "../../../../src/utils/chunks.js"; +import { setupTestDatabaseWithCollections, teardownTestDatabase } from "../../../utils/test-db.js"; + +describe("SeoRepository", () => { + let db: Kysely; + let seoRepo: SeoRepository; + let contentRepo: ContentRepository; + + beforeEach(async () => { + db = await setupTestDatabaseWithCollections(); + // Enable SEO on the post collection — createCollection defaults has_seo to 0. + await db + .updateTable("_emdash_collections") + .set({ has_seo: 1 }) + .where("slug", "=", "post") + .execute(); + seoRepo = new SeoRepository(db); + contentRepo = new ContentRepository(db); + }); + + afterEach(async () => { + await teardownTestDatabase(db); + }); + + it("getMany handles more IDs than SQL_BATCH_SIZE", async () => { + // Create a few real content entries with SEO rows + const realIds: string[] = []; + for (let i = 0; i < 3; i++) { + const content = await contentRepo.create({ + type: "post", + slug: `seo-batch-post-${i}`, + data: { title: `SEO Batch Post ${i}` }, + }); + await seoRepo.upsert("post", content.id, { + title: `SEO Title ${i}`, + description: `SEO Description ${i}`, + }); + realIds.push(content.id); + } + + // Build an ID list larger than SQL_BATCH_SIZE with real IDs spread across chunks + const ids: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + ids.push(`fake-id-${i}`); + } + ids[0] = realIds[0]!; + ids[SQL_BATCH_SIZE - 1] = realIds[1]!; + ids[SQL_BATCH_SIZE + 5] = realIds[2]!; + + const result = await seoRepo.getMany("post", ids); + + // All input IDs should be present in the result Map + expect(result.size).toBe(ids.length); + + // Real IDs should have their SEO data resolved + expect(result.get(realIds[0]!)?.title).toBe("SEO Title 0"); + expect(result.get(realIds[1]!)?.title).toBe("SEO Title 1"); + expect(result.get(realIds[2]!)?.title).toBe("SEO Title 2"); + + // Fake IDs should get default values + expect(result.get("fake-id-5")?.title).toBeNull(); + expect(result.get("fake-id-5")?.description).toBeNull(); + expect(result.get("fake-id-5")?.noIndex).toBe(false); + }); + + it("getMany returns defaults for every input id when no rows exist", async () => { + const ids: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + ids.push(`missing-id-${i}`); + } + + const result = await seoRepo.getMany("post", ids); + + expect(result.size).toBe(ids.length); + for (const id of ids) { + const entry = result.get(id); + expect(entry).toBeDefined(); + expect(entry?.title).toBeNull(); + expect(entry?.description).toBeNull(); + expect(entry?.image).toBeNull(); + expect(entry?.canonical).toBeNull(); + expect(entry?.noIndex).toBe(false); + } + }); + + it("getMany deduplicates repeated content IDs without duplicate rows", async () => { + const content = await contentRepo.create({ + type: "post", + slug: "seo-duplicate-post", + data: { title: "SEO Duplicate" }, + }); + await seoRepo.upsert("post", content.id, { + title: "Duplicate SEO", + }); + + const ids: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + ids.push(`fake-id-${i}`); + } + ids[0] = content.id; + ids[SQL_BATCH_SIZE + 5] = content.id; + + const result = await seoRepo.getMany("post", ids); + + // The real entry should resolve to its SEO row regardless of the duplicate input + expect(result.get(content.id)?.title).toBe("Duplicate SEO"); + }); +}); From d8a56cc3da4e9d514f40443205fe72b0db8163c3 Mon Sep 17 00:00:00 2001 From: "emdashbot[bot]" Date: Thu, 9 Apr 2026 23:48:09 +0000 Subject: [PATCH 5/5] style: format --- packages/core/tests/unit/bylines/bylines-query.test.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/core/tests/unit/bylines/bylines-query.test.ts b/packages/core/tests/unit/bylines/bylines-query.test.ts index 416f81ea7..0c0b5d965 100644 --- a/packages/core/tests/unit/bylines/bylines-query.test.ts +++ b/packages/core/tests/unit/bylines/bylines-query.test.ts @@ -243,7 +243,9 @@ describe("Byline query functions", () => { slug: "large-batch-explicit-1", data: { title: "Large Batch Explicit 1" }, }); - await bylineRepo.setContentBylines("post", explicitPost1.id, [{ bylineId: explicitByline.id }]); + await bylineRepo.setContentBylines("post", explicitPost1.id, [ + { bylineId: explicitByline.id }, + ]); const inferredPostIds: string[] = []; for (let i = 0; i < SQL_BATCH_SIZE + 2; i++) { @@ -273,7 +275,9 @@ describe("Byline query functions", () => { slug: "large-batch-explicit-2", data: { title: "Large Batch Explicit 2" }, }); - await bylineRepo.setContentBylines("post", explicitPost2.id, [{ bylineId: explicitByline.id }]); + await bylineRepo.setContentBylines("post", explicitPost2.id, [ + { bylineId: explicitByline.id }, + ]); const entryIds = [explicitPost1.id, ...inferredPostIds, explicitPost2.id]; const result = await getBylinesForEntries("post", entryIds);