diff --git a/.changeset/brave-seals-hydrate.md b/.changeset/brave-seals-hydrate.md new file mode 100644 index 000000000..067ebde58 --- /dev/null +++ b/.changeset/brave-seals-hydrate.md @@ -0,0 +1,5 @@ +--- +"emdash": patch +--- + +Fixes SEO hydration exceeding D1 SQL variable limit on large collections by chunking the `content_id IN (...)` clause in `SeoRepository.getMany`. diff --git a/packages/core/src/database/repositories/seo.ts b/packages/core/src/database/repositories/seo.ts index 1af31b4f8..4cc21bd44 100644 --- a/packages/core/src/database/repositories/seo.ts +++ b/packages/core/src/database/repositories/seo.ts @@ -1,5 +1,6 @@ import { sql, type Kysely } from "kysely"; +import { chunks, SQL_BATCH_SIZE } from "../../utils/chunks.js"; import type { Database } from "../types.js"; import type { ContentSeo, ContentSeoInput } from "./types.js"; @@ -61,37 +62,40 @@ export class SeoRepository { } /** - * Get SEO data for multiple content items in a single query. + * Get SEO data for multiple content items. * Returns a Map keyed by content_id. Items without SEO rows get defaults. + * + * Chunks the `content_id IN (…)` clause so the total bound-parameter count + * per statement (ids + the `collection = ?` filter) stays within Cloudflare + * D1's 100-variable limit regardless of how many content items are passed. */ async getMany(collection: string, contentIds: string[]): Promise> { const result = new Map(); if (contentIds.length === 0) return result; - // Batch query — single SELECT with IN clause - const rows = await this.db - .selectFrom("_emdash_seo") - .selectAll() - .where("collection", "=", collection) - .where("content_id", "in", contentIds) - .execute(); - - // Index fetched rows by content_id - const rowMap = new Map(rows.map((r) => [r.content_id, r])); - + // Pre-fill with defaults so every input id has an entry even if no row exists. for (const id of contentIds) { - const row = rowMap.get(id); - if (row) { - result.set(id, { + result.set(id, { ...SEO_DEFAULTS }); + } + + const uniqueContentIds = [...new Set(contentIds)]; + for (const chunk of chunks(uniqueContentIds, SQL_BATCH_SIZE)) { + const rows = await this.db + .selectFrom("_emdash_seo") + .selectAll() + .where("collection", "=", collection) + .where("content_id", "in", chunk) + .execute(); + + for (const row of rows) { + result.set(row.content_id, { title: row.seo_title ?? null, description: row.seo_description ?? null, image: row.seo_image ?? null, canonical: row.seo_canonical ?? null, noIndex: row.seo_no_index === 1, }); - } else { - result.set(id, { ...SEO_DEFAULTS }); } } diff --git a/packages/core/tests/unit/database/repositories/seo.test.ts b/packages/core/tests/unit/database/repositories/seo.test.ts new file mode 100644 index 000000000..8e7649d6d --- /dev/null +++ b/packages/core/tests/unit/database/repositories/seo.test.ts @@ -0,0 +1,114 @@ +import type { Kysely } from "kysely"; +import { describe, it, expect, beforeEach, afterEach } from "vitest"; + +import { ContentRepository } from "../../../../src/database/repositories/content.js"; +import { SeoRepository } from "../../../../src/database/repositories/seo.js"; +import type { Database } from "../../../../src/database/types.js"; +import { SQL_BATCH_SIZE } from "../../../../src/utils/chunks.js"; +import { setupTestDatabaseWithCollections, teardownTestDatabase } from "../../../utils/test-db.js"; + +describe("SeoRepository", () => { + let db: Kysely; + let seoRepo: SeoRepository; + let contentRepo: ContentRepository; + + beforeEach(async () => { + db = await setupTestDatabaseWithCollections(); + // Enable SEO on the post collection — createCollection defaults has_seo to 0. + await db + .updateTable("_emdash_collections") + .set({ has_seo: 1 }) + .where("slug", "=", "post") + .execute(); + seoRepo = new SeoRepository(db); + contentRepo = new ContentRepository(db); + }); + + afterEach(async () => { + await teardownTestDatabase(db); + }); + + it("getMany handles more IDs than SQL_BATCH_SIZE", async () => { + // Create a few real content entries with SEO rows + const realIds: string[] = []; + for (let i = 0; i < 3; i++) { + const content = await contentRepo.create({ + type: "post", + slug: `seo-batch-post-${i}`, + data: { title: `SEO Batch Post ${i}` }, + }); + await seoRepo.upsert("post", content.id, { + title: `SEO Title ${i}`, + description: `SEO Description ${i}`, + }); + realIds.push(content.id); + } + + // Build an ID list larger than SQL_BATCH_SIZE with real IDs spread across chunks + const ids: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + ids.push(`fake-id-${i}`); + } + ids[0] = realIds[0]!; + ids[SQL_BATCH_SIZE - 1] = realIds[1]!; + ids[SQL_BATCH_SIZE + 5] = realIds[2]!; + + const result = await seoRepo.getMany("post", ids); + + // All input IDs should be present in the result Map + expect(result.size).toBe(ids.length); + + // Real IDs should have their SEO data resolved + expect(result.get(realIds[0]!)?.title).toBe("SEO Title 0"); + expect(result.get(realIds[1]!)?.title).toBe("SEO Title 1"); + expect(result.get(realIds[2]!)?.title).toBe("SEO Title 2"); + + // Fake IDs should get default values + expect(result.get("fake-id-5")?.title).toBeNull(); + expect(result.get("fake-id-5")?.description).toBeNull(); + expect(result.get("fake-id-5")?.noIndex).toBe(false); + }); + + it("getMany returns defaults for every input id when no rows exist", async () => { + const ids: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + ids.push(`missing-id-${i}`); + } + + const result = await seoRepo.getMany("post", ids); + + expect(result.size).toBe(ids.length); + for (const id of ids) { + const entry = result.get(id); + expect(entry).toBeDefined(); + expect(entry?.title).toBeNull(); + expect(entry?.description).toBeNull(); + expect(entry?.image).toBeNull(); + expect(entry?.canonical).toBeNull(); + expect(entry?.noIndex).toBe(false); + } + }); + + it("getMany deduplicates repeated content IDs without duplicate rows", async () => { + const content = await contentRepo.create({ + type: "post", + slug: "seo-duplicate-post", + data: { title: "SEO Duplicate" }, + }); + await seoRepo.upsert("post", content.id, { + title: "Duplicate SEO", + }); + + const ids: string[] = []; + for (let i = 0; i < SQL_BATCH_SIZE + 10; i++) { + ids.push(`fake-id-${i}`); + } + ids[0] = content.id; + ids[SQL_BATCH_SIZE + 5] = content.id; + + const result = await seoRepo.getMany("post", ids); + + // The real entry should resolve to its SEO row regardless of the duplicate input + expect(result.get(content.id)?.title).toBe("Duplicate SEO"); + }); +});