From e65a85e7c4edd5fe84c70c3c928d090fb3117900 Mon Sep 17 00:00:00 2001 From: Darien Kindlund Date: Wed, 1 Apr 2026 17:38:04 -0400 Subject: [PATCH] perf: bound trigram search index size with LEFT() truncation Teable auto-creates GIN trigram indexes (idx_trgm_*) on every field for search. On large tables with many fields, this causes massive index bloat (e.g., 3.6 GB of indexes on 117 MB of data) and severe write amplification on every INSERT/UPDATE. This commit wraps index expressions with LEFT(expression, N) to bound index size to the first N characters per field value. N is configurable via SEARCH_INDEX_TRUNCATE_LENGTH env var (default: 1000). Setting it to 0 disables truncation (preserving current behavior). For short fields (< N chars), LEFT() is a no-op. For large JSON/HTML fields, it dramatically reduces index size while preserving search functionality (PostgreSQL uses the truncated index for candidate selection, then applies the full-column WHERE clause for filtering). Existing indexes are automatically rebuilt on the next index reconciliation cycle when getAbnormalIndex() detects the definition mismatch. If the env var changes between reboots, the same mechanism triggers a rebuild with the new threshold. Production data that motivated this change: - Articles table: 117 MB data, 5.8 GB total (3.6 GB indexes) - 70 trigram indexes, 13 largest with zero search scans (1.9 GB) - html_content index: 731 MB, 0 scans - Formula field backfill (34K rows): 90+ min, 3 container crashes Follow-on enhancement: per-field configurable truncate length via field metadata, rather than a single global threshold. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/configs/threshold.config.ts | 1 + .../src/db-provider/db.provider.interface.ts | 2 +- .../src/db-provider/postgres.provider.ts | 4 +- .../search-index-builder.postgres.spec.ts | 199 ++++++++++++++++++ .../search-index-builder.postgres.ts | 16 +- .../src/db-provider/sqlite.provider.ts | 2 +- .../src/features/table/table-index.service.ts | 20 +- 7 files changed, 228 insertions(+), 16 deletions(-) create mode 100644 apps/nestjs-backend/src/db-provider/search-query/search-index-builder.postgres.spec.ts diff --git a/apps/nestjs-backend/src/configs/threshold.config.ts b/apps/nestjs-backend/src/configs/threshold.config.ts index b6a017c0d2..1c0b37e6d0 100644 --- a/apps/nestjs-backend/src/configs/threshold.config.ts +++ b/apps/nestjs-backend/src/configs/threshold.config.ts @@ -21,6 +21,7 @@ export const thresholdConfig = registerAs('threshold', () => ({ process.env.BIG_TRANSACTION_TIMEOUT ?? 10 * 60 * 1000 /* 10 mins */ ), automationGap: Number(process.env.AUTOMATION_GAP ?? 200), + searchIndexTruncateLength: Number(process.env.SEARCH_INDEX_TRUNCATE_LENGTH ?? 1000), maxAttachmentUploadSize: Number(process.env.MAX_ATTACHMENT_UPLOAD_SIZE ?? Infinity), maxOpenapiAttachmentUploadSize: Number( process.env.MAX_OPENAPI_ATTACHMENT_UPLOAD_SIZE ?? Infinity diff --git a/apps/nestjs-backend/src/db-provider/db.provider.interface.ts b/apps/nestjs-backend/src/db-provider/db.provider.interface.ts index ea3107dfdc..ee3de639e0 100644 --- a/apps/nestjs-backend/src/db-provider/db.provider.interface.ts +++ b/apps/nestjs-backend/src/db-provider/db.provider.interface.ts @@ -227,7 +227,7 @@ export interface IDbProvider { context?: IRecordQueryFilterContext ): Knex.QueryBuilder; - searchIndex(): IndexBuilderAbstract; + searchIndex(truncateLength?: number): IndexBuilderAbstract; duplicateTableQuery(queryBuilder: Knex.QueryBuilder): DuplicateTableQueryAbstract; diff --git a/apps/nestjs-backend/src/db-provider/postgres.provider.ts b/apps/nestjs-backend/src/db-provider/postgres.provider.ts index fb035c4236..568f0cff9a 100644 --- a/apps/nestjs-backend/src/db-provider/postgres.provider.ts +++ b/apps/nestjs-backend/src/db-provider/postgres.provider.ts @@ -654,8 +654,8 @@ WHERE tc.constraint_type = 'FOREIGN KEY' ).getSearchIndexQuery(); } - searchIndex() { - return new IndexBuilderPostgres(); + searchIndex(truncateLength?: number) { + return new IndexBuilderPostgres(truncateLength); } duplicateTableQuery(queryBuilder: Knex.QueryBuilder) { diff --git a/apps/nestjs-backend/src/db-provider/search-query/search-index-builder.postgres.spec.ts b/apps/nestjs-backend/src/db-provider/search-query/search-index-builder.postgres.spec.ts new file mode 100644 index 0000000000..8bc4977b32 --- /dev/null +++ b/apps/nestjs-backend/src/db-provider/search-query/search-index-builder.postgres.spec.ts @@ -0,0 +1,199 @@ +import { CellValueType, FieldType } from '@teable/core'; +import type { IFieldInstance } from '../../features/field/model/factory'; +import { FieldFormatter, IndexBuilderPostgres } from './search-index-builder.postgres'; + +function createMockField(overrides: Partial = {}): IFieldInstance { + return { + id: 'fldTestField123', + dbFieldName: 'test_field', + cellValueType: CellValueType.String, + type: FieldType.SingleLineText, + options: {}, + isStructuredCellValue: false, + isMultipleCellValue: false, + ...overrides, + } as IFieldInstance; +} + +// --- FieldFormatter.getIndexExpression --- + +describe('FieldFormatter.getIndexExpression', () => { + describe('with truncation', () => { + it('wraps string field expression with LEFT()', () => { + const field = createMockField(); + const result = FieldFormatter.getIndexExpression(field, 1000); + expect(result).toBe('LEFT(("test_field")::text, 1000)'); + }); + + it('wraps LongText field expression with LEFT()', () => { + const field = createMockField({ type: FieldType.LongText }); + const result = FieldFormatter.getIndexExpression(field, 500); + expect(result).toContain('LEFT('); + expect(result).toContain('500)'); + expect(result).toContain('REPLACE'); + }); + + it('wraps Number field expression with LEFT()', () => { + const field = createMockField({ + cellValueType: CellValueType.Number, + options: { formatting: { precision: 2 } }, + }); + const result = FieldFormatter.getIndexExpression(field, 1000); + expect(result).toBe('LEFT((ROUND("test_field"::numeric, 2)::text)::text, 1000)'); + }); + + it('returns null for DateTime fields regardless of truncation', () => { + const field = createMockField({ cellValueType: CellValueType.DateTime }); + expect(FieldFormatter.getIndexExpression(field, 1000)).toBeNull(); + }); + + it('returns null for Boolean fields regardless of truncation', () => { + const field = createMockField({ cellValueType: CellValueType.Boolean }); + expect(FieldFormatter.getIndexExpression(field, 1000)).toBeNull(); + }); + + it('wraps structured cell value expression with LEFT()', () => { + const field = createMockField({ isStructuredCellValue: true }); + const result = FieldFormatter.getIndexExpression(field, 1000); + expect(result).toContain('LEFT('); + expect(result).toContain("title"); + expect(result).toContain('1000)'); + }); + + it('wraps array field expression with LEFT()', () => { + const field = createMockField({ isMultipleCellValue: true }); + const result = FieldFormatter.getIndexExpression(field, 1000); + expect(result).toBe('LEFT(("test_field"::text)::text, 1000)'); + }); + + it('uses specified truncate length', () => { + const field = createMockField(); + expect(FieldFormatter.getIndexExpression(field, 500)).toContain('500)'); + expect(FieldFormatter.getIndexExpression(field, 2000)).toContain('2000)'); + }); + }); + + describe('without truncation', () => { + it('returns raw expression when truncateLength is undefined', () => { + const field = createMockField(); + const result = FieldFormatter.getIndexExpression(field); + expect(result).toBe('"test_field"'); + expect(result).not.toContain('LEFT'); + }); + + it('returns raw expression when truncateLength is 0 (escape hatch)', () => { + const field = createMockField(); + const result = FieldFormatter.getIndexExpression(field, 0); + expect(result).toBe('"test_field"'); + expect(result).not.toContain('LEFT'); + }); + + it('returns raw expression when truncateLength is negative', () => { + const field = createMockField(); + const result = FieldFormatter.getIndexExpression(field, -1); + expect(result).toBe('"test_field"'); + expect(result).not.toContain('LEFT'); + }); + }); +}); + +// --- IndexBuilderPostgres.createSingleIndexSql --- + +describe('IndexBuilderPostgres.createSingleIndexSql', () => { + it('generates SQL with LEFT() when truncateLength is set', () => { + const builder = new IndexBuilderPostgres(1000); + const field = createMockField(); + const sql = builder.createSingleIndexSql('schema.table', field); + + expect(sql).toContain('CREATE INDEX IF NOT EXISTS'); + expect(sql).toContain('USING gin'); + expect(sql).toContain('gin_trgm_ops'); + expect(sql).toContain('LEFT('); + expect(sql).toContain('1000)'); + }); + + it('generates SQL without LEFT() when truncateLength is undefined', () => { + const builder = new IndexBuilderPostgres(); + const field = createMockField(); + const sql = builder.createSingleIndexSql('schema.table', field); + + expect(sql).toContain('CREATE INDEX IF NOT EXISTS'); + expect(sql).toContain('USING gin'); + expect(sql).not.toContain('LEFT('); + }); + + it('generates SQL without LEFT() when truncateLength is 0', () => { + const builder = new IndexBuilderPostgres(0); + const field = createMockField(); + const sql = builder.createSingleIndexSql('schema.table', field); + + expect(sql).not.toContain('LEFT('); + }); + + it('returns null for unsupported field types', () => { + const builder = new IndexBuilderPostgres(1000); + const field = createMockField({ cellValueType: CellValueType.DateTime }); + expect(builder.createSingleIndexSql('schema.table', field)).toBeNull(); + }); +}); + +// --- IndexBuilderPostgres.getAbnormalIndex --- + +describe('IndexBuilderPostgres.getAbnormalIndex', () => { + it('detects old-format indexes (without LEFT()) as abnormal', () => { + const builder = new IndexBuilderPostgres(1000); + const field = createMockField(); + + // Simulate an existing index WITHOUT LEFT() truncation + const existingIndexes = [ + { + schemaname: 'schema', + tablename: 'table', + indexname: `idx_trgm_table_test_field_${field.id}`, + tablespace: '', + indexdef: `CREATE INDEX idx_trgm_table_test_field_${field.id} ON schema.table USING gin (("test_field") gin_trgm_ops)`, + }, + ]; + + const abnormal = builder.getAbnormalIndex('schema.table', [field], existingIndexes); + expect(abnormal.length).toBeGreaterThan(0); + }); + + it('does not flag matching indexes (with LEFT()) as abnormal', () => { + const builder = new IndexBuilderPostgres(1000); + const field = createMockField(); + + // Simulate an existing index WITH LEFT() truncation (matching current config) + const existingIndexes = [ + { + schemaname: 'schema', + tablename: 'table', + indexname: `idx_trgm_table_test_field_${field.id}`, + tablespace: '', + indexdef: `CREATE INDEX idx_trgm_table_test_field_${field.id} ON schema.table USING gin ((LEFT(("test_field")::text, 1000)) gin_trgm_ops)`, + }, + ]; + + const abnormal = builder.getAbnormalIndex('schema.table', [field], existingIndexes); + expect(abnormal).toHaveLength(0); + }); + + it('detects abnormal indexes when truncate length changes', () => { + // Config says 500, but existing indexes were built with 1000 + const builder = new IndexBuilderPostgres(500); + const field = createMockField(); + + const existingIndexes = [ + { + schemaname: 'schema', + tablename: 'table', + indexname: `idx_trgm_table_test_field_${field.id}`, + tablespace: '', + indexdef: `CREATE INDEX idx_trgm_table_test_field_${field.id} ON schema.table USING gin ((LEFT(("test_field")::text, 1000)) gin_trgm_ops)`, + }, + ]; + + const abnormal = builder.getAbnormalIndex('schema.table', [field], existingIndexes); + expect(abnormal.length).toBeGreaterThan(0); + }); +}); diff --git a/apps/nestjs-backend/src/db-provider/search-query/search-index-builder.postgres.ts b/apps/nestjs-backend/src/db-provider/search-query/search-index-builder.postgres.ts index 7da90c63b8..cfa3a145bd 100644 --- a/apps/nestjs-backend/src/db-provider/search-query/search-index-builder.postgres.ts +++ b/apps/nestjs-backend/src/db-provider/search-query/search-index-builder.postgres.ts @@ -66,8 +66,12 @@ export class FieldFormatter { } // expression for generating index - static getIndexExpression(field: IFieldInstance): string | null { - return this.getSearchableExpression(field, field.isMultipleCellValue); + static getIndexExpression(field: IFieldInstance, truncateLength?: number): string | null { + const expression = this.getSearchableExpression(field, field.isMultipleCellValue); + if (expression === null || !truncateLength || truncateLength <= 0) { + return expression; + } + return `LEFT((${expression})::text, ${truncateLength})`; } } @@ -75,6 +79,10 @@ export class IndexBuilderPostgres extends IndexBuilderAbstract { static PG_MAX_INDEX_LEN = 63; static DELIMITER_LEN = 3; + constructor(private readonly truncateLength?: number) { + super(); + } + private getIndexPrefix() { return `idx_trgm`; } @@ -108,7 +116,7 @@ export class IndexBuilderPostgres extends IndexBuilderAbstract { createSingleIndexSql(dbTableName: string, field: IFieldInstance): string | null { const [schema, table] = dbTableName.split('.'); const indexName = this.getIndexName(table, field); - const expression = FieldFormatter.getIndexExpression(field); + const expression = FieldFormatter.getIndexExpression(field, this.truncateLength); if (expression === null) { return null; } @@ -141,7 +149,7 @@ export class IndexBuilderPostgres extends IndexBuilderAbstract { const fieldSql = searchFields .filter(({ cellValueType }) => !unSupportCellValueType.includes(cellValueType)) .map((field) => { - const expression = FieldFormatter.getIndexExpression(field); + const expression = FieldFormatter.getIndexExpression(field, this.truncateLength); return expression ? this.createSingleIndexSql(dbTableName, field) : null; }) .filter((sql): sql is string => sql !== null); diff --git a/apps/nestjs-backend/src/db-provider/sqlite.provider.ts b/apps/nestjs-backend/src/db-provider/sqlite.provider.ts index 4532ec63a6..2c8ae43990 100644 --- a/apps/nestjs-backend/src/db-provider/sqlite.provider.ts +++ b/apps/nestjs-backend/src/db-provider/sqlite.provider.ts @@ -526,7 +526,7 @@ export class SqliteProvider implements IDbProvider { ).getSearchIndexQuery(); } - searchIndex() { + searchIndex(_truncateLength?: number) { return new IndexBuilderSqlite(); } diff --git a/apps/nestjs-backend/src/features/table/table-index.service.ts b/apps/nestjs-backend/src/features/table/table-index.service.ts index 879bfb8050..d1f2b7fa40 100644 --- a/apps/nestjs-backend/src/features/table/table-index.service.ts +++ b/apps/nestjs-backend/src/features/table/table-index.service.ts @@ -29,6 +29,10 @@ export class TableIndexService { @InjectModel('CUSTOM_KNEX') private readonly knex: Knex ) {} + private getSearchIndexBuilder() { + return this.dbProvider.searchIndex(this.thresholdConfig.searchIndexTruncateLength); + } + async getSearchIndexFields(tableId: string): Promise { const fieldsRaw = await this.prismaService.field.findMany({ where: { @@ -62,7 +66,7 @@ export class TableIndexService { }); if (type === TableIndex.search) { - const searchIndexSql = this.dbProvider.searchIndex().getExistTableIndexSql(dbTableName); + const searchIndexSql = this.getSearchIndexBuilder().getExistTableIndexSql(dbTableName); const [{ exists: searchIndexExist }] = await this.prismaService.$queryRawUnsafe< { exists: boolean; @@ -121,7 +125,7 @@ export class TableIndexService { async toggleSearchIndex(dbTableName: string, fields: IFieldInstance[], toEnable: boolean) { if (toEnable) { - const sqls = this.dbProvider.searchIndex().getCreateIndexSql(dbTableName, fields); + const sqls = this.getSearchIndexBuilder().getCreateIndexSql(dbTableName, fields); return await this.prismaService.$tx( async (prisma) => { for (let i = 0; i < sqls.length; i++) { @@ -146,7 +150,7 @@ export class TableIndexService { ); } - const sql = this.dbProvider.searchIndex().getDropIndexSql(dbTableName); + const sql = this.getSearchIndexBuilder().getDropIndexSql(dbTableName); try { return await this.prismaService.$executeRawUnsafe(sql); } catch (error) { @@ -171,7 +175,7 @@ export class TableIndexService { const { dbTableName } = tableRaw; const index = await this.getActivatedTableIndexes(tableId); if (index.includes(TableIndex.search)) { - const sql = this.dbProvider.searchIndex().getDeleteSingleIndexSql(dbTableName, field); + const sql = this.getSearchIndexBuilder().getDeleteSingleIndexSql(dbTableName, field); // Execute within current transaction if present to keep boundaries consistent await this.prismaService.txClient().$executeRawUnsafe(sql); } @@ -190,7 +194,7 @@ export class TableIndexService { }); const { dbTableName } = tableRaw; const index = await this.getActivatedTableIndexes(tableId); - const sql = this.dbProvider.searchIndex().createSingleIndexSql(dbTableName, fieldInstance); + const sql = this.getSearchIndexBuilder().createSingleIndexSql(dbTableName, fieldInstance); if (index.includes(TableIndex.search) && sql) { await this.prismaService.txClient().$executeRawUnsafe(sql); } @@ -222,7 +226,7 @@ export class TableIndexService { }); const { dbTableName } = tableRaw; - const sql = this.dbProvider.searchIndex().getIndexInfoSql(dbTableName); + const sql = this.getSearchIndexBuilder().getIndexInfoSql(dbTableName); return this.prismaService.$queryRawUnsafe(sql); } @@ -273,9 +277,9 @@ export class TableIndexService { }); const { dbTableName } = tableRaw; - const dropSql = this.dbProvider.searchIndex().getDropIndexSql(dbTableName); + const dropSql = this.getSearchIndexBuilder().getDropIndexSql(dbTableName); const fieldInstances = await this.getSearchIndexFields(tableId); - const createSqls = this.dbProvider.searchIndex().getCreateIndexSql(dbTableName, fieldInstances); + const createSqls = this.getSearchIndexBuilder().getCreateIndexSql(dbTableName, fieldInstances); await this.prismaService.$tx( async (prisma) => { await prisma.$executeRawUnsafe(dropSql);