From 72f6df33fc690b1c6be8fd05f960c7e7ece54ab6 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 26 Apr 2026 20:54:11 +0000 Subject: [PATCH] fix(profiler): N+1 / missing-index regression on /tables/.../columns?fields=profile (#3488) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause ---------- The 1.9.9 migration introduced two separate index regressions on `profiler_data_time_series`: 1. **PostgreSQL**: `schemaChanges.sql` explicitly dropped the unique constraint `profiler_data_time_series_unique_hash_extension_ts` (entityFQNHash, extension, operation, timestamp) to allow altering the generated `operation` column expression, but never recreated it. After the migration the table kept only the `(extension, timestamp)` index, which is useless for queries filtering by `entityFQNHash`. 2. **MySQL/both**: `postDataMigrationSQLScript.sql` created temporary indexes (idx_pdts_entityFQNHash, idx_pdts_composite, etc.) for its bulk UPDATE pass and then dropped **all** of them, including the only index covering `entityFQNHash`. The batch query issued by `getLatestExtensionsBatch()` when `fields=profile` is requested: SELECT entityFQNHash, MAX(timestamp) FROM profiler_data_time_series WHERE entityFQNHash IN (...N hashes...) AND extension = 'table.columnProfile' GROUP BY entityFQNHash required an `(entityFQNHash, extension, timestamp)` index. Without it the database performs a full table scan. On production deployments with millions of profiler rows this caused 100+ second response times (Grafana: 106 770 ms; 99 % in DB; 93 dbOps). Without `profile` in the fields param the same endpoint returned in ~150-220 ms. A secondary N+1 bug existed independently of the index: `customMetrics` in fields called `getCustomMetrics(table, column)` once per paginated column, issuing up to N identical queries against `entity_extension` and then filtering in Java. Fix --- * **migration 2.0.2** (MySQL + PostgreSQL): `CREATE INDEX IF NOT EXISTS idx_pdts_fqnhash_ext_ts ON profiler_data_time_series(entityFQNHash, extension, timestamp)`. The `IF NOT EXISTS` guard makes the migration safe to re-run and handles both upgrade and fresh-install paths. * **`getTableColumnsInternal`** — `customMetrics` block: fetch all column custom metrics for the table in one query, group by column name in Java, then distribute. Reduces N queries to 1. * **`getTableColumnsInternal`** — `profile` block: skip the duplicate `populateEntityFieldTags` call when `tags` was already fetched earlier in the same request, saving one prefix-scan on `tag_usage` per request. Related: PR #26855 (fixed N+1 tag queries on the list-tables path but left the profiler-index and customMetrics N+1 untouched on the columns sub-path). --- .../native/2.0.2/mysql/schemaChanges.sql | 19 +++ .../native/2.0.2/postgres/schemaChanges.sql | 23 +++ .../it/tests/TableResourceIT.java | 134 ++++++++++++++++++ .../service/jdbi3/TableRepository.java | 19 ++- 4 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 bootstrap/sql/migrations/native/2.0.2/mysql/schemaChanges.sql create mode 100644 bootstrap/sql/migrations/native/2.0.2/postgres/schemaChanges.sql diff --git a/bootstrap/sql/migrations/native/2.0.2/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/2.0.2/mysql/schemaChanges.sql new file mode 100644 index 000000000000..47e3b6867d6d --- /dev/null +++ b/bootstrap/sql/migrations/native/2.0.2/mysql/schemaChanges.sql @@ -0,0 +1,19 @@ +-- Restore a composite index on profiler_data_time_series(entityFQNHash, extension, timestamp). +-- +-- Root cause: the 1.9.9 postDataMigrationSQLScript.sql created several temporary indexes for +-- the data migration and then dropped ALL of them at the end, including the only index that +-- covered entityFQNHash. After that migration the table retains only the unique constraint +-- (entityFQNHash, extension, operation, timestamp) where `operation` sits between `extension` +-- and `timestamp`. Queries of the form +-- +-- SELECT entityFQNHash, MAX(timestamp) FROM profiler_data_time_series +-- WHERE entityFQNHash IN (...) AND extension = 'table.columnProfile' +-- GROUP BY entityFQNHash +-- +-- cannot use that index efficiently for MAX(timestamp) because `operation` (nullable) breaks +-- the prefix. On a large table (millions of profile rows) this causes a full table scan and +-- 100+ second response times on the columns API when `fields=profile` is requested. +-- +-- The new index covers the exact predicate pattern used by getLatestExtensionsBatch(). +CREATE INDEX IF NOT EXISTS idx_pdts_fqnhash_ext_ts + ON profiler_data_time_series (entityFQNHash, extension, timestamp); diff --git a/bootstrap/sql/migrations/native/2.0.2/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/2.0.2/postgres/schemaChanges.sql new file mode 100644 index 000000000000..67bcb2e63488 --- /dev/null +++ b/bootstrap/sql/migrations/native/2.0.2/postgres/schemaChanges.sql @@ -0,0 +1,23 @@ +-- Restore a composite index on profiler_data_time_series(entityFQNHash, extension, timestamp). +-- +-- Root cause: the 1.9.9 schemaChanges.sql explicitly dropped the unique constraint +-- profiler_data_time_series_unique_hash_extension_ts (entityFQNHash, extension, operation, timestamp) +-- to allow changing the `operation` generated-column expression, but never recreated it. +-- After the 1.9.9 migration the table retains only +-- profiler_data_time_series_combined_id_ts (extension, timestamp) +-- which is useless for queries that filter by entityFQNHash. +-- +-- The 1.9.9 postDataMigrationSQLScript.sql also created temporary indexes +-- (idx_pdts_entityFQNHash, idx_pdts_composite, etc.) during its bulk UPDATE pass and then +-- dropped them all, leaving no index on entityFQNHash. +-- +-- Queries of the form +-- +-- SELECT entityFQNHash, MAX(timestamp) FROM profiler_data_time_series +-- WHERE entityFQNHash IN (...) AND extension = 'table.columnProfile' +-- GROUP BY entityFQNHash +-- +-- issued by getLatestExtensionsBatch() perform a full table scan without this index, +-- causing 100+ second response times on the columns API when `fields=profile` is requested. +CREATE INDEX IF NOT EXISTS idx_pdts_fqnhash_ext_ts + ON profiler_data_time_series (entityFQNHash, extension, timestamp); diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/TableResourceIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/TableResourceIT.java index bf725c071b5b..4c1d6ba677bf 100644 --- a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/TableResourceIT.java +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/TableResourceIT.java @@ -5900,4 +5900,138 @@ void test_listTablesWithColumnTags_performance(TestNamespace ns) { assertFalse(table.getTags().isEmpty(), "Table tags should not be empty"); } } + + // =================================================================== + // REGRESSION TEST - columns API with fields=profile (collate#3488) + // =================================================================== + + @Test + @Execution(ExecutionMode.SAME_THREAD) + void test_getColumnsWithProfileField_correctnessAndNoBatchRegression(TestNamespace ns) { + OpenMetadataClient client = SdkClients.adminClient(); + + DatabaseService service = DatabaseServiceTestFactory.createPostgres(ns); + DatabaseSchema schema = DatabaseSchemaTestFactory.createSimple(ns, service); + + CreateClassification createClassification = + new CreateClassification() + .withName(ns.prefix("profile_test_cls")) + .withDescription("Classification for profile regression test"); + Classification cls = client.classifications().create(createClassification); + + CreateTag createTag = + new CreateTag() + .withName(ns.prefix("profile_test_tag")) + .withDescription("Tag for profile regression test") + .withClassification(cls.getName()); + Tag tag = client.tags().create(createTag); + + TagLabel tagLabel = + new TagLabel() + .withTagFQN(tag.getFullyQualifiedName()) + .withSource(TagLabel.TagSource.CLASSIFICATION); + + Column idCol = ColumnBuilder.of("id", "BIGINT").primaryKey().notNull().build(); + idCol.setTags(List.of(tagLabel)); + Column emailCol = ColumnBuilder.of("email", "VARCHAR").dataLength(255).build(); + emailCol.setTags(List.of(tagLabel)); + Column nameCol = ColumnBuilder.of("name", "VARCHAR").dataLength(255).build(); + + CreateTable createRequest = createRequest(ns.prefix("profile_regression_table"), ns); + createRequest.setDatabaseSchema(schema.getFullyQualifiedName()); + createRequest.setColumns(List.of(idCol, emailCol, nameCol)); + Table table = client.tables().create(createRequest); + + Long timestamp = System.currentTimeMillis(); + ColumnProfile idProfile = + new ColumnProfile() + .withName("id") + .withMin(1.0) + .withMax(999.0) + .withUniqueCount(100.0) + .withTimestamp(timestamp); + ColumnProfile emailProfile = + new ColumnProfile() + .withName("email") + .withNullCount(5.0) + .withNullProportion(0.05) + .withTimestamp(timestamp); + + TableProfile tableProfile = + new TableProfile().withRowCount(100.0).withColumnCount(3.0).withTimestamp(timestamp); + + CreateTableProfile createProfile = + new CreateTableProfile() + .withTableProfile(tableProfile) + .withColumnProfile(List.of(idProfile, emailProfile)); + client.tables().updateTableProfile(table.getId(), createProfile); + + // Verify all four field combinations don't regress: + // (a) fields=profile — profile data returned, no full-table-scan on profiler_data_time_series + TableColumnList withProfile = + assertTimeout( + Duration.ofSeconds(30), + () -> client.tables().getColumns(table.getId(), "profile"), + "columns?fields=profile should complete within 30s"); + + assertEquals(3, withProfile.getData().size()); + Column returnedId = + withProfile.getData().stream() + .filter(c -> "id".equals(c.getName())) + .findFirst() + .orElse(null); + Column returnedName = + withProfile.getData().stream() + .filter(c -> "name".equals(c.getName())) + .findFirst() + .orElse(null); + assertNotNull(returnedId, "id column should be present"); + assertNotNull(returnedId.getProfile(), "id column should have profile data"); + assertEquals(1.0, returnedId.getProfile().getMin(), "id column min should match"); + assertEquals(999.0, returnedId.getProfile().getMax(), "id column max should match"); + assertNotNull(returnedName, "name column should be present"); + assertNull(returnedName.getProfile(), "name column has no profile, should be null"); + + // (b) fields=tags,customMetrics,extension,profile — the exact production query + TableColumnList withAllFields = + assertTimeout( + Duration.ofSeconds(30), + () -> client.tables().getColumns(table.getId(), "tags,customMetrics,extension,profile"), + "columns?fields=tags,customMetrics,extension,profile should complete within 30s"); + + assertEquals(3, withAllFields.getData().size()); + + Column idResult = + withAllFields.getData().stream() + .filter(c -> "id".equals(c.getName())) + .findFirst() + .orElse(null); + assertNotNull(idResult, "id column must be present"); + assertNotNull(idResult.getProfile(), "id column must have profile"); + assertNotNull(idResult.getTags(), "id column must have tags"); + assertFalse(idResult.getTags().isEmpty(), "id column tags must not be empty"); + assertTrue( + idResult.getTags().stream() + .anyMatch(t -> tag.getFullyQualifiedName().equals(t.getTagFQN())), + "id column should carry the test tag"); + + // (c) fields=tags,profile — duplicate populateEntityFieldTags must not run twice + TableColumnList withTagsAndProfile = + assertTimeout( + Duration.ofSeconds(30), + () -> client.tables().getColumns(table.getId(), "tags,profile"), + "columns?fields=tags,profile should complete within 30s"); + + assertEquals(3, withTagsAndProfile.getData().size()); + Column idTagsProfile = + withTagsAndProfile.getData().stream() + .filter(c -> "id".equals(c.getName())) + .findFirst() + .orElse(null); + assertNotNull(idTagsProfile); + assertNotNull(idTagsProfile.getTags()); + assertFalse( + idTagsProfile.getTags().isEmpty(), "Tags must be present even when profile requested"); + assertNotNull(idTagsProfile.getProfile(), "Profile must be present when profile requested"); + } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java index cbdc052523a1..05aeb37af9cc 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java @@ -2891,8 +2891,21 @@ private ResultList getTableColumnsInternal( } if (fieldsParam != null && fieldsParam.contains("customMetrics")) { + List allColumnMetricRecords = + daoCollection + .entityExtensionDAO() + .getExtensions(table.getId(), CUSTOM_METRICS_EXTENSION + TABLE_COLUMN_EXTENSION); + Map> metricsByColumn = new HashMap<>(); + for (ExtensionRecord record : allColumnMetricRecords) { + CustomMetric metric = JsonUtils.readValue(record.extensionJson(), CustomMetric.class); + if (metric != null && metric.getColumnName() != null) { + metricsByColumn + .computeIfAbsent(metric.getColumnName(), k -> new ArrayList<>()) + .add(metric); + } + } for (Column column : paginatedColumns) { - column.setCustomMetrics(getCustomMetrics(table, column.getName())); + column.setCustomMetrics(metricsByColumn.getOrDefault(column.getName(), new ArrayList<>())); } } @@ -2904,7 +2917,9 @@ private ResultList getTableColumnsInternal( if (fieldsParam != null && fieldsParam.contains("profile")) { setColumnProfile(paginatedColumns); - populateEntityFieldTags(entityType, paginatedColumns, table.getFullyQualifiedName(), true); + if (!fieldsParam.contains("tags")) { + populateEntityFieldTags(entityType, paginatedColumns, table.getFullyQualifiedName(), true); + } paginatedColumns = piiOwners != null ? PIIMasker.getTableProfile(piiOwners, paginatedColumns, authorizer, securityContext)